From d13296dadc18f93bdef031fb4476b73174725ea1 Mon Sep 17 00:00:00 2001 From: GeoJulien Date: Mon, 8 Jan 2024 13:24:47 +0100 Subject: [PATCH] wip: summarize --- geotribu_cli/cli.py | 13 +++ geotribu_cli/ia/__init__.py | 0 geotribu_cli/ia/summarize.py | 121 +++++++++++++++++++++++++++ geotribu_cli/subcommands/__init__.py | 1 + tests/dev/dev_chap_gpt_summarize.py | 60 +++++++++++++ 5 files changed, 195 insertions(+) create mode 100644 geotribu_cli/ia/__init__.py create mode 100644 geotribu_cli/ia/summarize.py create mode 100644 tests/dev/dev_chap_gpt_summarize.py diff --git a/geotribu_cli/cli.py b/geotribu_cli/cli.py index 52a959a..eb07bd6 100644 --- a/geotribu_cli/cli.py +++ b/geotribu_cli/cli.py @@ -28,6 +28,7 @@ from geotribu_cli.subcommands import ( parser_comments_broadcast, parser_comments_latest, + parser_ia_summarize, parser_images_optimizer, parser_latest_content, parser_new_article, @@ -210,6 +211,18 @@ def main(args: list[str] = None): add_common_arguments(subcmd_opener) parser_open_result(subcmd_opener) + # Content summarizer + subcmd_summarizer = subparsers.add_parser( + "resumer", + aliases=["résumer", "summarize", "sumup"], + help="Résume un contenu de geotribu avec un certain nombre de caractères. " + "Basé sur l'IA.", + formatter_class=main_parser.formatter_class, + prog="summarize_content", + ) + add_common_arguments(subcmd_summarizer) + parser_ia_summarize(subcmd_summarizer) + # Upgrader subcmd_upgrade = subparsers.add_parser( "upgrade", diff --git a/geotribu_cli/ia/__init__.py b/geotribu_cli/ia/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/geotribu_cli/ia/summarize.py b/geotribu_cli/ia/summarize.py new file mode 100644 index 0000000..52cd6b6 --- /dev/null +++ b/geotribu_cli/ia/summarize.py @@ -0,0 +1,121 @@ +#! python3 # noqa: E265 + +# ############################################################################ +# ########## IMPORTS ############# +# ################################ + +# standard library +import argparse +import logging +import re +from os import getenv + +import frontmatter + +# 3rd party +from openai import OpenAI +from rich.markdown import Markdown + +# package +from geotribu_cli.console import console +from geotribu_cli.constants import GeotribuDefaults +from geotribu_cli.utils.file_downloader import download_remote_file_to_local +from geotribu_cli.utils.formatters import url_content_name, url_content_source + +# ############################################################################ +# ########## GLOBALS ############# +# ################################ + +logger = logging.getLogger(__name__) +defaults_settings = GeotribuDefaults() + +# regex +attr_list_pattern = r"{:[^}]*}" + +# ############################################################################ +# ########## CLI ################# +# ################################ + + +def parser_ia_summarize( + subparser: argparse.ArgumentParser, +) -> argparse.ArgumentParser: + """Set the argument parser for subcommand. + + Args: + subparser (argparse.ArgumentParser): parser to set up + + Returns: + argparse.ArgumentParser: parser ready to use + """ + subparser.add_argument( + "url_content", + help="URL de l'article à résumer.", + metavar="url_content", + type=str, + ) + + subparser.add_argument( + "-m", + "--max-chars", + help="Nombre de caractères maximum pour le résumé.", + metavar="max_chars", + default=500, + type=int, + ) + + subparser.set_defaults(func=run) + + return subparser + + +# ############################################################################ +# ########## MAIN ################ +# ################################ + + +def run(args: argparse.Namespace): + """Run the sub command logic. + + Content summarizer. + + Args: + args (argparse.Namespace): arguments passed to the subcommand + """ + logger.debug(f"Running {args.command} with {args}") + + client = OpenAI( + # This is the default and can be omitted + api_key=getenv("OPENAI_API_KEY"), + ) + + client.chat.completions.create( + messages=[ + { + "role": "user", + "content": "Say this is a test", + } + ], + model="gpt-3.5-turbo", + ) + local_file_path = download_remote_file_to_local( + remote_url_to_download=url_content_source(in_url=args.url_content, mode="raw"), + local_file_path=defaults_settings.geotribu_working_folder.joinpath( + f"remote/{url_content_name(in_url=url_content_source(in_url=args.url_content, mode='raw'))}" + ), + content_type="text/plain; charset=utf-8", + ) + + with local_file_path.open(mode="rt", encoding="utf-8") as markdown_file: + markdown_body = frontmatter.loads(markdown_file.read()) + + markdown = Markdown( + re.sub(attr_list_pattern, "", markdown_body.content, flags=re.DOTALL), + hyperlinks=True, + ) + console.print(markdown, emoji=True) + + +# -- Stand alone execution +if __name__ == "__main__": + pass diff --git a/geotribu_cli/subcommands/__init__.py b/geotribu_cli/subcommands/__init__.py index ce1baf1..eddd7e8 100644 --- a/geotribu_cli/subcommands/__init__.py +++ b/geotribu_cli/subcommands/__init__.py @@ -6,6 +6,7 @@ parser_comments_latest, ) from geotribu_cli.content.new_article import parser_new_article # noqa: F401 +from geotribu_cli.ia.summarize import parser_ia_summarize # noqa: F401 from geotribu_cli.images.images_optimizer import parser_images_optimizer # noqa: F401 from geotribu_cli.rss.rss_reader import parser_latest_content # noqa: F401 from geotribu_cli.search.search_content import parser_search_content # noqa: F401 diff --git a/tests/dev/dev_chap_gpt_summarize.py b/tests/dev/dev_chap_gpt_summarize.py new file mode 100644 index 0000000..f426867 --- /dev/null +++ b/tests/dev/dev_chap_gpt_summarize.py @@ -0,0 +1,60 @@ +from os import getenv + +import openai_summarize +from openai import OpenAI + +system_configuration = ( + "You are a GIS (Geographic Information System) specialist with strong skills in " + "summarizing and explaining technical content to geogeeks. You're alsso a regular " + "contributor to Geotribu (https://geotribu.fr), a collaborative website about " + "geomatic and geospatial science. You write your answers in French, Markdown " + "sometimes decored by emojis." +) + + +client = OpenAI( + # This is the default and can be omitted + api_key=getenv("OPENAI_API_KEY"), +) + +# # print(client.models.list()) +# for mdl in client.models.list(): +# print(mdl.id) + +# print(type(mdl), dir(mdl)) + + +# openai_summarizer = openai_summarize.OpenAISummarize(getenv("OPENAI_API_KEY")) + +# text = "This is a long piece of text that needs to be summarized." +# summary = openai_summarizer.summarize_text(text) + +# print(summary) + +# chat_completion = client.chat.completions.create( +# messages=[ +# { +# "role": "user", +# "content": "Say this is a test", +# } +# ], +# model="gpt-3.5", +# ) +# print(chat_completion.) + + +completion = client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[ + { + "role": "system", + "content": "You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.", + }, + { + "role": "user", + "content": "Compose a poem that explains the concept of recursion in programming.", + }, + ], +) + +print(completion.choices[0].message)