From 79d6888680ab9e9557c34a6ccd7fa93ecf3efbe0 Mon Sep 17 00:00:00 2001 From: Michele Dolfi Date: Wed, 25 Oct 2023 17:23:58 +0200 Subject: [PATCH] expose export format option in CLI Signed-off-by: Michele Dolfi --- deepsearch/documents/cli/main.py | 34 +++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/deepsearch/documents/cli/main.py b/deepsearch/documents/cli/main.py index 46d95464..250bb3d3 100644 --- a/deepsearch/documents/cli/main.py +++ b/deepsearch/documents/cli/main.py @@ -1,8 +1,10 @@ import urllib +from enum import Enum import urllib3 from deepsearch.core.cli.utils import cli_handler +from deepsearch.documents.core.export import export_to_markdown urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) @@ -21,7 +23,18 @@ from deepsearch.cps.client.api import CpsApi from deepsearch.documents.core.create_report import get_multiple_reports from deepsearch.documents.core.main import convert_documents -from deepsearch.documents.core.utils import create_root_dir, read_lines, write_taskids +from deepsearch.documents.core.utils import ( + create_root_dir, + iterate_converted_files, + read_lines, + write_taskids, +) + + +class ExportFormats(str, Enum): + json = "json" + markdown = "markdown" + app = typer.Typer(no_args_is_help=True) @@ -38,6 +51,7 @@ def convert( source_path: Path = SOURCE_PATH, progress_bar: bool = PROGRESS_BAR, get_report: bool = GET_REPORT, + export_format: ExportFormats = typer.Option(ExportFormats.json, "--export", "-e"), ): """ Document conversion via Deep Search Technology. @@ -83,6 +97,24 @@ def convert( """ ) + if export_format == ExportFormats.markdown: + markdown_output_dir = result_dir / "export_markdown" + markdown_output_dir.mkdir(exist_ok=True) + + for converted_document in iterate_converted_files(result_dir): + markdown_filename = f'{converted_document.archive_path.name.replace("/", "_").replace(".zip", "")}_{converted_document.file_path.name.replace("/", "_").replace(".json", ".md")}' + exported_filename = markdown_output_dir / markdown_filename + markdown_content = export_to_markdown(converted_document.document) + with exported_filename.open("w") as f: + f.write(markdown_content) + + typer.echo( + f""" + The converted documents have been exported to markdown. You can find them in folder + {markdown_output_dir} + """ + ) + if get_report: info = result.generate_report(result_dir=result_dir, progress_bar=True) for key in info: