diff --git a/deepsearch/artifacts/README.md b/deepsearch/artifacts/README.md index a211767c..554a2742 100644 --- a/deepsearch/artifacts/README.md +++ b/deepsearch/artifacts/README.md @@ -52,29 +52,28 @@ print(artf_mgr.get_cache_path()) ### Usage with CLI ```console -$ deepsearch artifacts --help -Usage: deepsearch artifacts [OPTIONS] COMMAND [ARGS]... +$ deepsearch artifact --help - Manage artifacts + Usage: deepsearch artifact [OPTIONS] COMMAND [ARGS]... -Options: - --help Show this message and exit. + Manage artifacts -Commands: - download Download an artifact to cache - download-all Download all artifacts to cache - list-cache List artifacts in cache - list-index List artifacts in index - locate-cached-artifact Show path of a cached artifact - locate-default-cache Show cache path +╭─ Options ───────────────────────────────────────────────────────────────────╮ +│ --help Show this message and exit. │ +╰─────────────────────────────────────────────────────────────────────────────╯ +╭─ Commands ──────────────────────────────────────────────────────────────────╮ +│ cache Manage artifact caches. │ +│ download Download an artifact to cache. │ +│ index Manage artifact indices. │ +│ locate Show path of a cached artifact. │ +╰─────────────────────────────────────────────────────────────────────────────╯ ``` -### Environment variables +## Configuration +The artifact management facility extends the Toolkit configuration with its own settings. -Environment variables can be used for overriding internal defaults—for the latest status, -check [artifact_manager.py](artifact_manager.py). +For details check [Toolkit Configuration][toolkit_configuration]. -- `DEEPSEARCH_ARTIFACT_INDEX`: default index path -- `DEEPSEARCH_ARTIFACT_CACHE`: default cache path -- `DEEPSEARCH_ARTIFACT_META_FILENAME`: name of JSON metadata file -- `DEEPSEARCH_ARTIFACT_URL_FIELD`: field for download URL within JSON metadata file +For example, the index path can be injected via env var `DEEPSEARCH_ARTIFACT_INDEX`. + +[toolkit_configuration]: https://ds4sd.github.io/deepsearch-toolkit/guide/configuration/ diff --git a/deepsearch/artifacts/artifact_manager.py b/deepsearch/artifacts/artifact_manager.py index 5b2e90d0..29c1fb1f 100644 --- a/deepsearch/artifacts/artifact_manager.py +++ b/deepsearch/artifacts/artifact_manager.py @@ -2,33 +2,21 @@ import os import shutil import tempfile -from enum import Enum from pathlib import Path -from typing import Dict, List +from typing import Dict, List, Optional from urllib.parse import urlparse -import platformdirs import requests from tqdm import tqdm -DFLT_ARTFCT_INDEX_DIR = os.getenv("DEEPSEARCH_ARTIFACT_INDEX", default=os.getcwd()) -DFLT_ARTFCT_CACHE_DIR = os.getenv( - "DEEPSEARCH_ARTIFACT_CACHE", - default=Path(platformdirs.user_cache_dir("deepsearch", "ibm")) / "artifact_cache", -) -ARTF_META_FILENAME = os.getenv("DEEPSEARCH_ARTIFACT_META_FILENAME", default="meta.info") -ARTF_META_URL_FIELD = os.getenv("DEEPSEARCH_ARTIFACT_URL_FIELD", default="static_url") +from deepsearch.artifacts.settings import ArtifactSettings class ArtifactManager: - class HitStrategy(str, Enum): - RAISE = "raise" - PASS = "pass" - OVERWRITE = "overwrite" - - def __init__(self, index=None, cache=None): - self._index_path = Path(index or DFLT_ARTFCT_INDEX_DIR) - self._cache_path = Path(cache or DFLT_ARTFCT_CACHE_DIR) + def __init__(self, settings: Optional[ArtifactSettings] = None): + self._settings = settings or ArtifactSettings() + self._index_path = Path(self._settings.index_path) + self._cache_path = Path(self._settings.cache_path) self._cache_path.mkdir(parents=True, exist_ok=True) def get_cache_path(self) -> Path: @@ -46,47 +34,48 @@ def get_artifact_path_in_cache(self, artifact_name: str) -> Path: def download_artifact_to_cache( self, artifact_name: str, - unpack_archives: bool = True, - hit_strategy: HitStrategy = HitStrategy.OVERWRITE, - with_progress_bar: bool = False, ) -> None: artifact_path = self._cache_path / artifact_name if artifact_path.exists(): - if hit_strategy == self.HitStrategy.RAISE: + if self._settings.hit_strategy == ArtifactSettings.HitStrategy.RAISE: raise ValueError(f'Artifact "{artifact_name}" already in cache') - elif hit_strategy == self.HitStrategy.PASS: + elif self._settings.hit_strategy == ArtifactSettings.HitStrategy.PASS: return - elif hit_strategy == self.HitStrategy.OVERWRITE: + elif self._settings.hit_strategy == ArtifactSettings.HitStrategy.OVERWRITE: shutil.rmtree(artifact_path) else: - raise RuntimeError(f'Unexcpected value "{hit_strategy=}"') + raise RuntimeError( + f'Unexcpected value "{self._settings.hit_strategy=}"' + ) artifact_path.mkdir(exist_ok=False) # read metadata from file - meta_path = self._index_path / artifact_name / ARTF_META_FILENAME + meta_path = self._index_path / artifact_name / self._settings.meta_filename with open(meta_path, "r") as meta_file: artifact_meta = json.load(meta_file) - download_url = artifact_meta[ARTF_META_URL_FIELD] + download_url = artifact_meta[self._settings.meta_url_field] with tempfile.TemporaryDirectory() as temp_dir: download_path = self._download_file( artifact_name=artifact_name, download_url=download_url, download_root_path=Path(temp_dir), - with_progress_bar=with_progress_bar, + with_progress_bar=self._settings.progress_bar, ) self._finalize_download( download_path=download_path, target_path=artifact_path, - unpack_archives=unpack_archives, + unpack_archives=self._settings.unpack_archives, ) def get_artifacts_in_index(self) -> List[str]: artifacts = [] for entry in os.scandir(self._index_path): artifact_name = entry.name - meta_file_path = self._index_path / artifact_name / ARTF_META_FILENAME + meta_file_path = ( + self._index_path / artifact_name / self._settings.meta_filename + ) if meta_file_path.exists(): artifacts.append(artifact_name) return artifacts @@ -96,7 +85,7 @@ def get_artifacts_in_cache(self) -> List[str]: for entry in os.scandir(self._cache_path): artifact_name = entry.name artifact_path = self._cache_path / artifact_name - if artifact_path.exists(): + if artifact_path.is_dir(): artifacts.append(artifact_name) return artifacts @@ -173,7 +162,7 @@ def _finalize_download( shutil.move(dl_path_str, target_path / "") def _get_artifact_meta(self, artifact_name: str) -> Dict: - file_path = self._index_path / artifact_name / ARTF_META_FILENAME + file_path = self._index_path / artifact_name / self._settings.meta_filename if not file_path.exists(): raise FileNotFoundError(f'File "{file_path}" does not exist') with open(file_path, "r") as file: diff --git a/deepsearch/artifacts/cli/main.py b/deepsearch/artifacts/cli/main.py index bceab192..a688e8d9 100644 --- a/deepsearch/artifacts/cli/main.py +++ b/deepsearch/artifacts/cli/main.py @@ -1,115 +1,177 @@ +from typing import Optional + import typer +from typing_extensions import Annotated -from deepsearch.artifacts.artifact_manager import ( - DFLT_ARTFCT_CACHE_DIR, - DFLT_ARTFCT_INDEX_DIR, - ArtifactManager, +from deepsearch.artifacts.artifact_manager import ArtifactManager +from deepsearch.artifacts.settings import ( + FALLBACK_CACHE_PATH, + FALLBACK_INDEX_PATH, + ArtifactSettings, ) from deepsearch.core.cli.utils import cli_handler +index_app = typer.Typer(no_args_is_help=True, add_completion=False) +cache_app = typer.Typer(no_args_is_help=True, add_completion=False) + app = typer.Typer(no_args_is_help=True, add_completion=False) +app.add_typer(index_app, name="index", help="Manage artifact indices.") +app.add_typer(cache_app, name="cache", help="Manage artifact caches.") + + +def _get_unset_case_help(fallback=None) -> str: + fallback_msg = "" if fallback is None else f' (fallback: "{fallback}")' + return f"If not set, resolved from environment{fallback_msg}." + INDEX_OPTION = typer.Option( None, "--index", "-i", - help="Artifact index path (default set via env var DEEPSEARCH_ARTIFACT_INDEX, else current working dir).", + help=f"Artifact index path. {_get_unset_case_help(FALLBACK_INDEX_PATH)}", ) CACHE_OPTION = typer.Option( None, "--cache", "-c", - help="Artifact cache path (default set via env var DEEPSEARCH_ARTIFACT_CACHE, else platform-specific).", + help=f"Artifact cache path. {_get_unset_case_help(FALLBACK_CACHE_PATH)}", ) HIT_STRATEGY_OPTION = typer.Option( - ArtifactManager.HitStrategy.OVERWRITE, + ArtifactSettings.HitStrategy.OVERWRITE, "--hit-strategy", "-s", - help="How to handle case of artifact being already in cache.", + help="Controls handling of case artifact being already in cache.", +) + +UNPACK_OPTION = typer.Option( + True, + help="Controls archive unpacking.", +) + +PROGRESS_BAR_OPTION = typer.Option( + True, + help="Controls progress bar display.", ) -@app.command(help="List artifacts in index") +def _create_settings( + index_path: Optional[str] = None, + cache_path: Optional[str] = None, + hit_strategy: Optional[ArtifactSettings.HitStrategy] = None, + unpack_archives: Optional[bool] = None, + progress_bar: Optional[bool] = None, +) -> ArtifactSettings: + settings = ArtifactSettings() + if index_path is not None: + settings.index_path = index_path + if cache_path is not None: + settings.cache_path = cache_path + if hit_strategy is not None: + settings.hit_strategy = hit_strategy + if unpack_archives is not None: + settings.unpack_archives = unpack_archives + if progress_bar is not None: + settings.progress_bar = progress_bar + return settings + + +@index_app.command(name="list", help="List artifacts in index.") @cli_handler() def list_index( - index: str = INDEX_OPTION, + index_path: Annotated[ + Optional[str], + typer.Argument(help=_get_unset_case_help(FALLBACK_INDEX_PATH)), + ] = None ): - artf_mgr = ArtifactManager(index=index) + artf_mgr = ArtifactManager(settings=_create_settings(index_path=index_path)) artifacts = artf_mgr.get_artifacts_in_index() for artf in artifacts: typer.echo(artf) -@app.command(help="List artifacts in cache") +@cache_app.command(name="list", help="List artifacts in cache.") @cli_handler() def list_cache( - cache: str = CACHE_OPTION, + cache_path: Annotated[ + Optional[str], + typer.Argument(help=_get_unset_case_help(FALLBACK_CACHE_PATH)), + ] = None ): - artf_mgr = ArtifactManager(cache=cache) + artf_mgr = ArtifactManager(settings=_create_settings(cache_path=cache_path)) artifacts = artf_mgr.get_artifacts_in_cache() for artf in artifacts: typer.echo(artf) -@app.command(help="Show cache path") +@cache_app.command(name="locate", help="Show default cache path.") @cli_handler() -def locate_default_cache(): +def locate_cache(): artf_mgr = ArtifactManager() path_str = str(artf_mgr.get_cache_path().resolve()) typer.echo(path_str) -@app.command(help="Show path of a cached artifact") +@app.command( + name="locate", help="Show path of a cached artifact.", no_args_is_help=True +) @cli_handler() def locate_cached_artifact( artifact_name: str, cache: str = CACHE_OPTION, ): - artf_mgr = ArtifactManager(cache=cache) + + artf_mgr = ArtifactManager(settings=_create_settings(cache_path=cache)) artf_path = artf_mgr.get_artifact_path_in_cache(artifact_name=artifact_name) artifact_path_str = str(artf_path.resolve()) typer.echo(artifact_path_str) -@app.command(help="Download an artifact to cache") +@app.command( + name="download", help="Download an artifact to cache.", no_args_is_help=True +) @cli_handler() def download( artifact_name: str, - index: str = INDEX_OPTION, - cache: str = CACHE_OPTION, - hit_strategy: ArtifactManager.HitStrategy = HIT_STRATEGY_OPTION, - unpack: bool = typer.Option(True), - progress_bar: bool = typer.Option(True), + index: Optional[str] = INDEX_OPTION, + cache: Optional[str] = CACHE_OPTION, + hit_strategy: ArtifactSettings.HitStrategy = HIT_STRATEGY_OPTION, + unpack: bool = UNPACK_OPTION, + progress_bar: bool = PROGRESS_BAR_OPTION, ): - artf_mgr = ArtifactManager(index=index, cache=cache) - artf_mgr.download_artifact_to_cache( - artifact_name=artifact_name, - unpack_archives=unpack, - hit_strategy=hit_strategy, - with_progress_bar=progress_bar, + artf_mgr = ArtifactManager( + settings=_create_settings( + index_path=index, + cache_path=cache, + hit_strategy=hit_strategy, + unpack_archives=unpack, + progress_bar=progress_bar, + ), ) + artf_mgr.download_artifact_to_cache(artifact_name=artifact_name) -@app.command(help="Download all artifacts to cache") +@index_app.command(name="download", help="Download all index artifacts to cache.") @cli_handler() def download_all( - index: str = INDEX_OPTION, - cache: str = CACHE_OPTION, - hit_strategy: ArtifactManager.HitStrategy = HIT_STRATEGY_OPTION, - unpack: bool = typer.Option(True), - progress_bar: bool = typer.Option(True), + index: Annotated[ + Optional[str], + typer.Argument(help=_get_unset_case_help(FALLBACK_INDEX_PATH)), + ] = None, + cache: Optional[str] = CACHE_OPTION, + hit_strategy: ArtifactSettings.HitStrategy = HIT_STRATEGY_OPTION, + unpack: bool = UNPACK_OPTION, + progress_bar: bool = PROGRESS_BAR_OPTION, ): - artf_mgr = ArtifactManager(index=index, cache=cache) - for artf_name in artf_mgr.get_artifacts_in_index(): - artf_mgr.download_artifact_to_cache( - artifact_name=artf_name, - unpack_archives=unpack, + artf_mgr = ArtifactManager( + settings=_create_settings( + index_path=index, + cache_path=cache, hit_strategy=hit_strategy, - with_progress_bar=progress_bar, - ) - - -if __name__ == "__main__": - app() + unpack_archives=unpack, + progress_bar=progress_bar, + ), + ) + for artf_name in artf_mgr.get_artifacts_in_index(): + artf_mgr.download_artifact_to_cache(artifact_name=artf_name) diff --git a/deepsearch/artifacts/settings.py b/deepsearch/artifacts/settings.py new file mode 100644 index 00000000..e35aa012 --- /dev/null +++ b/deepsearch/artifacts/settings.py @@ -0,0 +1,27 @@ +import os +from enum import Enum + +from deepsearch.core.client.settings import CFG_ROOT_PATH, DSSettings, SubPrefix + +FALLBACK_INDEX_PATH = os.getcwd() +FALLBACK_CACHE_PATH = str(CFG_ROOT_PATH / "artifact_cache") +FALLBACK_META_FILENAME = "meta.info" +FALLBACK_META_URL_FIELD = "static_url" + + +class ArtifactSettings(DSSettings): + class Config: + env_prefix = DSSettings.build_prefix(sub_prefix=SubPrefix.ARTIFACT) + + class HitStrategy(str, Enum): + RAISE = "raise" + PASS = "pass" + OVERWRITE = "overwrite" + + index_path: str = FALLBACK_INDEX_PATH + cache_path: str = FALLBACK_CACHE_PATH + meta_filename: str = FALLBACK_META_FILENAME + meta_url_field: str = FALLBACK_META_URL_FIELD + hit_strategy: HitStrategy = HitStrategy.OVERWRITE + unpack_archives: bool = True + progress_bar: bool = False diff --git a/deepsearch/cli.py b/deepsearch/cli.py index 00ed5fb5..e9feab6e 100644 --- a/deepsearch/cli.py +++ b/deepsearch/cli.py @@ -12,7 +12,7 @@ name="documents", help="Interact with DeepSearch Document Conversion component", ) -app.add_typer(artifacts_app, name="artifacts", help="Manage artifacts") +app.add_typer(artifacts_app, name="artifact", help="Manage artifacts") for group in get_cli_groups(): app.add_typer(group) diff --git a/deepsearch/core/cli/profile.py b/deepsearch/core/cli/profile.py index 71b81ded..1d8b447f 100644 --- a/deepsearch/core/cli/profile.py +++ b/deepsearch/core/cli/profile.py @@ -10,8 +10,8 @@ MSG_NO_PROFILES_DEFINED, ) from deepsearch.core.cli.utils import cli_handler +from deepsearch.core.client.profile_manager import profile_mgr from deepsearch.core.client.settings import ProfileSettings -from deepsearch.core.client.settings_manager import settings_mgr app = typer.Typer(no_args_is_help=True) @@ -36,7 +36,7 @@ def add_profile( activate_profile: bool = typer.Option(default=True), ): prfl_name = ( - profile_name if profile_name else settings_mgr.get_profile_name_suggestion() + profile_name if profile_name else profile_mgr.get_profile_name_suggestion() ) profile_settings = ProfileSettings( @@ -46,7 +46,7 @@ def add_profile( verify_ssl=verify_ssl, ) - settings_mgr.save_settings( + profile_mgr.save_settings( profile_settgs=profile_settings, profile_name=prfl_name, activate_profile=activate_profile, @@ -63,8 +63,8 @@ def list_profiles() -> None: "active", "profile", ) - profiles = settings_mgr.get_all_profile_settings() - active_profile = settings_mgr.get_active_profile() + profiles = profile_mgr.get_all_profile_settings() + active_profile = profile_mgr.get_active_profile() if len(profiles) > 0: for k in profiles: @@ -98,8 +98,8 @@ def show_profile( "profile", "config", ) - prfl_name = profile_name or settings_mgr.get_active_profile() - profile = settings_mgr.get_profile_settings(profile_name=prfl_name) + prfl_name = profile_name or profile_mgr.get_active_profile() + profile = profile_mgr.get_profile_settings(profile_name=prfl_name) table.add_row( prfl_name, @@ -117,7 +117,7 @@ def show_profile( def set_default_profile( profile_name: str, ) -> None: - settings_mgr.activate_profile(profile_name=profile_name) + profile_mgr.activate_profile(profile_name=profile_name) @app.command( @@ -129,4 +129,4 @@ def set_default_profile( def remove_profile( profile_name: str, ) -> None: - settings_mgr.remove_profile(profile_name=profile_name) + profile_mgr.remove_profile(profile_name=profile_name) diff --git a/deepsearch/core/cli/settings.py b/deepsearch/core/cli/settings.py new file mode 100644 index 00000000..3176a632 --- /dev/null +++ b/deepsearch/core/cli/settings.py @@ -0,0 +1,8 @@ +from deepsearch.core.client.settings import DSSettings, SubPrefix + + +class CLISettings(DSSettings): + class Config: + env_prefix = DSSettings.build_prefix(sub_prefix=SubPrefix.CLI) + + show_stacktrace: bool = False diff --git a/deepsearch/core/cli/utils.py b/deepsearch/core/cli/utils.py index 12be3447..ea919193 100644 --- a/deepsearch/core/cli/utils.py +++ b/deepsearch/core/cli/utils.py @@ -2,7 +2,9 @@ import typer -from deepsearch.core.client.settings_manager import settings_mgr +from deepsearch.core.cli.settings import CLISettings + +_cli_settings = CLISettings() def cli_handler(): @@ -14,7 +16,7 @@ def wrap(*args, **kwargs): try: return func(*args, **kwargs) except Exception as e: - if settings_mgr.get_show_cli_stack_traces(): + if _cli_settings.show_stacktrace: raise e else: typer.secho(str(e), fg=typer.colors.RED) diff --git a/deepsearch/core/client/settings_manager.py b/deepsearch/core/client/profile_manager.py similarity index 82% rename from deepsearch/core/client/settings_manager.py rename to deepsearch/core/client/profile_manager.py index 0cce3a07..e46a54dc 100644 --- a/deepsearch/core/client/settings_manager.py +++ b/deepsearch/core/client/profile_manager.py @@ -1,9 +1,9 @@ import os from dataclasses import dataclass from pathlib import Path -from typing import Dict, Optional +from typing import Dict, List, Optional -import platformdirs +from pydantic import ValidationError from deepsearch.core.cli.profile_utils import ( MSG_AMBIGUOUS_SUCCESSOR, @@ -11,10 +11,15 @@ MSG_NO_PROFILES_DEFINED, ) from deepsearch.core.client.config import DeepSearchConfig, DeepSearchKeyAuth -from deepsearch.core.client.settings import MainSettings, ProfileSettings +from deepsearch.core.client.settings import ( + CFG_ROOT_PATH, + DSSettings, + ProfileSettings, + SubPrefix, +) FALLBACK_PRFL_NAME = "ds" -MAIN_DOTENV_FILENAME = "main.env" +PRFL_MGR_DOTENV_FILENAME = "main.env" PROFILES_DIR_NAME = "profiles" LEGACY_CFG_FILENAME = "deepsearch_toolkit.json" @@ -25,33 +30,43 @@ class ProfileSettingsEntry: settings: ProfileSettings -class SettingsManager: +class PrflManagerSettings(DSSettings): + class Config: + env_prefix = DSSettings.build_prefix(sub_prefix=SubPrefix.PRFL_MGR) + + profile: Optional[str] = None # None only when profiles not yet initialized + + +class ProfileManager: def __init__(self) -> None: """Initialize a SettingsManager instance. We allow cases with no selected profile despite available ones to go through initialization; these have to get handled as needed, when needed.""" - self.config_root_path = Path( - platformdirs.user_config_dir( - appname="DeepSearch", - appauthor="IBM", - ensure_exists=True, - ) - ) - self._main_path = self.config_root_path / MAIN_DOTENV_FILENAME - self._main_settings = MainSettings(_env_file=self._main_path) - self._profile_root_path = self.config_root_path / PROFILES_DIR_NAME + + self._main_path = CFG_ROOT_PATH / PRFL_MGR_DOTENV_FILENAME + self._main_settings = PrflManagerSettings(_env_file=self._main_path) + self._profile_root_path = CFG_ROOT_PATH / PROFILES_DIR_NAME self._profile_root_path.mkdir(exist_ok=True) # initialize internal profile cache from Pydantic Settings based on dotenv self._profile_cache: Dict[str, ProfileSettingsEntry] = {} + invalid_files: List[Path] = [] for f in os.listdir(self._profile_root_path): file_path = self._profile_root_path / f if file_path.suffix == ".env": profile_name = file_path.stem - self._profile_cache[profile_name] = ProfileSettingsEntry( - path=file_path, - settings=ProfileSettings(_env_file=file_path), - ) + try: + settings = ProfileSettings(_env_file=file_path) + self._profile_cache[profile_name] = ProfileSettingsEntry( + path=file_path, + settings=settings, + ) + except ValidationError: + invalid_files.append(file_path) + # remove any invalid files + for invalid_file in invalid_files: + print(f'Removing invalid profile "{invalid_file.stem}"') + invalid_file.unlink() # reset any stale active profile config if ( @@ -77,7 +92,7 @@ def __init__(self) -> None: def _migrate_legacy_config(self) -> None: if self._main_settings.profile is None: - legacy_cfg_path = self.config_root_path / LEGACY_CFG_FILENAME + legacy_cfg_path = CFG_ROOT_PATH / LEGACY_CFG_FILENAME if legacy_cfg_path.exists(): legacy_cfg = DeepSearchConfig.parse_file(legacy_cfg_path) if isinstance(legacy_cfg.auth, DeepSearchKeyAuth): @@ -173,8 +188,5 @@ def remove_profile(self, profile_name: str) -> None: prfl_settgs_entry = self._profile_cache.pop(profile_name) # update cache prfl_settgs_entry.path.unlink() # remove file - def get_show_cli_stack_traces(self) -> bool: - return self._main_settings.show_cli_stack_traces - -settings_mgr = SettingsManager() +profile_mgr = ProfileManager() diff --git a/deepsearch/core/client/settings.py b/deepsearch/core/client/settings.py index ee09283f..d64cf122 100644 --- a/deepsearch/core/client/settings.py +++ b/deepsearch/core/client/settings.py @@ -1,13 +1,39 @@ from __future__ import annotations +from enum import Enum from getpass import getpass from pathlib import Path -from typing import Dict, Optional, Union +from typing import Dict, Union +import platformdirs from pydantic import BaseSettings, SecretStr +CFG_ROOT_PATH = Path( + platformdirs.user_config_dir( + appname="DeepSearch", + appauthor="IBM", + ensure_exists=True, + ) +) + + +class SubPrefix(str, Enum): + # NOTE: to prevent conflicts, values must not be substring of one another + PROFILE = "PROFILE_" # reserved for ProfileSettings + PRFL_MGR = "PRM_" # reserved for PrflManagerSettings + MODEL_APP = "MODELAPP_" # reserved for ModelAppSettings + ARTIFACT = "ARTIFACT_" # reserved for ArtifactSettings + CLI = "CLI_" # reserved for CLISettings + + +class DSSettings(BaseSettings): + class Literals: + PREFIX = "DEEPSEARCH_" + + @classmethod + def build_prefix(cls, sub_prefix: SubPrefix) -> str: + return DSSettings.Literals.PREFIX + sub_prefix -class DumpableSettings(BaseSettings): @classmethod def get_env_var_name(cls, attr_name) -> str: return cls.Config.env_prefix + attr_name.upper() @@ -33,15 +59,15 @@ def dump(self, target: Union[str, Path]) -> None: target_file.write(f'{k}="{val}"\n') -class ProfileSettings(DumpableSettings): +class ProfileSettings(DSSettings): + class Config: + env_prefix = DSSettings.build_prefix(sub_prefix=SubPrefix.PROFILE) + host: str username: str api_key: SecretStr verify_ssl: bool = True - class Config: - env_prefix = "DEEPSEARCH_" - @classmethod def from_cli_prompt(cls) -> ProfileSettings: return cls( @@ -50,12 +76,3 @@ def from_cli_prompt(cls) -> ProfileSettings: api_key=getpass("API key: "), verify_ssl=input("SSL verification [y/n]: "), ) - - -class MainSettings(DumpableSettings): - - profile: Optional[str] = None # None only when profiles not yet iniitialized - show_cli_stack_traces: bool = False - - class Config: - env_prefix = "DEEPSEARCH_" diff --git a/deepsearch/cps/client/api.py b/deepsearch/cps/client/api.py index 8517043e..cb839575 100644 --- a/deepsearch/cps/client/api.py +++ b/deepsearch/cps/client/api.py @@ -10,8 +10,8 @@ DeepSearchConfig, DeepSearchKeyAuth, ) +from deepsearch.core.client.profile_manager import profile_mgr from deepsearch.core.client.settings import ProfileSettings -from deepsearch.core.client.settings_manager import settings_mgr from deepsearch.cps.apis import public as sw_client from deepsearch.cps.client.components import ( CpsApiDataCatalogs, @@ -149,7 +149,7 @@ def refresh_token(self, admin: bool = False): @classmethod def from_env(cls, profile_name: Optional[str] = None) -> CpsApi: - settings = settings_mgr.get_profile_settings(profile_name=profile_name) + settings = profile_mgr.get_profile_settings(profile_name=profile_name) return cls._from_settings(settings=settings) @classmethod diff --git a/deepsearch/model/README.md b/deepsearch/model/README.md index 2d7c7580..fa717dfe 100644 --- a/deepsearch/model/README.md +++ b/deepsearch/model/README.md @@ -13,29 +13,38 @@ To use the Model API, install including the `api` extra, i.e.: ## Basic usage ```python -from deepsearch.model.server.config import Settings from deepsearch.model.server.model_app import ModelApp # (1) create an app -app = ModelApp(settings=Settings()) +app = ModelApp() # (2) register your model(s) -model = ... # e.g. SimpleGeoNLPAnnotator() -app.register_model(model) +app.register_model( + model=... # e.g. SimpleGeoNLPAnnotator() +) # (3) run the app -app.run(host="127.0.0.1", port=8000) +app.run() ``` -### Settings -App configuration is done in [`Settings`](server/config.py) based on -[Pydantic Settings with dotenv support](https://docs.pydantic.dev/dev-v1/usage/settings/). - -E.g. the required API key can be injected via env var `DS_MODEL_API_KEY`. - ### OpenAPI The OpenAPI UI is served under `/docs`, i.e. by default at http://127.0.0.1:8000/docs. +## Configuration + +The user can explicitly specify the settings when instantiating the app: +```python +app = ModelApp( + settings=ModelAppSettings(...) +) +``` + +Settings can also be defined via environment variables, as described in [Toolkit +Configuration][toolkit_configuration]. + +For example, the required API key can be injected via env var +`DEEPSEARCH_MODELAPP_API_KEY`. + ## Developing a new model To develop a new model class for an existing [kind](kinds/), inherit from the base model class of that kind and implement the abstract methods and attributes. @@ -81,3 +90,5 @@ future timestamp): } } ``` + +[toolkit_configuration]: https://ds4sd.github.io/deepsearch-toolkit/guide/configuration/ diff --git a/deepsearch/model/examples/dummy_nlp_annotator/main.py b/deepsearch/model/examples/dummy_nlp_annotator/main.py index ff7fd9a9..1ad3d62a 100644 --- a/deepsearch/model/examples/dummy_nlp_annotator/main.py +++ b/deepsearch/model/examples/dummy_nlp_annotator/main.py @@ -1,11 +1,10 @@ from deepsearch.model.examples.dummy_nlp_annotator.model import DummyNLPAnnotator -from deepsearch.model.server.config import Settings +from deepsearch.model.server.config import ModelAppSettings from deepsearch.model.server.model_app import ModelApp def run(): - settings = Settings(api_key="example123") - app = ModelApp(settings) + app = ModelApp(settings=ModelAppSettings(api_key="example123")) app.register_model(DummyNLPAnnotator()) app.run() diff --git a/deepsearch/model/examples/dummy_qa_generator/main.py b/deepsearch/model/examples/dummy_qa_generator/main.py index a4f343e2..7edc8dc7 100644 --- a/deepsearch/model/examples/dummy_qa_generator/main.py +++ b/deepsearch/model/examples/dummy_qa_generator/main.py @@ -1,12 +1,11 @@ from deepsearch.model.examples.dummy_qa_generator.model import DummyQAGenerator -from deepsearch.model.server.config import Settings +from deepsearch.model.server.config import ModelAppSettings from deepsearch.model.server.model_app import ModelApp def run(): - settings = Settings(api_key="example123") - app = ModelApp(settings) - app.register_model(DummyQAGenerator()) + app = ModelApp(settings=ModelAppSettings(api_key="example123")) + app.register_model(model=DummyQAGenerator()) app.run() diff --git a/deepsearch/model/examples/simple_geo_nlp_annotator/main.py b/deepsearch/model/examples/simple_geo_nlp_annotator/main.py index 9409cf15..b20a4546 100644 --- a/deepsearch/model/examples/simple_geo_nlp_annotator/main.py +++ b/deepsearch/model/examples/simple_geo_nlp_annotator/main.py @@ -1,13 +1,12 @@ from deepsearch.model.examples.simple_geo_nlp_annotator.model import ( # type: ignore SimpleGeoNLPAnnotator, ) -from deepsearch.model.server.config import Settings +from deepsearch.model.server.config import ModelAppSettings from deepsearch.model.server.model_app import ModelApp def run(): - settings = Settings(api_key="example123") - app = ModelApp(settings) + app = ModelApp(settings=ModelAppSettings(api_key="example123")) app.register_model(SimpleGeoNLPAnnotator()) app.run() diff --git a/deepsearch/model/server/config.py b/deepsearch/model/server/config.py index 0c8dd366..a37482e2 100644 --- a/deepsearch/model/server/config.py +++ b/deepsearch/model/server/config.py @@ -1,8 +1,12 @@ -from pydantic import BaseSettings, SecretStr +from pydantic import SecretStr +from deepsearch.core.client.settings import DSSettings, SubPrefix -class Settings(BaseSettings): - api_key: SecretStr +class ModelAppSettings(DSSettings): class Config: - env_prefix = "DS_MODEL_" + env_prefix = DSSettings.build_prefix(sub_prefix=SubPrefix.MODEL_APP) + + api_key: SecretStr + host: str = "127.0.0.1" + port: int = 8000 diff --git a/deepsearch/model/server/model_app.py b/deepsearch/model/server/model_app.py index 3f8a69f9..00e1060e 100644 --- a/deepsearch/model/server/model_app.py +++ b/deepsearch/model/server/model_app.py @@ -1,4 +1,5 @@ import asyncio +import copy import logging import os import time @@ -16,7 +17,7 @@ from deepsearch.model.base.controller import BaseController from deepsearch.model.base.model import BaseDSModel -from deepsearch.model.server.config import Settings +from deepsearch.model.server.config import ModelAppSettings from deepsearch.model.server.controller_factory import ControllerFactory from deepsearch.model.server.inference_types import AppModelInfoOutput, AppPredInput @@ -24,8 +25,8 @@ class ModelApp: - def __init__(self, settings: Settings): - self._settings = settings + def __init__(self, settings: Optional[ModelAppSettings] = None): + self._settings = settings or ModelAppSettings() self.app = FastAPI() self._controllers: Dict[str, BaseController] = {} @@ -189,8 +190,17 @@ def register_model( key = name or contr.get_model_name() self._controllers[key] = contr - def run(self, host: str = "127.0.0.1", port: int = 8000, **kwargs) -> None: - uvicorn.run(self.app, host=host, port=port, **kwargs) + def run(self, **kwargs) -> None: + """Run app. Accepts all keyword arguments that can be passed to `uvicoirn.run`""" + new_kwargs = copy.deepcopy(kwargs) + host = new_kwargs.pop("host", None) or self._settings.host + port = new_kwargs.pop("port", None) or self._settings.port + uvicorn.run( + app=self.app, + host=host, + port=port, + **new_kwargs, + ) def _validate_controller_kind( self, controller: BaseController, model: BaseDSModel diff --git a/docs/guide/configuration.md b/docs/guide/configuration.md index 22e037e6..824a8491 100644 --- a/docs/guide/configuration.md +++ b/docs/guide/configuration.md @@ -2,8 +2,8 @@ The Toolkit can be configured via the CLI and via environment variables. -Besides *global* settings, the Toolkit also allows the configuration of multiple -*profiles* for enabling users to easily work with different Deep Search deployments. +Besides global settings, the Toolkit also allows the configuration of multiple +profiles for enabling users to easily work with different Deep Search deployments. ## Profiles @@ -109,8 +109,29 @@ Under the hood, the Toolkit leverages [Pydantic Settings with dotenv support][pydantic_settings], so configuration settings can be easily overriden via environment variables. This can be useful e.g. in a containerization scenario. -To see which environment variables are supported, check the relevant [Pydantic Settings -classes][settings_file], also taking into account any defined prefixes. +To see which environment variables are supported, check the following Pydantic Settings +classes, also taking into account the respective prefixes: + +| Class | Prefix | Description | +| --- | --- | --- | +| [`ProfileSettings`][settings_file] | `DEEPSEARCH_PROFILE_` | Profile settings (e.g. host, port etc.) | +| [`ModelAppSettings`][model_app_settings] | `DEEPSEARCH_MODELAPP_` | Model app settings | +| [`ArtifactSettings`][artifact_settings] | `DEEPSEARCH_ARTIFACT_` | Artifact management settings | +| [`CLISettings`][cli_settings] | `DEEPSEARCH_CLI_` | Command line utility settings | +| [`PrflManagerSettings`][settings_file] | `DEEPSEARCH_PRM_` | Profile manager settings (e.g. which profile to use) | + +For instance, `DEEPSEARCH_PROFILE_*` environment variables, i.e. `DEEPSEARCH_PROFILE_HOST` etc. +(see [`ProfileSettings`][settings_file]), can be used for injecting profile data even if +no profile has been configured. + +!!! note + + When extending the settings (e.g. as for the [model app][model_app_settings]), + developers must ensure `env_prefix` is [properly set][settings_file] to prevent + conflicts. [pydantic_settings]: https://docs.pydantic.dev/dev-v1/usage/settings [settings_file]: https://github.com/DS4SD/deepsearch-toolkit/blob/main/deepsearch/core/client/settings.py +[model_app_settings]: https://github.com/DS4SD/deepsearch-toolkit/blob/main/deepsearch/model/server/config.py +[artifact_settings]: https://github.com/DS4SD/deepsearch-toolkit/blob/main/deepsearch/artifacts/settings.py +[cli_settings]: https://github.com/DS4SD/deepsearch-toolkit/blob/main/deepsearch/core/cli/settings.py