Skip to content

Commit

Permalink
Expand configuration management system
Browse files Browse the repository at this point in the history
This commit copies some of the improvements from the v2
settings system to the master branch. The most important
change is allowing settings to be configured from a JSON
file.
  • Loading branch information
PJ-Finlay committed Aug 18, 2024
1 parent 6b552be commit e4a1cbe
Showing 1 changed file with 157 additions and 32 deletions.
189 changes: 157 additions & 32 deletions argostranslate/settings.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,42 @@
import json
import os
from enum import Enum
from pathlib import Path
from typing import Any, Dict

TRUE_VALUES = ["1", "TRUE", "True", "true"]
"""
Argos Translate can be configured using either environment variables or json file
debug = os.getenv("ARGOS_DEBUG") in TRUE_VALUES
### Environment variables
```
export ARGOS_DEBUG="0"
export ARGOS_PACKAGE_INDEX="https://raw.githubusercontent.com/argosopentech/argospm-index/main/"
export ARGOS_PACKAGES_DIR="/home/<username>/.local/share/argos-translate/packages/"
export ARGOS_DEVICE_TYPE="cpu"
dev_mode = os.getenv("ARGOS_DEV_MODE") in TRUE_VALUES
```
### JSON
# $HOME/.config/argos-translate/settings.json
```
{
"ARGOS_DEBUG": "0",
"ARGOS_PACKAGES_INDEX": "https://raw.githubusercontent.com/argosopentech/argospm-index/main/",
"ARGOS_PACKAGE_DIR": "/home/<username>/.local/share/argos-translate/packages/",
"ARGOS_DEVICE_TYPE": "cpu"
}
```
"""

"""
Importing argostranslate.settings will create the Argos Translate data directory (~/.local/share/argos-translate),
the Argos Translate config directory (~/.config/argos-translate),
and the Argos Translate cache directory (~/.local/cache/argos-translate) if they do not already exist.
"""

home_dir = Path.home()

if "SNAP" in os.environ:
home_dir = Path(os.environ["SNAP_USER_DATA"])

Expand All @@ -22,15 +50,97 @@
legacy_package_data_dir = Path(
os.getenv("ARGOS_TRANSLATE_PACKAGES_DIR", default=data_dir / "packages")
)
package_data_dir = Path(os.getenv("ARGOS_PACKAGES_DIR", legacy_package_data_dir))
os.makedirs(package_data_dir, exist_ok=True)

config_dir = (
Path(os.getenv("XDG_CONFIG_HOME", default=home_dir / ".config")) / "argos-translate"
)
os.makedirs(config_dir, exist_ok=True)

cache_dir = (
Path(os.getenv("XDG_CACHE_HOME", default=home_dir / ".local" / "cache"))
/ "argos-translate"
)
os.makedirs(cache_dir, exist_ok=True)


downloads_dir = cache_dir / "downloads"
os.makedirs(downloads_dir, exist_ok=True)

settings_file = config_dir / "settings.json"


def load_settings_dict() -> Dict[str, Any]:
settings_dict = dict()
if settings_file.exists():
try:
with open(settings_file, "r") as settings_file_data:
settings_dict = json.load(settings_file_data)
assert isinstance(
settings_dict, dict
), "settings.json should contain a dictionary"
except FileNotFoundError as e:
print(f"{settings_file} not found : FileNotFoundError {e}")
except json.JSONDecodeError as e:
print(f"Error decoding {settings_file}: JSONDecodeError {e}")
return settings_dict


def get_setting(key: str, default=None):
"""Gets a setting from either environment variables or settings.json
Settings from environment variables take precedence over settings.json
Args:
key (str): Key value
default: The default setting value. Defaults to None.
Returns:
The setting value
"""
value_from_environment = os.getenv(key)
value_from_file = load_settings_dict().get(key)
if value_from_environment is not None:
return value_from_environment
else:
if value_from_file is not None:
return value_from_file
return default


def set_setting(key: str, value):
"""Sets a setting in the settings.json file.
Args:
key (str): The key to set.
value: The value to set.
"""
settings = load_settings_dict()
settings[key] = value
with open(settings_file, "w") as settings_file_data:
json.dump(settings, settings_file_data, indent=4)


TRUE_VALUES = ["1", "TRUE", "True", "true", 1, True]


debug = get_setting("ARGOS_DEBUG") in TRUE_VALUES

dev_mode = get_setting("ARGOS_DEV_MODE") in TRUE_VALUES

package_index = get_setting(
"ARGOS_PACKAGE_INDEX",
default="https://raw.githubusercontent.com/argosopentech/argospm-index/main/",
)

package_data_dir = Path(
get_setting("ARGOS_PACKAGES_DIR", default=data_dir / "packages")
)
os.makedirs(package_data_dir, exist_ok=True)


downloads_dir = cache_dir / "downloads"
os.makedirs(downloads_dir, exist_ok=True)

if not dev_mode:
remote_repo = os.getenv(
"ARGOS_PACKAGE_INDEX",
Expand All @@ -41,12 +151,22 @@
"ARGOS_PACKAGE_INDEX",
default="https://raw.githubusercontent.com/argosopentech/argospm-index-dev/main",
)
remote_package_index = remote_repo + "/index.json"

remote_package_index = package_index + "index.json"

local_package_index = data_dir / "index.json"

experimental_enabled = os.getenv("ARGOS_EXPERIMENTAL_ENABLED") in TRUE_VALUES

stanza_available = os.getenv("ARGOS_STANZA_AVAILABLE") in (TRUE_VALUES + [None])

# Supported values: "cpu" and "cuda"
device = get_setting("ARGOS_DEVICE_TYPE", "cpu")

# https://opennmt.net/CTranslate2/python/ctranslate2.Translator.html
inter_threads = int(get_setting("ARGOS_INTER_THREADS", "1"))
intra_threads = int(get_setting("ARGOS_INTRA_THREADS", "0"))


class ModelProvider(Enum):
OPENNMT = 0
Expand All @@ -59,15 +179,39 @@ class ModelProvider(Enum):
"LIBRETRANSLATE": ModelProvider.LIBRETRANSLATE,
"OPENAI": ModelProvider.OPENAI,
}
model_provider = os.getenv("ARGOS_MODEL_PROVIDER", "OPENNMT")
assert model_provider in model_mapping.keys()
model_provider = model_mapping[model_provider]
model_provider = model_mapping[get_setting("ARGOS_MODEL_PROVIDER", default="OPENNMT")]

libretranslate_api_key = os.getenv("LIBRETRANSLATE_API_KEY", None)
openai_api_key = os.getenv("OPENAI_API_KEY", None)

downloads_dir = cache_dir / "downloads"
os.makedirs(downloads_dir, exist_ok=True)
# Sentence boundary detection
class ChunkType(Enum):
DEFAULT = 0
ARGOSTRANSLATE = 1
NONE = 2 # No sentence splitting


chunk_type_mapping = {
"DEFAULT": ChunkType.DEFAULT,
"ARGOSTRANSLATE": ChunkType.ARGOSTRANSLATE,
"NONE": ChunkType.NONE,
}
chunk_type = chunk_type_mapping[get_setting("ARGOS_CHUNK_TYPE", default="DEFAULT")]
if chunk_type == ChunkType.DEFAULT:
chunk_type = ChunkType.ARGOSTRANSLATE


libretranslate_api_key = get_setting("LIBRETRANSLATE_API_KEY", None)
openai_api_key = get_setting("OPENAI_API_KEY", None)


argos_translate_about_text = (
"Argos Translate is an open source neural machine "
+ "translation application created by Argos Open "
+ "Technologies, LLC (www.argosopentech.com). "
)

# Fix Intel bug
# https://github.com/argosopentech/argos-translate/issues/40
os.environ["KMP_DUPLICATE_LIB_OK"] = "True"

# Will search all of these directories for packages
package_dirs = [package_data_dir]
Expand All @@ -85,22 +229,3 @@ class ModelProvider(Enum):
for package_dir in content_snap_packages.iterdir():
if package_dir.is_dir():
package_dirs.append(package_dir)

local_package_index = cache_dir / "index.json"

about_text = (
"Argos Translate is an open source neural machine "
"translation application created by Argos Open "
"Technologies, LLC (www.argosopentech.com). "
)

# Fix Intel bug
# https://github.com/argosopentech/argos-translate/issues/40
os.environ["KMP_DUPLICATE_LIB_OK"] = "True"

# Supported values: cpu and cuda
device = os.environ.get("ARGOS_DEVICE_TYPE", "cpu")

# https://opennmt.net/CTranslate2/python/ctranslate2.Translator.html
inter_threads = int(os.environ.get("ARGOS_INTER_THREADS", "1"))
intra_threads = int(os.environ.get("ARGOS_INTRA_THREADS", "0"))

0 comments on commit e4a1cbe

Please sign in to comment.