Minor refactoring

argosopentech · Aug 13, 2024 · 635f4eb · 635f4eb
1 parent a2cd1f9
commit 635f4eb
Show file tree

Hide file tree

Showing 4 changed files with 25 additions and 64 deletions.
diff --git a/argostranslate/sbd.py b/argostranslate/sbd.py
@@ -1,22 +1,21 @@
 from __future__ import annotations
 
 from difflib import SequenceMatcher
+from typing import List, Optional
+
+import spacy
 
 from argostranslate import package
 from argostranslate.package import Package
 from argostranslate.utils import info
 
-from typing import List
-from typing import Optional
-from difflib import SequenceMatcher
-
-import spacy
 
 class ISentenceBoundaryDetectionModel:
     # https://github.com/argosopentech/sbd/blob/main/main.py
     def split_sentences(self, text: str, lang_code: Optional[str] = None) -> List[str]:
         raise NotImplementedError
 
+
 # Spacy sentence boundary detection Sentencizer
 # https://community.libretranslate.com/t/sentence-boundary-detection-for-machine-translation/606/3
 
@@ -25,7 +24,7 @@ def split_sentences(self, text: str, lang_code: Optional[str] = None) -> List[st
 class SpacySentencizerSmall(ISentenceBoundaryDetectionModel):
     def __init__(self):
         try:
-         self.nlp = spacy.load("xx_sent_ud_sm")
+            self.nlp = spacy.load("xx_sent_ud_sm")
         except OSError:
             spacy.cli.download("xx_sent_ud_sm")
             self.nlp = spacy.load("xx_sent_ud_sm")
@@ -38,6 +37,7 @@ def split_sentences(self, text: str, lang_code: Optional[str] = None) -> List[st
     def __str__(self):
         return "Spacy xx_sent_ud_sm"
 
+
 # Few Shot Sentence Boundary Detection
 
 fewshot_prompt = """<detect-sentence-boundaries> I walked down to the river. Then I went to the
@@ -124,4 +124,4 @@ def detect_sentence(
     sbd_translated_guess = sbd_translation.translate(
         DETECT_SENTENCE_BOUNDARIES_TOKEN + sentence_guess
     )
-    return process_seq2seq_sbd(input_text, sbd_translated_guess)
+    return process_seq2seq_sbd(input_text, sbd_translated_guess)
diff --git a/argostranslate/settings.py b/argostranslate/settings.py
@@ -86,7 +86,7 @@ def get_setting(key: str, default=None):
 
 package_index = get_setting(
     "ARGOS_PACKAGE_INDEX",
-    default="https://raw.githubusercontent.com/argosopentech/argospm-index/main/index.json",
+    default="https://raw.githubusercontent.com/argosopentech/argospm-index/main/",
 )
 
 packages_dir = Path(get_setting("ARGOS_PACKAGES_DIR", default=data_dir / "packages"))

diff --git a/argostranslate/tags.py b/argostranslate/tags.py
@@ -114,7 +114,7 @@ def is_same_structure(tag1: ITag | str, tag2: ITag | str) -> bool:
 ARGOS_OPEN_TAG = "<argos-tag>"
 ARGOS_CLOSE_TAG = "</argos-tag>"
 
-GOLDEN_RATIO = (1 + 5**0.5) / 2
+GOLDEN_RATIO = (1 + 5 ** 0.5) / 2
 
 
 def flatten_tag(tag: ITag) -> str:

diff --git a/argostranslate/translate.py b/argostranslate/translate.py
@@ -12,12 +12,11 @@
 import argostranslate.models
 import argostranslate.package
 import argostranslate.settings
-from argostranslate.utils import error, info, warning
-
 from argostranslate import apis, fewshot, package, sbd, settings
 from argostranslate.package import Package
-from argostranslate.utils import info
 from argostranslate.sbd import SpacySentencizerSmall
+from argostranslate.utils import error, info, warning
+
 
 class Hypothesis:
     """Represents a translation hypothesis
@@ -247,6 +246,7 @@ def __init__(self, from_lang: Language, to_lang: Language, api):
     def hypotheses(self, from_text: str, num_hypotheses: int = 1) -> list[Hypothesis]:
         """LibreTranslate only supports single hypotheses.
 
+        TODO LibreTranslate now supports multiple hypotheses
         A list of length num_hypotheses will be returned with identical hypotheses.
         """
         result = self.api.translate(from_text, self.from_lang.code, self.to_lang.code)
@@ -269,7 +269,7 @@ def get_chunk_package(from_code):
 
 
 def chunk(from_text, from_code):
-    # TODO Support Stanza and spacy
+    # TODO Support Spacy
     if argostranslate.settings.chunk_type == argostranslate.settings.ChunkType.NONE:
         return [from_text]
     elif (
@@ -315,56 +315,6 @@ def apply_chunk_translation(from_text, ctranslate2_translator, sp_processor):
         return [from_text]
 
 
-class FewShotTranslation(ITranslation):
-    """A translation performed with a few shot language model"""
-
-    from_lang: Language
-    to_lang: Language
-    language_model: argostranslate.models.ILanguageModel
-
-    def __init__(
-        self,
-        from_lang: Language,
-        to_lang: Language,
-        language_model: argostranslate.models.ILanguageModel,
-    ):
-        self.from_lang = from_lang
-        self.to_lang = to_lang
-        self.language_model = language_model
-
-    def hypotheses(self, from_text: str, num_hypotheses: int = 1) -> list[Hypothesis]:
-        # TODO: Split into chunks
-        prompt = argostranslate.fewshot.generate_prompt(
-            from_text,
-            self.from_lang.name,
-            self.from_lang.code,
-            self.to_lang.name,
-            self.to_lang.code,
-        )
-        info("fewshot prompt", prompt)
-        response = self.language_model.infer(prompt)
-        info("fewshot response", response)
-        if response is None:
-            error("fewshot response is None")
-            return [Hypothesis("", 0)] * num_hypotheses
-        info("fewshot response", response)
-        result = argostranslate.fewshot.parse_inference(response)
-        info("fewshot result", result)
-        return [Hypothesis(result, 0)] * num_hypotheses
-
-
-class LocalTranslation(ITranslation):
-    def __init__(self, translator, from_lang, to_lang):
-        self.translator = translator
-        self.from_lang = from_lang
-        self.to_lang = to_lang
-
-    def hypotheses(self, from_text, num_hypotheses=4):
-        return self.translator.translate(
-            from_text, self.from_lang.code, self.to_lang.code, num_hypotheses
-        )
-
-
 class Translator:
     def __init__(self, pkg: argostranslate.package.Package):
         # TODO: Cache to prevent memory leaks
@@ -402,7 +352,6 @@ def remove_target_prefix(self, translated_tokens):
 
     def translate(self, from_text, from_code, to_code, num_hypotheses):
         # Split sentences
-        # TODO add spacy chunking from v1
         sentences = chunk(from_text, from_code)
 
         # Tokenize
@@ -455,6 +404,18 @@ def get_translation(self, from_lang, to_lang):
         return LocalTranslation(self, from_lang, to_lang)
 
 
+class LocalTranslation(ITranslation):
+    def __init__(self, translator, from_lang, to_lang):
+        self.translator = translator
+        self.from_lang = from_lang
+        self.to_lang = to_lang
+
+    def hypotheses(self, from_text, num_hypotheses=4):
+        return self.translator.translate(
+            from_text, self.from_lang.code, self.to_lang.code, num_hypotheses
+        )
+
+
 def get_installed_languages() -> list[Language]:
     """Returns a list of Languages installed from packages"""