Keywords - temporary solution for list of stopwords

biolab · Jun 1, 2023 · 56268bc · 56268bc
1 parent 1996d27
commit 56268bc
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 3 deletions.
diff --git a/orangecontrib/text/keywords/__init__.py b/orangecontrib/text/keywords/__init__.py
@@ -15,13 +15,15 @@
 from orangecontrib.text import Corpus
 from orangecontrib.text.keywords.mbert import mbert_keywords
 from orangecontrib.text.keywords.rake import Rake
+from orangecontrib.text.language import ISO2LANG
 from orangecontrib.text.preprocess import StopwordsFilter
 
 # all available languages for RAKE
 from orangecontrib.text.vectorization import BowVectorizer
 
-# todo
-RAKE_LANGUAGES = StopwordsFilter.supported_languages()
+# todo: this is a temporary solution since supported_languages now returns
+#  languages as ISO codes - refactor with keywords language refactoring
+RAKE_LANGUAGES = [ISO2LANG[la] for la in StopwordsFilter.supported_languages()]
 # all available languages for YAKE!
 YAKE_LANGUAGE_MAPPING = {
     "Arabic": "ar",

diff --git a/orangecontrib/text/widgets/tests/test_owkeywords.py b/orangecontrib/text/widgets/tests/test_owkeywords.py
@@ -77,7 +77,7 @@ def test_run_with_words(self):
         self.assertEqual(len(results.scores), 42)
 
     def test_run_normalize_words(self):
-        normalizer = LemmagenLemmatizer()
+        normalizer = LemmagenLemmatizer(language="en")
         corpus = normalizer(self.corpus)
 
         words = ["minor", "tree"]