diff --git a/orangecontrib/text/keywords/__init__.py b/orangecontrib/text/keywords/__init__.py index bdfb44bdd..8dc84f6c4 100644 --- a/orangecontrib/text/keywords/__init__.py +++ b/orangecontrib/text/keywords/__init__.py @@ -15,13 +15,15 @@ from orangecontrib.text import Corpus from orangecontrib.text.keywords.mbert import mbert_keywords from orangecontrib.text.keywords.rake import Rake +from orangecontrib.text.language import ISO2LANG from orangecontrib.text.preprocess import StopwordsFilter # all available languages for RAKE from orangecontrib.text.vectorization import BowVectorizer -# todo -RAKE_LANGUAGES = StopwordsFilter.supported_languages() +# todo: this is a temporary solution since supported_languages now returns +# languages as ISO codes - refactor with keywords language refactoring +RAKE_LANGUAGES = [ISO2LANG[la] for la in StopwordsFilter.supported_languages()] # all available languages for YAKE! YAKE_LANGUAGE_MAPPING = { "Arabic": "ar", diff --git a/orangecontrib/text/widgets/tests/test_owkeywords.py b/orangecontrib/text/widgets/tests/test_owkeywords.py index ae301fbce..f34abbd5b 100644 --- a/orangecontrib/text/widgets/tests/test_owkeywords.py +++ b/orangecontrib/text/widgets/tests/test_owkeywords.py @@ -77,7 +77,7 @@ def test_run_with_words(self): self.assertEqual(len(results.scores), 42) def test_run_normalize_words(self): - normalizer = LemmagenLemmatizer() + normalizer = LemmagenLemmatizer(language="en") corpus = normalizer(self.corpus) words = ["minor", "tree"]