From 153e06f8d5a5012b2625238c329e342dc3e08df7 Mon Sep 17 00:00:00 2001
From: PrimozGodec
Date: Fri, 23 Feb 2024 13:26:50 +0100
Subject: [PATCH] LanguageModel - Fix languages order and tests
---
orangecontrib/text/language.py | 3 ++-
orangecontrib/text/tests/test_language.py | 29 ++++++++++++++++++++++-
2 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/orangecontrib/text/language.py b/orangecontrib/text/language.py
index 1250c9757..fb64ddde3 100644
--- a/orangecontrib/text/language.py
+++ b/orangecontrib/text/language.py
@@ -124,7 +124,8 @@ def __init__(
"""
if languages is None:
# if languages not provided take all available languages
- languages = sorted(filter(None, ISO2LANG), key=ISO2LANG.get)
+ languages = filter(None, ISO2LANG)
+ languages = sorted(languages, key=ISO2LANG.get)
if include_none:
languages = [None] + languages
super().__init__(iterable=languages)
diff --git a/orangecontrib/text/tests/test_language.py b/orangecontrib/text/tests/test_language.py
index 24780152f..d068b2a5f 100644
--- a/orangecontrib/text/tests/test_language.py
+++ b/orangecontrib/text/tests/test_language.py
@@ -5,7 +5,34 @@
from Orange.data import StringVariable, Domain
from orangecontrib.text import Corpus
-from orangecontrib.text.language import detect_language, ISO2LANG
+from orangecontrib.text.language import detect_language, ISO2LANG, LanguageModel
+
+
+class TestLanguageModel(TestCase):
+ def test_model_without_languages(self):
+ # no None, all languages
+ lm = LanguageModel()
+ self.assertEqual(len(ISO2LANG) - 1, lm.rowCount())
+ all_langs = [lm.data(lm.index(i)) for i in range(lm.rowCount())]
+ expected = sorted(list(ISO2LANG.values())[:-1])
+ self.assertEqual(expected, all_langs)
+
+ lm = LanguageModel(include_none=True)
+ self.assertEqual(len(ISO2LANG), lm.rowCount())
+ all_langs = [lm.data(lm.index(i)) for i in range(lm.rowCount())]
+ expected = sorted(list(ISO2LANG.values())[:-1])
+ self.assertEqual(["(no language)"] + expected, all_langs)
+
+ def test_model_with_languages(self):
+ lm = LanguageModel(include_none=True, languages=["en", "ar", "it"])
+ self.assertEqual(4, lm.rowCount())
+ all_langs = [lm.data(lm.index(i)) for i in range(lm.rowCount())]
+ self.assertEqual(["(no language)", "Arabic", "English", "Italian"], all_langs)
+
+ lm = LanguageModel(languages=["en", "ar", "it"])
+ self.assertEqual(3, lm.rowCount())
+ all_langs = [lm.data(lm.index(i)) for i in range(lm.rowCount())]
+ self.assertEqual(["Arabic", "English", "Italian"], all_langs)
class TestLanguage(TestCase):