From 153e06f8d5a5012b2625238c329e342dc3e08df7 Mon Sep 17 00:00:00 2001 From: PrimozGodec Date: Fri, 23 Feb 2024 13:26:50 +0100 Subject: [PATCH] LanguageModel - Fix languages order and tests --- orangecontrib/text/language.py | 3 ++- orangecontrib/text/tests/test_language.py | 29 ++++++++++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/orangecontrib/text/language.py b/orangecontrib/text/language.py index 1250c9757..fb64ddde3 100644 --- a/orangecontrib/text/language.py +++ b/orangecontrib/text/language.py @@ -124,7 +124,8 @@ def __init__( """ if languages is None: # if languages not provided take all available languages - languages = sorted(filter(None, ISO2LANG), key=ISO2LANG.get) + languages = filter(None, ISO2LANG) + languages = sorted(languages, key=ISO2LANG.get) if include_none: languages = [None] + languages super().__init__(iterable=languages) diff --git a/orangecontrib/text/tests/test_language.py b/orangecontrib/text/tests/test_language.py index 24780152f..d068b2a5f 100644 --- a/orangecontrib/text/tests/test_language.py +++ b/orangecontrib/text/tests/test_language.py @@ -5,7 +5,34 @@ from Orange.data import StringVariable, Domain from orangecontrib.text import Corpus -from orangecontrib.text.language import detect_language, ISO2LANG +from orangecontrib.text.language import detect_language, ISO2LANG, LanguageModel + + +class TestLanguageModel(TestCase): + def test_model_without_languages(self): + # no None, all languages + lm = LanguageModel() + self.assertEqual(len(ISO2LANG) - 1, lm.rowCount()) + all_langs = [lm.data(lm.index(i)) for i in range(lm.rowCount())] + expected = sorted(list(ISO2LANG.values())[:-1]) + self.assertEqual(expected, all_langs) + + lm = LanguageModel(include_none=True) + self.assertEqual(len(ISO2LANG), lm.rowCount()) + all_langs = [lm.data(lm.index(i)) for i in range(lm.rowCount())] + expected = sorted(list(ISO2LANG.values())[:-1]) + self.assertEqual(["(no language)"] + expected, all_langs) + + def test_model_with_languages(self): + lm = LanguageModel(include_none=True, languages=["en", "ar", "it"]) + self.assertEqual(4, lm.rowCount()) + all_langs = [lm.data(lm.index(i)) for i in range(lm.rowCount())] + self.assertEqual(["(no language)", "Arabic", "English", "Italian"], all_langs) + + lm = LanguageModel(languages=["en", "ar", "it"]) + self.assertEqual(3, lm.rowCount()) + all_langs = [lm.data(lm.index(i)) for i in range(lm.rowCount())] + self.assertEqual(["Arabic", "English", "Italian"], all_langs) class TestLanguage(TestCase):