diff --git a/orangecontrib/text/widgets/owpreprocess.py b/orangecontrib/text/widgets/owpreprocess.py index 7be2eed77..f65c60687 100644 --- a/orangecontrib/text/widgets/owpreprocess.py +++ b/orangecontrib/text/widgets/owpreprocess.py @@ -88,16 +88,17 @@ def __init__( """ super().__init__(parent) self.setMinimumWidth(80) - self.__add_items(items, include_none) - self.set_current_language(value) + items = [(ISO2LANG[itm], itm) for itm in items] + self.add_items(items, include_none, value) self.currentIndexChanged.connect(self.__index_changed) self.callback = callback - def __add_items(self, items: Iterable[str], include_non: bool): + def add_items(self, items: Iterable[Tuple[str, str]], include_non: bool, language: str): if include_non: self.addItem(_DEFAULT_NONE, None) - for itm in sorted(items, key=ISO2LANG.get): - self.addItem(ISO2LANG[itm], itm) + for itm in sorted(items): + self.addItem(*itm) + self.set_current_language(language) def __index_changed(self, index: QModelIndex): self.callback(self.itemData(index)) @@ -115,34 +116,34 @@ def set_current_language(self, iso_language: Optional[str]): self.setCurrentIndex(index) -class UDPipeComboBox(QComboBox): +class UDPipeComboBox(LanguageComboBox): def __init__(self, master: BaseEditor, value: str, default: str, callback: Callable): - super().__init__(master) - self.__items = [] # type: List + self.__items: List = [] self.__default_lang = default - self.add_items(value) - self.currentTextChanged.connect(callback) - self.setMinimumWidth(80) + super().__init__(master, [], value, False, callback) @property def items(self) -> List: return UDPipeLemmatizer().models.supported_languages - def add_items(self, value: str): + def add_items(self, _, include_non: bool, language: str): self.__items = self.items - self.addItems(self.__items) - if value in self.__items: - self.setCurrentText(value) - elif self.__default_lang in self.__items: - self.setCurrentText(self.__default_lang) + super().add_items(self.__items, include_non, language) + + def set_current_language(self, iso_language: Optional[str]): + iso_items = {iso for _, iso in self.__items} + if iso_language in iso_items: + super().set_current_language(iso_language) + elif self.__default_lang in iso_items: + super().set_current_language(self.__default_lang) elif self.__items: self.setCurrentIndex(0) def showPopup(self): if self.__items != self.items: self.clear() - self.add_items(self.currentText()) + self.add_items(None, False, self.itemData(self.currentIndex())) super().showPopup() @@ -475,14 +476,13 @@ class NormalizationModule(SingleMethodModule): UDPipe: UDPipeLemmatizer, Lemmagen: LemmagenLemmatizer} DEFAULT_METHOD = Porter - DEFAULT_UDPIPE_LANG = "English" # todo: remove when udpipe use iso DEFAULT_LANGUAGE = "en" DEFAULT_USE_TOKE = False def __init__(self, parent=None, **kwargs): super().__init__(parent, **kwargs) self.__snowball_lang = self.DEFAULT_LANGUAGE - self.__udpipe_lang = self.DEFAULT_UDPIPE_LANG + self.__udpipe_lang = self.DEFAULT_LANGUAGE self.__lemmagen_lang = self.DEFAULT_LANGUAGE self.__use_tokenizer = self.DEFAULT_USE_TOKE @@ -494,7 +494,7 @@ def __init__(self, parent=None, **kwargs): self.__set_snowball_lang ) self.__combo_udl = UDPipeComboBox( - self, self.__udpipe_lang, self.DEFAULT_UDPIPE_LANG, self.__set_udpipe_lang + self, self.__udpipe_lang, self.DEFAULT_LANGUAGE, self.__set_udpipe_lang ) self.__check_use = QCheckBox("UDPipe tokenizer", checked=self.DEFAULT_USE_TOKE) @@ -538,7 +538,7 @@ def setParameters(self, params: Dict): super().setParameters(params) snowball_lang = params.get("snowball_language", self.DEFAULT_LANGUAGE) self.__set_snowball_lang(snowball_lang) - udpipe_lang = params.get("udpipe_language", self.DEFAULT_UDPIPE_LANG) + udpipe_lang = params.get("udpipe_language", self.DEFAULT_LANGUAGE) self.__set_udpipe_lang(udpipe_lang) use_tokenizer = params.get("udpipe_tokenizer", self.DEFAULT_USE_TOKE) self.__set_use_tokenizer(use_tokenizer) @@ -560,7 +560,7 @@ def __set_snowball_lang(self, language: str): def __set_udpipe_lang(self, language: str): if self.__udpipe_lang != language: self.__udpipe_lang = language - self.__combo_udl.setCurrentText(language) + self.__combo_udl.set_current_language(language) self.changed.emit() if self.method == self.UDPipe: self.edited.emit() @@ -593,13 +593,12 @@ def parameters(self) -> Dict: def createinstance(params: Dict) -> BaseNormalizer: method = params.get("method", NormalizationModule.DEFAULT_METHOD) args = {} - def_udpipe = NormalizationModule.DEFAULT_UDPIPE_LANG def_lang = NormalizationModule.DEFAULT_LANGUAGE if method == NormalizationModule.Snowball: args = {"language": params.get("snowball_language", def_lang)} elif method == NormalizationModule.UDPipe: def_use = NormalizationModule.DEFAULT_USE_TOKE - args = {"language": params.get("udpipe_language", def_udpipe), + args = {"language": params.get("udpipe_language", def_lang), "use_tokenizer": params.get("udpipe_tokenizer", def_use)} elif method == NormalizationModule.Lemmagen: args = {"language": params.get("lemmagen_language", def_lang)} diff --git a/orangecontrib/text/widgets/tests/test_owpreprocess.py b/orangecontrib/text/widgets/tests/test_owpreprocess.py index 713d67b43..a0af1ba93 100644 --- a/orangecontrib/text/widgets/tests/test_owpreprocess.py +++ b/orangecontrib/text/widgets/tests/test_owpreprocess.py @@ -127,7 +127,7 @@ def test_udpipe_offline(self): @patch("orangecontrib.text.preprocess.normalize.UDPipeModels.online", PropertyMock(return_value=False)) @patch("orangecontrib.text.preprocess.normalize.UDPipeModels.model_files", - PropertyMock(return_value=[])) + PropertyMock(return_value={})) @patch("orangecontrib.text.widgets.owpreprocess.OWPreprocess.start", Mock()) def test_udpipe_no_models(self): widget = self.create_widget(OWPreprocess) @@ -500,7 +500,7 @@ def test_parameters(self): params = { "method": NormalizationModule.Porter, "snowball_language": "en", - "udpipe_language": "English", + "udpipe_language": "en", "lemmagen_language": "en", "udpipe_tokenizer": False, } @@ -510,7 +510,7 @@ def test_set_parameters(self): params = { "method": NormalizationModule.UDPipe, "snowball_language": "nl", - "udpipe_language": "Slovenian", + "udpipe_language": "sl", "lemmagen_language": "bg", "udpipe_tokenizer": True, } @@ -549,7 +549,7 @@ def test_repr(self): @patch("orangecontrib.text.preprocess.normalize.UDPipeModels.online", PropertyMock(return_value=False)) @patch("orangecontrib.text.preprocess.normalize.UDPipeModels.model_files", - PropertyMock(return_value=[])) + PropertyMock(return_value={})) def test_udpipe_no_models(self): editor = NormalizationModule() button = editor._SingleMethodModule__group.button(editor.UDPipe)