Skip to content

Commit

Permalink
OWAnnotator - Update test with "artificial" embeddings for better clu…
Browse files Browse the repository at this point in the history
…ster distinguish
  • Loading branch information
PrimozGodec committed Nov 2, 2023
1 parent fcdbe74 commit 4b6af85
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 20 deletions.
2 changes: 1 addition & 1 deletion orangecontrib/text/tests/test_annotate_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def setUp(self):
def test_gmm(self):
labels = ClusterDocuments.gmm(self.corpus.metas[:, -2:], 2, 0.6)
self.assertIn(
list(labels), ([0, 1, 0, 1, 0, 1, 0, 1, 0], [0, 1, 0, 1, 0, 1, 0, 1, 0])
list(labels), ([0, 1, 0, 1, 0, 1, 0, 1, 0], [1, 0, 1, 0, 1, 0, 1, 0, 1])
)

def test_gmm_n_comp(self):
Expand Down
25 changes: 6 additions & 19 deletions orangecontrib/text/widgets/tests/test_owannotator.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
# Test methods with long descriptive names can omit docstrings
# pylint: disable=missing-docstring,arguments-differ
import unittest
from itertools import chain
from unittest.mock import Mock, patch

import numpy as np
from AnyQt.QtCore import QRectF, QPointF

from Orange.data import Domain, Table
from Orange.projection import PCA
from Orange.widgets.tests.base import WidgetTest, simulate
from Orange.widgets.unsupervised.owtsne import OWtSNE
from Orange.widgets.unsupervised.tests.test_owtsne import DummyTSNE, \
DummyTSNEModel
from orangecontrib.text import Corpus
from orangecontrib.text.preprocess import LowercaseTransformer, \
RegexpTokenizer, StopwordsFilter, FrequencyFilter
from orangecontrib.text.tests.test_annotate_documents import add_embedding
from orangecontrib.text.vectorization import BowVectorizer
from orangecontrib.text.widgets.owannotator import OWAnnotator

Expand All @@ -24,20 +23,8 @@ def preprocess(corpus: Corpus) -> Corpus:
for pp in (LowercaseTransformer(), RegexpTokenizer(r"\w+"),
StopwordsFilter("English"), FrequencyFilter(0.25, 0.5)):
corpus = pp(corpus)

transformed_corpus = BowVectorizer().transform(corpus)

pca = PCA(n_components=2)
pca_model = pca(transformed_corpus)
projection = pca_model(transformed_corpus)

domain = Domain(
transformed_corpus.domain.attributes,
transformed_corpus.domain.class_vars,
chain(transformed_corpus.domain.metas,
projection.domain.attributes)
)
return corpus.transform(domain)
corpus = BowVectorizer().transform(corpus)
return add_embedding(corpus, 4)


class TestOWAnnotator(WidgetTest):
Expand Down Expand Up @@ -257,14 +244,14 @@ def test_no_disc_var_context(self):
self.assertIsNotNone(self.widget.cluster_var)

def test_invalidate(self):
self.send_signal(self.widget.Inputs.corpus, self.corpus[::4])
self.send_signal(self.widget.Inputs.corpus, self.corpus[:len(self.corpus) // 2])

self.wait_until_finished()
self.assertEqual(len(self.widget.clusters.groups), 1)
self.assertEqual(len(self.widget.clusters.groups), 4)

self.widget.controls.clustering_type.buttons[1].click()
self.wait_until_finished()
self.assertEqual(len(self.widget.clusters.groups), 8)
self.assertEqual(len(self.widget.clusters.groups), 4)

self.widget.controls.use_n_components.setChecked(True)
self.widget.controls.n_components.setValue(4)
Expand Down

0 comments on commit 4b6af85

Please sign in to comment.