From bf1361d76020770f162295c314f2cd4aea2cab2b Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Thu, 5 Oct 2023 11:29:25 -0700 Subject: [PATCH 01/15] feat: new implementation of get_filtered_tags --- openedx_tagging/core/tagging/api.py | 118 ++--- openedx_tagging/core/tagging/data.py | 24 + .../core/tagging/import_export/parsers.py | 6 +- openedx_tagging/core/tagging/models/base.py | 249 +++++++--- .../core/tagging/models/system_defined.py | 47 -- .../core/fixtures/tagging.yaml | 21 + .../openedx_tagging/core/tagging/test_api.py | 13 +- .../core/tagging/test_models.py | 446 +++++++++++++----- 8 files changed, 583 insertions(+), 341 deletions(-) create mode 100644 openedx_tagging/core/tagging/data.py diff --git a/openedx_tagging/core/tagging/api.py b/openedx_tagging/core/tagging/api.py index e9140968..58a04356 100644 --- a/openedx_tagging/core/tagging/api.py +++ b/openedx_tagging/core/tagging/api.py @@ -13,9 +13,10 @@ from __future__ import annotations from django.db import transaction -from django.db.models import F, QuerySet +from django.db.models import QuerySet from django.utils.translation import gettext as _ +from .data import TagData from .models import ObjectTag, Tag, Taxonomy # Export this as part of the API @@ -70,54 +71,56 @@ def get_taxonomies(enabled=True) -> QuerySet[Taxonomy]: return queryset.filter(enabled=enabled) -def get_tags(taxonomy: Taxonomy) -> list[Tag]: +def get_tags(taxonomy: Taxonomy) -> QuerySet[TagData]: """ - Returns a list of predefined tags for the given taxonomy. + Returns a QuerySet of all the tags in the given taxonomy. - Note that if the taxonomy allows free-text tags, then the returned list will be empty. + Note that if the taxonomy is dynamic or free-text, only tags that have + already been applied to some object will be returned. """ - return taxonomy.cast().get_tags() + return taxonomy.cast().get_filtered_tags() -def get_root_tags(taxonomy: Taxonomy) -> list[Tag]: +def get_root_tags(taxonomy: Taxonomy) -> QuerySet[TagData]: """ Returns a list of the root tags for the given taxonomy. Note that if the taxonomy allows free-text tags, then the returned list will be empty. """ - return list(taxonomy.cast().get_filtered_tags()) + return taxonomy.cast().get_filtered_tags(depth=1) -def search_tags(taxonomy: Taxonomy, search_term: str) -> list[Tag]: +def search_tags(taxonomy: Taxonomy, search_term: str, exclude_object_id: int | None = None) -> QuerySet[TagData]: """ - Returns a list of all tags that contains `search_term` of the given taxonomy. + Returns a list of all tags that contains `search_term` of the given + taxonomy, as well as their ancestors (so they can be displayed in a tree). - Note that if the taxonomy allows free-text tags, then the returned list will be empty. + If exclude_object_id is set, any tags applied to that object will be + excluded from the results, e.g. to power an autocomplete search when adding + additional tags to an object. """ - return list( - taxonomy.cast().get_filtered_tags( - search_term=search_term, - search_in_all=True, + qs = taxonomy.cast().get_filtered_tags(search_term=search_term) + if exclude_object_id: + # Fetch tags that the object already has to exclude them from the result + excluded_values = list( + taxonomy.objecttag_set.filter(object_id=exclude_object_id).values_list( + "_value", flat=True + ) ) - ) + qs = qs.exclude(value__in=excluded_values) + return qs def get_children_tags( taxonomy: Taxonomy, - parent_tag_id: int, - search_term: str | None = None, -) -> list[Tag]: + parent_tag_value: str, +) -> QuerySet[TagData]: """ - Returns a list of children tags for the given parent tag. + Returns a QuerySet of children tags for the given parent tag. Note that if the taxonomy allows free-text tags, then the returned list will be empty. """ - return list( - taxonomy.cast().get_filtered_tags( - parent_tag_id=parent_tag_id, - search_term=search_term, - ) - ) + return taxonomy.cast().get_filtered_tags(parent_tag_value=parent_tag_value) def resync_object_tags(object_tags: QuerySet | None = None) -> int: @@ -250,68 +253,3 @@ def _check_new_tag_count(new_tag_count: int) -> None: for object_tag in updated_tags: object_tag.full_clean() # Run validation object_tag.save() - - -# TODO: return tags from closed taxonomies as well as the count of how many times each is used. -def autocomplete_tags( - taxonomy: Taxonomy, - search: str, - object_id: str | None = None, - object_tags_only=True, -) -> QuerySet: - """ - Provides auto-complete suggestions by matching the `search` string against existing - ObjectTags linked to the given taxonomy. A case-insensitive search is used in order - to return the highest number of relevant tags. - - If `object_id` is provided, then object tag values already linked to this object - are omitted from the returned suggestions. (ObjectTag values must be unique for a - given object + taxonomy, and so omitting these suggestions helps users avoid - duplication errors.). - - Returns a QuerySet of dictionaries containing distinct `value` (string) and - `tag` (numeric ID) values, sorted alphabetically by `value`. - The `value` is what should be shown as a suggestion to users, - and if it's a free-text taxonomy, `tag` will be `None`: we include the `tag` ID - in anticipation of the second use case listed below. - - Use cases: - * This method is useful for reducing tag variation in free-text taxonomies by showing - users tags that are similar to what they're typing. E.g., if the `search` string "dn" - shows that other objects have been tagged with "DNA", "DNA electrophoresis", and "DNA fingerprinting", - this encourages users to use those existing tags if relevant, instead of creating new ones that - look similar (e.g. "dna finger-printing"). - * It could also be used to assist tagging for closed taxonomies with a list of possible tags which is too - large to return all at once, e.g. a user model taxonomy that dynamically creates tags on request for any - registered user in the database. (Note that this is not implemented yet, but may be as part of a future change.) - """ - if not object_tags_only: - raise NotImplementedError( - _( - "Using this would return a query set of tags instead of object tags." - "For now we recommend fetching all of the taxonomy's tags " - "using get_tags() and filtering them on the frontend." - ) - ) - # Fetch tags that the object already has to exclude them from the result - excluded_tags: list[str] = [] - if object_id: - excluded_tags = list( - taxonomy.objecttag_set.filter(object_id=object_id).values_list( - "_value", flat=True - ) - ) - return ( - # Fetch object tags from this taxonomy whose value contains the search - taxonomy.objecttag_set.filter(_value__icontains=search) - # omit any tags whose values match the tags on the given object - .exclude(_value__in=excluded_tags) - # alphabetical ordering - .order_by("_value") - # Alias the `_value` field to `value` to make it nicer for users - .annotate(value=F("_value")) - # obtain tag values - .values("value", "tag_id") - # remove repeats - .distinct() - ) diff --git a/openedx_tagging/core/tagging/data.py b/openedx_tagging/core/tagging/data.py new file mode 100644 index 00000000..b1a0ec3e --- /dev/null +++ b/openedx_tagging/core/tagging/data.py @@ -0,0 +1,24 @@ +""" +Data models used by openedx-tagging +""" +from __future__ import annotations + +from typing import TypedDict + + +class TagData(TypedDict): + """ + Data about a single tag. Many of the tagging API methods return Django + QuerySets that resolve to these dictionaries. + + Even though the data will be in this same format, it will not necessarily + be an instance of this class but rather a plain dictionary. This is more a + type than a class. + """ + value: str + external_id: str | None + child_count: int + depth: int + parent_value: str | None + # Note: usage_count may not actually be present but there's no way to indicate that w/ python types at the moment + usage_count: int diff --git a/openedx_tagging/core/tagging/import_export/parsers.py b/openedx_tagging/core/tagging/import_export/parsers.py index c0b8207f..e5db49aa 100644 --- a/openedx_tagging/core/tagging/import_export/parsers.py +++ b/openedx_tagging/core/tagging/import_export/parsers.py @@ -168,12 +168,12 @@ def _load_tags_for_export(cls, taxonomy: Taxonomy) -> list[dict]: The tags are ordered by hierarchy, first, parents and then children. `get_tags` is in charge of returning this in a hierarchical way. """ - tags = get_tags(taxonomy) + tags = Taxonomy.get_filtered_tags().all() result = [] for tag in tags: result_tag = { - "id": tag.external_id or tag.id, - "value": tag.value, + "id": tag["external_id"] or tag["id"], + "value": tag["value"], } if tag.parent: result_tag["parent_id"] = tag.parent.external_id or tag.parent.id diff --git a/openedx_tagging/core/tagging/models/base.py b/openedx_tagging/core/tagging/models/base.py index d24d67d4..b5656752 100644 --- a/openedx_tagging/core/tagging/models/base.py +++ b/openedx_tagging/core/tagging/models/base.py @@ -8,12 +8,17 @@ from django.core.exceptions import ValidationError from django.db import models +from django.db.models import F, Q, Value +from django.db.models.functions import Coalesce, Concat +from django.utils.functional import cached_property from django.utils.module_loading import import_string from django.utils.translation import gettext_lazy as _ from typing_extensions import Self # Until we upgrade to python 3.11 from openedx_learning.lib.fields import MultiCollationTextField, case_insensitive_char_field, case_sensitive_char_field +from ..data import TagData + log = logging.getLogger(__name__) @@ -105,6 +110,35 @@ def get_lineage(self) -> Lineage: tag = tag.parent depth -= 1 return lineage + + @cached_property + def num_ancestors(self) -> int: + """ + How many ancestors this Tag has. Equivalent to its "depth" in the tree. + Zero for root tags. + """ + num_ancestors = 0 + tag = self + while tag.parent: + num_ancestors += 1 + tag = tag.parent + return num_ancestors + + @staticmethod + def annotate_depth(qs: models.QuerySet) -> models.QuerySet: + """ + Given a query that loads Tag objects, annotate it with the depth of + each tag. + """ + return qs.annotate(depth=models.Case( + models.When(parent_id=None, then=0), + models.When(parent__parent_id=None, then=1), + models.When(parent__parent__parent_id=None, then=2), + models.When(parent__parent__parent__parent_id=None, then=3), + # If the depth is 4 or more, currently we just "collapse" the depth + # to 4 in order not to add too many joins to this query in general. + default=4, + )) class Taxonomy(models.Model): @@ -260,87 +294,174 @@ def copy(self, taxonomy: Taxonomy) -> Taxonomy: self._taxonomy_class = taxonomy._taxonomy_class # pylint: disable=protected-access return self - def get_tags( + def get_filtered_tags( self, - tag_set: models.QuerySet[Tag] | None = None, - ) -> list[Tag]: + depth: int | None = TAXONOMY_MAX_DEPTH, + parent_tag_value: str | None = None, + search_term: str | None = None, + include_counts: bool = True, + ) -> models.QuerySet[TagData]: """ - Returns a list of all Tags in the current taxonomy, from the root(s) - down to TAXONOMY_MAX_DEPTH tags, in tree order. + Returns a filtered QuerySet of tag values. + For free text or dynamic taxonomies, this will only return tag values + that have actually been used. - Use `tag_set` to do an initial filtering of the tags. + By default returns all the tags of the given taxonomy - Annotates each returned Tag with its ``depth`` in the tree (starting at - 0). + Use `depth=1` to return a single level of tags, without any child + tags included. Use `depth=None` or `depth=TAXONOMY_MAX_DEPTH` to return + all descendants of the tags, up to our maximum supported depth. - Performance note: may perform as many as TAXONOMY_MAX_DEPTH select - queries. + Use `parent_tag_value` to return only the children/descendants of a specific tag. + + Use `search_term` to filter the results by values that contains `search_term`. + + Note: This is mostly an 'internal' API and generally code outside of openedx_tagging + should use the APIs in openedx_tagging.api which in turn use this. """ - tags: list[Tag] = [] if self.allow_free_text: - return tags - - if tag_set is None: - tag_set = self.tag_set.all() - - parents = None - - for depth in range(TAXONOMY_MAX_DEPTH): - filtered_tags = tag_set.prefetch_related("parent") - if parents is None: - filtered_tags = filtered_tags.filter(parent=None) - else: - filtered_tags = filtered_tags.filter(parent__in=parents) - next_parents = list( - filtered_tags.annotate( - annotated_field=models.Value( - depth, output_field=models.IntegerField() - ) - ) - .order_by("parent__value", "value", "id") - .all() + if parent_tag_value is not None: + raise ValueError("Cannot specify a parent tag ID for free text taxonomies") + return self._get_filtered_tags_free_text(search_term=search_term, include_counts=include_counts) + elif depth == 1: + return self._get_filtered_tags_one_level( + parent_tag_value=parent_tag_value, + search_term=search_term, + include_counts=include_counts, + ) + elif depth is None or depth == TAXONOMY_MAX_DEPTH: + return self._get_filtered_tags_deep( + parent_tag_value=parent_tag_value, + search_term=search_term, + include_counts=include_counts, ) - tags.extend(next_parents) - parents = next_parents - if not parents: - break - return tags + else: + raise ValueError("Unsupported depth value for get_filtered_tags()") - def get_filtered_tags( + def _get_filtered_tags_free_text( self, - tag_set: models.QuerySet[Tag] | None = None, - parent_tag_id: int | None = None, - search_term: str | None = None, - search_in_all: bool = False, - ) -> models.QuerySet[Tag]: + search_term: str | None, + include_counts: bool, + ) -> models.QuerySet[TagData]: """ - Returns a filtered QuerySet of tags. - By default returns the root tags of the given taxonomy - - Use `parent_tag_id` to return the children of a tag. - - Use `search_term` to filter the results by values that contains `search_term`. + Implementation of get_filtered_tags() for free text taxonomies. + """ + assert self.allow_free_text + qs: models.QuerySet = self.objecttag_set.all() + if search_term: + qs = qs.filter(_value__icontains=search_term) + # Rename "_value" to "value" + qs = qs.annotate(value=F('_value')) + # Add in all these fixed fields that don't really apply to free text tags, but we include for consistency: + qs = qs.annotate( + depth=Value(0), + child_count=Value(0), + external_id=Value(None, output_field=models.CharField()), + parent_value=Value(None, output_field=models.CharField()), + ) + qs = qs.values("value", "child_count", "depth", "parent_value", "external_id").order_by("value") + if include_counts: + return qs.annotate(usage_count=models.Count("value")) + else: + return qs.distinct() - Set `search_in_all` to True to make the search in all tags on the given taxonomy. + def _get_filtered_tags_one_level( + self, + parent_tag_value: str | None, + search_term: str | None, + include_counts: bool, + ) -> models.QuerySet[TagData]: + """ + Implementation of get_filtered_tags() for closed taxonomies, where + depth=1. When depth=1, we're only looking at a single "level" of the + taxononomy, like all root tags or all children of a specific tag. + """ + # A closed, and possibly hierarchical taxonomy. We're just fetching a single "level" of tags. + if parent_tag_value: + parent_tag = self.tag_for_value(parent_tag_value) + qs: models.QuerySet = self.tag_set.filter(parent_id=parent_tag.pk) + qs = qs.annotate(depth=Value(parent_tag.num_ancestors + 1)) + # Use parent_tag.value not parent_tag_value because they may differ in case + qs = qs.annotate(parent_value=Value(parent_tag.value)) + else: + qs = self.tag_set.filter(parent=None).annotate(depth=Value(0)) + qs = qs.annotate(parent_value=Value(None, output_field=models.CharField())) + qs = qs.annotate(child_count=models.Count("children")) + # Filter by search term: + if search_term: + qs = qs.filter(value__icontains=search_term) + qs = qs.values("value", "child_count", "depth", "parent_value", "external_id").order_by("value") + if include_counts: + # We need to include the count of how many times this tag is used to tag objects. + # You'd think we could just use: + # qs = qs.annotate(usage_count=models.Count("objecttag__pk")) + # but that adds another join which starts creating a cross product and the children and usage_count become + # intertwined and multiplied with each other. So we use a subquery. + obj_tags = ObjectTag.objects.filter(tag_id=models.OuterRef("pk")).order_by().annotate( + # We need to use Func() to get Count() without GROUP BY - see https://stackoverflow.com/a/69031027 + count=models.Func(F('id'), function='Count') + ) + qs = qs.annotate(usage_count=models.Subquery(obj_tags.values('count'))) + return qs - Note: This is mostly an 'internal' API and generally code outside of openedx_tagging - should use the APIs in openedx_tagging.api which in turn use this. + def _get_filtered_tags_deep( + self, + parent_tag_value: str | None, + search_term: str | None, + include_counts: bool, + ) -> models.QuerySet[TagData]: """ - if tag_set is None: - tag_set = self.tag_set.all() - - if self.allow_free_text: - return tag_set.none() + Implementation of get_filtered_tags() for closed taxonomies, where + we're including tags from multiple levels of the hierarchy. + """ + # All tags (possibly below a certain tag?) in the closed taxonomy, up to depth TAXONOMY_MAX_DEPTH + if parent_tag_value: + main_parent_id = self.tag_for_value(parent_tag_value).pk + else: + main_parent_id = None - if not search_in_all: - # If not search in all taxonomy, then apply parent filter. - tag_set = tag_set.filter(parent=parent_tag_id) + assert TAXONOMY_MAX_DEPTH == 3 # If we change TAXONOMY_MAX_DEPTH we need to change this query code: + qs: models.QuerySet = self.tag_set.filter( + Q(parent_id=main_parent_id) | + Q(parent__parent_id=main_parent_id) | + Q(parent__parent__parent_id=main_parent_id) + ) if search_term: - # Apply search filter - tag_set = tag_set.filter(value__icontains=search_term) - - return tag_set.order_by("value", "id") + # We need to do an additional query to find all the tags that match the search term, then limit the + # search to those tags and their ancestors. + matching_tags = qs.filter(value__icontains=search_term).values( + 'id', 'parent_id', 'parent__parent_id', 'parent__parent__parent_id' + ) + matching_ids = [] + for row in matching_tags: + for pk in row.values(): + if pk is not None: + matching_ids.append(pk) + qs = qs.filter(pk__in=matching_ids) + + qs = qs.annotate(child_count=models.Count("children")) + # Add the "depth" to each tag: + qs = Tag.annotate_depth(qs) + # Add the "lineage" field to sort them in order correctly: + qs = qs.annotate(sort_key=Concat( + Coalesce(F("parent__parent__parent__value"), Value("")), + Coalesce(F("parent__parent__value"), Value("")), + Coalesce(F("parent__value"), Value("")), + F("value"), + output_field=models.CharField(), + )) + # Add the parent value + qs = qs.annotate(parent_value=F("parent__value")) + qs = qs.values("value", "child_count", "depth", "parent_value", "external_id").order_by("sort_key") + if include_counts: + # Including the counts is a bit tricky; see the comment above in _get_filtered_tags_one_level() + obj_tags = ObjectTag.objects.filter(tag_id=models.OuterRef("pk")).order_by().annotate( + # We need to use Func() to get Count() without GROUP BY - see https://stackoverflow.com/a/69031027 + count=models.Func(F('id'), function='Count') + ) + qs = qs.annotate(usage_count=models.Subquery(obj_tags.values('count'))) + return qs def validate_value(self, value: str) -> bool: """ diff --git a/openedx_tagging/core/tagging/models/system_defined.py b/openedx_tagging/core/tagging/models/system_defined.py index 6851ab9d..3efb68f2 100644 --- a/openedx_tagging/core/tagging/models/system_defined.py +++ b/openedx_tagging/core/tagging/models/system_defined.py @@ -198,53 +198,6 @@ class LanguageTaxonomy(SystemDefinedTaxonomy): class Meta: proxy = True - def get_tags( - self, - tag_set: models.QuerySet[Tag] | None = None, - ) -> list[Tag]: - """ - Returns a list of all the available Language Tags, annotated with ``depth`` = 0. - """ - available_langs = self._get_available_languages() - tag_set = self.tag_set.filter(external_id__in=available_langs) - return super().get_tags(tag_set=tag_set) - - def get_filtered_tags( - self, - tag_set: models.QuerySet[Tag] | None = None, - parent_tag_id: int | None = None, - search_term: str | None = None, - search_in_all: bool = False, - ) -> models.QuerySet[Tag]: - """ - Returns a filtered QuerySet of available Language Tags. - By default returns all the available Language Tags. - - `parent_tag_id` returns an empty result because all Language tags are root tags. - - Use `search_term` to filter the results by values that contains `search_term`. - """ - if parent_tag_id: - return self.tag_set.none() - - available_langs = self._get_available_languages() - tag_set = self.tag_set.filter(external_id__in=available_langs) - return super().get_filtered_tags( - tag_set=tag_set, - search_term=search_term, - search_in_all=search_in_all, - ) - - @classmethod - def _get_available_languages(cls) -> set[str]: - """ - Get available languages from Django LANGUAGE. - """ - langs = set() - for django_lang in settings.LANGUAGES: - langs.add(django_lang[0]) - return langs - def validate_value(self, value: str): """ Check if 'value' is part of this Taxonomy, based on the specified model. diff --git a/tests/openedx_tagging/core/fixtures/tagging.yaml b/tests/openedx_tagging/core/fixtures/tagging.yaml index 4715b667..164b9399 100644 --- a/tests/openedx_tagging/core/fixtures/tagging.yaml +++ b/tests/openedx_tagging/core/fixtures/tagging.yaml @@ -1,3 +1,24 @@ +# - Bacteria +# |- Archaebacteria +# |- Eubacteria +# - Archaea +# |- DPANN +# |- Euryarchaeida +# |- Proteoarchaeota +# - Eukaryota +# |- Animalia +# | |- Arthropoda +# | |- Chordata +# | | |- Mammalia +# | |- Cnidaria +# | |- Ctenophora +# | |- Gastrotrich +# | |- Placozoa +# | |- Porifera +# |- Fungi +# |- Monera +# |- Plantae +# |- Protista - model: oel_tagging.tag pk: 1 fields: diff --git a/tests/openedx_tagging/core/tagging/test_api.py b/tests/openedx_tagging/core/tagging/test_api.py index f82a2201..e1dbd03d 100644 --- a/tests/openedx_tagging/core/tagging/test_api.py +++ b/tests/openedx_tagging/core/tagging/test_api.py @@ -7,6 +7,7 @@ import ddt # type: ignore[import] import pytest +from django.db.models import QuerySet from django.test import TestCase, override_settings import openedx_tagging.core.tagging.api as tagging_api @@ -594,15 +595,11 @@ def test_autocomplete_tags(self, search: str, expected_values: list[str], expect expected_ids, ) - def test_autocompleate_not_implemented(self) -> None: - with self.assertRaises(NotImplementedError): - tagging_api.autocomplete_tags(self.taxonomy, 'test', None, object_tags_only=False) - - def _get_tag_values(self, tags) -> list[str]: + def _get_tag_values(self, tags: QuerySet[tagging_api.TagData]) -> list[str]: """ Get tag values from tagging_api.autocomplete_tags() result """ - return [tag.get("value") for tag in tags] + return [tag["value"] for tag in tags] def _get_tag_ids(self, tags) -> list[int]: """ @@ -622,7 +619,7 @@ def _validate_autocomplete_tags( """ # Normal search - result = tagging_api.autocomplete_tags(taxonomy, search) + result = tagging_api.search_tags(taxonomy, search) tag_values = self._get_tag_values(result) for value in tag_values: assert search.lower() in value.lower() @@ -644,6 +641,6 @@ def _validate_autocomplete_tags( ).save() # Search with object - result = tagging_api.autocomplete_tags(taxonomy, search, object_id) + result = tagging_api.search_tags(taxonomy, search, object_id) assert self._get_tag_values(result) == expected_values[1:] assert self._get_tag_ids(result) == expected_ids[1:] diff --git a/tests/openedx_tagging/core/tagging/test_models.py b/tests/openedx_tagging/core/tagging/test_models.py index 2a7131f4..af2c812b 100644 --- a/tests/openedx_tagging/core/tagging/test_models.py +++ b/tests/openedx_tagging/core/tagging/test_models.py @@ -1,6 +1,8 @@ """ Test the tagging base models """ +from __future__ import annotations + import ddt # type: ignore[import] import pytest from django.contrib.auth import get_user_model @@ -55,54 +57,6 @@ def setUp(self): ) self.user_2.save() - # Domain tags (depth=0) - # https://en.wikipedia.org/wiki/Domain_(biology) - self.domain_tags = [ - get_tag("Archaea"), - get_tag("Bacteria"), - get_tag("Eukaryota"), - ] - # Domain tags that contains 'ar' - self.filtered_domain_tags = [ - get_tag("Archaea"), - get_tag("Eukaryota"), - ] - - # Kingdom tags (depth=1) - self.kingdom_tags = [ - # Kingdoms of https://en.wikipedia.org/wiki/Archaea - get_tag("DPANN"), - get_tag("Euryarchaeida"), - get_tag("Proteoarchaeota"), - # Kingdoms of https://en.wikipedia.org/wiki/Bacterial_taxonomy - get_tag("Archaebacteria"), - get_tag("Eubacteria"), - # Kingdoms of https://en.wikipedia.org/wiki/Eukaryote - get_tag("Animalia"), - get_tag("Fungi"), - get_tag("Monera"), - get_tag("Plantae"), - get_tag("Protista"), - ] - - # Phylum tags (depth=2) - self.phylum_tags = [ - # Some phyla of https://en.wikipedia.org/wiki/Animalia - get_tag("Arthropoda"), - get_tag("Chordata"), - get_tag("Cnidaria"), - get_tag("Ctenophora"), - get_tag("Gastrotrich"), - get_tag("Placozoa"), - get_tag("Porifera"), - ] - # Phylum tags that contains 'da' - self.filtered_phylum_tags = [ - get_tag("Arthropoda"), - get_tag("Chordata"), - get_tag("Cnidaria"), - ] - # Biology tags that contains 'eu' self.filtered_tags = [ get_tag("Eubacteria"), @@ -132,17 +86,6 @@ def setUp(self): ) self.dummy_taxonomies.append(taxonomy) - def setup_tag_depths(self): - """ - Annotate our tags with depth so we can compare them. - """ - for tag in self.domain_tags: - tag.depth = 0 - for tag in self.kingdom_tags: - tag.depth = 1 - for tag in self.phylum_tags: - tag.depth = 2 - class TaxonomyTestSubclassA(Taxonomy): """ @@ -237,6 +180,20 @@ def test_taxonomy_cast_bad_value(self): self.taxonomy.taxonomy_class = str assert " must be a subclass of Taxonomy" in str(exc.exception) + def test_unique_tags(self): + # Creating new tag + Tag( + taxonomy=self.taxonomy, + value='New value' + ).save() + + # Creating repeated tag + with self.assertRaises(IntegrityError): + Tag( + taxonomy=self.taxonomy, + value=self.archaea.value, + ).save() + @ddt.data( # Root tags just return their own value ("bacteria", ["Bacteria"]), @@ -251,80 +208,311 @@ def test_taxonomy_cast_bad_value(self): def test_get_lineage(self, tag_attr, lineage): assert getattr(self, tag_attr).get_lineage() == lineage - def test_get_tags(self): - self.setup_tag_depths() - assert self.taxonomy.get_tags() == [ - *self.domain_tags, - *self.kingdom_tags, - *self.phylum_tags, + + +@ddt.ddt +class TestFilteredTagsClosedTaxonomy(TestTagTaxonomyMixin, TestCase): + """ + Test the the get_filtered_tags() method of closed taxonomies + """ + def test_get_root(self) -> None: + """ + Test basic retrieval of root tags in the closed taxonomy, using + get_filtered_tags(). Without counts included. + """ + result = list(self.taxonomy.get_filtered_tags(depth=1, include_counts=False)) + common_fields = {"depth": 0, "parent_value": None, "external_id": None} + assert result == [ + # These are the root tags, in alphabetical order: + {"value": "Archaea", "child_count": 3, **common_fields}, + {"value": "Bacteria", "child_count": 2, **common_fields}, + {"value": "Eukaryota", "child_count": 5, **common_fields}, ] - def test_get_root_tags(self): - assert list(self.taxonomy.get_filtered_tags()) == self.domain_tags - assert list( - self.taxonomy.get_filtered_tags(search_term='aR') - ) == self.filtered_domain_tags - - def test_get_tags_free_text(self): - self.taxonomy.allow_free_text = True - with self.assertNumQueries(0): - assert self.taxonomy.get_tags() == [] - - def test_get_children_tags(self): - assert list( - self.taxonomy.get_filtered_tags(parent_tag_id=self.animalia.id) - ) == self.phylum_tags - assert list( - self.taxonomy.get_filtered_tags( - parent_tag_id=self.animalia.id, - search_term='dA', - ) - ) == self.filtered_phylum_tags - assert not list( - self.system_taxonomy.get_filtered_tags( - parent_tag_id=self.system_taxonomy_tag.id - ) - ) + def test_get_child_tags_one_level(self) -> None: + """ + Test basic retrieval of tags one level below the "Eukaryota" root tag in + the closed taxonomy, using get_filtered_tags(). With counts included. + """ + result = list(self.taxonomy.get_filtered_tags(depth=1, parent_tag_value="Eukaryota")) + common_fields = {"depth": 1, "parent_value": "Eukaryota", "usage_count": 0, "external_id": None} + assert result == [ + # These are the Eukaryota tags, in alphabetical order: + {"value": "Animalia", "child_count": 7, **common_fields}, + {"value": "Fungi", "child_count": 0, **common_fields}, + {"value": "Monera", "child_count": 0, **common_fields}, + {"value": "Plantae", "child_count": 0, **common_fields}, + {"value": "Protista", "child_count": 0, **common_fields}, + ] - def test_get_children_tags_free_text(self): - self.taxonomy.allow_free_text = True - assert not list(self.taxonomy.get_filtered_tags( - parent_tag_id=self.animalia.id - )) - assert not list(self.taxonomy.get_filtered_tags( - parent_tag_id=self.animalia.id, - search_term='dA', - )) + def test_get_grandchild_tags_one_level(self) -> None: + """ + Test basic retrieval of a single level of tags at two level belows the + "Eukaryota" root tag in the closed taxonomy, using get_filtered_tags(). + """ + result = list(self.taxonomy.get_filtered_tags(depth=1, parent_tag_value="Animalia")) + common_fields = {"depth": 2, "parent_value": "Animalia", "usage_count": 0, "external_id": None} + assert result == [ + # These are the Eukaryota tags, in alphabetical order: + {"value": "Arthropoda", "child_count": 0, **common_fields}, + {"value": "Chordata", "child_count": 1, **common_fields}, + {"value": "Cnidaria", "child_count": 0, **common_fields}, + {"value": "Ctenophora", "child_count": 0, **common_fields}, + {"value": "Gastrotrich", "child_count": 0, **common_fields}, + {"value": "Placozoa", "child_count": 0, **common_fields}, + {"value": "Porifera", "child_count": 0, **common_fields}, + ] - def test_search_tags(self): - assert list(self.taxonomy.get_filtered_tags( - search_term='eU', - search_in_all=True - )) == self.filtered_tags - - def test_get_tags_shallow_taxonomy(self): - taxonomy = Taxonomy.objects.create(name="Difficulty") - tags = [ - Tag.objects.create(taxonomy=taxonomy, value="1. Easy"), - Tag.objects.create(taxonomy=taxonomy, value="2. Moderate"), - Tag.objects.create(taxonomy=taxonomy, value="3. Hard"), + def test_get_depth_1_search_term(self) -> None: + """ + Filter the root tags to only those that match a search term + """ + result = list(self.taxonomy.get_filtered_tags(depth=1, search_term="ARCH")) + assert result == [ + { + "value": "Archaea", + "child_count": 3, + "depth": 0, + "usage_count": 0, + "parent_value": None, + "external_id": None, + }, + ] + # Note that other tags in the taxonomy match "ARCH" but are excluded because of the depth=1 search + + def test_get_depth_1_child_search_term(self) -> None: + """ + Filter the child tags of "Bacteria" to only those that match a search term + """ + result = list(self.taxonomy.get_filtered_tags(depth=1, search_term="ARCH", parent_tag_value="Bacteria")) + assert result == [ + { + "value": "Archaebacteria", + "child_count": 0, + "depth": 1, + "usage_count": 0, + "parent_value": "Bacteria", + "external_id": None, + }, ] + # Note that other tags in the taxonomy match "ARCH" but are excluded because of the depth=1 search + + def test_depth_1_queries(self) -> None: + """ + Test the number of queries used by get_filtered_tags() with closed + taxonomies when depth=1. This should be a constant, not O(n). + """ + with self.assertNumQueries(1): + self.test_get_root() + with self.assertNumQueries(1): + self.test_get_depth_1_search_term() + # When listing the tags below a specific tag, there is one additional query to load each ancestor tag: + with self.assertNumQueries(2): + self.test_get_child_tags_one_level() with self.assertNumQueries(2): - assert taxonomy.get_tags() == tags + self.test_get_depth_1_child_search_term() + with self.assertNumQueries(3): + self.test_get_grandchild_tags_one_level() - def test_unique_tags(self): - # Creating new tag - Tag( - taxonomy=self.taxonomy, - value='New value' - ).save() + ################## - # Creating repeated tag - with self.assertRaises(IntegrityError): - Tag( - taxonomy=self.taxonomy, - value=self.archaea.value, - ).save() + @staticmethod + def _pretty_format_result(result) -> list[str]: + """ + Format a result to be more human readable. + """ + return [ + f"{t['depth'] * ' '}{t['value']} ({t['parent_value']}) " + + f"(used: {t['usage_count']}, children: {t['child_count']})" + for t in result + ] + + def test_get_all(self) -> None: + """ + Test getting all of the tags in the taxonomy, using get_filtered_tags() + """ + result = self._pretty_format_result(self.taxonomy.get_filtered_tags()) + assert result == [ + "Archaea (None) (used: 0, children: 3)", + " DPANN (Archaea) (used: 0, children: 0)", + " Euryarchaeida (Archaea) (used: 0, children: 0)", + " Proteoarchaeota (Archaea) (used: 0, children: 0)", + "Bacteria (None) (used: 0, children: 2)", + " Archaebacteria (Bacteria) (used: 0, children: 0)", + " Eubacteria (Bacteria) (used: 0, children: 0)", + "Eukaryota (None) (used: 0, children: 5)", + " Animalia (Eukaryota) (used: 0, children: 7)", + " Arthropoda (Animalia) (used: 0, children: 0)", + " Chordata (Animalia) (used: 0, children: 1)", # note this has a child but the child is not included + " Cnidaria (Animalia) (used: 0, children: 0)", + " Ctenophora (Animalia) (used: 0, children: 0)", + " Gastrotrich (Animalia) (used: 0, children: 0)", + " Placozoa (Animalia) (used: 0, children: 0)", + " Porifera (Animalia) (used: 0, children: 0)", + " Fungi (Eukaryota) (used: 0, children: 0)", + " Monera (Eukaryota) (used: 0, children: 0)", + " Plantae (Eukaryota) (used: 0, children: 0)", + " Protista (Eukaryota) (used: 0, children: 0)", + ] + + def test_search(self) -> None: + """ + Search the whole taxonomy (up to max depth) for a given term. Should + return all tags that match the term as well as their ancestors. + """ + result = self._pretty_format_result(self.taxonomy.get_filtered_tags(search_term="ARCH")) + assert result == [ + "Archaea (None) (used: 0, children: 3)", # Matches the value of this root tag, ARCHaea + " Euryarchaeida (Archaea) (used: 0, children: 0)", # Matches the value of this child tag + " Proteoarchaeota (Archaea) (used: 0, children: 0)", # Matches the value of this child tag + "Bacteria (None) (used: 0, children: 2)", # Does not match this tag but matches a descendant: + " Archaebacteria (Bacteria) (used: 0, children: 0)", # Matches the value of this child tag + ] + + def test_search_2(self) -> None: + """ + Another search test, that matches a tag deeper in the taxonomy to check + that all its ancestors are returned by the search. + """ + result = self._pretty_format_result(self.taxonomy.get_filtered_tags(search_term="chordata")) + assert result == [ + "Eukaryota (None) (used: 0, children: 5)", + " Animalia (Eukaryota) (used: 0, children: 7)", + " Chordata (Animalia) (used: 0, children: 1)", # this is the matching tag. + ] + + def test_tags_deep(self) -> None: + """ + Test getting a deep tag in the taxonomy + """ + result = list(self.taxonomy.get_filtered_tags(parent_tag_value="Chordata")) + assert result == [ + { + "value": "Mammalia", + "parent_value": "Chordata", + "depth": 3, + "usage_count": 0, + "child_count": 0, + "external_id": None, + } + ] + + def test_deep_queries(self) -> None: + """ + Test the number of queries used by get_filtered_tags() with closed + taxonomies when depth=None. This should be a constant, not O(n). + """ + with self.assertNumQueries(1): + self.test_get_all() + # Searching below a specific tag requires an additional query to load that tag: + with self.assertNumQueries(2): + self.test_tags_deep() + # Keyword search requires an additional query: + with self.assertNumQueries(2): + self.test_search() + with self.assertNumQueries(2): + self.test_search_2() + + def test_get_external_id(self) -> None: + """ + Test that if our tags have external IDs, those external IDs are returned + """ + self.bacteria.external_id = "bct001" + self.bacteria.save() + result = list(self.taxonomy.get_filtered_tags(search_term="Eubacteria")) + assert result[0]["value"] == "Bacteria" + assert result[0]["external_id"] == "bct001" + + def test_usage_count(self) -> None: + """ + Test that the usage count in the results is right + """ + api.tag_object(object_id="obj01", taxonomy=self.taxonomy, tags=["Bacteria"]) + api.tag_object(object_id="obj02", taxonomy=self.taxonomy, tags=["Bacteria"]) + api.tag_object(object_id="obj03", taxonomy=self.taxonomy, tags=["Bacteria"]) + api.tag_object(object_id="obj04", taxonomy=self.taxonomy, tags=["Eubacteria"]) + # Now the API should reflect these usage counts: + result = self._pretty_format_result(self.taxonomy.get_filtered_tags(search_term="bacteria")) + assert result == [ + "Bacteria (None) (used: 3, children: 2)", + " Archaebacteria (Bacteria) (used: 0, children: 0)", + " Eubacteria (Bacteria) (used: 1, children: 0)", + ] + # Same with depth=1, which uses a different query internally: + result1 = self._pretty_format_result(self.taxonomy.get_filtered_tags(search_term="bacteria", depth=1)) + assert result1 == [ + "Bacteria (None) (used: 3, children: 2)", + ] + + +class TestFilteredTagsFreeTextTaxonomy(TestCase): + """ + Tests for listing/autocompleting/searching for tags in a free text taxonomy. + + Free text taxonomies only return tags that are actually used. + """ + + def setUp(self): + super().setUp() + self.taxonomy = Taxonomy.objects.create(allow_free_text=True, name="FreeText") + # The "triple" tag will be applied to three objects, "double" to two, and "solo" to one: + api.tag_object(object_id="obj1", taxonomy=self.taxonomy, tags=["triple"]) + api.tag_object(object_id="obj2", taxonomy=self.taxonomy, tags=["triple", "double"]) + api.tag_object(object_id="obj3", taxonomy=self.taxonomy, tags=["triple", "double"]) + api.tag_object(object_id="obj4", taxonomy=self.taxonomy, tags=["solo"]) + + def test_get_filtered_tags(self): + """ + Test basic retrieval of all tags in the taxonomy. + Without counts included. + """ + result = list(self.taxonomy.get_filtered_tags(include_counts=False)) + common_fields = {"child_count": 0, "depth": 0, "parent_value": None, "external_id": None} + assert result == [ + # These should appear in alphabetical order: + {"value": "double", **common_fields}, + {"value": "solo", **common_fields}, + {"value": "triple", **common_fields}, + ] + + def test_get_filtered_tags_with_count(self): + """ + Test basic retrieval of all tags in the taxonomy. + Without counts included. + """ + result = list(self.taxonomy.get_filtered_tags(include_counts=True)) + common_fields = {"child_count": 0, "depth": 0, "parent_value": None, "external_id": None} + assert result == [ + # These should appear in alphabetical order: + {"value": "double", "usage_count": 2, **common_fields}, + {"value": "solo", "usage_count": 1, **common_fields}, + {"value": "triple", "usage_count": 3, **common_fields}, + ] + + def test_get_filtered_tags_num_queries(self): + """ + Test that the number of queries used by get_filtered_tags() is fixed + and not O(n) or worse. + """ + with self.assertNumQueries(1): + self.test_get_filtered_tags() + with self.assertNumQueries(1): + self.test_get_filtered_tags_with_count() + + def test_get_filtered_tags_with_search(self) -> None: + """ + Test basic retrieval of only matching tags. + """ + result1 = list(self.taxonomy.get_filtered_tags(search_term="le")) + common_fields = {"child_count": 0, "depth": 0, "parent_value": None, "external_id": None} + assert result1 == [ + # These should appear in alphabetical order: + {"value": "double", "usage_count": 2, **common_fields}, + {"value": "triple", "usage_count": 3, **common_fields}, + ] + # And it should be case insensitive: + result2 = list(self.taxonomy.get_filtered_tags(search_term="LE")) + assert result1 == result2 class TestObjectTag(TestTagTaxonomyMixin, TestCase): @@ -450,10 +638,10 @@ def test_tag_case(self) -> None: Test that the object_id is case sensitive. """ # Tag with object_id with lower case - api.tag_object(self.taxonomy, [self.domain_tags[0].value], object_id="case:id:2") + api.tag_object(self.taxonomy, [self.chordata.value], object_id="case:id:2") # Tag with object_id with upper case should not trigger IntegrityError - api.tag_object(self.taxonomy, [self.domain_tags[0].value], object_id="CASE:id:2") + api.tag_object(self.taxonomy, [self.chordata.value], object_id="CASE:id:2") # Create another ObjectTag with lower case object_id should trigger IntegrityError with transaction.atomic(): @@ -461,7 +649,7 @@ def test_tag_case(self) -> None: ObjectTag( object_id="case:id:2", taxonomy=self.taxonomy, - tag=self.domain_tags[0], + tag=self.chordata, ).save() # Create another ObjectTag with upper case object_id should trigger IntegrityError @@ -470,7 +658,7 @@ def test_tag_case(self) -> None: ObjectTag( object_id="CASE:id:2", taxonomy=self.taxonomy, - tag=self.domain_tags[0], + tag=self.chordata, ).save() def test_is_deleted(self): From 61499c104fe4b680e737218be8322a235ec86b66 Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Wed, 18 Oct 2023 11:16:35 -0700 Subject: [PATCH 02/15] feat: update and simplify tests in test_template.py --- .../tagging/import_export/test_template.py | 69 +++++++++---------- .../core/tagging/test_models.py | 22 ++---- tests/openedx_tagging/core/tagging/utils.py | 24 +++++++ 3 files changed, 61 insertions(+), 54 deletions(-) create mode 100644 tests/openedx_tagging/core/tagging/utils.py diff --git a/tests/openedx_tagging/core/tagging/import_export/test_template.py b/tests/openedx_tagging/core/tagging/import_export/test_template.py index 067a338c..c25c4b28 100644 --- a/tests/openedx_tagging/core/tagging/import_export/test_template.py +++ b/tests/openedx_tagging/core/tagging/import_export/test_template.py @@ -13,6 +13,7 @@ from openedx_tagging.core.tagging.import_export import api as import_api from .mixins import TestImportExportMixin +from ..utils import pretty_format_tags @ddt.ddt @@ -47,44 +48,36 @@ def test_import_template(self, template_file, parser_format): replace=True, ), import_api.get_last_import_log(self.taxonomy) - imported_tags = [ - { - "external_id": tag.external_id, - "value": tag.value, - "parent": tag.parent.external_id if tag.parent else None, - } - for tag in get_tags(self.taxonomy) - ] - assert imported_tags == [ - {'external_id': "ELECTRIC", 'parent': None, 'value': 'Electronic instruments'}, - {'external_id': 'PERCUSS', 'parent': None, 'value': 'Percussion instruments'}, - {'external_id': 'STRINGS', 'parent': None, 'value': 'String instruments'}, - {'external_id': 'WINDS', 'parent': None, 'value': 'Wind instruments'}, - {'external_id': 'SYNTH', 'parent': 'ELECTRIC', 'value': 'Synthesizer'}, - {'external_id': 'THERAMIN', 'parent': 'ELECTRIC', 'value': 'Theramin'}, - {'external_id': 'CHORD', 'parent': 'PERCUSS', 'value': 'Chordophone'}, - {'external_id': 'BELLS', 'parent': 'PERCUSS', 'value': 'Idiophone'}, - {'external_id': 'DRUMS', 'parent': 'PERCUSS', 'value': 'Membranophone'}, - {'external_id': 'BOW', 'parent': 'STRINGS', 'value': 'Bowed strings'}, - {'external_id': 'PLUCK', 'parent': 'STRINGS', 'value': 'Plucked strings'}, - {'external_id': 'BRASS', 'parent': 'WINDS', 'value': 'Brass'}, - {'external_id': 'WOODS', 'parent': 'WINDS', 'value': 'Woodwinds'}, - {'external_id': 'CELLO', 'parent': 'BOW', 'value': 'Cello'}, - {'external_id': 'VIOLIN', 'parent': 'BOW', 'value': 'Violin'}, - {'external_id': 'TRUMPET', 'parent': 'BRASS', 'value': 'Trumpet'}, - {'external_id': 'TUBA', 'parent': 'BRASS', 'value': 'Tuba'}, - {'external_id': 'PIANO', 'parent': 'CHORD', 'value': 'Piano'}, + assert pretty_format_tags(get_tags(self.taxonomy), external_id=True, usage_count=False) == [ + 'Electronic instruments (ELECTRIC) (None) (children: 2)', + ' Synthesizer (SYNTH) (Electronic instruments) (children: 0)', + ' Theramin (THERAMIN) (Electronic instruments) (children: 0)', + 'Percussion instruments (PERCUSS) (None) (children: 3)', + ' Chordophone (CHORD) (Percussion instruments) (children: 1)', + ' Piano (PIANO) (Chordophone) (children: 0)', + ' Idiophone (BELLS) (Percussion instruments) (children: 2)', + ' Celesta (CELESTA) (Idiophone) (children: 0)', + ' Hi-hat (HI-HAT) (Idiophone) (children: 0)', + ' Membranophone (DRUMS) (Percussion instruments) (children: 2)', + ' Cajón (CAJÓN) (Membranophone) (children: 1)', # This tag is present in the import files, but it will be omitted from get_tags() # because it sits beyond TAXONOMY_MAX_DEPTH. - # {'external_id': 'PYLE', 'parent': 'CAJÓN', 'value': 'Pyle Stringed Jam Cajón'}, - {'external_id': 'CELESTA', 'parent': 'BELLS', 'value': 'Celesta'}, - {'external_id': 'HI-HAT', 'parent': 'BELLS', 'value': 'Hi-hat'}, - {'external_id': 'CAJÓN', 'parent': 'DRUMS', 'value': 'Cajón'}, - {'external_id': 'TABLA', 'parent': 'DRUMS', 'value': 'Tabla'}, - {'external_id': 'BANJO', 'parent': 'PLUCK', 'value': 'Banjo'}, - {'external_id': 'HARP', 'parent': 'PLUCK', 'value': 'Harp'}, - {'external_id': 'MANDOLIN', 'parent': 'PLUCK', 'value': 'Mandolin'}, - {'external_id': 'CLARINET', 'parent': 'WOODS', 'value': 'Clarinet'}, - {'external_id': 'FLUTE', 'parent': 'WOODS', 'value': 'Flute'}, - {'external_id': 'OBOE', 'parent': 'WOODS', 'value': 'Oboe'}, + # Pyle Stringed Jam Cajón (PYLE) (Cajón) (children: 0) + ' Tabla (TABLA) (Membranophone) (children: 0)', + 'String instruments (STRINGS) (None) (children: 2)', + ' Bowed strings (BOW) (String instruments) (children: 2)', + ' Cello (CELLO) (Bowed strings) (children: 0)', + ' Violin (VIOLIN) (Bowed strings) (children: 0)', + ' Plucked strings (PLUCK) (String instruments) (children: 3)', + ' Banjo (BANJO) (Plucked strings) (children: 0)', + ' Harp (HARP) (Plucked strings) (children: 0)', + ' Mandolin (MANDOLIN) (Plucked strings) (children: 0)', + 'Wind instruments (WINDS) (None) (children: 2)', + ' Brass (BRASS) (Wind instruments) (children: 2)', + ' Trumpet (TRUMPET) (Brass) (children: 0)', + ' Tuba (TUBA) (Brass) (children: 0)', + ' Woodwinds (WOODS) (Wind instruments) (children: 3)', + ' Clarinet (CLARINET) (Woodwinds) (children: 0)', + ' Flute (FLUTE) (Woodwinds) (children: 0)', + ' Oboe (OBOE) (Woodwinds) (children: 0)', ] diff --git a/tests/openedx_tagging/core/tagging/test_models.py b/tests/openedx_tagging/core/tagging/test_models.py index af2c812b..c3ff48ae 100644 --- a/tests/openedx_tagging/core/tagging/test_models.py +++ b/tests/openedx_tagging/core/tagging/test_models.py @@ -13,6 +13,7 @@ from openedx_tagging.core.tagging import api from openedx_tagging.core.tagging.models import LanguageTaxonomy, ObjectTag, Tag, Taxonomy +from .utils import pretty_format_tags def get_tag(value): @@ -316,22 +317,11 @@ def test_depth_1_queries(self) -> None: ################## - @staticmethod - def _pretty_format_result(result) -> list[str]: - """ - Format a result to be more human readable. - """ - return [ - f"{t['depth'] * ' '}{t['value']} ({t['parent_value']}) " + - f"(used: {t['usage_count']}, children: {t['child_count']})" - for t in result - ] - def test_get_all(self) -> None: """ Test getting all of the tags in the taxonomy, using get_filtered_tags() """ - result = self._pretty_format_result(self.taxonomy.get_filtered_tags()) + result = pretty_format_tags(self.taxonomy.get_filtered_tags()) assert result == [ "Archaea (None) (used: 0, children: 3)", " DPANN (Archaea) (used: 0, children: 0)", @@ -360,7 +350,7 @@ def test_search(self) -> None: Search the whole taxonomy (up to max depth) for a given term. Should return all tags that match the term as well as their ancestors. """ - result = self._pretty_format_result(self.taxonomy.get_filtered_tags(search_term="ARCH")) + result = pretty_format_tags(self.taxonomy.get_filtered_tags(search_term="ARCH")) assert result == [ "Archaea (None) (used: 0, children: 3)", # Matches the value of this root tag, ARCHaea " Euryarchaeida (Archaea) (used: 0, children: 0)", # Matches the value of this child tag @@ -374,7 +364,7 @@ def test_search_2(self) -> None: Another search test, that matches a tag deeper in the taxonomy to check that all its ancestors are returned by the search. """ - result = self._pretty_format_result(self.taxonomy.get_filtered_tags(search_term="chordata")) + result = pretty_format_tags(self.taxonomy.get_filtered_tags(search_term="chordata")) assert result == [ "Eukaryota (None) (used: 0, children: 5)", " Animalia (Eukaryota) (used: 0, children: 7)", @@ -432,14 +422,14 @@ def test_usage_count(self) -> None: api.tag_object(object_id="obj03", taxonomy=self.taxonomy, tags=["Bacteria"]) api.tag_object(object_id="obj04", taxonomy=self.taxonomy, tags=["Eubacteria"]) # Now the API should reflect these usage counts: - result = self._pretty_format_result(self.taxonomy.get_filtered_tags(search_term="bacteria")) + result = pretty_format_tags(self.taxonomy.get_filtered_tags(search_term="bacteria")) assert result == [ "Bacteria (None) (used: 3, children: 2)", " Archaebacteria (Bacteria) (used: 0, children: 0)", " Eubacteria (Bacteria) (used: 1, children: 0)", ] # Same with depth=1, which uses a different query internally: - result1 = self._pretty_format_result(self.taxonomy.get_filtered_tags(search_term="bacteria", depth=1)) + result1 = pretty_format_tags(self.taxonomy.get_filtered_tags(search_term="bacteria", depth=1)) assert result1 == [ "Bacteria (None) (used: 3, children: 2)", ] diff --git a/tests/openedx_tagging/core/tagging/utils.py b/tests/openedx_tagging/core/tagging/utils.py new file mode 100644 index 00000000..fb24187d --- /dev/null +++ b/tests/openedx_tagging/core/tagging/utils.py @@ -0,0 +1,24 @@ +""" +Useful utilities for testing tagging and taxonomy code. +""" + + +def pretty_format_tags(result, parent=True, external_id=False, usage_count=True) -> list[str]: + """ + Format the result of get_filtered_tags() to be more human readable. + + Also works with other wrappers around get_filtered_tags, like api.get_tags() + """ + pretty_results = [] + for t in result: + line = f"{t['depth'] * ' '}{t['value']} " + if external_id: + line += f"({t['external_id']}) " + if parent: + line += f"({t['parent_value']}) " + line += "(" + if usage_count: + line += f"used: {t.get('usage_count', '?')}, " + line += f"children: {t['child_count']})" + pretty_results.append(line) + return pretty_results From c46edaad6ec554f959a8fc7e744a4a3fee99ad5a Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Wed, 18 Oct 2023 12:19:16 -0700 Subject: [PATCH 03/15] chore: get API tests passing --- openedx_tagging/core/tagging/api.py | 11 +- .../core/tagging/import_export/parsers.py | 2 +- openedx_tagging/core/tagging/models/base.py | 19 +- .../openedx_tagging/core/tagging/test_api.py | 262 ++++++++++++++---- 4 files changed, 228 insertions(+), 66 deletions(-) diff --git a/openedx_tagging/core/tagging/api.py b/openedx_tagging/core/tagging/api.py index 58a04356..367fa91f 100644 --- a/openedx_tagging/core/tagging/api.py +++ b/openedx_tagging/core/tagging/api.py @@ -90,7 +90,7 @@ def get_root_tags(taxonomy: Taxonomy) -> QuerySet[TagData]: return taxonomy.cast().get_filtered_tags(depth=1) -def search_tags(taxonomy: Taxonomy, search_term: str, exclude_object_id: int | None = None) -> QuerySet[TagData]: +def search_tags(taxonomy: Taxonomy, search_term: str, exclude_object_id: str | None = None) -> QuerySet[TagData]: """ Returns a list of all tags that contains `search_term` of the given taxonomy, as well as their ancestors (so they can be displayed in a tree). @@ -99,15 +99,16 @@ def search_tags(taxonomy: Taxonomy, search_term: str, exclude_object_id: int | N excluded from the results, e.g. to power an autocomplete search when adding additional tags to an object. """ - qs = taxonomy.cast().get_filtered_tags(search_term=search_term) + excluded_values = None if exclude_object_id: - # Fetch tags that the object already has to exclude them from the result + # Fetch tags that the object already has to exclude them from the result. + # Note: this adds a fair bit of complexity. In the future, maybe we can just do this filtering on the frontend? excluded_values = list( taxonomy.objecttag_set.filter(object_id=exclude_object_id).values_list( "_value", flat=True ) ) - qs = qs.exclude(value__in=excluded_values) + qs = taxonomy.cast().get_filtered_tags(search_term=search_term, excluded_values=excluded_values) return qs @@ -120,7 +121,7 @@ def get_children_tags( Note that if the taxonomy allows free-text tags, then the returned list will be empty. """ - return taxonomy.cast().get_filtered_tags(parent_tag_value=parent_tag_value) + return taxonomy.cast().get_filtered_tags(parent_tag_value=parent_tag_value, depth=1) def resync_object_tags(object_tags: QuerySet | None = None) -> int: diff --git a/openedx_tagging/core/tagging/import_export/parsers.py b/openedx_tagging/core/tagging/import_export/parsers.py index e5db49aa..72d43822 100644 --- a/openedx_tagging/core/tagging/import_export/parsers.py +++ b/openedx_tagging/core/tagging/import_export/parsers.py @@ -168,7 +168,7 @@ def _load_tags_for_export(cls, taxonomy: Taxonomy) -> list[dict]: The tags are ordered by hierarchy, first, parents and then children. `get_tags` is in charge of returning this in a hierarchical way. """ - tags = Taxonomy.get_filtered_tags().all() + tags = taxonomy.get_filtered_tags().all() result = [] for tag in tags: result_tag = { diff --git a/openedx_tagging/core/tagging/models/base.py b/openedx_tagging/core/tagging/models/base.py index b5656752..b7f113d2 100644 --- a/openedx_tagging/core/tagging/models/base.py +++ b/openedx_tagging/core/tagging/models/base.py @@ -300,6 +300,7 @@ def get_filtered_tags( parent_tag_value: str | None = None, search_term: str | None = None, include_counts: bool = True, + excluded_values: list[str] | None = None, ) -> models.QuerySet[TagData]: """ Returns a filtered QuerySet of tag values. @@ -316,24 +317,35 @@ def get_filtered_tags( Use `search_term` to filter the results by values that contains `search_term`. + Use `excluded_values` to exclude tags with that value (and their parents, if applicable) from the results. + Note: This is mostly an 'internal' API and generally code outside of openedx_tagging should use the APIs in openedx_tagging.api which in turn use this. """ if self.allow_free_text: if parent_tag_value is not None: raise ValueError("Cannot specify a parent tag ID for free text taxonomies") - return self._get_filtered_tags_free_text(search_term=search_term, include_counts=include_counts) + result = self._get_filtered_tags_free_text(search_term=search_term, include_counts=include_counts) + if excluded_values: + return result.exclude(value__in=excluded_values) + else: + return result elif depth == 1: - return self._get_filtered_tags_one_level( + result = self._get_filtered_tags_one_level( parent_tag_value=parent_tag_value, search_term=search_term, include_counts=include_counts, ) + if excluded_values: + return result.exclude(value__in=excluded_values) + else: + return result elif depth is None or depth == TAXONOMY_MAX_DEPTH: return self._get_filtered_tags_deep( parent_tag_value=parent_tag_value, search_term=search_term, include_counts=include_counts, + excluded_values=excluded_values, ) else: raise ValueError("Unsupported depth value for get_filtered_tags()") @@ -409,6 +421,7 @@ def _get_filtered_tags_deep( parent_tag_value: str | None, search_term: str | None, include_counts: bool, + excluded_values: list[str] | None, ) -> models.QuerySet[TagData]: """ Implementation of get_filtered_tags() for closed taxonomies, where @@ -433,6 +446,8 @@ def _get_filtered_tags_deep( matching_tags = qs.filter(value__icontains=search_term).values( 'id', 'parent_id', 'parent__parent_id', 'parent__parent__parent_id' ) + if excluded_values: + matching_tags = matching_tags.exclude(value__in=excluded_values) matching_ids = [] for row in matching_tags: for pk in row.values(): diff --git a/tests/openedx_tagging/core/tagging/test_api.py b/tests/openedx_tagging/core/tagging/test_api.py index e1dbd03d..b488c81c 100644 --- a/tests/openedx_tagging/core/tagging/test_api.py +++ b/tests/openedx_tagging/core/tagging/test_api.py @@ -14,6 +14,7 @@ from openedx_tagging.core.tagging.models import ObjectTag, Taxonomy from .test_models import TestTagTaxonomyMixin, get_tag +from .utils import pretty_format_tags test_languages = [ ("az", "Azerbaijani"), @@ -31,6 +32,17 @@ ("pl", "Polish"), ] +tag_values_for_autocomplete_test = [ + 'Archaea', + 'Archaebacteria', + 'Animalia', + 'Arthropoda', + 'Plantae', + 'Monera', + 'Gastrotrich', + 'Placozoa', +] + @ddt.ddt class TestApiTagging(TestTagTaxonomyMixin, TestCase): @@ -103,20 +115,56 @@ def test_get_taxonomies(self) -> None: self.user_taxonomy, ] + self.dummy_taxonomies - @override_settings(LANGUAGES=test_languages) def test_get_tags(self) -> None: - self.setup_tag_depths() - assert tagging_api.get_tags(self.taxonomy) == [ - *self.domain_tags, - *self.kingdom_tags, - *self.phylum_tags, + assert pretty_format_tags(tagging_api.get_tags(self.taxonomy), parent=False, usage_count=False) == [ + "Archaea (children: 3)", + " DPANN (children: 0)", + " Euryarchaeida (children: 0)", + " Proteoarchaeota (children: 0)", + "Bacteria (children: 2)", + " Archaebacteria (children: 0)", + " Eubacteria (children: 0)", + "Eukaryota (children: 5)", + " Animalia (children: 7)", + " Arthropoda (children: 0)", + " Chordata (children: 1)", # The child of this is excluded due to depth limit + " Cnidaria (children: 0)", + " Ctenophora (children: 0)", + " Gastrotrich (children: 0)", + " Placozoa (children: 0)", + " Porifera (children: 0)", + " Fungi (children: 0)", + " Monera (children: 0)", + " Plantae (children: 0)", + " Protista (children: 0)", ] - assert tagging_api.get_tags(self.system_taxonomy) == self.system_tags @override_settings(LANGUAGES=test_languages) + def test_get_tags_system(self) -> None: + assert pretty_format_tags(tagging_api.get_tags(self.system_taxonomy), parent=False, usage_count=False) == [ + "System Tag 1 (children: 0)", + "System Tag 2 (children: 0)", + "System Tag 3 (children: 0)", + "System Tag 4 (children: 0)", + ] + def test_get_root_tags(self): - assert tagging_api.get_root_tags(self.taxonomy) == self.domain_tags - assert tagging_api.get_root_tags(self.system_taxonomy) == self.system_tags + root_life_on_earth_tags = tagging_api.get_root_tags(self.taxonomy) + assert pretty_format_tags(root_life_on_earth_tags, parent=False, usage_count=False) == [ + 'Archaea (children: 3)', + 'Bacteria (children: 2)', + 'Eukaryota (children: 5)', + ] + + @override_settings(LANGUAGES=test_languages) + def test_get_root_tags_system(self): + result = tagging_api.get_root_tags(self.system_taxonomy) + assert pretty_format_tags(result, parent=False, usage_count=False) == [ + 'System Tag 1 (children: 0)', + 'System Tag 2 (children: 0)', + 'System Tag 3 (children: 0)', + 'System Tag 4 (children: 0)', + ] @override_settings(LANGUAGES=test_languages) def test_get_root_language_tags(self): @@ -125,7 +173,7 @@ def test_get_root_language_tags(self): tags that have been used at least once. """ before_langs = [ - tag.external_id for tag in + tag["external_id"] for tag in tagging_api.get_root_tags(self.language_taxonomy) ] assert before_langs == ["en"] @@ -134,17 +182,21 @@ def test_get_root_language_tags(self): tagging_api.tag_object(object_id="foo", taxonomy=self.language_taxonomy, tags=[lang_value]) # now a search will return matching tags: after_langs = [ - tag.external_id for tag in + tag["external_id"] for tag in tagging_api.get_root_tags(self.language_taxonomy) ] expected_langs = [lang_code for lang_code, _ in test_languages] assert after_langs == expected_langs - def test_search_tags(self): - assert tagging_api.search_tags( - self.taxonomy, - search_term='eU' - ) == self.filtered_tags + def test_search_tags(self) -> None: + result = tagging_api.search_tags(self.taxonomy, search_term='eU') + assert pretty_format_tags(result, parent=False, usage_count=False) == [ + 'Archaea (children: 3)', # Doesn't match 'eU' but is included because a child is included + ' Euryarchaeida (children: 0)', + 'Bacteria (children: 2)', # Doesn't match 'eU' but is included because a child is included + ' Eubacteria (children: 0)', + 'Eukaryota (children: 5)', + ] @override_settings(LANGUAGES=test_languages) def test_search_language_tags(self): @@ -153,7 +205,7 @@ def test_search_language_tags(self): tags that have been used at least once. """ before_langs = [ - tag.external_id for tag in + tag["external_id"] for tag in tagging_api.search_tags(self.language_taxonomy, search_term='IsH') ] assert before_langs == ["en"] @@ -162,30 +214,43 @@ def test_search_language_tags(self): tagging_api.tag_object(object_id="foo", taxonomy=self.language_taxonomy, tags=[lang_value]) # now a search will return matching tags: after_langs = [ - tag.external_id for tag in + tag["external_id"] for tag in tagging_api.search_tags(self.language_taxonomy, search_term='IsH') ] expected_langs = [lang_code for lang_code, _ in filtered_test_languages] assert after_langs == expected_langs - def test_get_children_tags(self): - assert tagging_api.get_children_tags( - self.taxonomy, - self.animalia.id, - ) == self.phylum_tags - assert tagging_api.get_children_tags( - self.taxonomy, - self.animalia.id, - search_term='dA', - ) == self.filtered_phylum_tags - assert not tagging_api.get_children_tags( - self.system_taxonomy, - self.system_taxonomy_tag.id, - ) - assert not tagging_api.get_children_tags( - self.language_taxonomy, - self.english_tag, - ) + def test_get_children_tags(self) -> None: + """ + Test getting the children of a particular tag in a closed taxonomy. + """ + result1 = tagging_api.get_children_tags(self.taxonomy, "Animalia") + assert pretty_format_tags(result1, parent=False, usage_count=False) == [ + ' Arthropoda (children: 0)', + ' Chordata (children: 1)', + # Mammalia is a child of Chordata but excluded here. + ' Cnidaria (children: 0)', + ' Ctenophora (children: 0)', + ' Gastrotrich (children: 0)', + ' Placozoa (children: 0)', + ' Porifera (children: 0)', + ] + + def test_get_children_tags_invalid_taxonomy(self) -> None: + """ + Calling get_children_tags on free text taxonomies gives an error. + """ + free_text_taxonomy = Taxonomy.objects.create(allow_free_text=True, name="FreeText") + tagging_api.tag_object(object_id="obj1", taxonomy=free_text_taxonomy, tags=["some_tag"]) + with self.assertRaises(ValueError) as exc: + tagging_api.get_children_tags(free_text_taxonomy, "some_tag") + assert "Cannot specify a parent tag ID for free text taxonomies" in str(exc.exception) + + def test_get_children_tags_no_children(self) -> None: + """ + Trying to get children of a system tag that has no children yields an empty result: + """ + assert list(tagging_api.get_children_tags(self.system_taxonomy, self.system_taxonomy_tag.value)) == [] def test_resync_object_tags(self) -> None: self.taxonomy.allow_multiple = True @@ -580,20 +645,105 @@ def test_autocomplete_tags(self, search: str, expected_values: list[str], expect ).save() # Test for open taxonomy - self._validate_autocomplete_tags( - open_taxonomy, - search, - expected_values, - [None] * len(expected_ids), - ) + # self._validate_autocomplete_tags( + # open_taxonomy, + # search, + # expected_values, + # [None] * len(expected_ids), + # ) + + # # Test for closed taxonomy + # self._validate_autocomplete_tags( + # closed_taxonomy, + # search, + # expected_values, + # expected_ids, + # ) - # Test for closed taxonomy - self._validate_autocomplete_tags( - closed_taxonomy, - search, - expected_values, - expected_ids, - ) + @ddt.data( + ("ChA", [ + "Archaea (used: 1, children: 3)", + " Euryarchaeida (used: 0, children: 0)", + " Proteoarchaeota (used: 0, children: 0)", + "Bacteria (used: 0, children: 2)", # does not contain "cha" but a child does + " Archaebacteria (used: 1, children: 0)", + ]), + ("ar", [ + "Archaea (used: 1, children: 3)", + " Euryarchaeida (used: 0, children: 0)", + " Proteoarchaeota (used: 0, children: 0)", + "Bacteria (used: 0, children: 2)", # does not contain "ar" but a child does + " Archaebacteria (used: 1, children: 0)", + "Eukaryota (used: 0, children: 5)", + " Animalia (used: 1, children: 7)", # does not contain "ar" but a child does + " Arthropoda (used: 1, children: 0)", + " Cnidaria (used: 0, children: 0)", + ]), + ("aE", [ + "Archaea (used: 1, children: 3)", + " Euryarchaeida (used: 0, children: 0)", + " Proteoarchaeota (used: 0, children: 0)", + "Bacteria (used: 0, children: 2)", # does not contain "ae" but a child does + " Archaebacteria (used: 1, children: 0)", + "Eukaryota (used: 0, children: 5)", # does not contain "ae" but a child does + " Plantae (used: 1, children: 0)", + ]), + ("a", [ + "Archaea (used: 1, children: 3)", + " DPANN (used: 0, children: 0)", + " Euryarchaeida (used: 0, children: 0)", + " Proteoarchaeota (used: 0, children: 0)", + "Bacteria (used: 0, children: 2)", + " Archaebacteria (used: 1, children: 0)", + " Eubacteria (used: 0, children: 0)", + "Eukaryota (used: 0, children: 5)", + " Animalia (used: 1, children: 7)", + " Arthropoda (used: 1, children: 0)", + " Chordata (used: 0, children: 1)", + " Cnidaria (used: 0, children: 0)", + " Ctenophora (used: 0, children: 0)", + " Gastrotrich (used: 1, children: 0)", + " Placozoa (used: 1, children: 0)", + " Porifera (used: 0, children: 0)", + " Monera (used: 1, children: 0)", + " Plantae (used: 1, children: 0)", + " Protista (used: 0, children: 0)", + ]), + ) + @ddt.unpack + def test_autocomplete_tags_closed(self, search: str, expected: list[str]) -> None: + """ + Test autocompletion/search for tags using a closed taxonomy. + """ + closed_taxonomy = self.taxonomy + for index, value in enumerate(tag_values_for_autocomplete_test): + # Creating ObjectTags for closed taxonomy + tag = get_tag(value) + ObjectTag( + object_id=f"object_id_{index}", + taxonomy=closed_taxonomy, + tag=tag, + _value=value, + ).save() + + result = tagging_api.search_tags(closed_taxonomy, search) + assert pretty_format_tags(result, parent=False) == expected + + def test_autocomplete_tags_closed_omit_object(self) -> None: + """ + Test autocomplete search that omits the tags from a specific object + """ + object_id = 'new_object_id' + tagging_api.tag_object(object_id=object_id, taxonomy=self.taxonomy, tags=["Archaebacteria"]) + result = tagging_api.search_tags(self.taxonomy, "ChA", exclude_object_id=object_id) + assert pretty_format_tags(result, parent=False) == [ + "Archaea (used: 0, children: 3)", + " Euryarchaeida (used: 0, children: 0)", + " Proteoarchaeota (used: 0, children: 0)", + # These results are no longer included because of exclude_object_id: + #"Bacteria (used: 0, children: 2)", # does not contain "cha" but a child does + #" Archaebacteria (used: 1, children: 0)", + ] def _get_tag_values(self, tags: QuerySet[tagging_api.TagData]) -> list[str]: """ @@ -611,8 +761,7 @@ def _validate_autocomplete_tags( self, taxonomy: Taxonomy, search: str, - expected_values: list[str], - expected_ids: list[int | None], + expected: list[str], ) -> None: """ Validate autocomplete tags @@ -620,24 +769,21 @@ def _validate_autocomplete_tags( # Normal search result = tagging_api.search_tags(taxonomy, search) - tag_values = self._get_tag_values(result) - for value in tag_values: - assert search.lower() in value.lower() - assert tag_values == expected_values - assert self._get_tag_ids(result) == expected_ids + assert pretty_format_tags(result, parent=False) == expected # Create ObjectTag to simulate the content tagging + first_value = next(t for t in result if search.lower() in t["value"].lower()) tag_model = None if not taxonomy.allow_free_text: - tag_model = get_tag(tag_values[0]) + tag_model = get_tag(first_value) object_id = 'new_object_id' ObjectTag( object_id=object_id, taxonomy=taxonomy, tag=tag_model, - _value=tag_values[0], + _value=first_value, ).save() # Search with object From 3eafff772a6b5a365205610593c9a782a9b6d966 Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Wed, 18 Oct 2023 12:32:33 -0700 Subject: [PATCH 04/15] chore: get import/export tests passing --- openedx_tagging/core/tagging/data.py | 2 ++ .../core/tagging/import_export/parsers.py | 10 ++++++---- openedx_tagging/core/tagging/models/base.py | 9 ++++++--- tests/openedx_tagging/core/tagging/test_models.py | 15 ++++++++++++--- 4 files changed, 26 insertions(+), 10 deletions(-) diff --git a/openedx_tagging/core/tagging/data.py b/openedx_tagging/core/tagging/data.py index b1a0ec3e..a6644205 100644 --- a/openedx_tagging/core/tagging/data.py +++ b/openedx_tagging/core/tagging/data.py @@ -22,3 +22,5 @@ class TagData(TypedDict): parent_value: str | None # Note: usage_count may not actually be present but there's no way to indicate that w/ python types at the moment usage_count: int + # Internal database ID, if any. Generally should not be used; prefer 'value' which is unique within each taxonomy. + _id: int | None diff --git a/openedx_tagging/core/tagging/import_export/parsers.py b/openedx_tagging/core/tagging/import_export/parsers.py index 72d43822..823a8a24 100644 --- a/openedx_tagging/core/tagging/import_export/parsers.py +++ b/openedx_tagging/core/tagging/import_export/parsers.py @@ -166,17 +166,19 @@ def _load_tags_for_export(cls, taxonomy: Taxonomy) -> list[dict]: with required and optional fields The tags are ordered by hierarchy, first, parents and then children. - `get_tags` is in charge of returning this in a hierarchical way. + `get_filtered_tags` is in charge of returning this in a hierarchical + way. """ tags = taxonomy.get_filtered_tags().all() result = [] for tag in tags: result_tag = { - "id": tag["external_id"] or tag["id"], + "id": tag["external_id"] or tag["_id"], "value": tag["value"], } - if tag.parent: - result_tag["parent_id"] = tag.parent.external_id or tag.parent.id + if tag["parent_value"]: + parent_tag = next(t for t in tags if t["value"] == tag["parent_value"]) + result_tag["parent_id"] = parent_tag["external_id"] or parent_tag["_id"] result.append(result_tag) return result diff --git a/openedx_tagging/core/tagging/models/base.py b/openedx_tagging/core/tagging/models/base.py index b7f113d2..7900e440 100644 --- a/openedx_tagging/core/tagging/models/base.py +++ b/openedx_tagging/core/tagging/models/base.py @@ -370,8 +370,9 @@ def _get_filtered_tags_free_text( child_count=Value(0), external_id=Value(None, output_field=models.CharField()), parent_value=Value(None, output_field=models.CharField()), + _id=Value(None, output_field=models.CharField()), ) - qs = qs.values("value", "child_count", "depth", "parent_value", "external_id").order_by("value") + qs = qs.values("value", "child_count", "depth", "parent_value", "external_id", "_id").order_by("value") if include_counts: return qs.annotate(usage_count=models.Count("value")) else: @@ -402,7 +403,8 @@ def _get_filtered_tags_one_level( # Filter by search term: if search_term: qs = qs.filter(value__icontains=search_term) - qs = qs.values("value", "child_count", "depth", "parent_value", "external_id").order_by("value") + qs = qs.annotate(_id=F("id")) # ID has an underscore to encourage use of 'value' rather than this internal ID + qs = qs.values("value", "child_count", "depth", "parent_value", "external_id", "_id").order_by("value") if include_counts: # We need to include the count of how many times this tag is used to tag objects. # You'd think we could just use: @@ -468,7 +470,8 @@ def _get_filtered_tags_deep( )) # Add the parent value qs = qs.annotate(parent_value=F("parent__value")) - qs = qs.values("value", "child_count", "depth", "parent_value", "external_id").order_by("sort_key") + qs = qs.annotate(_id=F("id")) # ID has an underscore to encourage use of 'value' rather than this internal ID + qs = qs.values("value", "child_count", "depth", "parent_value", "external_id", "_id").order_by("sort_key") if include_counts: # Including the counts is a bit tricky; see the comment above in _get_filtered_tags_one_level() obj_tags = ObjectTag.objects.filter(tag_id=models.OuterRef("pk")).order_by().annotate( diff --git a/tests/openedx_tagging/core/tagging/test_models.py b/tests/openedx_tagging/core/tagging/test_models.py index c3ff48ae..a47ec5f1 100644 --- a/tests/openedx_tagging/core/tagging/test_models.py +++ b/tests/openedx_tagging/core/tagging/test_models.py @@ -223,6 +223,8 @@ def test_get_root(self) -> None: """ result = list(self.taxonomy.get_filtered_tags(depth=1, include_counts=False)) common_fields = {"depth": 0, "parent_value": None, "external_id": None} + for r in result: + del r["_id"] # Remove the internal database IDs; they aren't interesting here and a other tests check them assert result == [ # These are the root tags, in alphabetical order: {"value": "Archaea", "child_count": 3, **common_fields}, @@ -237,6 +239,8 @@ def test_get_child_tags_one_level(self) -> None: """ result = list(self.taxonomy.get_filtered_tags(depth=1, parent_tag_value="Eukaryota")) common_fields = {"depth": 1, "parent_value": "Eukaryota", "usage_count": 0, "external_id": None} + for r in result: + del r["_id"] # Remove the internal database IDs; they aren't interesting here and a other tests check them assert result == [ # These are the Eukaryota tags, in alphabetical order: {"value": "Animalia", "child_count": 7, **common_fields}, @@ -253,6 +257,8 @@ def test_get_grandchild_tags_one_level(self) -> None: """ result = list(self.taxonomy.get_filtered_tags(depth=1, parent_tag_value="Animalia")) common_fields = {"depth": 2, "parent_value": "Animalia", "usage_count": 0, "external_id": None} + for r in result: + del r["_id"] # Remove the internal database IDs; they aren't interesting here and a other tests check them assert result == [ # These are the Eukaryota tags, in alphabetical order: {"value": "Arthropoda", "child_count": 0, **common_fields}, @@ -277,6 +283,7 @@ def test_get_depth_1_search_term(self) -> None: "usage_count": 0, "parent_value": None, "external_id": None, + "_id": 2, # These IDs are hard-coded in the test fixture file }, ] # Note that other tags in the taxonomy match "ARCH" but are excluded because of the depth=1 search @@ -294,6 +301,7 @@ def test_get_depth_1_child_search_term(self) -> None: "usage_count": 0, "parent_value": "Bacteria", "external_id": None, + "_id": 5, # These IDs are hard-coded in the test fixture file }, ] # Note that other tags in the taxonomy match "ARCH" but are excluded because of the depth=1 search @@ -384,6 +392,7 @@ def test_tags_deep(self) -> None: "usage_count": 0, "child_count": 0, "external_id": None, + "_id": 21, # These IDs are hard-coded in the test fixture file } ] @@ -457,7 +466,7 @@ def test_get_filtered_tags(self): Without counts included. """ result = list(self.taxonomy.get_filtered_tags(include_counts=False)) - common_fields = {"child_count": 0, "depth": 0, "parent_value": None, "external_id": None} + common_fields = {"child_count": 0, "depth": 0, "parent_value": None, "external_id": None, "_id": None} assert result == [ # These should appear in alphabetical order: {"value": "double", **common_fields}, @@ -471,7 +480,7 @@ def test_get_filtered_tags_with_count(self): Without counts included. """ result = list(self.taxonomy.get_filtered_tags(include_counts=True)) - common_fields = {"child_count": 0, "depth": 0, "parent_value": None, "external_id": None} + common_fields = {"child_count": 0, "depth": 0, "parent_value": None, "external_id": None, "_id": None} assert result == [ # These should appear in alphabetical order: {"value": "double", "usage_count": 2, **common_fields}, @@ -494,7 +503,7 @@ def test_get_filtered_tags_with_search(self) -> None: Test basic retrieval of only matching tags. """ result1 = list(self.taxonomy.get_filtered_tags(search_term="le")) - common_fields = {"child_count": 0, "depth": 0, "parent_value": None, "external_id": None} + common_fields = {"child_count": 0, "depth": 0, "parent_value": None, "external_id": None, "_id": None} assert result1 == [ # These should appear in alphabetical order: {"value": "double", "usage_count": 2, **common_fields}, From 4f2e3f74b58aa7ec577cd6e1eca5ac08929e85f0 Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Wed, 18 Oct 2023 14:59:53 -0700 Subject: [PATCH 05/15] feat: refactor views to use new get_filtered_tags() --- openedx_tagging/core/tagging/api.py | 10 +- openedx_tagging/core/tagging/data.py | 15 +- .../core/tagging/import_export/parsers.py | 1 - openedx_tagging/core/tagging/models/base.py | 26 +- openedx_tagging/core/tagging/models/utils.py | 24 ++ .../core/tagging/rest_api/v1/serializers.py | 108 ++----- .../core/tagging/rest_api/v1/views.py | 205 ++++-------- .../tagging/import_export/test_template.py | 2 +- .../openedx_tagging/core/tagging/test_api.py | 123 +------- .../core/tagging/test_models.py | 31 +- .../core/tagging/test_views.py | 293 ++++++++++++------ tests/openedx_tagging/core/tagging/utils.py | 1 + 12 files changed, 373 insertions(+), 466 deletions(-) create mode 100644 openedx_tagging/core/tagging/models/utils.py diff --git a/openedx_tagging/core/tagging/api.py b/openedx_tagging/core/tagging/api.py index 367fa91f..3b2b2ba9 100644 --- a/openedx_tagging/core/tagging/api.py +++ b/openedx_tagging/core/tagging/api.py @@ -16,7 +16,7 @@ from django.db.models import QuerySet from django.utils.translation import gettext as _ -from .data import TagData +from .data import TagDataQuerySet from .models import ObjectTag, Tag, Taxonomy # Export this as part of the API @@ -71,7 +71,7 @@ def get_taxonomies(enabled=True) -> QuerySet[Taxonomy]: return queryset.filter(enabled=enabled) -def get_tags(taxonomy: Taxonomy) -> QuerySet[TagData]: +def get_tags(taxonomy: Taxonomy) -> TagDataQuerySet: """ Returns a QuerySet of all the tags in the given taxonomy. @@ -81,7 +81,7 @@ def get_tags(taxonomy: Taxonomy) -> QuerySet[TagData]: return taxonomy.cast().get_filtered_tags() -def get_root_tags(taxonomy: Taxonomy) -> QuerySet[TagData]: +def get_root_tags(taxonomy: Taxonomy) -> TagDataQuerySet: """ Returns a list of the root tags for the given taxonomy. @@ -90,7 +90,7 @@ def get_root_tags(taxonomy: Taxonomy) -> QuerySet[TagData]: return taxonomy.cast().get_filtered_tags(depth=1) -def search_tags(taxonomy: Taxonomy, search_term: str, exclude_object_id: str | None = None) -> QuerySet[TagData]: +def search_tags(taxonomy: Taxonomy, search_term: str, exclude_object_id: str | None = None) -> TagDataQuerySet: """ Returns a list of all tags that contains `search_term` of the given taxonomy, as well as their ancestors (so they can be displayed in a tree). @@ -115,7 +115,7 @@ def search_tags(taxonomy: Taxonomy, search_term: str, exclude_object_id: str | N def get_children_tags( taxonomy: Taxonomy, parent_tag_value: str, -) -> QuerySet[TagData]: +) -> TagDataQuerySet: """ Returns a QuerySet of children tags for the given parent tag. diff --git a/openedx_tagging/core/tagging/data.py b/openedx_tagging/core/tagging/data.py index a6644205..02bbed41 100644 --- a/openedx_tagging/core/tagging/data.py +++ b/openedx_tagging/core/tagging/data.py @@ -3,7 +3,10 @@ """ from __future__ import annotations -from typing import TypedDict +from typing import TYPE_CHECKING, Any, TypedDict + +from django.db.models import QuerySet +from typing_extensions import TypeAlias class TagData(TypedDict): @@ -24,3 +27,13 @@ class TagData(TypedDict): usage_count: int # Internal database ID, if any. Generally should not be used; prefer 'value' which is unique within each taxonomy. _id: int | None + + +if TYPE_CHECKING: + from django_stubs_ext import ValuesQuerySet + TagDataQuerySet: TypeAlias = ValuesQuerySet[Any, TagData] + # The following works better for pyright (provides proper VS Code autocompletions), + # but I can't find any way to specify different types for pyright vs mypy :/ + # TagDataQuerySet: TypeAlias = QuerySet[TagData] +else: + TagDataQuerySet = QuerySet[TagData] diff --git a/openedx_tagging/core/tagging/import_export/parsers.py b/openedx_tagging/core/tagging/import_export/parsers.py index 823a8a24..90268ff6 100644 --- a/openedx_tagging/core/tagging/import_export/parsers.py +++ b/openedx_tagging/core/tagging/import_export/parsers.py @@ -10,7 +10,6 @@ from django.utils.translation import gettext as _ -from ..api import get_tags from ..models import Taxonomy from .exceptions import EmptyCSVField, EmptyJSONField, FieldJSONError, InvalidFormat, TagParserError from .import_plan import TagItem diff --git a/openedx_tagging/core/tagging/models/base.py b/openedx_tagging/core/tagging/models/base.py index 7900e440..5428f686 100644 --- a/openedx_tagging/core/tagging/models/base.py +++ b/openedx_tagging/core/tagging/models/base.py @@ -17,7 +17,8 @@ from openedx_learning.lib.fields import MultiCollationTextField, case_insensitive_char_field, case_sensitive_char_field -from ..data import TagData +from ..data import TagDataQuerySet +from .utils import ConcatNull log = logging.getLogger(__name__) @@ -110,7 +111,7 @@ def get_lineage(self) -> Lineage: tag = tag.parent depth -= 1 return lineage - + @cached_property def num_ancestors(self) -> int: """ @@ -123,7 +124,7 @@ def num_ancestors(self) -> int: num_ancestors += 1 tag = tag.parent return num_ancestors - + @staticmethod def annotate_depth(qs: models.QuerySet) -> models.QuerySet: """ @@ -301,7 +302,7 @@ def get_filtered_tags( search_term: str | None = None, include_counts: bool = True, excluded_values: list[str] | None = None, - ) -> models.QuerySet[TagData]: + ) -> TagDataQuerySet: """ Returns a filtered QuerySet of tag values. For free text or dynamic taxonomies, this will only return tag values @@ -354,7 +355,7 @@ def _get_filtered_tags_free_text( self, search_term: str | None, include_counts: bool, - ) -> models.QuerySet[TagData]: + ) -> TagDataQuerySet: """ Implementation of get_filtered_tags() for free text taxonomies. """ @@ -383,7 +384,7 @@ def _get_filtered_tags_one_level( parent_tag_value: str | None, search_term: str | None, include_counts: bool, - ) -> models.QuerySet[TagData]: + ) -> TagDataQuerySet: """ Implementation of get_filtered_tags() for closed taxonomies, where depth=1. When depth=1, we're only looking at a single "level" of the @@ -424,7 +425,7 @@ def _get_filtered_tags_deep( search_term: str | None, include_counts: bool, excluded_values: list[str] | None, - ) -> models.QuerySet[TagData]: + ) -> TagDataQuerySet: """ Implementation of get_filtered_tags() for closed taxonomies, where we're including tags from multiple levels of the hierarchy. @@ -460,11 +461,14 @@ def _get_filtered_tags_deep( qs = qs.annotate(child_count=models.Count("children")) # Add the "depth" to each tag: qs = Tag.annotate_depth(qs) - # Add the "lineage" field to sort them in order correctly: + # Add the "lineage" as a field called "sort_key" to sort them in order correctly: qs = qs.annotate(sort_key=Concat( - Coalesce(F("parent__parent__parent__value"), Value("")), - Coalesce(F("parent__parent__value"), Value("")), - Coalesce(F("parent__value"), Value("")), + # For a root tag, we want sort_key="RootValue" and for a depth=1 tag + # we want sort_key="RootValue\tValue". The following does that, since + # ConcatNull(...) returns NULL if any argument is NULL. + Coalesce(ConcatNull(F("parent__parent__parent__value"), Value("\t")), Value("")), + Coalesce(ConcatNull(F("parent__parent__value"), Value("\t")), Value("")), + Coalesce(ConcatNull(F("parent__value"), Value("\t")), Value("")), F("value"), output_field=models.CharField(), )) diff --git a/openedx_tagging/core/tagging/models/utils.py b/openedx_tagging/core/tagging/models/utils.py new file mode 100644 index 00000000..1f7dcb92 --- /dev/null +++ b/openedx_tagging/core/tagging/models/utils.py @@ -0,0 +1,24 @@ +""" +Utilities for tagging and taxonomy models +""" + +from django.db.models.expressions import Func + + +class ConcatNull(Func): # pylint: disable=abstract-method + """ + Concatenate two arguments together. Like normal SQL but unlike Django's + "Concat", if either argument is NULL, the result will be NULL. + """ + + function = "CONCAT" + + def as_sqlite(self, compiler, connection, **extra_context): + """ SQLite doesn't have CONCAT() but has a concatenation operator """ + return super().as_sql( + compiler, + connection, + template="%(expressions)s", + arg_joiner=" || ", + **extra_context, + ) diff --git a/openedx_tagging/core/tagging/rest_api/v1/serializers.py b/openedx_tagging/core/tagging/rest_api/v1/serializers.py index a4eb89ff..59bfd9ac 100644 --- a/openedx_tagging/core/tagging/rest_api/v1/serializers.py +++ b/openedx_tagging/core/tagging/rest_api/v1/serializers.py @@ -5,7 +5,8 @@ from rest_framework import serializers from rest_framework.reverse import reverse -from openedx_tagging.core.tagging.models import ObjectTag, Tag, Taxonomy +from openedx_tagging.core.tagging.data import TagData +from openedx_tagging.core.tagging.models import ObjectTag, Taxonomy class TaxonomyListQueryParamsSerializer(serializers.Serializer): # pylint: disable=abstract-method @@ -93,102 +94,41 @@ class ObjectTagUpdateQueryParamsSerializer(serializers.Serializer): # pylint: d ) -class TagsSerializer(serializers.ModelSerializer): +class TagDataSerializer(serializers.Serializer): """ - Serializer for Tags + Serializer for TagData Adds a link to get the sub tags """ + value = serializers.CharField() + external_id = serializers.CharField(allow_null=True) + child_count = serializers.IntegerField() + depth = serializers.IntegerField() + parent_value = serializers.CharField(allow_null=True) + usage_count = serializers.IntegerField(required=False) + # Internal database ID, if any. Generally should not be used; prefer 'value' which is unique within each taxonomy. + # Free text taxonomies never have '_id' for their tags. + _id = serializers.IntegerField(allow_null=True) - sub_tags_link = serializers.SerializerMethodField() - children_count = serializers.SerializerMethodField() + sub_tags_url = serializers.SerializerMethodField() - class Meta: - model = Tag - fields = ( - "id", - "value", - "taxonomy_id", - "parent_id", - "sub_tags_link", - "children_count", - ) - - def get_sub_tags_link(self, obj): + def get_sub_tags_url(self, obj: TagData): """ Returns URL for the list of child tags of the current tag. """ - if obj.children.count(): - query_params = f"?parent_tag_id={obj.id}" + if obj["child_count"] > 0 and "taxonomy_id" in self.context: + query_params = f"?parent_tag={obj['value']}" + request = self.context["request"] + url_namespace = request.resolver_match.namespace # get the namespace, usually "oel_tagging" url = ( - reverse("oel_tagging:taxonomy-tags", args=[str(obj.taxonomy_id)]) + reverse(f"{url_namespace}:taxonomy-tags", args=[str(self.context["taxonomy_id"])]) + query_params ) - request = self.context.get("request") return request.build_absolute_uri(url) return None - def get_children_count(self, obj): - """ - Returns the number of child tags of the given tag. - """ - return obj.children.count() + def update(self, instance, validated_data): + raise RuntimeError('`update()` is not supported by the TagData serializer.') - -class TagsWithSubTagsSerializer(serializers.ModelSerializer): - """ - Serializer for Tags. - - Represents a tree with a list of sub tags - """ - - sub_tags = serializers.SerializerMethodField() - children_count = serializers.SerializerMethodField() - - class Meta: - model = Tag - fields = ( - "id", - "value", - "taxonomy_id", - "sub_tags", - "children_count", - ) - - def get_sub_tags(self, obj): - """ - Returns a serialized list of child tags for the given tag. - """ - serializer = TagsWithSubTagsSerializer( - obj.children.all().order_by("value", "id"), - many=True, - read_only=True, - ) - return serializer.data - - def get_children_count(self, obj): - """ - Returns the number of child tags of the given tag. - """ - return obj.children.count() - - -class TagsForSearchSerializer(TagsWithSubTagsSerializer): - """ - Serializer for Tags - - Used to filter sub tags of a given tag - """ - - def get_sub_tags(self, obj): - """ - Returns a serialized list of child tags for the given tag. - """ - serializer = TagsWithSubTagsSerializer(obj.sub_tags, many=True, read_only=True) - return serializer.data - - def get_children_count(self, obj): - """ - Returns the number of child tags of the given tag. - """ - return len(obj.sub_tags) + def create(self, validated_data): + raise RuntimeError('`create()` is not supported by the TagData serializer.') diff --git a/openedx_tagging/core/tagging/rest_api/v1/views.py b/openedx_tagging/core/tagging/rest_api/v1/views.py index 743f5bcd..6472ee46 100644 --- a/openedx_tagging/core/tagging/rest_api/v1/views.py +++ b/openedx_tagging/core/tagging/rest_api/v1/views.py @@ -14,30 +14,20 @@ from openedx_tagging.core.tagging.models.base import Tag -from ...api import ( - create_taxonomy, - get_children_tags, - get_object_tags, - get_root_tags, - get_taxonomies, - get_taxonomy, - search_tags, - tag_object, -) +from ...api import create_taxonomy, get_object_tags, get_taxonomies, get_taxonomy, tag_object +from ...data import TagDataQuerySet from ...import_export.api import export_tags from ...import_export.parsers import ParserFormat from ...models import Taxonomy from ...rules import ObjectTagPermissionItem -from ..paginators import SEARCH_TAGS_THRESHOLD, TAGS_THRESHOLD, DisabledTagsPagination, TagsPagination +from ..paginators import TAGS_THRESHOLD, DisabledTagsPagination, TagsPagination from .permissions import ObjectTagObjectPermissions, TagListPermissions, TaxonomyObjectPermissions from .serializers import ( ObjectTagListQueryParamsSerializer, ObjectTagSerializer, ObjectTagUpdateBodySerializer, ObjectTagUpdateQueryParamsSerializer, - TagsForSearchSerializer, - TagsSerializer, - TagsWithSubTagsSerializer, + TagDataSerializer, TaxonomyExportQueryParamsSerializer, TaxonomyListQueryParamsSerializer, TaxonomySerializer, @@ -408,15 +398,26 @@ class TaxonomyTagsView(ListAPIView): """ View to list tags of a taxonomy. + If you specify ?root_only or ?parent_tag_value=..., only one "level" of the + hierachy will be returned. Otherwise, several levels will be returned, in + tree order, up to the maximum supported depth. Additional levels/depth can + be retrieved by using ?parent_tag_value to load more data. + + Note: If the taxonomy is particularly large (> 1,000 tags), ?root_only is + automatically set true by default and cannot be disabled. This way, users + can more easily select which tags they want to expand in the tree, and load + just that subset of the tree as needed. This may be changed in the future. + **List Query Parameters** - * pk (required) - The pk of the taxonomy to retrieve tags. - * parent_tag_id (optional) - Id of the tag to retrieve children tags. + * id (required) - The ID of the taxonomy to retrieve tags. + * parent_tag (optional) - Retrieve children of the tag with this value. + * root_only (optional) - If specified, only root tags are returned. * page (optional) - Page number (default: 1) * page_size (optional) - Number of items per page (default: 10) **List Example Requests** - GET api/tagging/v1/taxonomy/:pk/tags - Get tags of taxonomy - GET api/tagging/v1/taxonomy/:pk/tags?parent_tag_id=30 - Get children tags of tag + GET api/tagging/v1/taxonomy/:id/tags - Get tags of taxonomy + GET api/tagging/v1/taxonomy/:id/tags?parent_tag=Physics - Get child tags of tag **List Query Returns** * 200 - Success @@ -426,22 +427,8 @@ class TaxonomyTagsView(ListAPIView): """ permission_classes = [TagListPermissions] - pagination_enabled = True - - def __init__(self): - # Initialized here to avoid errors on type hints - self.serializer_class = TagsSerializer - - def get_pagination_class(self): - """ - Get the corresponding class depending if the pagination is enabled. - - It is necessary to call this function before returning the data. - """ - if self.pagination_enabled: - return TagsPagination - else: - return DisabledTagsPagination + pagination_class = TagsPagination + serializer_class = TagDataSerializer def get_taxonomy(self, pk: int) -> Taxonomy: """ @@ -453,130 +440,44 @@ def get_taxonomy(self, pk: int) -> Taxonomy: self.check_object_permissions(self.request, taxonomy) return taxonomy - def _build_search_tree(self, tags: list[Tag]) -> list[Tag]: - """ - Builds a tree with the result tags for a search. - - The retult is a pruned tree that contains - the path from root tags to tags that match the search. - """ - tag_ids = [tag.id for tag in tags] - - # Get missing parents. - # Not all parents are in the search result. - # This occurs when a child tag is on the search result, but its parent not, - # we need to add the parent to show the tree from the root to the child. - for tag in tags: - if tag.parent and tag.parent_id and tag.parent_id not in tag_ids: - tag_ids.append(tag.parent_id) - tags.append(tag.parent) # Our loop will iterate over this new parent tag too. - - groups: dict[int, list[Tag]] = {} - roots: list[Tag] = [] - - # Group tags by parent - for tag in tags: - if tag.parent_id is not None: - if tag.parent_id not in groups: - groups[tag.parent_id] = [] - groups[tag.parent_id].append(tag) - else: - roots.append(tag) + def get_serializer_context(self): + context = super().get_serializer_context() + context.update({ + "request": self.request, + "taxonomy_id": int(self.kwargs["pk"]), + }) + return context - for tag in tags: - # Used to serialize searched childrens - tag.sub_tags = groups.get(tag.id, []) # type: ignore[attr-defined] - - return roots - - def get_matching_tags( - self, - taxonomy_id: int, - parent_tag_id: str | None = None, - search_term: str | None = None, - ) -> list[Tag]: + def get_queryset(self) -> TagDataQuerySet: """ - Returns a list of tags for the given taxonomy. - - The pagination can be enabled or disabled depending of the taxonomy size. - You can read the desicion '0014_*' to more info about this logic. - Also, determines the serializer to be used. - - Use `parent_tag_id` to get the children of the given tag. - - Use `search_term` to filter tags values that contains the given term. + Builds and returns the queryset to be paginated. """ + taxonomy_id = int(self.kwargs.get("pk")) taxonomy = self.get_taxonomy(taxonomy_id) - if parent_tag_id: - # Get children of a tag. - - # If you need to get the children, then the roots are - # paginated, so we need to paginate the childrens too. - self.pagination_enabled = True - - # Normal serializer, with children link. - self.serializer_class = TagsSerializer - return get_children_tags( - taxonomy, - int(parent_tag_id), - search_term=search_term, - ) + parent_tag_value = self.request.query_params.get("parent_tag", None) + root_only = "root_only" in self.request.query_params + search_term = self.request.query_params.get("search_term", None) + + if parent_tag_value: + # Fetching tags below a certain parent is always paginated and only returns the direct children + depth = 1 + if root_only: + raise ValidationError("?root_only and ?parent_tag cannot be used together") else: - if search_term: - # Search tags - result = search_tags( - taxonomy, - search_term, - ) - # Checks the result size to determine whether - # to turn pagination on or off. - self.pagination_enabled = len(result) > SEARCH_TAGS_THRESHOLD - - # Use the special serializer to only show the tree - # of the search result. - self.serializer_class = TagsForSearchSerializer - - result = self._build_search_tree(result) + if root_only: + depth = 1 # User Explicitly requested to load only the root tags for now + elif search_term: + depth = None # For search, default to maximum depth but use normal pagination + elif taxonomy.tag_set.count() > TAGS_THRESHOLD: + # This is a very large taxonomy. Only load the root tags at first, so users can choose what to load. + depth = 1 else: - # Get root tags of taxonomy - - # Checks the taxonomy size to determine whether - # to turn pagination on or off. - self.pagination_enabled = taxonomy.tag_set.count() > TAGS_THRESHOLD - - if self.pagination_enabled: - # If pagination is enabled, use the normal serializer - # with children link. - self.serializer_class = TagsSerializer - else: - # If pagination is disabled, use the special serializer - # to show children. In this case, we return all taxonomy tags - # in a tree structure. - self.serializer_class = TagsWithSubTagsSerializer - - result = get_root_tags(taxonomy) - - return result + # We can load and display all the tags in the taxonomy at once: + self.pagination_class = DisabledTagsPagination + depth = None # Maximum depth - def get_queryset(self) -> list[Tag]: # type: ignore[override] - """ - Builds and returns the queryset to be paginated. - - The return type is not a QuerySet because the tagging python api functions - return lists, and on this point convert the list to a query set - is an unnecesary operation. - """ - pk = self.kwargs.get("pk") - parent_tag_id = self.request.query_params.get("parent_tag_id", None) - search_term = self.request.query_params.get("search_term", None) - - result = self.get_matching_tags( - pk, - parent_tag_id=parent_tag_id, + return taxonomy.get_filtered_tags( + parent_tag_value=parent_tag_value, search_term=search_term, + depth=depth, ) - - # This function is not called automatically - self.pagination_class = self.get_pagination_class() - - return result diff --git a/tests/openedx_tagging/core/tagging/import_export/test_template.py b/tests/openedx_tagging/core/tagging/import_export/test_template.py index c25c4b28..dc987c1c 100644 --- a/tests/openedx_tagging/core/tagging/import_export/test_template.py +++ b/tests/openedx_tagging/core/tagging/import_export/test_template.py @@ -12,8 +12,8 @@ from openedx_tagging.core.tagging.import_export import ParserFormat from openedx_tagging.core.tagging.import_export import api as import_api -from .mixins import TestImportExportMixin from ..utils import pretty_format_tags +from .mixins import TestImportExportMixin @ddt.ddt diff --git a/tests/openedx_tagging/core/tagging/test_api.py b/tests/openedx_tagging/core/tagging/test_api.py index b488c81c..6cd9a3a9 100644 --- a/tests/openedx_tagging/core/tagging/test_api.py +++ b/tests/openedx_tagging/core/tagging/test_api.py @@ -7,10 +7,10 @@ import ddt # type: ignore[import] import pytest -from django.db.models import QuerySet from django.test import TestCase, override_settings import openedx_tagging.core.tagging.api as tagging_api +from openedx_tagging.core.tagging.data import TagDataQuerySet from openedx_tagging.core.tagging.models import ObjectTag, Taxonomy from .test_models import TestTagTaxonomyMixin, get_tag @@ -250,7 +250,7 @@ def test_get_children_tags_no_children(self) -> None: """ Trying to get children of a system tag that has no children yields an empty result: """ - assert list(tagging_api.get_children_tags(self.system_taxonomy, self.system_taxonomy_tag.value)) == [] + assert not list(tagging_api.get_children_tags(self.system_taxonomy, self.system_taxonomy_tag.value)) def test_resync_object_tags(self) -> None: self.taxonomy.allow_multiple = True @@ -572,9 +572,7 @@ def test_get_object_tags(self) -> None: # Fetch all the tags for a given object ID assert list( - tagging_api.get_object_tags( - object_id="abc", - ) + tagging_api.get_object_tags(object_id="abc") ) == [ alpha, beta, @@ -582,84 +580,11 @@ def test_get_object_tags(self) -> None: # Fetch all the tags for a given object ID + taxonomy assert list( - tagging_api.get_object_tags( - object_id="abc", - taxonomy_id=self.taxonomy.pk, - ) + tagging_api.get_object_tags(object_id="abc", taxonomy_id=self.taxonomy.pk) ) == [ beta, ] - @ddt.data( - ("ChA", ["Archaea", "Archaebacteria"], [2, 5]), - ("ar", ['Archaea', 'Archaebacteria', 'Arthropoda'], [2, 5, 14]), - ("aE", ['Archaea', 'Archaebacteria', 'Plantae'], [2, 5, 10]), - ( - "a", - [ - 'Animalia', - 'Archaea', - 'Archaebacteria', - 'Arthropoda', - 'Gastrotrich', - 'Monera', - 'Placozoa', - 'Plantae', - ], - [9, 2, 5, 14, 16, 13, 19, 10], - ), - ) - @ddt.unpack - def test_autocomplete_tags(self, search: str, expected_values: list[str], expected_ids: list[int | None]) -> None: - tags = [ - 'Archaea', - 'Archaebacteria', - 'Animalia', - 'Arthropoda', - 'Plantae', - 'Monera', - 'Gastrotrich', - 'Placozoa', - ] + expected_values # To create repeats - closed_taxonomy = self.taxonomy - open_taxonomy = tagging_api.create_taxonomy( - "Free_Text_Taxonomy", - allow_free_text=True, - ) - - for index, value in enumerate(tags): - # Creating ObjectTags for open taxonomy - ObjectTag( - object_id=f"object_id_{index}", - taxonomy=open_taxonomy, - _value=value, - ).save() - - # Creating ObjectTags for closed taxonomy - tag = get_tag(value) - ObjectTag( - object_id=f"object_id_{index}", - taxonomy=closed_taxonomy, - tag=tag, - _value=value, - ).save() - - # Test for open taxonomy - # self._validate_autocomplete_tags( - # open_taxonomy, - # search, - # expected_values, - # [None] * len(expected_ids), - # ) - - # # Test for closed taxonomy - # self._validate_autocomplete_tags( - # closed_taxonomy, - # search, - # expected_values, - # expected_ids, - # ) - @ddt.data( ("ChA", [ "Archaea (used: 1, children: 3)", @@ -741,11 +666,11 @@ def test_autocomplete_tags_closed_omit_object(self) -> None: " Euryarchaeida (used: 0, children: 0)", " Proteoarchaeota (used: 0, children: 0)", # These results are no longer included because of exclude_object_id: - #"Bacteria (used: 0, children: 2)", # does not contain "cha" but a child does - #" Archaebacteria (used: 1, children: 0)", + # "Bacteria (used: 0, children: 2)", # does not contain "cha" but a child does + # " Archaebacteria (used: 1, children: 0)", ] - def _get_tag_values(self, tags: QuerySet[tagging_api.TagData]) -> list[str]: + def _get_tag_values(self, tags: TagDataQuerySet) -> list[str]: """ Get tag values from tagging_api.autocomplete_tags() result """ @@ -756,37 +681,3 @@ def _get_tag_ids(self, tags) -> list[int]: Get tag ids from tagging_api.autocomplete_tags() result """ return [tag.get("tag_id") for tag in tags] - - def _validate_autocomplete_tags( - self, - taxonomy: Taxonomy, - search: str, - expected: list[str], - ) -> None: - """ - Validate autocomplete tags - """ - - # Normal search - result = tagging_api.search_tags(taxonomy, search) - - assert pretty_format_tags(result, parent=False) == expected - - # Create ObjectTag to simulate the content tagging - first_value = next(t for t in result if search.lower() in t["value"].lower()) - tag_model = None - if not taxonomy.allow_free_text: - tag_model = get_tag(first_value) - - object_id = 'new_object_id' - ObjectTag( - object_id=object_id, - taxonomy=taxonomy, - tag=tag_model, - _value=first_value, - ).save() - - # Search with object - result = tagging_api.search_tags(taxonomy, search, object_id) - assert self._get_tag_values(result) == expected_values[1:] - assert self._get_tag_ids(result) == expected_ids[1:] diff --git a/tests/openedx_tagging/core/tagging/test_models.py b/tests/openedx_tagging/core/tagging/test_models.py index a47ec5f1..6bea627c 100644 --- a/tests/openedx_tagging/core/tagging/test_models.py +++ b/tests/openedx_tagging/core/tagging/test_models.py @@ -13,6 +13,7 @@ from openedx_tagging.core.tagging import api from openedx_tagging.core.tagging.models import LanguageTaxonomy, ObjectTag, Tag, Taxonomy + from .utils import pretty_format_tags @@ -210,7 +211,6 @@ def test_get_lineage(self, tag_attr, lineage): assert getattr(self, tag_attr).get_lineage() == lineage - @ddt.ddt class TestFilteredTagsClosedTaxonomy(TestTagTaxonomyMixin, TestCase): """ @@ -443,6 +443,35 @@ def test_usage_count(self) -> None: "Bacteria (None) (used: 3, children: 2)", ] + def test_pathological_tree_sort(self) -> None: + """ + Check for bugs in how tree sorting happens, if the tag names are very + similar. + """ + # pylint: disable=unused-variable + taxonomy = api.create_taxonomy("Sort Test") + root1 = Tag.objects.create(taxonomy=taxonomy, value="1") + child1_1 = Tag.objects.create(taxonomy=taxonomy, value="11", parent=root1) + child1_2 = Tag.objects.create(taxonomy=taxonomy, value="2", parent=root1) + child1_3 = Tag.objects.create(taxonomy=taxonomy, value="1 A", parent=root1) + child1_4 = Tag.objects.create(taxonomy=taxonomy, value="11111", parent=root1) + grandchild1_4_1 = Tag.objects.create(taxonomy=taxonomy, value="1111-grandchild", parent=child1_4) + root2 = Tag.objects.create(taxonomy=taxonomy, value="111") + child2_1 = Tag.objects.create(taxonomy=taxonomy, value="11111111", parent=root2) + child2_2 = Tag.objects.create(taxonomy=taxonomy, value="123", parent=root2) + result = pretty_format_tags(taxonomy.get_filtered_tags()) + assert result == [ + "1 (None) (used: 0, children: 4)", + " 1 A (1) (used: 0, children: 0)", + " 11 (1) (used: 0, children: 0)", + " 11111 (1) (used: 0, children: 1)", + " 1111-grandchild (11111) (used: 0, children: 0)", + " 2 (1) (used: 0, children: 0)", + "111 (None) (used: 0, children: 2)", + " 11111111 (111) (used: 0, children: 0)", + " 123 (111) (used: 0, children: 0)", + ] + class TestFilteredTagsFreeTextTaxonomy(TestCase): """ diff --git a/tests/openedx_tagging/core/tagging/test_views.py b/tests/openedx_tagging/core/tagging/test_views.py index d97296ba..b8d471f2 100644 --- a/tests/openedx_tagging/core/tagging/test_views.py +++ b/tests/openedx_tagging/core/tagging/test_views.py @@ -3,7 +3,7 @@ """ from __future__ import annotations -from urllib.parse import parse_qs, urlparse +from urllib.parse import parse_qs, quote, quote_plus, urlparse import ddt # type: ignore[import] # typing support in rules depends on https://github.com/dfunckt/django-rules/pull/177 @@ -19,6 +19,8 @@ from openedx_tagging.core.tagging.rest_api.paginators import TagsPagination from openedx_tagging.core.tagging.rules import can_change_object_tag_objectid, can_view_object_tag_objectid +from .utils import pretty_format_tags + User = get_user_model() TAXONOMY_LIST_URL = "/tagging/rest_api/v1/taxonomies/" @@ -958,9 +960,12 @@ def test_not_authorized_user(self): assert response.status_code == status.HTTP_403_FORBIDDEN - def test_small_taxonomy(self): + def test_small_taxonomy_root(self): + """ + Test explicitly requesting only the root tags of a small taxonomy. + """ self.client.force_authenticate(user=self.staff) - response = self.client.get(self.small_taxonomy_url) + response = self.client.get(self.small_taxonomy_url + "?root_only") assert response.status_code == status.HTTP_200_OK data = response.data @@ -970,13 +975,20 @@ def test_small_taxonomy(self): root_count = self.small_taxonomy.tag_set.filter(parent=None).count() assert len(results) == root_count - # Checking tag fields - root_tag = self.small_taxonomy.tag_set.get(id=results[0].get("id")) - root_children_count = root_tag.children.count() + # Checking tag fields on the first tag returned: + root_tag = self.small_taxonomy.tag_set.get(id=results[0].get("_id")) assert results[0].get("value") == root_tag.value - assert results[0].get("taxonomy_id") == self.small_taxonomy.id - assert results[0].get("children_count") == root_children_count - assert len(results[0].get("sub_tags")) == root_children_count + assert results[0].get("child_count") == root_tag.children.count() + assert results[0].get("depth") == 0 # root tags always have depth 0 + assert results[0].get("parent_value") is None + assert results[0].get("usage_count") == 0 + + # Check that we can load sub-tags of that tag: + sub_tags_response = self.client.get(results[0]["sub_tags_url"]) + assert sub_tags_response.status_code == status.HTTP_200_OK + sub_tags_result = sub_tags_response.data["results"] + assert len(sub_tags_result) == root_tag.children.count() + assert set(t["value"] for t in sub_tags_result) == set(t.value for t in root_tag.children.all()) # Checking pagination values assert data.get("next") is None @@ -985,7 +997,86 @@ def test_small_taxonomy(self): assert data.get("num_pages") == 1 assert data.get("current_page") == 1 + def test_small_taxonomy(self): + """ + Test loading all the tags of a small taxonomy at once. + """ + self.client.force_authenticate(user=self.staff) + response = self.client.get(self.small_taxonomy_url) + assert response.status_code == status.HTTP_200_OK + + data = response.data + results = data.get("results", []) + assert pretty_format_tags(results) == [ + "Archaea (None) (used: 0, children: 3)", + " DPANN (Archaea) (used: 0, children: 0)", + " Euryarchaeida (Archaea) (used: 0, children: 0)", + " Proteoarchaeota (Archaea) (used: 0, children: 0)", + "Bacteria (None) (used: 0, children: 2)", + " Archaebacteria (Bacteria) (used: 0, children: 0)", + " Eubacteria (Bacteria) (used: 0, children: 0)", + "Eukaryota (None) (used: 0, children: 5)", + " Animalia (Eukaryota) (used: 0, children: 7)", + " Arthropoda (Animalia) (used: 0, children: 0)", + " Chordata (Animalia) (used: 0, children: 1)", + " Cnidaria (Animalia) (used: 0, children: 0)", + " Ctenophora (Animalia) (used: 0, children: 0)", + " Gastrotrich (Animalia) (used: 0, children: 0)", + " Placozoa (Animalia) (used: 0, children: 0)", + " Porifera (Animalia) (used: 0, children: 0)", + " Fungi (Eukaryota) (used: 0, children: 0)", + " Monera (Eukaryota) (used: 0, children: 0)", + " Plantae (Eukaryota) (used: 0, children: 0)", + " Protista (Eukaryota) (used: 0, children: 0)", + ] + + # Checking pagination values + assert data.get("next") is None + assert data.get("previous") is None + assert data.get("count") == len(results) + assert data.get("num_pages") == 1 + assert data.get("current_page") == 1 + + def test_small_taxonomy_paged(self): + """ + Test loading only the first few of the tags of a small taxonomy. + """ + self.client.force_authenticate(user=self.staff) + response = self.client.get(self.small_taxonomy_url + "?page_size=5") + assert response.status_code == status.HTTP_200_OK + data = response.data + assert pretty_format_tags(data["results"]) == [ + "Archaea (None) (used: 0, children: 3)", + " DPANN (Archaea) (used: 0, children: 0)", + " Euryarchaeida (Archaea) (used: 0, children: 0)", + " Proteoarchaeota (Archaea) (used: 0, children: 0)", + "Bacteria (None) (used: 0, children: 2)", + ] + + # Checking pagination values + assert data.get("next") is not None + assert data.get("previous") is None + assert data.get("count") == 20 + assert data.get("num_pages") == 4 + assert data.get("current_page") == 1 + + # Get the next page: + next_response = self.client.get(data.get("next")) + assert next_response.status_code == status.HTTP_200_OK + next_data = next_response.data + assert pretty_format_tags(next_data["results"]) == [ + " Archaebacteria (Bacteria) (used: 0, children: 0)", + " Eubacteria (Bacteria) (used: 0, children: 0)", + "Eukaryota (None) (used: 0, children: 5)", + " Animalia (Eukaryota) (used: 0, children: 7)", + " Arthropoda (Animalia) (used: 0, children: 0)", + ] + assert next_data.get("current_page") == 2 + def test_small_search(self): + """ + Test performing a search + """ search_term = 'eU' url = f"{self.small_taxonomy_url}?search_term={search_term}" self.client.force_authenticate(user=self.staff) @@ -993,39 +1084,59 @@ def test_small_search(self): assert response.status_code == status.HTTP_200_OK data = response.data - results = data.get("results", []) - - assert len(results) == 3 + assert pretty_format_tags(data["results"], parent=False, usage_count=False) == [ + "Archaea (children: 3)", # No match in this tag, but a child matches so it's included + " Euryarchaeida (children: 0)", + "Bacteria (children: 2)", # No match in this tag, but a child matches so it's included + " Eubacteria (children: 0)", + "Eukaryota (children: 5)", + ] # Checking pagination values assert data.get("next") is None assert data.get("previous") is None - assert data.get("count") == 3 + assert data.get("count") == 5 assert data.get("num_pages") == 1 assert data.get("current_page") == 1 def test_large_taxonomy(self): + """ + Test listing the tags in a large taxonomy (~7,000 tags). + """ self._build_large_taxonomy() self.client.force_authenticate(user=self.staff) response = self.client.get(self.large_taxonomy_url) assert response.status_code == status.HTTP_200_OK data = response.data - results = data.get("results", []) + results = data["results"] + + # Even though we didn't specify root_only, only the root tags will have + # been returned, because of the taxonomy's size. + assert pretty_format_tags(results) == [ + "Tag 0 (None) (used: 0, children: 12)", + "Tag 1099 (None) (used: 0, children: 12)", + "Tag 1256 (None) (used: 0, children: 12)", + "Tag 1413 (None) (used: 0, children: 12)", + "Tag 157 (None) (used: 0, children: 12)", + "Tag 1570 (None) (used: 0, children: 12)", + "Tag 1727 (None) (used: 0, children: 12)", + "Tag 1884 (None) (used: 0, children: 12)", + "Tag 2041 (None) (used: 0, children: 12)", + "Tag 2198 (None) (used: 0, children: 12)", + # ... there are 41 more root tags but they're excluded from this first result page. + ] # Count of paginated root tags assert len(results) == self.page_size - # Checking tag fields - root_tag = self.large_taxonomy.tag_set.get(id=results[0].get("id")) - assert results[0].get("value") == root_tag.value - assert results[0].get("taxonomy_id") == self.large_taxonomy.id - assert results[0].get("parent_id") == root_tag.parent_id - assert results[0].get("children_count") == root_tag.children.count() - assert results[0].get("sub_tags_link") == ( + # Checking some other tag fields not covered by the pretty-formatted string above: + root_tag = self.large_taxonomy.tag_set.get(value=results[0].get("value")) + assert results[0].get("_id") == root_tag.id + assert results[0].get("sub_tags_url") == ( "http://testserver/tagging/" f"rest_api/v1/taxonomies/{self.large_taxonomy.id}" - f"/tags/?parent_tag_id={root_tag.id}" + f"/tags/?parent_tag={quote(results[0]['value'])}" ) # Checking pagination values @@ -1065,62 +1176,48 @@ def test_next_page_large_taxonomy(self): assert data.get("current_page") == 2 def test_large_search(self): + """ + Test searching in a large taxonomy + """ self._build_large_taxonomy() - search_term = '1' + search_term = '11' url = f"{self.large_taxonomy_url}?search_term={search_term}" self.client.force_authenticate(user=self.staff) response = self.client.get(url) assert response.status_code == status.HTTP_200_OK data = response.data - results = data.get("results", []) - - # Count of paginated root tags - assert len(results) == self.page_size - - # Checking pagination values - assert data.get("next") == ( - "http://testserver/tagging/" - f"rest_api/v1/taxonomies/{self.large_taxonomy.id}" - f"/tags/?page=2&search_term={search_term}" - ) - assert data.get("previous") is None - assert data.get("count") == 51 - assert data.get("num_pages") == 6 + results = data["results"] + assert pretty_format_tags(results, usage_count=None) == [ + "Tag 0 (None) (children: 12)", # First 3 results don't match but have children that match + " Tag 1 (Tag 0) (children: 12)", + " Tag 11 (Tag 1) (children: 0)", + " Tag 105 (Tag 0) (children: 12)", + " Tag 110 (Tag 105) (children: 0)", + " Tag 111 (Tag 105) (children: 0)", + " Tag 112 (Tag 105) (children: 0)", + " Tag 113 (Tag 105) (children: 0)", + " Tag 114 (Tag 105) (children: 0)", + " Tag 115 (Tag 105) (children: 0)", + ] + assert data.get("count") == 362 + assert data.get("num_pages") == 37 assert data.get("current_page") == 1 - - def test_next_large_search(self): - self._build_large_taxonomy() - search_term = '1' - url = f"{self.large_taxonomy_url}?search_term={search_term}" - - # Get first page of the search - self.client.force_authenticate(user=self.staff) - response = self.client.get(url) - - # Get next page - response = self.client.get(response.data.get("next")) - - data = response.data - results = data.get("results", []) - - # Count of paginated root tags - assert len(results) == self.page_size - - # Checking pagination values - assert data.get("next") == ( - "http://testserver/tagging/" - f"rest_api/v1/taxonomies/{self.large_taxonomy.id}" - f"/tags/?page=3&search_term={search_term}" - ) - assert data.get("previous") == ( - "http://testserver/tagging/" - f"rest_api/v1/taxonomies/{self.large_taxonomy.id}" - f"/tags/?search_term={search_term}" - ) - assert data.get("count") == 51 - assert data.get("num_pages") == 6 - assert data.get("current_page") == 2 + # Get the next page: + next_response = self.client.get(response.data.get("next")) + assert next_response.status_code == status.HTTP_200_OK + assert pretty_format_tags(next_response.data["results"], usage_count=None) == [ + " Tag 116 (Tag 105) (children: 0)", + " Tag 117 (Tag 105) (children: 0)", + " Tag 118 (Tag 0) (children: 12)", + " Tag 119 (Tag 118) (children: 0)", + "Tag 1099 (None) (children: 12)", + " Tag 1100 (Tag 1099) (children: 12)", + " Tag 1101 (Tag 1100) (children: 0)", + " Tag 1102 (Tag 1100) (children: 0)", + " Tag 1103 (Tag 1100) (children: 0)", + " Tag 1104 (Tag 1100) (children: 0)", + ] def test_get_children(self): self._build_large_taxonomy() @@ -1131,7 +1228,7 @@ def test_get_children(self): results = response.data.get("results", []) # Get children tags - response = self.client.get(results[0].get("sub_tags_link")) + response = self.client.get(results[0].get("sub_tags_url")) assert response.status_code == status.HTTP_200_OK data = response.data @@ -1141,22 +1238,21 @@ def test_get_children(self): assert len(results) == self.page_size # Checking tag fields - tag = self.large_taxonomy.tag_set.get(id=results[0].get("id")) + tag = self.large_taxonomy.tag_set.get(id=results[0].get("_id")) assert results[0].get("value") == tag.value - assert results[0].get("taxonomy_id") == self.large_taxonomy.id - assert results[0].get("parent_id") == tag.parent_id - assert results[0].get("children_count") == tag.children.count() - assert results[0].get("sub_tags_link") == ( + assert results[0].get("parent_value") == tag.parent.value + assert results[0].get("child_count") == tag.children.count() + assert results[0].get("sub_tags_url") == ( "http://testserver/tagging/" f"rest_api/v1/taxonomies/{self.large_taxonomy.id}" - f"/tags/?parent_tag_id={tag.id}" + f"/tags/?parent_tag={quote(tag.value)}" ) # Checking pagination values assert data.get("next") == ( "http://testserver/tagging/" f"rest_api/v1/taxonomies/{self.large_taxonomy.id}" - f"/tags/?page=2&parent_tag_id={tag.parent_id}" + f"/tags/?page=2&parent_tag={quote_plus(tag.parent.value)}" ) assert data.get("previous") is None assert data.get("count") == self.children_tags_count[0] @@ -1169,17 +1265,21 @@ def test_get_leaves(self): parent_tag = Tag.objects.get(value="Animalia") # Build url to get tags depth=2 - url = f"{self.small_taxonomy_url}?parent_tag_id={parent_tag.id}" + url = f"{self.small_taxonomy_url}?parent_tag={parent_tag.value}" response = self.client.get(url) - results = response.data.get("results", []) - - # Checking tag fields - tag = self.small_taxonomy.tag_set.get(id=results[0].get("id")) - assert results[0].get("value") == tag.value - assert results[0].get("taxonomy_id") == self.small_taxonomy.id - assert results[0].get("parent_id") == tag.parent_id - assert results[0].get("children_count") == tag.children.count() - assert results[0].get("sub_tags_link") is None + results = response.data["results"] + + # Even though we didn't specify root_only, only the root tags will have + # been returned, because of the taxonomy's size. + assert pretty_format_tags(results) == [ + " Arthropoda (Animalia) (used: 0, children: 0)", + " Chordata (Animalia) (used: 0, children: 1)", + " Cnidaria (Animalia) (used: 0, children: 0)", + " Ctenophora (Animalia) (used: 0, children: 0)", + " Gastrotrich (Animalia) (used: 0, children: 0)", + " Placozoa (Animalia) (used: 0, children: 0)", + " Porifera (Animalia) (used: 0, children: 0)", + ] def test_next_children(self): self._build_large_taxonomy() @@ -1189,22 +1289,27 @@ def test_next_children(self): response = self.client.get(self.large_taxonomy_url) results = response.data.get("results", []) - # Get children to obtain next link - response = self.client.get(results[0].get("sub_tags_link")) + # Get the URL that gives us the children of the first root tag + first_root_tag = results[0] + response = self.client.get(first_root_tag.get("sub_tags_url")) - # Get next children + # Get next page of children response = self.client.get(response.data.get("next")) assert response.status_code == status.HTTP_200_OK data = response.data - results = data.get("results", []) - tag = self.large_taxonomy.tag_set.get(id=results[0].get("id")) + results = data["results"] + assert pretty_format_tags(results) == [ + # There are 12 child tags total, so on this second page, we see only 2 (10 were on the first page): + " Tag 79 (Tag 0) (used: 0, children: 12)", + " Tag 92 (Tag 0) (used: 0, children: 12)", + ] # Checking pagination values assert data.get("next") is None assert data.get("previous") == ( "http://testserver/tagging/" - f"rest_api/v1/taxonomies/{self.large_taxonomy.id}/tags/?parent_tag_id={tag.parent_id}" + f"rest_api/v1/taxonomies/{self.large_taxonomy.id}/tags/?parent_tag={quote_plus(first_root_tag['value'])}" ) assert data.get("count") == self.children_tags_count[0] assert data.get("num_pages") == 2 diff --git a/tests/openedx_tagging/core/tagging/utils.py b/tests/openedx_tagging/core/tagging/utils.py index fb24187d..3d6890e1 100644 --- a/tests/openedx_tagging/core/tagging/utils.py +++ b/tests/openedx_tagging/core/tagging/utils.py @@ -1,6 +1,7 @@ """ Useful utilities for testing tagging and taxonomy code. """ +from __future__ import annotations def pretty_format_tags(result, parent=True, external_id=False, usage_count=True) -> list[str]: From e7bb5bae0c1fe207807b4b88b45bf74253e8dfdb Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Thu, 19 Oct 2023 18:56:15 -0700 Subject: [PATCH 06/15] chore: update isort command to remove deprecated parameter --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index c42101f5..284b213f 100644 --- a/tox.ini +++ b/tox.ini @@ -77,7 +77,7 @@ commands = mypy pycodestyle openedx_learning openedx_tagging tests manage.py setup.py pydocstyle openedx_learning openedx_tagging tests manage.py setup.py - isort --check-only --diff --recursive tests test_utils openedx_learning openedx_tagging manage.py setup.py test_settings.py + isort --check-only --diff tests test_utils openedx_learning openedx_tagging manage.py setup.py test_settings.py make selfcheck [testenv:pii_check] From 868f3b84bb35ead14d11d0fa5d14ddad8e5a8090 Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Thu, 19 Oct 2023 19:42:44 -0700 Subject: [PATCH 07/15] docs: document how to run the tests on MySQL locally --- mysql_test_settings.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mysql_test_settings.py b/mysql_test_settings.py index ae8e37d3..f519c540 100644 --- a/mysql_test_settings.py +++ b/mysql_test_settings.py @@ -7,6 +7,14 @@ The tox targets for py38-django32 and py38-django42 will use this settings file. For the most part, you can use test_settings.py instead (that's the default if you just run "pytest" with no arguments). + +If you need a compatible MySQL server running locally, spin one up with: +docker run --rm \ + -e MYSQL_DATABASE=test_oel_db \ + -e MYSQL_USER=test_oel_user \ + -e MYSQL_PASSWORD=test_oel_pass \ + -e MYSQL_RANDOM_ROOT_PASSWORD=true \ + -p 3306:3306 mysql:8 """ from test_settings import * From e2421dadc7d0113ee2502610b57dc2c8c7220824 Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Fri, 20 Oct 2023 08:51:38 -0700 Subject: [PATCH 08/15] fix: fix sorting tags on MySQL --- openedx_tagging/core/tagging/models/base.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/openedx_tagging/core/tagging/models/base.py b/openedx_tagging/core/tagging/models/base.py index 5428f686..3c7830b5 100644 --- a/openedx_tagging/core/tagging/models/base.py +++ b/openedx_tagging/core/tagging/models/base.py @@ -9,7 +9,7 @@ from django.core.exceptions import ValidationError from django.db import models from django.db.models import F, Q, Value -from django.db.models.functions import Coalesce, Concat +from django.db.models.functions import Concat from django.utils.functional import cached_property from django.utils.module_loading import import_string from django.utils.translation import gettext_lazy as _ @@ -466,10 +466,11 @@ def _get_filtered_tags_deep( # For a root tag, we want sort_key="RootValue" and for a depth=1 tag # we want sort_key="RootValue\tValue". The following does that, since # ConcatNull(...) returns NULL if any argument is NULL. - Coalesce(ConcatNull(F("parent__parent__parent__value"), Value("\t")), Value("")), - Coalesce(ConcatNull(F("parent__parent__value"), Value("\t")), Value("")), - Coalesce(ConcatNull(F("parent__value"), Value("\t")), Value("")), + ConcatNull(F("parent__parent__parent__value"), Value("\t")), + ConcatNull(F("parent__parent__value"), Value("\t")), + ConcatNull(F("parent__value"), Value("\t")), F("value"), + Value("\t"), # We also need the '\t' separator character at the end, or MySQL will sort things wrong output_field=models.CharField(), )) # Add the parent value From 941f4736d5a32235a20d52d0a7df5eb91034d145 Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Fri, 20 Oct 2023 09:10:51 -0700 Subject: [PATCH 09/15] docs: minor clarifications --- tests/openedx_tagging/core/tagging/test_views.py | 4 ++-- tests/openedx_tagging/core/tagging/utils.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/openedx_tagging/core/tagging/test_views.py b/tests/openedx_tagging/core/tagging/test_views.py index b8d471f2..67b0b145 100644 --- a/tests/openedx_tagging/core/tagging/test_views.py +++ b/tests/openedx_tagging/core/tagging/test_views.py @@ -1189,10 +1189,10 @@ def test_large_search(self): data = response.data results = data["results"] assert pretty_format_tags(results, usage_count=None) == [ - "Tag 0 (None) (children: 12)", # First 3 results don't match but have children that match + "Tag 0 (None) (children: 12)", # First 2 results don't match but have children that match " Tag 1 (Tag 0) (children: 12)", " Tag 11 (Tag 1) (children: 0)", - " Tag 105 (Tag 0) (children: 12)", + " Tag 105 (Tag 0) (children: 12)", # Non-match but children match " Tag 110 (Tag 105) (children: 0)", " Tag 111 (Tag 105) (children: 0)", " Tag 112 (Tag 105) (children: 0)", diff --git a/tests/openedx_tagging/core/tagging/utils.py b/tests/openedx_tagging/core/tagging/utils.py index 3d6890e1..11cbf485 100644 --- a/tests/openedx_tagging/core/tagging/utils.py +++ b/tests/openedx_tagging/core/tagging/utils.py @@ -9,6 +9,7 @@ def pretty_format_tags(result, parent=True, external_id=False, usage_count=True) Format the result of get_filtered_tags() to be more human readable. Also works with other wrappers around get_filtered_tags, like api.get_tags() + Also works with serialized TagData from the REST API. """ pretty_results = [] for t in result: From b567a3fd0c165243e0b847c6de22803e73cf71ec Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Wed, 25 Oct 2023 12:11:32 -0700 Subject: [PATCH 10/15] fix: make tree sorting case insensitive --- openedx_tagging/core/tagging/models/base.py | 6 +-- .../core/tagging/test_models.py | 40 +++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/openedx_tagging/core/tagging/models/base.py b/openedx_tagging/core/tagging/models/base.py index 3c7830b5..82915cd3 100644 --- a/openedx_tagging/core/tagging/models/base.py +++ b/openedx_tagging/core/tagging/models/base.py @@ -9,7 +9,7 @@ from django.core.exceptions import ValidationError from django.db import models from django.db.models import F, Q, Value -from django.db.models.functions import Concat +from django.db.models.functions import Concat, Lower from django.utils.functional import cached_property from django.utils.module_loading import import_string from django.utils.translation import gettext_lazy as _ @@ -462,7 +462,7 @@ def _get_filtered_tags_deep( # Add the "depth" to each tag: qs = Tag.annotate_depth(qs) # Add the "lineage" as a field called "sort_key" to sort them in order correctly: - qs = qs.annotate(sort_key=Concat( + qs = qs.annotate(sort_key=Lower(Concat( # For a root tag, we want sort_key="RootValue" and for a depth=1 tag # we want sort_key="RootValue\tValue". The following does that, since # ConcatNull(...) returns NULL if any argument is NULL. @@ -472,7 +472,7 @@ def _get_filtered_tags_deep( F("value"), Value("\t"), # We also need the '\t' separator character at the end, or MySQL will sort things wrong output_field=models.CharField(), - )) + ))) # Add the parent value qs = qs.annotate(parent_value=F("parent__value")) qs = qs.annotate(_id=F("id")) # ID has an underscore to encourage use of 'value' rather than this internal ID diff --git a/tests/openedx_tagging/core/tagging/test_models.py b/tests/openedx_tagging/core/tagging/test_models.py index 6bea627c..09c2b917 100644 --- a/tests/openedx_tagging/core/tagging/test_models.py +++ b/tests/openedx_tagging/core/tagging/test_models.py @@ -472,6 +472,46 @@ def test_pathological_tree_sort(self) -> None: " 123 (111) (used: 0, children: 0)", ] + def test_case_insensitive_sort(self) -> None: + """ + Make sure the sorting is case-insensitive + """ + # pylint: disable=unused-variable + taxonomy = api.create_taxonomy("Sort Test") + root1 = Tag.objects.create(taxonomy=taxonomy, value="ALPHABET") + child1_1 = Tag.objects.create(taxonomy=taxonomy, value="Android", parent=root1) + child1_2 = Tag.objects.create(taxonomy=taxonomy, value="abacus", parent=root1) + child1_2 = Tag.objects.create(taxonomy=taxonomy, value="azure", parent=root1) + child1_3 = Tag.objects.create(taxonomy=taxonomy, value="aardvark", parent=root1) + child1_4 = Tag.objects.create(taxonomy=taxonomy, value="ANVIL", parent=root1) + + root2 = Tag.objects.create(taxonomy=taxonomy, value="abstract") + child2_1 = Tag.objects.create(taxonomy=taxonomy, value="Andes", parent=root2) + child2_2 = Tag.objects.create(taxonomy=taxonomy, value="azores islands", parent=root2) + + result = pretty_format_tags(taxonomy.get_filtered_tags()) + assert result == [ + "abstract (None) (used: 0, children: 2)", + " Andes (abstract) (used: 0, children: 0)", + " azores islands (abstract) (used: 0, children: 0)", + "ALPHABET (None) (used: 0, children: 5)", + " aardvark (ALPHABET) (used: 0, children: 0)", + " abacus (ALPHABET) (used: 0, children: 0)", + " Android (ALPHABET) (used: 0, children: 0)", + " ANVIL (ALPHABET) (used: 0, children: 0)", + " azure (ALPHABET) (used: 0, children: 0)", + ] + + # And it's case insensitive when getting only a single level: + result = pretty_format_tags(taxonomy.get_filtered_tags(parent_tag_value="ALPHABET", depth=1)) + assert result == [ + " aardvark (ALPHABET) (used: 0, children: 0)", + " abacus (ALPHABET) (used: 0, children: 0)", + " Android (ALPHABET) (used: 0, children: 0)", + " ANVIL (ALPHABET) (used: 0, children: 0)", + " azure (ALPHABET) (used: 0, children: 0)", + ] + class TestFilteredTagsFreeTextTaxonomy(TestCase): """ From 3f2bab6576bf8948a8c82c73963d7701687508c2 Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Wed, 25 Oct 2023 12:46:40 -0700 Subject: [PATCH 11/15] feat: disable include_counts by default --- openedx_tagging/core/tagging/api.py | 13 +- openedx_tagging/core/tagging/models/base.py | 2 +- .../core/tagging/rest_api/v1/views.py | 6 +- .../tagging/import_export/test_template.py | 2 +- .../openedx_tagging/core/tagging/test_api.py | 36 ++---- .../core/tagging/test_models.py | 121 +++++++++--------- .../core/tagging/test_views.py | 90 ++++++------- tests/openedx_tagging/core/tagging/utils.py | 6 +- 8 files changed, 139 insertions(+), 137 deletions(-) diff --git a/openedx_tagging/core/tagging/api.py b/openedx_tagging/core/tagging/api.py index 3b2b2ba9..c5917c48 100644 --- a/openedx_tagging/core/tagging/api.py +++ b/openedx_tagging/core/tagging/api.py @@ -90,7 +90,12 @@ def get_root_tags(taxonomy: Taxonomy) -> TagDataQuerySet: return taxonomy.cast().get_filtered_tags(depth=1) -def search_tags(taxonomy: Taxonomy, search_term: str, exclude_object_id: str | None = None) -> TagDataQuerySet: +def search_tags( + taxonomy: Taxonomy, + search_term: str, + exclude_object_id: str | None = None, + include_counts: bool = False, +) -> TagDataQuerySet: """ Returns a list of all tags that contains `search_term` of the given taxonomy, as well as their ancestors (so they can be displayed in a tree). @@ -108,7 +113,11 @@ def search_tags(taxonomy: Taxonomy, search_term: str, exclude_object_id: str | N "_value", flat=True ) ) - qs = taxonomy.cast().get_filtered_tags(search_term=search_term, excluded_values=excluded_values) + qs = taxonomy.cast().get_filtered_tags( + search_term=search_term, + excluded_values=excluded_values, + include_counts=include_counts, + ) return qs diff --git a/openedx_tagging/core/tagging/models/base.py b/openedx_tagging/core/tagging/models/base.py index 82915cd3..b23893f3 100644 --- a/openedx_tagging/core/tagging/models/base.py +++ b/openedx_tagging/core/tagging/models/base.py @@ -300,7 +300,7 @@ def get_filtered_tags( depth: int | None = TAXONOMY_MAX_DEPTH, parent_tag_value: str | None = None, search_term: str | None = None, - include_counts: bool = True, + include_counts: bool = False, excluded_values: list[str] | None = None, ) -> TagDataQuerySet: """ diff --git a/openedx_tagging/core/tagging/rest_api/v1/views.py b/openedx_tagging/core/tagging/rest_api/v1/views.py index 6472ee46..8c541a8c 100644 --- a/openedx_tagging/core/tagging/rest_api/v1/views.py +++ b/openedx_tagging/core/tagging/rest_api/v1/views.py @@ -412,12 +412,14 @@ class TaxonomyTagsView(ListAPIView): * id (required) - The ID of the taxonomy to retrieve tags. * parent_tag (optional) - Retrieve children of the tag with this value. * root_only (optional) - If specified, only root tags are returned. + * include_counts (optional) - Include the count of how many times each + tag has been used. * page (optional) - Page number (default: 1) * page_size (optional) - Number of items per page (default: 10) **List Example Requests** GET api/tagging/v1/taxonomy/:id/tags - Get tags of taxonomy - GET api/tagging/v1/taxonomy/:id/tags?parent_tag=Physics - Get child tags of tag + GET api/tagging/v1/taxonomy/:id/tags?parent_tag=Physics&include_counts - Get child tags of tag **List Query Returns** * 200 - Success @@ -456,6 +458,7 @@ def get_queryset(self) -> TagDataQuerySet: taxonomy = self.get_taxonomy(taxonomy_id) parent_tag_value = self.request.query_params.get("parent_tag", None) root_only = "root_only" in self.request.query_params + include_counts = "include_counts" in self.request.query_params search_term = self.request.query_params.get("search_term", None) if parent_tag_value: @@ -480,4 +483,5 @@ def get_queryset(self) -> TagDataQuerySet: parent_tag_value=parent_tag_value, search_term=search_term, depth=depth, + include_counts=include_counts, ) diff --git a/tests/openedx_tagging/core/tagging/import_export/test_template.py b/tests/openedx_tagging/core/tagging/import_export/test_template.py index dc987c1c..9947de6b 100644 --- a/tests/openedx_tagging/core/tagging/import_export/test_template.py +++ b/tests/openedx_tagging/core/tagging/import_export/test_template.py @@ -48,7 +48,7 @@ def test_import_template(self, template_file, parser_format): replace=True, ), import_api.get_last_import_log(self.taxonomy) - assert pretty_format_tags(get_tags(self.taxonomy), external_id=True, usage_count=False) == [ + assert pretty_format_tags(get_tags(self.taxonomy), external_id=True) == [ 'Electronic instruments (ELECTRIC) (None) (children: 2)', ' Synthesizer (SYNTH) (Electronic instruments) (children: 0)', ' Theramin (THERAMIN) (Electronic instruments) (children: 0)', diff --git a/tests/openedx_tagging/core/tagging/test_api.py b/tests/openedx_tagging/core/tagging/test_api.py index 6cd9a3a9..230c57cf 100644 --- a/tests/openedx_tagging/core/tagging/test_api.py +++ b/tests/openedx_tagging/core/tagging/test_api.py @@ -116,7 +116,7 @@ def test_get_taxonomies(self) -> None: ] + self.dummy_taxonomies def test_get_tags(self) -> None: - assert pretty_format_tags(tagging_api.get_tags(self.taxonomy), parent=False, usage_count=False) == [ + assert pretty_format_tags(tagging_api.get_tags(self.taxonomy), parent=False) == [ "Archaea (children: 3)", " DPANN (children: 0)", " Euryarchaeida (children: 0)", @@ -141,7 +141,7 @@ def test_get_tags(self) -> None: @override_settings(LANGUAGES=test_languages) def test_get_tags_system(self) -> None: - assert pretty_format_tags(tagging_api.get_tags(self.system_taxonomy), parent=False, usage_count=False) == [ + assert pretty_format_tags(tagging_api.get_tags(self.system_taxonomy), parent=False) == [ "System Tag 1 (children: 0)", "System Tag 2 (children: 0)", "System Tag 3 (children: 0)", @@ -150,7 +150,7 @@ def test_get_tags_system(self) -> None: def test_get_root_tags(self): root_life_on_earth_tags = tagging_api.get_root_tags(self.taxonomy) - assert pretty_format_tags(root_life_on_earth_tags, parent=False, usage_count=False) == [ + assert pretty_format_tags(root_life_on_earth_tags, parent=False) == [ 'Archaea (children: 3)', 'Bacteria (children: 2)', 'Eukaryota (children: 5)', @@ -159,7 +159,7 @@ def test_get_root_tags(self): @override_settings(LANGUAGES=test_languages) def test_get_root_tags_system(self): result = tagging_api.get_root_tags(self.system_taxonomy) - assert pretty_format_tags(result, parent=False, usage_count=False) == [ + assert pretty_format_tags(result, parent=False) == [ 'System Tag 1 (children: 0)', 'System Tag 2 (children: 0)', 'System Tag 3 (children: 0)', @@ -190,7 +190,7 @@ def test_get_root_language_tags(self): def test_search_tags(self) -> None: result = tagging_api.search_tags(self.taxonomy, search_term='eU') - assert pretty_format_tags(result, parent=False, usage_count=False) == [ + assert pretty_format_tags(result, parent=False) == [ 'Archaea (children: 3)', # Doesn't match 'eU' but is included because a child is included ' Euryarchaeida (children: 0)', 'Bacteria (children: 2)', # Doesn't match 'eU' but is included because a child is included @@ -225,7 +225,7 @@ def test_get_children_tags(self) -> None: Test getting the children of a particular tag in a closed taxonomy. """ result1 = tagging_api.get_children_tags(self.taxonomy, "Animalia") - assert pretty_format_tags(result1, parent=False, usage_count=False) == [ + assert pretty_format_tags(result1, parent=False) == [ ' Arthropoda (children: 0)', ' Chordata (children: 1)', # Mammalia is a child of Chordata but excluded here. @@ -651,7 +651,7 @@ def test_autocomplete_tags_closed(self, search: str, expected: list[str]) -> Non _value=value, ).save() - result = tagging_api.search_tags(closed_taxonomy, search) + result = tagging_api.search_tags(closed_taxonomy, search, include_counts=True) assert pretty_format_tags(result, parent=False) == expected def test_autocomplete_tags_closed_omit_object(self) -> None: @@ -662,22 +662,10 @@ def test_autocomplete_tags_closed_omit_object(self) -> None: tagging_api.tag_object(object_id=object_id, taxonomy=self.taxonomy, tags=["Archaebacteria"]) result = tagging_api.search_tags(self.taxonomy, "ChA", exclude_object_id=object_id) assert pretty_format_tags(result, parent=False) == [ - "Archaea (used: 0, children: 3)", - " Euryarchaeida (used: 0, children: 0)", - " Proteoarchaeota (used: 0, children: 0)", + "Archaea (children: 3)", + " Euryarchaeida (children: 0)", + " Proteoarchaeota (children: 0)", # These results are no longer included because of exclude_object_id: - # "Bacteria (used: 0, children: 2)", # does not contain "cha" but a child does - # " Archaebacteria (used: 1, children: 0)", + # "Bacteria (children: 2)", # does not contain "cha" but a child does + # " Archaebacteria (children: 0)", ] - - def _get_tag_values(self, tags: TagDataQuerySet) -> list[str]: - """ - Get tag values from tagging_api.autocomplete_tags() result - """ - return [tag["value"] for tag in tags] - - def _get_tag_ids(self, tags) -> list[int]: - """ - Get tag ids from tagging_api.autocomplete_tags() result - """ - return [tag.get("tag_id") for tag in tags] diff --git a/tests/openedx_tagging/core/tagging/test_models.py b/tests/openedx_tagging/core/tagging/test_models.py index 09c2b917..8c006a14 100644 --- a/tests/openedx_tagging/core/tagging/test_models.py +++ b/tests/openedx_tagging/core/tagging/test_models.py @@ -237,7 +237,7 @@ def test_get_child_tags_one_level(self) -> None: Test basic retrieval of tags one level below the "Eukaryota" root tag in the closed taxonomy, using get_filtered_tags(). With counts included. """ - result = list(self.taxonomy.get_filtered_tags(depth=1, parent_tag_value="Eukaryota")) + result = list(self.taxonomy.get_filtered_tags(depth=1, parent_tag_value="Eukaryota", include_counts=True)) common_fields = {"depth": 1, "parent_value": "Eukaryota", "usage_count": 0, "external_id": None} for r in result: del r["_id"] # Remove the internal database IDs; they aren't interesting here and a other tests check them @@ -256,7 +256,7 @@ def test_get_grandchild_tags_one_level(self) -> None: "Eukaryota" root tag in the closed taxonomy, using get_filtered_tags(). """ result = list(self.taxonomy.get_filtered_tags(depth=1, parent_tag_value="Animalia")) - common_fields = {"depth": 2, "parent_value": "Animalia", "usage_count": 0, "external_id": None} + common_fields = {"depth": 2, "parent_value": "Animalia", "external_id": None} for r in result: del r["_id"] # Remove the internal database IDs; they aren't interesting here and a other tests check them assert result == [ @@ -274,7 +274,7 @@ def test_get_depth_1_search_term(self) -> None: """ Filter the root tags to only those that match a search term """ - result = list(self.taxonomy.get_filtered_tags(depth=1, search_term="ARCH")) + result = list(self.taxonomy.get_filtered_tags(depth=1, search_term="ARCH", include_counts=True)) assert result == [ { "value": "Archaea", @@ -298,7 +298,6 @@ def test_get_depth_1_child_search_term(self) -> None: "value": "Archaebacteria", "child_count": 0, "depth": 1, - "usage_count": 0, "parent_value": "Bacteria", "external_id": None, "_id": 5, # These IDs are hard-coded in the test fixture file @@ -331,26 +330,26 @@ def test_get_all(self) -> None: """ result = pretty_format_tags(self.taxonomy.get_filtered_tags()) assert result == [ - "Archaea (None) (used: 0, children: 3)", - " DPANN (Archaea) (used: 0, children: 0)", - " Euryarchaeida (Archaea) (used: 0, children: 0)", - " Proteoarchaeota (Archaea) (used: 0, children: 0)", - "Bacteria (None) (used: 0, children: 2)", - " Archaebacteria (Bacteria) (used: 0, children: 0)", - " Eubacteria (Bacteria) (used: 0, children: 0)", - "Eukaryota (None) (used: 0, children: 5)", - " Animalia (Eukaryota) (used: 0, children: 7)", - " Arthropoda (Animalia) (used: 0, children: 0)", - " Chordata (Animalia) (used: 0, children: 1)", # note this has a child but the child is not included - " Cnidaria (Animalia) (used: 0, children: 0)", - " Ctenophora (Animalia) (used: 0, children: 0)", - " Gastrotrich (Animalia) (used: 0, children: 0)", - " Placozoa (Animalia) (used: 0, children: 0)", - " Porifera (Animalia) (used: 0, children: 0)", - " Fungi (Eukaryota) (used: 0, children: 0)", - " Monera (Eukaryota) (used: 0, children: 0)", - " Plantae (Eukaryota) (used: 0, children: 0)", - " Protista (Eukaryota) (used: 0, children: 0)", + "Archaea (None) (children: 3)", + " DPANN (Archaea) (children: 0)", + " Euryarchaeida (Archaea) (children: 0)", + " Proteoarchaeota (Archaea) (children: 0)", + "Bacteria (None) (children: 2)", + " Archaebacteria (Bacteria) (children: 0)", + " Eubacteria (Bacteria) (children: 0)", + "Eukaryota (None) (children: 5)", + " Animalia (Eukaryota) (children: 7)", + " Arthropoda (Animalia) (children: 0)", + " Chordata (Animalia) (children: 1)", # note this has a child but the child is not included + " Cnidaria (Animalia) (children: 0)", + " Ctenophora (Animalia) (children: 0)", + " Gastrotrich (Animalia) (children: 0)", + " Placozoa (Animalia) (children: 0)", + " Porifera (Animalia) (children: 0)", + " Fungi (Eukaryota) (children: 0)", + " Monera (Eukaryota) (children: 0)", + " Plantae (Eukaryota) (children: 0)", + " Protista (Eukaryota) (children: 0)", ] def test_search(self) -> None: @@ -360,11 +359,11 @@ def test_search(self) -> None: """ result = pretty_format_tags(self.taxonomy.get_filtered_tags(search_term="ARCH")) assert result == [ - "Archaea (None) (used: 0, children: 3)", # Matches the value of this root tag, ARCHaea - " Euryarchaeida (Archaea) (used: 0, children: 0)", # Matches the value of this child tag - " Proteoarchaeota (Archaea) (used: 0, children: 0)", # Matches the value of this child tag - "Bacteria (None) (used: 0, children: 2)", # Does not match this tag but matches a descendant: - " Archaebacteria (Bacteria) (used: 0, children: 0)", # Matches the value of this child tag + "Archaea (None) (children: 3)", # Matches the value of this root tag, ARCHaea + " Euryarchaeida (Archaea) (children: 0)", # Matches the value of this child tag + " Proteoarchaeota (Archaea) (children: 0)", # Matches the value of this child tag + "Bacteria (None) (children: 2)", # Does not match this tag but matches a descendant: + " Archaebacteria (Bacteria) (children: 0)", # Matches the value of this child tag ] def test_search_2(self) -> None: @@ -374,16 +373,16 @@ def test_search_2(self) -> None: """ result = pretty_format_tags(self.taxonomy.get_filtered_tags(search_term="chordata")) assert result == [ - "Eukaryota (None) (used: 0, children: 5)", - " Animalia (Eukaryota) (used: 0, children: 7)", - " Chordata (Animalia) (used: 0, children: 1)", # this is the matching tag. + "Eukaryota (None) (children: 5)", + " Animalia (Eukaryota) (children: 7)", + " Chordata (Animalia) (children: 1)", # this is the matching tag. ] def test_tags_deep(self) -> None: """ Test getting a deep tag in the taxonomy """ - result = list(self.taxonomy.get_filtered_tags(parent_tag_value="Chordata")) + result = list(self.taxonomy.get_filtered_tags(parent_tag_value="Chordata", include_counts=True)) assert result == [ { "value": "Mammalia", @@ -431,14 +430,16 @@ def test_usage_count(self) -> None: api.tag_object(object_id="obj03", taxonomy=self.taxonomy, tags=["Bacteria"]) api.tag_object(object_id="obj04", taxonomy=self.taxonomy, tags=["Eubacteria"]) # Now the API should reflect these usage counts: - result = pretty_format_tags(self.taxonomy.get_filtered_tags(search_term="bacteria")) + result = pretty_format_tags(self.taxonomy.get_filtered_tags(search_term="bacteria", include_counts=True)) assert result == [ "Bacteria (None) (used: 3, children: 2)", " Archaebacteria (Bacteria) (used: 0, children: 0)", " Eubacteria (Bacteria) (used: 1, children: 0)", ] # Same with depth=1, which uses a different query internally: - result1 = pretty_format_tags(self.taxonomy.get_filtered_tags(search_term="bacteria", depth=1)) + result1 = pretty_format_tags( + self.taxonomy.get_filtered_tags(search_term="bacteria", include_counts=True, depth=1) + ) assert result1 == [ "Bacteria (None) (used: 3, children: 2)", ] @@ -461,15 +462,15 @@ def test_pathological_tree_sort(self) -> None: child2_2 = Tag.objects.create(taxonomy=taxonomy, value="123", parent=root2) result = pretty_format_tags(taxonomy.get_filtered_tags()) assert result == [ - "1 (None) (used: 0, children: 4)", - " 1 A (1) (used: 0, children: 0)", - " 11 (1) (used: 0, children: 0)", - " 11111 (1) (used: 0, children: 1)", - " 1111-grandchild (11111) (used: 0, children: 0)", - " 2 (1) (used: 0, children: 0)", - "111 (None) (used: 0, children: 2)", - " 11111111 (111) (used: 0, children: 0)", - " 123 (111) (used: 0, children: 0)", + "1 (None) (children: 4)", + " 1 A (1) (children: 0)", + " 11 (1) (children: 0)", + " 11111 (1) (children: 1)", + " 1111-grandchild (11111) (children: 0)", + " 2 (1) (children: 0)", + "111 (None) (children: 2)", + " 11111111 (111) (children: 0)", + " 123 (111) (children: 0)", ] def test_case_insensitive_sort(self) -> None: @@ -491,25 +492,25 @@ def test_case_insensitive_sort(self) -> None: result = pretty_format_tags(taxonomy.get_filtered_tags()) assert result == [ - "abstract (None) (used: 0, children: 2)", - " Andes (abstract) (used: 0, children: 0)", - " azores islands (abstract) (used: 0, children: 0)", - "ALPHABET (None) (used: 0, children: 5)", - " aardvark (ALPHABET) (used: 0, children: 0)", - " abacus (ALPHABET) (used: 0, children: 0)", - " Android (ALPHABET) (used: 0, children: 0)", - " ANVIL (ALPHABET) (used: 0, children: 0)", - " azure (ALPHABET) (used: 0, children: 0)", + "abstract (None) (children: 2)", + " Andes (abstract) (children: 0)", + " azores islands (abstract) (children: 0)", + "ALPHABET (None) (children: 5)", + " aardvark (ALPHABET) (children: 0)", + " abacus (ALPHABET) (children: 0)", + " Android (ALPHABET) (children: 0)", + " ANVIL (ALPHABET) (children: 0)", + " azure (ALPHABET) (children: 0)", ] # And it's case insensitive when getting only a single level: result = pretty_format_tags(taxonomy.get_filtered_tags(parent_tag_value="ALPHABET", depth=1)) assert result == [ - " aardvark (ALPHABET) (used: 0, children: 0)", - " abacus (ALPHABET) (used: 0, children: 0)", - " Android (ALPHABET) (used: 0, children: 0)", - " ANVIL (ALPHABET) (used: 0, children: 0)", - " azure (ALPHABET) (used: 0, children: 0)", + " aardvark (ALPHABET) (children: 0)", + " abacus (ALPHABET) (children: 0)", + " Android (ALPHABET) (children: 0)", + " ANVIL (ALPHABET) (children: 0)", + " azure (ALPHABET) (children: 0)", ] @@ -571,7 +572,7 @@ def test_get_filtered_tags_with_search(self) -> None: """ Test basic retrieval of only matching tags. """ - result1 = list(self.taxonomy.get_filtered_tags(search_term="le")) + result1 = list(self.taxonomy.get_filtered_tags(search_term="le", include_counts=True)) common_fields = {"child_count": 0, "depth": 0, "parent_value": None, "external_id": None, "_id": None} assert result1 == [ # These should appear in alphabetical order: @@ -579,7 +580,7 @@ def test_get_filtered_tags_with_search(self) -> None: {"value": "triple", "usage_count": 3, **common_fields}, ] # And it should be case insensitive: - result2 = list(self.taxonomy.get_filtered_tags(search_term="LE")) + result2 = list(self.taxonomy.get_filtered_tags(search_term="LE", include_counts=True)) assert result1 == result2 diff --git a/tests/openedx_tagging/core/tagging/test_views.py b/tests/openedx_tagging/core/tagging/test_views.py index 67b0b145..50ac663c 100644 --- a/tests/openedx_tagging/core/tagging/test_views.py +++ b/tests/openedx_tagging/core/tagging/test_views.py @@ -965,7 +965,7 @@ def test_small_taxonomy_root(self): Test explicitly requesting only the root tags of a small taxonomy. """ self.client.force_authenticate(user=self.staff) - response = self.client.get(self.small_taxonomy_url + "?root_only") + response = self.client.get(self.small_taxonomy_url + "?root_only&include_counts") assert response.status_code == status.HTTP_200_OK data = response.data @@ -1008,26 +1008,26 @@ def test_small_taxonomy(self): data = response.data results = data.get("results", []) assert pretty_format_tags(results) == [ - "Archaea (None) (used: 0, children: 3)", - " DPANN (Archaea) (used: 0, children: 0)", - " Euryarchaeida (Archaea) (used: 0, children: 0)", - " Proteoarchaeota (Archaea) (used: 0, children: 0)", - "Bacteria (None) (used: 0, children: 2)", - " Archaebacteria (Bacteria) (used: 0, children: 0)", - " Eubacteria (Bacteria) (used: 0, children: 0)", - "Eukaryota (None) (used: 0, children: 5)", - " Animalia (Eukaryota) (used: 0, children: 7)", - " Arthropoda (Animalia) (used: 0, children: 0)", - " Chordata (Animalia) (used: 0, children: 1)", - " Cnidaria (Animalia) (used: 0, children: 0)", - " Ctenophora (Animalia) (used: 0, children: 0)", - " Gastrotrich (Animalia) (used: 0, children: 0)", - " Placozoa (Animalia) (used: 0, children: 0)", - " Porifera (Animalia) (used: 0, children: 0)", - " Fungi (Eukaryota) (used: 0, children: 0)", - " Monera (Eukaryota) (used: 0, children: 0)", - " Plantae (Eukaryota) (used: 0, children: 0)", - " Protista (Eukaryota) (used: 0, children: 0)", + "Archaea (None) (children: 3)", + " DPANN (Archaea) (children: 0)", + " Euryarchaeida (Archaea) (children: 0)", + " Proteoarchaeota (Archaea) (children: 0)", + "Bacteria (None) (children: 2)", + " Archaebacteria (Bacteria) (children: 0)", + " Eubacteria (Bacteria) (children: 0)", + "Eukaryota (None) (children: 5)", + " Animalia (Eukaryota) (children: 7)", + " Arthropoda (Animalia) (children: 0)", + " Chordata (Animalia) (children: 1)", + " Cnidaria (Animalia) (children: 0)", + " Ctenophora (Animalia) (children: 0)", + " Gastrotrich (Animalia) (children: 0)", + " Placozoa (Animalia) (children: 0)", + " Porifera (Animalia) (children: 0)", + " Fungi (Eukaryota) (children: 0)", + " Monera (Eukaryota) (children: 0)", + " Plantae (Eukaryota) (children: 0)", + " Protista (Eukaryota) (children: 0)", ] # Checking pagination values @@ -1046,11 +1046,11 @@ def test_small_taxonomy_paged(self): assert response.status_code == status.HTTP_200_OK data = response.data assert pretty_format_tags(data["results"]) == [ - "Archaea (None) (used: 0, children: 3)", - " DPANN (Archaea) (used: 0, children: 0)", - " Euryarchaeida (Archaea) (used: 0, children: 0)", - " Proteoarchaeota (Archaea) (used: 0, children: 0)", - "Bacteria (None) (used: 0, children: 2)", + "Archaea (None) (children: 3)", + " DPANN (Archaea) (children: 0)", + " Euryarchaeida (Archaea) (children: 0)", + " Proteoarchaeota (Archaea) (children: 0)", + "Bacteria (None) (children: 2)", ] # Checking pagination values @@ -1065,11 +1065,11 @@ def test_small_taxonomy_paged(self): assert next_response.status_code == status.HTTP_200_OK next_data = next_response.data assert pretty_format_tags(next_data["results"]) == [ - " Archaebacteria (Bacteria) (used: 0, children: 0)", - " Eubacteria (Bacteria) (used: 0, children: 0)", - "Eukaryota (None) (used: 0, children: 5)", - " Animalia (Eukaryota) (used: 0, children: 7)", - " Arthropoda (Animalia) (used: 0, children: 0)", + " Archaebacteria (Bacteria) (children: 0)", + " Eubacteria (Bacteria) (children: 0)", + "Eukaryota (None) (children: 5)", + " Animalia (Eukaryota) (children: 7)", + " Arthropoda (Animalia) (children: 0)", ] assert next_data.get("current_page") == 2 @@ -1084,7 +1084,7 @@ def test_small_search(self): assert response.status_code == status.HTTP_200_OK data = response.data - assert pretty_format_tags(data["results"], parent=False, usage_count=False) == [ + assert pretty_format_tags(data["results"], parent=False) == [ "Archaea (children: 3)", # No match in this tag, but a child matches so it's included " Euryarchaeida (children: 0)", "Bacteria (children: 2)", # No match in this tag, but a child matches so it's included @@ -1105,7 +1105,7 @@ def test_large_taxonomy(self): """ self._build_large_taxonomy() self.client.force_authenticate(user=self.staff) - response = self.client.get(self.large_taxonomy_url) + response = self.client.get(self.large_taxonomy_url + "?include_counts") assert response.status_code == status.HTTP_200_OK data = response.data @@ -1142,7 +1142,7 @@ def test_large_taxonomy(self): # Checking pagination values assert data.get("next") == ( "http://testserver/tagging/" - f"rest_api/v1/taxonomies/{self.large_taxonomy.id}/tags/?page=2" + f"rest_api/v1/taxonomies/{self.large_taxonomy.id}/tags/?include_counts=&page=2" ) assert data.get("previous") is None assert data.get("count") == self.root_tags_count @@ -1188,7 +1188,7 @@ def test_large_search(self): data = response.data results = data["results"] - assert pretty_format_tags(results, usage_count=None) == [ + assert pretty_format_tags(results) == [ "Tag 0 (None) (children: 12)", # First 2 results don't match but have children that match " Tag 1 (Tag 0) (children: 12)", " Tag 11 (Tag 1) (children: 0)", @@ -1206,7 +1206,7 @@ def test_large_search(self): # Get the next page: next_response = self.client.get(response.data.get("next")) assert next_response.status_code == status.HTTP_200_OK - assert pretty_format_tags(next_response.data["results"], usage_count=None) == [ + assert pretty_format_tags(next_response.data["results"]) == [ " Tag 116 (Tag 105) (children: 0)", " Tag 117 (Tag 105) (children: 0)", " Tag 118 (Tag 0) (children: 12)", @@ -1272,13 +1272,13 @@ def test_get_leaves(self): # Even though we didn't specify root_only, only the root tags will have # been returned, because of the taxonomy's size. assert pretty_format_tags(results) == [ - " Arthropoda (Animalia) (used: 0, children: 0)", - " Chordata (Animalia) (used: 0, children: 1)", - " Cnidaria (Animalia) (used: 0, children: 0)", - " Ctenophora (Animalia) (used: 0, children: 0)", - " Gastrotrich (Animalia) (used: 0, children: 0)", - " Placozoa (Animalia) (used: 0, children: 0)", - " Porifera (Animalia) (used: 0, children: 0)", + " Arthropoda (Animalia) (children: 0)", + " Chordata (Animalia) (children: 1)", + " Cnidaria (Animalia) (children: 0)", + " Ctenophora (Animalia) (children: 0)", + " Gastrotrich (Animalia) (children: 0)", + " Placozoa (Animalia) (children: 0)", + " Porifera (Animalia) (children: 0)", ] def test_next_children(self): @@ -1301,8 +1301,8 @@ def test_next_children(self): results = data["results"] assert pretty_format_tags(results) == [ # There are 12 child tags total, so on this second page, we see only 2 (10 were on the first page): - " Tag 79 (Tag 0) (used: 0, children: 12)", - " Tag 92 (Tag 0) (used: 0, children: 12)", + " Tag 79 (Tag 0) (children: 12)", + " Tag 92 (Tag 0) (children: 12)", ] # Checking pagination values diff --git a/tests/openedx_tagging/core/tagging/utils.py b/tests/openedx_tagging/core/tagging/utils.py index 11cbf485..0070f12c 100644 --- a/tests/openedx_tagging/core/tagging/utils.py +++ b/tests/openedx_tagging/core/tagging/utils.py @@ -4,7 +4,7 @@ from __future__ import annotations -def pretty_format_tags(result, parent=True, external_id=False, usage_count=True) -> list[str]: +def pretty_format_tags(result, parent=True, external_id=False) -> list[str]: """ Format the result of get_filtered_tags() to be more human readable. @@ -19,8 +19,8 @@ def pretty_format_tags(result, parent=True, external_id=False, usage_count=True) if parent: line += f"({t['parent_value']}) " line += "(" - if usage_count: - line += f"used: {t.get('usage_count', '?')}, " + if "usage_count" in t: + line += f"used: {t['usage_count']}, " line += f"children: {t['child_count']})" pretty_results.append(line) return pretty_results From 9b824e26ae1e54e8af5026e757ebeda124c1e819 Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Wed, 25 Oct 2023 12:49:39 -0700 Subject: [PATCH 12/15] fix: mark TagData["usage_count"] using typing.NotRequired --- openedx_tagging/core/tagging/data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/openedx_tagging/core/tagging/data.py b/openedx_tagging/core/tagging/data.py index 02bbed41..20561d0c 100644 --- a/openedx_tagging/core/tagging/data.py +++ b/openedx_tagging/core/tagging/data.py @@ -3,7 +3,7 @@ """ from __future__ import annotations -from typing import TYPE_CHECKING, Any, TypedDict +from typing import TYPE_CHECKING, Any, TypedDict, NotRequired from django.db.models import QuerySet from typing_extensions import TypeAlias @@ -23,8 +23,8 @@ class TagData(TypedDict): child_count: int depth: int parent_value: str | None - # Note: usage_count may not actually be present but there's no way to indicate that w/ python types at the moment - usage_count: int + # Note: usage_count may or may not be present, depending on the request. + usage_count: NotRequired[int] # Internal database ID, if any. Generally should not be used; prefer 'value' which is unique within each taxonomy. _id: int | None From 68f6f1995fcd5d0c5f334aee0f84b1d3d1192b16 Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Wed, 25 Oct 2023 12:54:09 -0700 Subject: [PATCH 13/15] fix: warn if excluded_values will be ignored --- openedx_tagging/core/tagging/models/base.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/openedx_tagging/core/tagging/models/base.py b/openedx_tagging/core/tagging/models/base.py index b23893f3..1f3afd98 100644 --- a/openedx_tagging/core/tagging/models/base.py +++ b/openedx_tagging/core/tagging/models/base.py @@ -457,6 +457,11 @@ def _get_filtered_tags_deep( if pk is not None: matching_ids.append(pk) qs = qs.filter(pk__in=matching_ids) + elif excluded_values: + raise NotImplementedError("Using excluded_values without search_term is not currently supported.") + # We could implement this in the future but I'd prefer to get rid of the "excluded_values" API altogether. + # It remains to be seen if it's useful to do that on the backend, or if we can do it better/simpler on the + # frontend. qs = qs.annotate(child_count=models.Count("children")) # Add the "depth" to each tag: From 9e94ee4e93231cbd703098e75cb82bb5538ebd3f Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Wed, 25 Oct 2023 13:54:17 -0700 Subject: [PATCH 14/15] chore: updates for compatibility with master changes --- openedx_tagging/core/tagging/data.py | 4 +-- openedx_tagging/core/tagging/models/base.py | 22 +++++++++----- .../core/tagging/rest_api/v1/serializers.py | 24 +++++++++++---- .../core/tagging/rest_api/v1/views.py | 2 -- .../openedx_tagging/core/tagging/test_api.py | 1 - .../core/tagging/test_views.py | 29 ++++++++----------- 6 files changed, 48 insertions(+), 34 deletions(-) diff --git a/openedx_tagging/core/tagging/data.py b/openedx_tagging/core/tagging/data.py index 20561d0c..1ab8e987 100644 --- a/openedx_tagging/core/tagging/data.py +++ b/openedx_tagging/core/tagging/data.py @@ -3,10 +3,10 @@ """ from __future__ import annotations -from typing import TYPE_CHECKING, Any, TypedDict, NotRequired +from typing import TYPE_CHECKING, Any, TypedDict from django.db.models import QuerySet -from typing_extensions import TypeAlias +from typing_extensions import NotRequired, TypeAlias class TagData(TypedDict): diff --git a/openedx_tagging/core/tagging/models/base.py b/openedx_tagging/core/tagging/models/base.py index 9a074a99..71318a13 100644 --- a/openedx_tagging/core/tagging/models/base.py +++ b/openedx_tagging/core/tagging/models/base.py @@ -113,17 +113,16 @@ def get_lineage(self) -> Lineage: return lineage @cached_property - def num_ancestors(self) -> int: + def depth(self) -> int: """ - How many ancestors this Tag has. Equivalent to its "depth" in the tree. - Zero for root tags. + How many ancestors this Tag has. Zero for root tags. """ - num_ancestors = 0 + depth = 0 tag = self while tag.parent: - num_ancestors += 1 + depth += 1 tag = tag.parent - return num_ancestors + return depth @staticmethod def annotate_depth(qs: models.QuerySet) -> models.QuerySet: @@ -141,6 +140,15 @@ def annotate_depth(qs: models.QuerySet) -> models.QuerySet: default=4, )) + @cached_property + def child_count(self) -> int: + """ + How many child tags this tag has in the taxonomy. + """ + if self.taxonomy and not self.taxonomy.allow_free_text: + return self.taxonomy.tag_set.filter(parent=self).count() + return 0 + class Taxonomy(models.Model): """ @@ -394,7 +402,7 @@ def _get_filtered_tags_one_level( if parent_tag_value: parent_tag = self.tag_for_value(parent_tag_value) qs: models.QuerySet = self.tag_set.filter(parent_id=parent_tag.pk) - qs = qs.annotate(depth=Value(parent_tag.num_ancestors + 1)) + qs = qs.annotate(depth=Value(parent_tag.depth + 1)) # Use parent_tag.value not parent_tag_value because they may differ in case qs = qs.annotate(parent_value=Value(parent_tag.value)) else: diff --git a/openedx_tagging/core/tagging/rest_api/v1/serializers.py b/openedx_tagging/core/tagging/rest_api/v1/serializers.py index ae9d58b5..4fe62c72 100644 --- a/openedx_tagging/core/tagging/rest_api/v1/serializers.py +++ b/openedx_tagging/core/tagging/rest_api/v1/serializers.py @@ -1,11 +1,13 @@ """ API Serializers for taxonomies """ +from __future__ import annotations + from rest_framework import serializers from rest_framework.reverse import reverse from openedx_tagging.core.tagging.data import TagData -from openedx_tagging.core.tagging.models import ObjectTag, Taxonomy +from openedx_tagging.core.tagging.models import ObjectTag, Tag, Taxonomy class TaxonomyListQueryParamsSerializer(serializers.Serializer): # pylint: disable=abstract-method @@ -95,7 +97,7 @@ class ObjectTagUpdateQueryParamsSerializer(serializers.Serializer): # pylint: d class TagDataSerializer(serializers.Serializer): """ - Serializer for TagData + Serializer for TagData dicts. Also can serialize Tag instances. Adds a link to get the sub tags """ @@ -111,12 +113,14 @@ class TagDataSerializer(serializers.Serializer): sub_tags_url = serializers.SerializerMethodField() - def get_sub_tags_url(self, obj: TagData): + def get_sub_tags_url(self, obj: TagData | Tag): """ Returns URL for the list of child tags of the current tag. """ - if obj["child_count"] > 0 and "taxonomy_id" in self.context: - query_params = f"?parent_tag={obj['value']}" + child_count = obj.child_count if isinstance(obj, Tag) else obj["child_count"] + if child_count > 0 and "taxonomy_id" in self.context: + value = obj.value if isinstance(obj, Tag) else obj["value"] + query_params = f"?parent_tag={value}" request = self.context["request"] url_namespace = request.resolver_match.namespace # get the namespace, usually "oel_tagging" url = ( @@ -126,6 +130,16 @@ def get_sub_tags_url(self, obj: TagData): return request.build_absolute_uri(url) return None + def to_representation(self, instance: TagData | Tag) -> dict: + """ + Convert this TagData (or Tag model instance) to the serialized dictionary + """ + data = super().to_representation(instance) + if isinstance(instance, Tag): + data["_id"] = instance.pk # The ID field won't otherwise be detected. + data["parent_value"] = instance.parent.value if instance.parent else None + return data + def update(self, instance, validated_data): raise RuntimeError('`update()` is not supported by the TagData serializer.') diff --git a/openedx_tagging/core/tagging/rest_api/v1/views.py b/openedx_tagging/core/tagging/rest_api/v1/views.py index b6435e18..951c8e6f 100644 --- a/openedx_tagging/core/tagging/rest_api/v1/views.py +++ b/openedx_tagging/core/tagging/rest_api/v1/views.py @@ -12,8 +12,6 @@ from rest_framework.response import Response from rest_framework.viewsets import GenericViewSet, ModelViewSet -from openedx_tagging.core.tagging.models.base import Tag - from ...api import ( TagDoesNotExist, add_tag_to_taxonomy, diff --git a/tests/openedx_tagging/core/tagging/test_api.py b/tests/openedx_tagging/core/tagging/test_api.py index 230c57cf..909495a4 100644 --- a/tests/openedx_tagging/core/tagging/test_api.py +++ b/tests/openedx_tagging/core/tagging/test_api.py @@ -10,7 +10,6 @@ from django.test import TestCase, override_settings import openedx_tagging.core.tagging.api as tagging_api -from openedx_tagging.core.tagging.data import TagDataQuerySet from openedx_tagging.core.tagging.models import ObjectTag, Taxonomy from .test_models import TestTagTaxonomyMixin, get_tag diff --git a/tests/openedx_tagging/core/tagging/test_views.py b/tests/openedx_tagging/core/tagging/test_views.py index b4d34ec2..6a312546 100644 --- a/tests/openedx_tagging/core/tagging/test_views.py +++ b/tests/openedx_tagging/core/tagging/test_views.py @@ -1365,13 +1365,12 @@ def test_create_tag_in_taxonomy(self): data = response.data - self.assertIsNotNone(data.get("id")) + self.assertIsNotNone(data.get("_id")) self.assertEqual(data.get("value"), new_tag_value) - self.assertEqual(data.get("taxonomy_id"), self.small_taxonomy.pk) - self.assertIsNone(data.get("parent_id")) + self.assertIsNone(data.get("parent_value")) self.assertIsNone(data.get("external_id")) self.assertIsNone(data.get("sub_tags_link")) - self.assertEqual(data.get("children_count"), 0) + self.assertEqual(data.get("child_count"), 0) def test_create_tag_in_taxonomy_with_parent(self): self.client.force_authenticate(user=self.staff) @@ -1393,13 +1392,12 @@ def test_create_tag_in_taxonomy_with_parent(self): data = response.data - self.assertIsNotNone(data.get("id")) + self.assertIsNotNone(data.get("_id")) self.assertEqual(data.get("value"), new_tag_value) - self.assertEqual(data.get("taxonomy_id"), self.small_taxonomy.pk) - self.assertEqual(data.get("parent_id"), parent_tag.id) + self.assertEqual(data.get("parent_value"), parent_tag.value) self.assertEqual(data.get("external_id"), new_external_id) self.assertIsNone(data.get("sub_tags_link")) - self.assertEqual(data.get("children_count"), 0) + self.assertEqual(data.get("child_count"), 0) def test_create_tag_in_invalid_taxonomy(self): self.client.force_authenticate(user=self.staff) @@ -1565,10 +1563,9 @@ def test_update_tag_in_taxonomy_with_different_methods(self): data = response.data # Check that Tag value got updated - self.assertEqual(data.get("id"), existing_tag.id) + self.assertEqual(data.get("_id"), existing_tag.id) self.assertEqual(data.get("value"), updated_tag_value) - self.assertEqual(data.get("taxonomy_id"), self.small_taxonomy.pk) - self.assertEqual(data.get("parent_id"), existing_tag.parent) + self.assertEqual(data.get("parent_value"), existing_tag.parent) self.assertEqual(data.get("external_id"), existing_tag.external_id) # Test updating using the PATCH method @@ -1583,10 +1580,9 @@ def test_update_tag_in_taxonomy_with_different_methods(self): data = response.data # Check the Tag value got updated again - self.assertEqual(data.get("id"), existing_tag.id) + self.assertEqual(data.get("_id"), existing_tag.id) self.assertEqual(data.get("value"), updated_tag_value_2) - self.assertEqual(data.get("taxonomy_id"), self.small_taxonomy.pk) - self.assertEqual(data.get("parent_id"), existing_tag.parent) + self.assertEqual(data.get("parent_value"), existing_tag.parent) self.assertEqual(data.get("external_id"), existing_tag.external_id) def test_update_tag_in_taxonomy_reflects_changes_in_object_tags(self): @@ -1626,10 +1622,9 @@ def test_update_tag_in_taxonomy_reflects_changes_in_object_tags(self): data = response.data # Check that Tag value got updated - self.assertEqual(data.get("id"), existing_tag.id) + self.assertEqual(data.get("_id"), existing_tag.id) self.assertEqual(data.get("value"), updated_tag_value) - self.assertEqual(data.get("taxonomy_id"), self.small_taxonomy.pk) - self.assertEqual(data.get("parent_id"), existing_tag.parent) + self.assertEqual(data.get("parent_value"), None) self.assertEqual(data.get("external_id"), existing_tag.external_id) # Check that the ObjectTags got updated as well From 73487854fc48ce53e6c712d8c5e1cb1702d72f4a Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Thu, 26 Oct 2023 10:20:24 -0700 Subject: [PATCH 15/15] chore: version bump --- openedx_learning/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openedx_learning/__init__.py b/openedx_learning/__init__.py index b602ca30..8872149d 100644 --- a/openedx_learning/__init__.py +++ b/openedx_learning/__init__.py @@ -1,4 +1,4 @@ """ Open edX Learning ("Learning Core"). """ -__version__ = "0.2.6" +__version__ = "0.3.0"