diff --git a/openedx_tagging/core/tagging/api.py b/openedx_tagging/core/tagging/api.py index 367fa91f..3b2b2ba9 100644 --- a/openedx_tagging/core/tagging/api.py +++ b/openedx_tagging/core/tagging/api.py @@ -16,7 +16,7 @@ from django.db.models import QuerySet from django.utils.translation import gettext as _ -from .data import TagData +from .data import TagDataQuerySet from .models import ObjectTag, Tag, Taxonomy # Export this as part of the API @@ -71,7 +71,7 @@ def get_taxonomies(enabled=True) -> QuerySet[Taxonomy]: return queryset.filter(enabled=enabled) -def get_tags(taxonomy: Taxonomy) -> QuerySet[TagData]: +def get_tags(taxonomy: Taxonomy) -> TagDataQuerySet: """ Returns a QuerySet of all the tags in the given taxonomy. @@ -81,7 +81,7 @@ def get_tags(taxonomy: Taxonomy) -> QuerySet[TagData]: return taxonomy.cast().get_filtered_tags() -def get_root_tags(taxonomy: Taxonomy) -> QuerySet[TagData]: +def get_root_tags(taxonomy: Taxonomy) -> TagDataQuerySet: """ Returns a list of the root tags for the given taxonomy. @@ -90,7 +90,7 @@ def get_root_tags(taxonomy: Taxonomy) -> QuerySet[TagData]: return taxonomy.cast().get_filtered_tags(depth=1) -def search_tags(taxonomy: Taxonomy, search_term: str, exclude_object_id: str | None = None) -> QuerySet[TagData]: +def search_tags(taxonomy: Taxonomy, search_term: str, exclude_object_id: str | None = None) -> TagDataQuerySet: """ Returns a list of all tags that contains `search_term` of the given taxonomy, as well as their ancestors (so they can be displayed in a tree). @@ -115,7 +115,7 @@ def search_tags(taxonomy: Taxonomy, search_term: str, exclude_object_id: str | N def get_children_tags( taxonomy: Taxonomy, parent_tag_value: str, -) -> QuerySet[TagData]: +) -> TagDataQuerySet: """ Returns a QuerySet of children tags for the given parent tag. diff --git a/openedx_tagging/core/tagging/data.py b/openedx_tagging/core/tagging/data.py index a6644205..51884241 100644 --- a/openedx_tagging/core/tagging/data.py +++ b/openedx_tagging/core/tagging/data.py @@ -3,7 +3,10 @@ """ from __future__ import annotations -from typing import TypedDict +from typing import Any, TypedDict, TYPE_CHECKING +from typing_extensions import TypeAlias + +from django.db.models import QuerySet class TagData(TypedDict): @@ -24,3 +27,13 @@ class TagData(TypedDict): usage_count: int # Internal database ID, if any. Generally should not be used; prefer 'value' which is unique within each taxonomy. _id: int | None + + +if TYPE_CHECKING: + from django_stubs_ext import ValuesQuerySet + TagDataQuerySet: TypeAlias = ValuesQuerySet[Any, TagData] + # The following works better for pyright (provides proper VS Code autocompletions), + # but I can't find any way to specify different types for pyright vs mypy :/ + # TagDataQuerySet: TypeAlias = QuerySet[TagData] +else: + TagDataQuerySet = QuerySet[TagData] diff --git a/openedx_tagging/core/tagging/import_export/parsers.py b/openedx_tagging/core/tagging/import_export/parsers.py index 823a8a24..90268ff6 100644 --- a/openedx_tagging/core/tagging/import_export/parsers.py +++ b/openedx_tagging/core/tagging/import_export/parsers.py @@ -10,7 +10,6 @@ from django.utils.translation import gettext as _ -from ..api import get_tags from ..models import Taxonomy from .exceptions import EmptyCSVField, EmptyJSONField, FieldJSONError, InvalidFormat, TagParserError from .import_plan import TagItem diff --git a/openedx_tagging/core/tagging/models/base.py b/openedx_tagging/core/tagging/models/base.py index 7900e440..963aa9b2 100644 --- a/openedx_tagging/core/tagging/models/base.py +++ b/openedx_tagging/core/tagging/models/base.py @@ -17,7 +17,8 @@ from openedx_learning.lib.fields import MultiCollationTextField, case_insensitive_char_field, case_sensitive_char_field -from ..data import TagData +from ..data import TagDataQuerySet +from .utils import ConcatNull log = logging.getLogger(__name__) @@ -110,7 +111,7 @@ def get_lineage(self) -> Lineage: tag = tag.parent depth -= 1 return lineage - + @cached_property def num_ancestors(self) -> int: """ @@ -123,7 +124,7 @@ def num_ancestors(self) -> int: num_ancestors += 1 tag = tag.parent return num_ancestors - + @staticmethod def annotate_depth(qs: models.QuerySet) -> models.QuerySet: """ @@ -301,7 +302,7 @@ def get_filtered_tags( search_term: str | None = None, include_counts: bool = True, excluded_values: list[str] | None = None, - ) -> models.QuerySet[TagData]: + ) -> TagDataQuerySet: """ Returns a filtered QuerySet of tag values. For free text or dynamic taxonomies, this will only return tag values @@ -354,7 +355,7 @@ def _get_filtered_tags_free_text( self, search_term: str | None, include_counts: bool, - ) -> models.QuerySet[TagData]: + ) -> TagDataQuerySet: """ Implementation of get_filtered_tags() for free text taxonomies. """ @@ -383,7 +384,7 @@ def _get_filtered_tags_one_level( parent_tag_value: str | None, search_term: str | None, include_counts: bool, - ) -> models.QuerySet[TagData]: + ) -> TagDataQuerySet: """ Implementation of get_filtered_tags() for closed taxonomies, where depth=1. When depth=1, we're only looking at a single "level" of the @@ -424,7 +425,7 @@ def _get_filtered_tags_deep( search_term: str | None, include_counts: bool, excluded_values: list[str] | None, - ) -> models.QuerySet[TagData]: + ) -> TagDataQuerySet: """ Implementation of get_filtered_tags() for closed taxonomies, where we're including tags from multiple levels of the hierarchy. @@ -462,9 +463,12 @@ def _get_filtered_tags_deep( qs = Tag.annotate_depth(qs) # Add the "lineage" field to sort them in order correctly: qs = qs.annotate(sort_key=Concat( - Coalesce(F("parent__parent__parent__value"), Value("")), - Coalesce(F("parent__parent__value"), Value("")), - Coalesce(F("parent__value"), Value("")), + # For a root tag, we want sort_key="RootValue" and for a depth=1 tag + # we want sort_key="RootValue\tValue". The following does that, since + # ConcatNull(...) returns NULL if any argument is NULL. + Coalesce(ConcatNull(F("parent__parent__parent__value"), Value("\t")), Value("")), + Coalesce(ConcatNull(F("parent__parent__value"), Value("\t")), Value("")), + Coalesce(ConcatNull(F("parent__value"), Value("\t")), Value("")), F("value"), output_field=models.CharField(), )) diff --git a/openedx_tagging/core/tagging/models/utils.py b/openedx_tagging/core/tagging/models/utils.py new file mode 100644 index 00000000..1f7dcb92 --- /dev/null +++ b/openedx_tagging/core/tagging/models/utils.py @@ -0,0 +1,24 @@ +""" +Utilities for tagging and taxonomy models +""" + +from django.db.models.expressions import Func + + +class ConcatNull(Func): # pylint: disable=abstract-method + """ + Concatenate two arguments together. Like normal SQL but unlike Django's + "Concat", if either argument is NULL, the result will be NULL. + """ + + function = "CONCAT" + + def as_sqlite(self, compiler, connection, **extra_context): + """ SQLite doesn't have CONCAT() but has a concatenation operator """ + return super().as_sql( + compiler, + connection, + template="%(expressions)s", + arg_joiner=" || ", + **extra_context, + ) diff --git a/openedx_tagging/core/tagging/rest_api/v1/serializers.py b/openedx_tagging/core/tagging/rest_api/v1/serializers.py index a4eb89ff..59bfd9ac 100644 --- a/openedx_tagging/core/tagging/rest_api/v1/serializers.py +++ b/openedx_tagging/core/tagging/rest_api/v1/serializers.py @@ -5,7 +5,8 @@ from rest_framework import serializers from rest_framework.reverse import reverse -from openedx_tagging.core.tagging.models import ObjectTag, Tag, Taxonomy +from openedx_tagging.core.tagging.data import TagData +from openedx_tagging.core.tagging.models import ObjectTag, Taxonomy class TaxonomyListQueryParamsSerializer(serializers.Serializer): # pylint: disable=abstract-method @@ -93,102 +94,41 @@ class ObjectTagUpdateQueryParamsSerializer(serializers.Serializer): # pylint: d ) -class TagsSerializer(serializers.ModelSerializer): +class TagDataSerializer(serializers.Serializer): """ - Serializer for Tags + Serializer for TagData Adds a link to get the sub tags """ + value = serializers.CharField() + external_id = serializers.CharField(allow_null=True) + child_count = serializers.IntegerField() + depth = serializers.IntegerField() + parent_value = serializers.CharField(allow_null=True) + usage_count = serializers.IntegerField(required=False) + # Internal database ID, if any. Generally should not be used; prefer 'value' which is unique within each taxonomy. + # Free text taxonomies never have '_id' for their tags. + _id = serializers.IntegerField(allow_null=True) - sub_tags_link = serializers.SerializerMethodField() - children_count = serializers.SerializerMethodField() + sub_tags_url = serializers.SerializerMethodField() - class Meta: - model = Tag - fields = ( - "id", - "value", - "taxonomy_id", - "parent_id", - "sub_tags_link", - "children_count", - ) - - def get_sub_tags_link(self, obj): + def get_sub_tags_url(self, obj: TagData): """ Returns URL for the list of child tags of the current tag. """ - if obj.children.count(): - query_params = f"?parent_tag_id={obj.id}" + if obj["child_count"] > 0 and "taxonomy_id" in self.context: + query_params = f"?parent_tag={obj['value']}" + request = self.context["request"] + url_namespace = request.resolver_match.namespace # get the namespace, usually "oel_tagging" url = ( - reverse("oel_tagging:taxonomy-tags", args=[str(obj.taxonomy_id)]) + reverse(f"{url_namespace}:taxonomy-tags", args=[str(self.context["taxonomy_id"])]) + query_params ) - request = self.context.get("request") return request.build_absolute_uri(url) return None - def get_children_count(self, obj): - """ - Returns the number of child tags of the given tag. - """ - return obj.children.count() + def update(self, instance, validated_data): + raise RuntimeError('`update()` is not supported by the TagData serializer.') - -class TagsWithSubTagsSerializer(serializers.ModelSerializer): - """ - Serializer for Tags. - - Represents a tree with a list of sub tags - """ - - sub_tags = serializers.SerializerMethodField() - children_count = serializers.SerializerMethodField() - - class Meta: - model = Tag - fields = ( - "id", - "value", - "taxonomy_id", - "sub_tags", - "children_count", - ) - - def get_sub_tags(self, obj): - """ - Returns a serialized list of child tags for the given tag. - """ - serializer = TagsWithSubTagsSerializer( - obj.children.all().order_by("value", "id"), - many=True, - read_only=True, - ) - return serializer.data - - def get_children_count(self, obj): - """ - Returns the number of child tags of the given tag. - """ - return obj.children.count() - - -class TagsForSearchSerializer(TagsWithSubTagsSerializer): - """ - Serializer for Tags - - Used to filter sub tags of a given tag - """ - - def get_sub_tags(self, obj): - """ - Returns a serialized list of child tags for the given tag. - """ - serializer = TagsWithSubTagsSerializer(obj.sub_tags, many=True, read_only=True) - return serializer.data - - def get_children_count(self, obj): - """ - Returns the number of child tags of the given tag. - """ - return len(obj.sub_tags) + def create(self, validated_data): + raise RuntimeError('`create()` is not supported by the TagData serializer.') diff --git a/openedx_tagging/core/tagging/rest_api/v1/views.py b/openedx_tagging/core/tagging/rest_api/v1/views.py index 743f5bcd..6472ee46 100644 --- a/openedx_tagging/core/tagging/rest_api/v1/views.py +++ b/openedx_tagging/core/tagging/rest_api/v1/views.py @@ -14,30 +14,20 @@ from openedx_tagging.core.tagging.models.base import Tag -from ...api import ( - create_taxonomy, - get_children_tags, - get_object_tags, - get_root_tags, - get_taxonomies, - get_taxonomy, - search_tags, - tag_object, -) +from ...api import create_taxonomy, get_object_tags, get_taxonomies, get_taxonomy, tag_object +from ...data import TagDataQuerySet from ...import_export.api import export_tags from ...import_export.parsers import ParserFormat from ...models import Taxonomy from ...rules import ObjectTagPermissionItem -from ..paginators import SEARCH_TAGS_THRESHOLD, TAGS_THRESHOLD, DisabledTagsPagination, TagsPagination +from ..paginators import TAGS_THRESHOLD, DisabledTagsPagination, TagsPagination from .permissions import ObjectTagObjectPermissions, TagListPermissions, TaxonomyObjectPermissions from .serializers import ( ObjectTagListQueryParamsSerializer, ObjectTagSerializer, ObjectTagUpdateBodySerializer, ObjectTagUpdateQueryParamsSerializer, - TagsForSearchSerializer, - TagsSerializer, - TagsWithSubTagsSerializer, + TagDataSerializer, TaxonomyExportQueryParamsSerializer, TaxonomyListQueryParamsSerializer, TaxonomySerializer, @@ -408,15 +398,26 @@ class TaxonomyTagsView(ListAPIView): """ View to list tags of a taxonomy. + If you specify ?root_only or ?parent_tag_value=..., only one "level" of the + hierachy will be returned. Otherwise, several levels will be returned, in + tree order, up to the maximum supported depth. Additional levels/depth can + be retrieved by using ?parent_tag_value to load more data. + + Note: If the taxonomy is particularly large (> 1,000 tags), ?root_only is + automatically set true by default and cannot be disabled. This way, users + can more easily select which tags they want to expand in the tree, and load + just that subset of the tree as needed. This may be changed in the future. + **List Query Parameters** - * pk (required) - The pk of the taxonomy to retrieve tags. - * parent_tag_id (optional) - Id of the tag to retrieve children tags. + * id (required) - The ID of the taxonomy to retrieve tags. + * parent_tag (optional) - Retrieve children of the tag with this value. + * root_only (optional) - If specified, only root tags are returned. * page (optional) - Page number (default: 1) * page_size (optional) - Number of items per page (default: 10) **List Example Requests** - GET api/tagging/v1/taxonomy/:pk/tags - Get tags of taxonomy - GET api/tagging/v1/taxonomy/:pk/tags?parent_tag_id=30 - Get children tags of tag + GET api/tagging/v1/taxonomy/:id/tags - Get tags of taxonomy + GET api/tagging/v1/taxonomy/:id/tags?parent_tag=Physics - Get child tags of tag **List Query Returns** * 200 - Success @@ -426,22 +427,8 @@ class TaxonomyTagsView(ListAPIView): """ permission_classes = [TagListPermissions] - pagination_enabled = True - - def __init__(self): - # Initialized here to avoid errors on type hints - self.serializer_class = TagsSerializer - - def get_pagination_class(self): - """ - Get the corresponding class depending if the pagination is enabled. - - It is necessary to call this function before returning the data. - """ - if self.pagination_enabled: - return TagsPagination - else: - return DisabledTagsPagination + pagination_class = TagsPagination + serializer_class = TagDataSerializer def get_taxonomy(self, pk: int) -> Taxonomy: """ @@ -453,130 +440,44 @@ def get_taxonomy(self, pk: int) -> Taxonomy: self.check_object_permissions(self.request, taxonomy) return taxonomy - def _build_search_tree(self, tags: list[Tag]) -> list[Tag]: - """ - Builds a tree with the result tags for a search. - - The retult is a pruned tree that contains - the path from root tags to tags that match the search. - """ - tag_ids = [tag.id for tag in tags] - - # Get missing parents. - # Not all parents are in the search result. - # This occurs when a child tag is on the search result, but its parent not, - # we need to add the parent to show the tree from the root to the child. - for tag in tags: - if tag.parent and tag.parent_id and tag.parent_id not in tag_ids: - tag_ids.append(tag.parent_id) - tags.append(tag.parent) # Our loop will iterate over this new parent tag too. - - groups: dict[int, list[Tag]] = {} - roots: list[Tag] = [] - - # Group tags by parent - for tag in tags: - if tag.parent_id is not None: - if tag.parent_id not in groups: - groups[tag.parent_id] = [] - groups[tag.parent_id].append(tag) - else: - roots.append(tag) + def get_serializer_context(self): + context = super().get_serializer_context() + context.update({ + "request": self.request, + "taxonomy_id": int(self.kwargs["pk"]), + }) + return context - for tag in tags: - # Used to serialize searched childrens - tag.sub_tags = groups.get(tag.id, []) # type: ignore[attr-defined] - - return roots - - def get_matching_tags( - self, - taxonomy_id: int, - parent_tag_id: str | None = None, - search_term: str | None = None, - ) -> list[Tag]: + def get_queryset(self) -> TagDataQuerySet: """ - Returns a list of tags for the given taxonomy. - - The pagination can be enabled or disabled depending of the taxonomy size. - You can read the desicion '0014_*' to more info about this logic. - Also, determines the serializer to be used. - - Use `parent_tag_id` to get the children of the given tag. - - Use `search_term` to filter tags values that contains the given term. + Builds and returns the queryset to be paginated. """ + taxonomy_id = int(self.kwargs.get("pk")) taxonomy = self.get_taxonomy(taxonomy_id) - if parent_tag_id: - # Get children of a tag. - - # If you need to get the children, then the roots are - # paginated, so we need to paginate the childrens too. - self.pagination_enabled = True - - # Normal serializer, with children link. - self.serializer_class = TagsSerializer - return get_children_tags( - taxonomy, - int(parent_tag_id), - search_term=search_term, - ) + parent_tag_value = self.request.query_params.get("parent_tag", None) + root_only = "root_only" in self.request.query_params + search_term = self.request.query_params.get("search_term", None) + + if parent_tag_value: + # Fetching tags below a certain parent is always paginated and only returns the direct children + depth = 1 + if root_only: + raise ValidationError("?root_only and ?parent_tag cannot be used together") else: - if search_term: - # Search tags - result = search_tags( - taxonomy, - search_term, - ) - # Checks the result size to determine whether - # to turn pagination on or off. - self.pagination_enabled = len(result) > SEARCH_TAGS_THRESHOLD - - # Use the special serializer to only show the tree - # of the search result. - self.serializer_class = TagsForSearchSerializer - - result = self._build_search_tree(result) + if root_only: + depth = 1 # User Explicitly requested to load only the root tags for now + elif search_term: + depth = None # For search, default to maximum depth but use normal pagination + elif taxonomy.tag_set.count() > TAGS_THRESHOLD: + # This is a very large taxonomy. Only load the root tags at first, so users can choose what to load. + depth = 1 else: - # Get root tags of taxonomy - - # Checks the taxonomy size to determine whether - # to turn pagination on or off. - self.pagination_enabled = taxonomy.tag_set.count() > TAGS_THRESHOLD - - if self.pagination_enabled: - # If pagination is enabled, use the normal serializer - # with children link. - self.serializer_class = TagsSerializer - else: - # If pagination is disabled, use the special serializer - # to show children. In this case, we return all taxonomy tags - # in a tree structure. - self.serializer_class = TagsWithSubTagsSerializer - - result = get_root_tags(taxonomy) - - return result + # We can load and display all the tags in the taxonomy at once: + self.pagination_class = DisabledTagsPagination + depth = None # Maximum depth - def get_queryset(self) -> list[Tag]: # type: ignore[override] - """ - Builds and returns the queryset to be paginated. - - The return type is not a QuerySet because the tagging python api functions - return lists, and on this point convert the list to a query set - is an unnecesary operation. - """ - pk = self.kwargs.get("pk") - parent_tag_id = self.request.query_params.get("parent_tag_id", None) - search_term = self.request.query_params.get("search_term", None) - - result = self.get_matching_tags( - pk, - parent_tag_id=parent_tag_id, + return taxonomy.get_filtered_tags( + parent_tag_value=parent_tag_value, search_term=search_term, + depth=depth, ) - - # This function is not called automatically - self.pagination_class = self.get_pagination_class() - - return result diff --git a/tests/openedx_tagging/core/tagging/import_export/test_template.py b/tests/openedx_tagging/core/tagging/import_export/test_template.py index c25c4b28..dc987c1c 100644 --- a/tests/openedx_tagging/core/tagging/import_export/test_template.py +++ b/tests/openedx_tagging/core/tagging/import_export/test_template.py @@ -12,8 +12,8 @@ from openedx_tagging.core.tagging.import_export import ParserFormat from openedx_tagging.core.tagging.import_export import api as import_api -from .mixins import TestImportExportMixin from ..utils import pretty_format_tags +from .mixins import TestImportExportMixin @ddt.ddt diff --git a/tests/openedx_tagging/core/tagging/test_api.py b/tests/openedx_tagging/core/tagging/test_api.py index b488c81c..6cd9a3a9 100644 --- a/tests/openedx_tagging/core/tagging/test_api.py +++ b/tests/openedx_tagging/core/tagging/test_api.py @@ -7,10 +7,10 @@ import ddt # type: ignore[import] import pytest -from django.db.models import QuerySet from django.test import TestCase, override_settings import openedx_tagging.core.tagging.api as tagging_api +from openedx_tagging.core.tagging.data import TagDataQuerySet from openedx_tagging.core.tagging.models import ObjectTag, Taxonomy from .test_models import TestTagTaxonomyMixin, get_tag @@ -250,7 +250,7 @@ def test_get_children_tags_no_children(self) -> None: """ Trying to get children of a system tag that has no children yields an empty result: """ - assert list(tagging_api.get_children_tags(self.system_taxonomy, self.system_taxonomy_tag.value)) == [] + assert not list(tagging_api.get_children_tags(self.system_taxonomy, self.system_taxonomy_tag.value)) def test_resync_object_tags(self) -> None: self.taxonomy.allow_multiple = True @@ -572,9 +572,7 @@ def test_get_object_tags(self) -> None: # Fetch all the tags for a given object ID assert list( - tagging_api.get_object_tags( - object_id="abc", - ) + tagging_api.get_object_tags(object_id="abc") ) == [ alpha, beta, @@ -582,84 +580,11 @@ def test_get_object_tags(self) -> None: # Fetch all the tags for a given object ID + taxonomy assert list( - tagging_api.get_object_tags( - object_id="abc", - taxonomy_id=self.taxonomy.pk, - ) + tagging_api.get_object_tags(object_id="abc", taxonomy_id=self.taxonomy.pk) ) == [ beta, ] - @ddt.data( - ("ChA", ["Archaea", "Archaebacteria"], [2, 5]), - ("ar", ['Archaea', 'Archaebacteria', 'Arthropoda'], [2, 5, 14]), - ("aE", ['Archaea', 'Archaebacteria', 'Plantae'], [2, 5, 10]), - ( - "a", - [ - 'Animalia', - 'Archaea', - 'Archaebacteria', - 'Arthropoda', - 'Gastrotrich', - 'Monera', - 'Placozoa', - 'Plantae', - ], - [9, 2, 5, 14, 16, 13, 19, 10], - ), - ) - @ddt.unpack - def test_autocomplete_tags(self, search: str, expected_values: list[str], expected_ids: list[int | None]) -> None: - tags = [ - 'Archaea', - 'Archaebacteria', - 'Animalia', - 'Arthropoda', - 'Plantae', - 'Monera', - 'Gastrotrich', - 'Placozoa', - ] + expected_values # To create repeats - closed_taxonomy = self.taxonomy - open_taxonomy = tagging_api.create_taxonomy( - "Free_Text_Taxonomy", - allow_free_text=True, - ) - - for index, value in enumerate(tags): - # Creating ObjectTags for open taxonomy - ObjectTag( - object_id=f"object_id_{index}", - taxonomy=open_taxonomy, - _value=value, - ).save() - - # Creating ObjectTags for closed taxonomy - tag = get_tag(value) - ObjectTag( - object_id=f"object_id_{index}", - taxonomy=closed_taxonomy, - tag=tag, - _value=value, - ).save() - - # Test for open taxonomy - # self._validate_autocomplete_tags( - # open_taxonomy, - # search, - # expected_values, - # [None] * len(expected_ids), - # ) - - # # Test for closed taxonomy - # self._validate_autocomplete_tags( - # closed_taxonomy, - # search, - # expected_values, - # expected_ids, - # ) - @ddt.data( ("ChA", [ "Archaea (used: 1, children: 3)", @@ -741,11 +666,11 @@ def test_autocomplete_tags_closed_omit_object(self) -> None: " Euryarchaeida (used: 0, children: 0)", " Proteoarchaeota (used: 0, children: 0)", # These results are no longer included because of exclude_object_id: - #"Bacteria (used: 0, children: 2)", # does not contain "cha" but a child does - #" Archaebacteria (used: 1, children: 0)", + # "Bacteria (used: 0, children: 2)", # does not contain "cha" but a child does + # " Archaebacteria (used: 1, children: 0)", ] - def _get_tag_values(self, tags: QuerySet[tagging_api.TagData]) -> list[str]: + def _get_tag_values(self, tags: TagDataQuerySet) -> list[str]: """ Get tag values from tagging_api.autocomplete_tags() result """ @@ -756,37 +681,3 @@ def _get_tag_ids(self, tags) -> list[int]: Get tag ids from tagging_api.autocomplete_tags() result """ return [tag.get("tag_id") for tag in tags] - - def _validate_autocomplete_tags( - self, - taxonomy: Taxonomy, - search: str, - expected: list[str], - ) -> None: - """ - Validate autocomplete tags - """ - - # Normal search - result = tagging_api.search_tags(taxonomy, search) - - assert pretty_format_tags(result, parent=False) == expected - - # Create ObjectTag to simulate the content tagging - first_value = next(t for t in result if search.lower() in t["value"].lower()) - tag_model = None - if not taxonomy.allow_free_text: - tag_model = get_tag(first_value) - - object_id = 'new_object_id' - ObjectTag( - object_id=object_id, - taxonomy=taxonomy, - tag=tag_model, - _value=first_value, - ).save() - - # Search with object - result = tagging_api.search_tags(taxonomy, search, object_id) - assert self._get_tag_values(result) == expected_values[1:] - assert self._get_tag_ids(result) == expected_ids[1:] diff --git a/tests/openedx_tagging/core/tagging/test_models.py b/tests/openedx_tagging/core/tagging/test_models.py index a47ec5f1..6bea627c 100644 --- a/tests/openedx_tagging/core/tagging/test_models.py +++ b/tests/openedx_tagging/core/tagging/test_models.py @@ -13,6 +13,7 @@ from openedx_tagging.core.tagging import api from openedx_tagging.core.tagging.models import LanguageTaxonomy, ObjectTag, Tag, Taxonomy + from .utils import pretty_format_tags @@ -210,7 +211,6 @@ def test_get_lineage(self, tag_attr, lineage): assert getattr(self, tag_attr).get_lineage() == lineage - @ddt.ddt class TestFilteredTagsClosedTaxonomy(TestTagTaxonomyMixin, TestCase): """ @@ -443,6 +443,35 @@ def test_usage_count(self) -> None: "Bacteria (None) (used: 3, children: 2)", ] + def test_pathological_tree_sort(self) -> None: + """ + Check for bugs in how tree sorting happens, if the tag names are very + similar. + """ + # pylint: disable=unused-variable + taxonomy = api.create_taxonomy("Sort Test") + root1 = Tag.objects.create(taxonomy=taxonomy, value="1") + child1_1 = Tag.objects.create(taxonomy=taxonomy, value="11", parent=root1) + child1_2 = Tag.objects.create(taxonomy=taxonomy, value="2", parent=root1) + child1_3 = Tag.objects.create(taxonomy=taxonomy, value="1 A", parent=root1) + child1_4 = Tag.objects.create(taxonomy=taxonomy, value="11111", parent=root1) + grandchild1_4_1 = Tag.objects.create(taxonomy=taxonomy, value="1111-grandchild", parent=child1_4) + root2 = Tag.objects.create(taxonomy=taxonomy, value="111") + child2_1 = Tag.objects.create(taxonomy=taxonomy, value="11111111", parent=root2) + child2_2 = Tag.objects.create(taxonomy=taxonomy, value="123", parent=root2) + result = pretty_format_tags(taxonomy.get_filtered_tags()) + assert result == [ + "1 (None) (used: 0, children: 4)", + " 1 A (1) (used: 0, children: 0)", + " 11 (1) (used: 0, children: 0)", + " 11111 (1) (used: 0, children: 1)", + " 1111-grandchild (11111) (used: 0, children: 0)", + " 2 (1) (used: 0, children: 0)", + "111 (None) (used: 0, children: 2)", + " 11111111 (111) (used: 0, children: 0)", + " 123 (111) (used: 0, children: 0)", + ] + class TestFilteredTagsFreeTextTaxonomy(TestCase): """ diff --git a/tests/openedx_tagging/core/tagging/test_views.py b/tests/openedx_tagging/core/tagging/test_views.py index d97296ba..b8d471f2 100644 --- a/tests/openedx_tagging/core/tagging/test_views.py +++ b/tests/openedx_tagging/core/tagging/test_views.py @@ -3,7 +3,7 @@ """ from __future__ import annotations -from urllib.parse import parse_qs, urlparse +from urllib.parse import parse_qs, quote, quote_plus, urlparse import ddt # type: ignore[import] # typing support in rules depends on https://github.com/dfunckt/django-rules/pull/177 @@ -19,6 +19,8 @@ from openedx_tagging.core.tagging.rest_api.paginators import TagsPagination from openedx_tagging.core.tagging.rules import can_change_object_tag_objectid, can_view_object_tag_objectid +from .utils import pretty_format_tags + User = get_user_model() TAXONOMY_LIST_URL = "/tagging/rest_api/v1/taxonomies/" @@ -958,9 +960,12 @@ def test_not_authorized_user(self): assert response.status_code == status.HTTP_403_FORBIDDEN - def test_small_taxonomy(self): + def test_small_taxonomy_root(self): + """ + Test explicitly requesting only the root tags of a small taxonomy. + """ self.client.force_authenticate(user=self.staff) - response = self.client.get(self.small_taxonomy_url) + response = self.client.get(self.small_taxonomy_url + "?root_only") assert response.status_code == status.HTTP_200_OK data = response.data @@ -970,13 +975,20 @@ def test_small_taxonomy(self): root_count = self.small_taxonomy.tag_set.filter(parent=None).count() assert len(results) == root_count - # Checking tag fields - root_tag = self.small_taxonomy.tag_set.get(id=results[0].get("id")) - root_children_count = root_tag.children.count() + # Checking tag fields on the first tag returned: + root_tag = self.small_taxonomy.tag_set.get(id=results[0].get("_id")) assert results[0].get("value") == root_tag.value - assert results[0].get("taxonomy_id") == self.small_taxonomy.id - assert results[0].get("children_count") == root_children_count - assert len(results[0].get("sub_tags")) == root_children_count + assert results[0].get("child_count") == root_tag.children.count() + assert results[0].get("depth") == 0 # root tags always have depth 0 + assert results[0].get("parent_value") is None + assert results[0].get("usage_count") == 0 + + # Check that we can load sub-tags of that tag: + sub_tags_response = self.client.get(results[0]["sub_tags_url"]) + assert sub_tags_response.status_code == status.HTTP_200_OK + sub_tags_result = sub_tags_response.data["results"] + assert len(sub_tags_result) == root_tag.children.count() + assert set(t["value"] for t in sub_tags_result) == set(t.value for t in root_tag.children.all()) # Checking pagination values assert data.get("next") is None @@ -985,7 +997,86 @@ def test_small_taxonomy(self): assert data.get("num_pages") == 1 assert data.get("current_page") == 1 + def test_small_taxonomy(self): + """ + Test loading all the tags of a small taxonomy at once. + """ + self.client.force_authenticate(user=self.staff) + response = self.client.get(self.small_taxonomy_url) + assert response.status_code == status.HTTP_200_OK + + data = response.data + results = data.get("results", []) + assert pretty_format_tags(results) == [ + "Archaea (None) (used: 0, children: 3)", + " DPANN (Archaea) (used: 0, children: 0)", + " Euryarchaeida (Archaea) (used: 0, children: 0)", + " Proteoarchaeota (Archaea) (used: 0, children: 0)", + "Bacteria (None) (used: 0, children: 2)", + " Archaebacteria (Bacteria) (used: 0, children: 0)", + " Eubacteria (Bacteria) (used: 0, children: 0)", + "Eukaryota (None) (used: 0, children: 5)", + " Animalia (Eukaryota) (used: 0, children: 7)", + " Arthropoda (Animalia) (used: 0, children: 0)", + " Chordata (Animalia) (used: 0, children: 1)", + " Cnidaria (Animalia) (used: 0, children: 0)", + " Ctenophora (Animalia) (used: 0, children: 0)", + " Gastrotrich (Animalia) (used: 0, children: 0)", + " Placozoa (Animalia) (used: 0, children: 0)", + " Porifera (Animalia) (used: 0, children: 0)", + " Fungi (Eukaryota) (used: 0, children: 0)", + " Monera (Eukaryota) (used: 0, children: 0)", + " Plantae (Eukaryota) (used: 0, children: 0)", + " Protista (Eukaryota) (used: 0, children: 0)", + ] + + # Checking pagination values + assert data.get("next") is None + assert data.get("previous") is None + assert data.get("count") == len(results) + assert data.get("num_pages") == 1 + assert data.get("current_page") == 1 + + def test_small_taxonomy_paged(self): + """ + Test loading only the first few of the tags of a small taxonomy. + """ + self.client.force_authenticate(user=self.staff) + response = self.client.get(self.small_taxonomy_url + "?page_size=5") + assert response.status_code == status.HTTP_200_OK + data = response.data + assert pretty_format_tags(data["results"]) == [ + "Archaea (None) (used: 0, children: 3)", + " DPANN (Archaea) (used: 0, children: 0)", + " Euryarchaeida (Archaea) (used: 0, children: 0)", + " Proteoarchaeota (Archaea) (used: 0, children: 0)", + "Bacteria (None) (used: 0, children: 2)", + ] + + # Checking pagination values + assert data.get("next") is not None + assert data.get("previous") is None + assert data.get("count") == 20 + assert data.get("num_pages") == 4 + assert data.get("current_page") == 1 + + # Get the next page: + next_response = self.client.get(data.get("next")) + assert next_response.status_code == status.HTTP_200_OK + next_data = next_response.data + assert pretty_format_tags(next_data["results"]) == [ + " Archaebacteria (Bacteria) (used: 0, children: 0)", + " Eubacteria (Bacteria) (used: 0, children: 0)", + "Eukaryota (None) (used: 0, children: 5)", + " Animalia (Eukaryota) (used: 0, children: 7)", + " Arthropoda (Animalia) (used: 0, children: 0)", + ] + assert next_data.get("current_page") == 2 + def test_small_search(self): + """ + Test performing a search + """ search_term = 'eU' url = f"{self.small_taxonomy_url}?search_term={search_term}" self.client.force_authenticate(user=self.staff) @@ -993,39 +1084,59 @@ def test_small_search(self): assert response.status_code == status.HTTP_200_OK data = response.data - results = data.get("results", []) - - assert len(results) == 3 + assert pretty_format_tags(data["results"], parent=False, usage_count=False) == [ + "Archaea (children: 3)", # No match in this tag, but a child matches so it's included + " Euryarchaeida (children: 0)", + "Bacteria (children: 2)", # No match in this tag, but a child matches so it's included + " Eubacteria (children: 0)", + "Eukaryota (children: 5)", + ] # Checking pagination values assert data.get("next") is None assert data.get("previous") is None - assert data.get("count") == 3 + assert data.get("count") == 5 assert data.get("num_pages") == 1 assert data.get("current_page") == 1 def test_large_taxonomy(self): + """ + Test listing the tags in a large taxonomy (~7,000 tags). + """ self._build_large_taxonomy() self.client.force_authenticate(user=self.staff) response = self.client.get(self.large_taxonomy_url) assert response.status_code == status.HTTP_200_OK data = response.data - results = data.get("results", []) + results = data["results"] + + # Even though we didn't specify root_only, only the root tags will have + # been returned, because of the taxonomy's size. + assert pretty_format_tags(results) == [ + "Tag 0 (None) (used: 0, children: 12)", + "Tag 1099 (None) (used: 0, children: 12)", + "Tag 1256 (None) (used: 0, children: 12)", + "Tag 1413 (None) (used: 0, children: 12)", + "Tag 157 (None) (used: 0, children: 12)", + "Tag 1570 (None) (used: 0, children: 12)", + "Tag 1727 (None) (used: 0, children: 12)", + "Tag 1884 (None) (used: 0, children: 12)", + "Tag 2041 (None) (used: 0, children: 12)", + "Tag 2198 (None) (used: 0, children: 12)", + # ... there are 41 more root tags but they're excluded from this first result page. + ] # Count of paginated root tags assert len(results) == self.page_size - # Checking tag fields - root_tag = self.large_taxonomy.tag_set.get(id=results[0].get("id")) - assert results[0].get("value") == root_tag.value - assert results[0].get("taxonomy_id") == self.large_taxonomy.id - assert results[0].get("parent_id") == root_tag.parent_id - assert results[0].get("children_count") == root_tag.children.count() - assert results[0].get("sub_tags_link") == ( + # Checking some other tag fields not covered by the pretty-formatted string above: + root_tag = self.large_taxonomy.tag_set.get(value=results[0].get("value")) + assert results[0].get("_id") == root_tag.id + assert results[0].get("sub_tags_url") == ( "http://testserver/tagging/" f"rest_api/v1/taxonomies/{self.large_taxonomy.id}" - f"/tags/?parent_tag_id={root_tag.id}" + f"/tags/?parent_tag={quote(results[0]['value'])}" ) # Checking pagination values @@ -1065,62 +1176,48 @@ def test_next_page_large_taxonomy(self): assert data.get("current_page") == 2 def test_large_search(self): + """ + Test searching in a large taxonomy + """ self._build_large_taxonomy() - search_term = '1' + search_term = '11' url = f"{self.large_taxonomy_url}?search_term={search_term}" self.client.force_authenticate(user=self.staff) response = self.client.get(url) assert response.status_code == status.HTTP_200_OK data = response.data - results = data.get("results", []) - - # Count of paginated root tags - assert len(results) == self.page_size - - # Checking pagination values - assert data.get("next") == ( - "http://testserver/tagging/" - f"rest_api/v1/taxonomies/{self.large_taxonomy.id}" - f"/tags/?page=2&search_term={search_term}" - ) - assert data.get("previous") is None - assert data.get("count") == 51 - assert data.get("num_pages") == 6 + results = data["results"] + assert pretty_format_tags(results, usage_count=None) == [ + "Tag 0 (None) (children: 12)", # First 3 results don't match but have children that match + " Tag 1 (Tag 0) (children: 12)", + " Tag 11 (Tag 1) (children: 0)", + " Tag 105 (Tag 0) (children: 12)", + " Tag 110 (Tag 105) (children: 0)", + " Tag 111 (Tag 105) (children: 0)", + " Tag 112 (Tag 105) (children: 0)", + " Tag 113 (Tag 105) (children: 0)", + " Tag 114 (Tag 105) (children: 0)", + " Tag 115 (Tag 105) (children: 0)", + ] + assert data.get("count") == 362 + assert data.get("num_pages") == 37 assert data.get("current_page") == 1 - - def test_next_large_search(self): - self._build_large_taxonomy() - search_term = '1' - url = f"{self.large_taxonomy_url}?search_term={search_term}" - - # Get first page of the search - self.client.force_authenticate(user=self.staff) - response = self.client.get(url) - - # Get next page - response = self.client.get(response.data.get("next")) - - data = response.data - results = data.get("results", []) - - # Count of paginated root tags - assert len(results) == self.page_size - - # Checking pagination values - assert data.get("next") == ( - "http://testserver/tagging/" - f"rest_api/v1/taxonomies/{self.large_taxonomy.id}" - f"/tags/?page=3&search_term={search_term}" - ) - assert data.get("previous") == ( - "http://testserver/tagging/" - f"rest_api/v1/taxonomies/{self.large_taxonomy.id}" - f"/tags/?search_term={search_term}" - ) - assert data.get("count") == 51 - assert data.get("num_pages") == 6 - assert data.get("current_page") == 2 + # Get the next page: + next_response = self.client.get(response.data.get("next")) + assert next_response.status_code == status.HTTP_200_OK + assert pretty_format_tags(next_response.data["results"], usage_count=None) == [ + " Tag 116 (Tag 105) (children: 0)", + " Tag 117 (Tag 105) (children: 0)", + " Tag 118 (Tag 0) (children: 12)", + " Tag 119 (Tag 118) (children: 0)", + "Tag 1099 (None) (children: 12)", + " Tag 1100 (Tag 1099) (children: 12)", + " Tag 1101 (Tag 1100) (children: 0)", + " Tag 1102 (Tag 1100) (children: 0)", + " Tag 1103 (Tag 1100) (children: 0)", + " Tag 1104 (Tag 1100) (children: 0)", + ] def test_get_children(self): self._build_large_taxonomy() @@ -1131,7 +1228,7 @@ def test_get_children(self): results = response.data.get("results", []) # Get children tags - response = self.client.get(results[0].get("sub_tags_link")) + response = self.client.get(results[0].get("sub_tags_url")) assert response.status_code == status.HTTP_200_OK data = response.data @@ -1141,22 +1238,21 @@ def test_get_children(self): assert len(results) == self.page_size # Checking tag fields - tag = self.large_taxonomy.tag_set.get(id=results[0].get("id")) + tag = self.large_taxonomy.tag_set.get(id=results[0].get("_id")) assert results[0].get("value") == tag.value - assert results[0].get("taxonomy_id") == self.large_taxonomy.id - assert results[0].get("parent_id") == tag.parent_id - assert results[0].get("children_count") == tag.children.count() - assert results[0].get("sub_tags_link") == ( + assert results[0].get("parent_value") == tag.parent.value + assert results[0].get("child_count") == tag.children.count() + assert results[0].get("sub_tags_url") == ( "http://testserver/tagging/" f"rest_api/v1/taxonomies/{self.large_taxonomy.id}" - f"/tags/?parent_tag_id={tag.id}" + f"/tags/?parent_tag={quote(tag.value)}" ) # Checking pagination values assert data.get("next") == ( "http://testserver/tagging/" f"rest_api/v1/taxonomies/{self.large_taxonomy.id}" - f"/tags/?page=2&parent_tag_id={tag.parent_id}" + f"/tags/?page=2&parent_tag={quote_plus(tag.parent.value)}" ) assert data.get("previous") is None assert data.get("count") == self.children_tags_count[0] @@ -1169,17 +1265,21 @@ def test_get_leaves(self): parent_tag = Tag.objects.get(value="Animalia") # Build url to get tags depth=2 - url = f"{self.small_taxonomy_url}?parent_tag_id={parent_tag.id}" + url = f"{self.small_taxonomy_url}?parent_tag={parent_tag.value}" response = self.client.get(url) - results = response.data.get("results", []) - - # Checking tag fields - tag = self.small_taxonomy.tag_set.get(id=results[0].get("id")) - assert results[0].get("value") == tag.value - assert results[0].get("taxonomy_id") == self.small_taxonomy.id - assert results[0].get("parent_id") == tag.parent_id - assert results[0].get("children_count") == tag.children.count() - assert results[0].get("sub_tags_link") is None + results = response.data["results"] + + # Even though we didn't specify root_only, only the root tags will have + # been returned, because of the taxonomy's size. + assert pretty_format_tags(results) == [ + " Arthropoda (Animalia) (used: 0, children: 0)", + " Chordata (Animalia) (used: 0, children: 1)", + " Cnidaria (Animalia) (used: 0, children: 0)", + " Ctenophora (Animalia) (used: 0, children: 0)", + " Gastrotrich (Animalia) (used: 0, children: 0)", + " Placozoa (Animalia) (used: 0, children: 0)", + " Porifera (Animalia) (used: 0, children: 0)", + ] def test_next_children(self): self._build_large_taxonomy() @@ -1189,22 +1289,27 @@ def test_next_children(self): response = self.client.get(self.large_taxonomy_url) results = response.data.get("results", []) - # Get children to obtain next link - response = self.client.get(results[0].get("sub_tags_link")) + # Get the URL that gives us the children of the first root tag + first_root_tag = results[0] + response = self.client.get(first_root_tag.get("sub_tags_url")) - # Get next children + # Get next page of children response = self.client.get(response.data.get("next")) assert response.status_code == status.HTTP_200_OK data = response.data - results = data.get("results", []) - tag = self.large_taxonomy.tag_set.get(id=results[0].get("id")) + results = data["results"] + assert pretty_format_tags(results) == [ + # There are 12 child tags total, so on this second page, we see only 2 (10 were on the first page): + " Tag 79 (Tag 0) (used: 0, children: 12)", + " Tag 92 (Tag 0) (used: 0, children: 12)", + ] # Checking pagination values assert data.get("next") is None assert data.get("previous") == ( "http://testserver/tagging/" - f"rest_api/v1/taxonomies/{self.large_taxonomy.id}/tags/?parent_tag_id={tag.parent_id}" + f"rest_api/v1/taxonomies/{self.large_taxonomy.id}/tags/?parent_tag={quote_plus(first_root_tag['value'])}" ) assert data.get("count") == self.children_tags_count[0] assert data.get("num_pages") == 2 diff --git a/tests/openedx_tagging/core/tagging/utils.py b/tests/openedx_tagging/core/tagging/utils.py index fb24187d..3d6890e1 100644 --- a/tests/openedx_tagging/core/tagging/utils.py +++ b/tests/openedx_tagging/core/tagging/utils.py @@ -1,6 +1,7 @@ """ Useful utilities for testing tagging and taxonomy code. """ +from __future__ import annotations def pretty_format_tags(result, parent=True, external_id=False, usage_count=True) -> list[str]: