feat!: Improve the "get object tags" API (an unstable REST API) (#111)

openedx · Nov 2, 2023 · a248f35 · a248f35
1 parent 51e2724
commit a248f35
Show file tree

Hide file tree

Showing 10 changed files with 646 additions and 367 deletions.
diff --git a/openedx_learning/__init__.py b/openedx_learning/__init__.py
@@ -1,4 +1,4 @@
 """
 Open edX Learning ("Learning Core").
 """
-__version__ = "0.3.1"
+__version__ = "0.3.2"
diff --git a/openedx_tagging/core/tagging/api.py b/openedx_tagging/core/tagging/api.py
@@ -12,12 +12,14 @@
 """
 from __future__ import annotations
 
-from django.db import transaction
-from django.db.models import QuerySet
+from django.db import models, transaction
+from django.db.models import F, QuerySet, Value
+from django.db.models.functions import Coalesce, Concat, Lower
 from django.utils.translation import gettext as _
 
 from .data import TagDataQuerySet
 from .models import ObjectTag, Tag, Taxonomy
+from .models.utils import ConcatNull
 
 # Export this as part of the API
 TagDoesNotExist = Tag.DoesNotExist
@@ -165,8 +167,20 @@ def get_object_tags(
     filters = {"taxonomy_id": taxonomy_id} if taxonomy_id else {}
     tags = (
         object_tag_class.objects.filter(object_id=object_id, **filters)
-        .select_related("tag", "taxonomy")
-        .order_by("id")
+        # Preload related objects, including data for the "get_lineage" method on ObjectTag/Tag:
+        .select_related("taxonomy", "tag", "tag__parent", "tag__parent__parent")
+        # Sort the tags within each taxonomy in "tree order". See Taxonomy._get_filtered_tags_deep for details on this:
+        .annotate(sort_key=Lower(Concat(
+            ConcatNull(F("tag__parent__parent__parent__value"), Value("\t")),
+            ConcatNull(F("tag__parent__parent__value"), Value("\t")),
+            ConcatNull(F("tag__parent__value"), Value("\t")),
+            Coalesce(F("tag__value"), F("_value")),
+            Value("\t"),
+            output_field=models.CharField(),
+        )))
+        .annotate(taxonomy_name=Coalesce(F("taxonomy__name"), F("_name")))
+        # Sort first by taxonomy name, then by tag value in tree order:
+        .order_by("taxonomy_name", "sort_key")
     )
     return tags
 

diff --git a/openedx_tagging/core/tagging/migrations/0013_tag_parent_blank.py b/openedx_tagging/core/tagging/migrations/0013_tag_parent_blank.py
@@ -0,0 +1,19 @@
+# Generated by Django 3.2.22 on 2023-10-30 21:51
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('oel_tagging', '0012_language_taxonomy'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='tag',
+            name='parent',
+            field=models.ForeignKey(blank=True, default=None, help_text='Tag that lives one level up from the current tag, forming a hierarchy.', null=True, on_delete=django.db.models.deletion.CASCADE, related_name='children', to='oel_tagging.tag'),
+        ),
+    ]
diff --git a/openedx_tagging/core/tagging/models/base.py b/openedx_tagging/core/tagging/models/base.py
@@ -51,6 +51,7 @@ class Tag(models.Model):
     parent = models.ForeignKey(
         "self",
         null=True,
+        blank=True,
         default=None,
         on_delete=models.CASCADE,
         related_name="children",
@@ -100,18 +101,30 @@ def get_lineage(self) -> Lineage:
         Queries and returns the lineage of the current tag as a list of Tag.value strings.
 
         The root Tag.value is first, followed by its child.value, and on down to self.value.
-
-        Performance note: may perform as many as TAXONOMY_MAX_DEPTH select queries.
         """
-        lineage: Lineage = []
-        tag: Tag | None = self
-        depth = TAXONOMY_MAX_DEPTH
-        while tag and depth > 0:
-            lineage.insert(0, tag.value)
-            tag = tag.parent
-            depth -= 1
+        lineage: Lineage = [self.value]
+        next_ancestor = self.get_next_ancestor()
+        while next_ancestor:
+            lineage.insert(0, next_ancestor.value)
+            next_ancestor = next_ancestor.get_next_ancestor()
         return lineage
 
+    def get_next_ancestor(self) -> Tag | None:
+        """
+        Fetch the parent of this Tag.
+
+        While doing so, preload several ancestors at the same time, so we can
+        use fewer database queries than the basic approach of iterating through
+        parent.parent.parent...
+        """
+        if self.parent_id is None:
+            return None
+        if not Tag.parent.is_cached(self):  # pylint: disable=no-member
+            # Parent is not yet loaded. Retrieve our parent, grandparent, and great-grandparent in one query.
+            # This is not actually changing the parent, just loading it and caching it.
+            self.parent = Tag.objects.select_related("parent", "parent__parent").get(pk=self.parent_id)
+        return self.parent
+
     @cached_property
     def depth(self) -> int:
         """
@@ -149,6 +162,20 @@ def child_count(self) -> int:
             return self.taxonomy.tag_set.filter(parent=self).count()
         return 0
 
+    def clean(self):
+        """
+        Validate this tag before saving
+        """
+        # Don't allow leading or trailing whitespace:
+        self.value = self.value.strip()
+        if self.external_id:
+            self.external_id = self.external_id.strip()
+        # Don't allow \t (tab) character at all, as we use it for lineage in database queries
+        if "\t" in self.value:
+            raise ValidationError("Tags in a taxonomy cannot contain a TAB character.")
+        if self.external_id and "\t" in self.external_id:
+            raise ValidationError("Tag external ID cannot contain a TAB character.")
+
 
 class Taxonomy(models.Model):
     """
@@ -534,6 +561,7 @@ def add_tag(
         tag = Tag.objects.create(
             taxonomy=self, value=tag_value, parent=parent, external_id=external_id
         )
+        tag.full_clean()
 
         return tag
 
@@ -802,6 +830,9 @@ def clean(self):
                 raise ValidationError("Invalid _value - empty string")
         if self.taxonomy and self.taxonomy.name != self._name:
             raise ValidationError("ObjectTag's _name is out of sync with Taxonomy.name")
+        if "," in self.object_id or "*" in self.object_id:
+            # Some APIs may use these characters to allow wildcard matches or multiple matches in the future.
+            raise ValidationError("Object ID contains invalid characters")
 
     def get_lineage(self) -> Lineage:
         """

diff --git a/openedx_tagging/core/tagging/rest_api/v1/serializers.py b/openedx_tagging/core/tagging/rest_api/v1/serializers.py
@@ -3,6 +3,8 @@
 """
 from __future__ import annotations
 
+from typing import Any
+
 from rest_framework import serializers
 from rest_framework.reverse import reverse
 
@@ -61,22 +63,61 @@ class ObjectTagListQueryParamsSerializer(serializers.Serializer):  # pylint: dis
     )
 
 
-class ObjectTagSerializer(serializers.ModelSerializer):
+class ObjectTagMinimalSerializer(serializers.ModelSerializer):
     """
-    Serializer for the ObjectTag model.
+    Minimal serializer for the ObjectTag model.
     """
 
     class Meta:
         model = ObjectTag
-        fields = [
+        fields = ["value", "lineage"]
+
+    lineage = serializers.ListField(child=serializers.CharField(), source="get_lineage", read_only=True)
+
+
+class ObjectTagSerializer(ObjectTagMinimalSerializer):
+    """
+    Serializer for the ObjectTag model.
+    """
+    class Meta:
+        model = ObjectTag
+        fields = ObjectTagMinimalSerializer.Meta.fields + [
+            # The taxonomy name
             "name",
-            "value",
             "taxonomy_id",
             # If the Tag or Taxonomy has been deleted, this ObjectTag shouldn't be shown to users.
             "is_deleted",
         ]
 
 
+class ObjectTagsByTaxonomySerializer(serializers.ModelSerializer):
+    """
+    Serialize a group of ObjectTags, grouped by taxonomy
+    """
+    def to_representation(self, instance: list[ObjectTag]) -> dict:
+        """
+        Convert this list of ObjectTags to the serialized dictionary, grouped by Taxonomy
+        """
+        by_object: dict[str, dict[str, Any]] = {}
+        for obj_tag in instance:
+            if obj_tag.object_id not in by_object:
+                by_object[obj_tag.object_id] = {
+                    "taxonomies": []
+                }
+            taxonomies = by_object[obj_tag.object_id]["taxonomies"]
+            tax_entry = next((t for t in taxonomies if t["taxonomy_id"] == obj_tag.taxonomy_id), None)
+            if tax_entry is None:
+                tax_entry = {
+                    "name": obj_tag.name,
+                    "taxonomy_id": obj_tag.taxonomy_id,
+                    "editable": (not obj_tag.taxonomy.cast().system_defined) if obj_tag.taxonomy else False,
+                    "tags": []
+                }
+                taxonomies.append(tax_entry)
+            tax_entry["tags"].append(ObjectTagMinimalSerializer(obj_tag).data)
+        return by_object
+
+
 class ObjectTagUpdateBodySerializer(serializers.Serializer):  # pylint: disable=abstract-method
     """
     Serializer of the body for the ObjectTag UPDATE view

diff --git a/openedx_tagging/core/tagging/rest_api/v1/urls.py b/openedx_tagging/core/tagging/rest_api/v1/urls.py
@@ -10,6 +10,7 @@
 router = DefaultRouter()
 router.register("taxonomies", views.TaxonomyView, basename="taxonomy")
 router.register("object_tags", views.ObjectTagView, basename="object_tag")
+router.register("object_tag_counts", views.ObjectTagCountsView, basename="object_tag_counts")
 
 urlpatterns = [
     path("", include(router.urls)),

diff --git a/openedx_tagging/core/tagging/rest_api/v1/views.py b/openedx_tagging/core/tagging/rest_api/v1/views.py
@@ -3,6 +3,8 @@
 """
 from __future__ import annotations
 
+from typing import Any
+
 from django.db import models
 from django.http import Http404, HttpResponse
 from rest_framework import mixins, status
@@ -26,12 +28,13 @@
 from ...data import TagDataQuerySet
 from ...import_export.api import export_tags
 from ...import_export.parsers import ParserFormat
-from ...models import Taxonomy
+from ...models import ObjectTag, Taxonomy
 from ...rules import ObjectTagPermissionItem
 from ..paginators import TAGS_THRESHOLD, DisabledTagsPagination, TagsPagination
 from .permissions import ObjectTagObjectPermissions, TagObjectPermissions, TaxonomyObjectPermissions
 from .serializers import (
     ObjectTagListQueryParamsSerializer,
+    ObjectTagsByTaxonomySerializer,
     ObjectTagSerializer,
     ObjectTagUpdateBodySerializer,
     ObjectTagUpdateQueryParamsSerializer,
@@ -265,10 +268,6 @@ class ObjectTagView(
         * 400 - Invalid query parameter
         * 403 - Permission denied
 
-    **Create Query Returns**
-        * 403 - Permission denied
-        * 405 - Method not allowed
-
     **Update Parameters**
         * object_id (required): - The Object ID to add ObjectTags for.
 
@@ -306,20 +305,20 @@ def get_queryset(self) -> models.QuerySet:
             taxonomy = taxonomy.cast()
             taxonomy_id = taxonomy.id
 
-        perm = "oel_tagging.view_objecttag"
-        perm_obj = ObjectTagPermissionItem(
-            taxonomy=taxonomy,
-            object_id=object_id,
-        )
-
-        if not self.request.user.has_perm(
-            perm,
-            # The obj arg expects a model, but we are passing an object
-            perm_obj,  # type: ignore[arg-type]
-        ):
-            raise PermissionDenied(
-                "You do not have permission to view object tags for this taxonomy or object_id."
-            )
+        if object_id.endswith("*") or "," in object_id:  # pylint: disable=no-else-raise
+            raise ValidationError("Retrieving tags from multiple objects is not yet supported.")
+            # Note: This API is actually designed so that in the future it can be extended to return tags for multiple
+            # objects, e.g. if object_id.endswith("*") then it results in a object_id__startswith query. However, for
+            # now we have no use case for that so we retrieve tags for one object at a time.
+        else:
+            if not self.request.user.has_perm(
+                "oel_tagging.view_objecttag",
+                # The obj arg expects a model, but we are passing an object
+                ObjectTagPermissionItem(taxonomy=taxonomy, object_id=object_id),  # type: ignore[arg-type]
+            ):
+                raise PermissionDenied(
+                    "You do not have permission to view object tags for this taxonomy or object_id."
+                )
 
         return get_object_tags(object_id, taxonomy_id)
 
@@ -335,8 +334,13 @@ def retrieve(self, request, *args, **kwargs) -> Response:
         behavior we want.
         """
         object_tags = self.filter_queryset(self.get_queryset())
-        serializer = ObjectTagSerializer(object_tags, many=True)
-        return Response(serializer.data)
+        serializer = ObjectTagsByTaxonomySerializer(list(object_tags))
+        response_data = serializer.data
+        if self.kwargs["object_id"] not in response_data:
+            # For consistency, the key with the object_id should always be present in the response, even if there
+            # are no tags at all applied to this object.
+            response_data[self.kwargs["object_id"]] = {"taxonomies": []}
+        return Response(response_data)
 
     def update(self, request, *args, **kwargs) -> Response:
         """
@@ -405,6 +409,52 @@ def update(self, request, *args, **kwargs) -> Response:
         return self.retrieve(request, object_id)
 
 
+@view_auth_classes
+class ObjectTagCountsView(
+    mixins.RetrieveModelMixin,
+    GenericViewSet,
+):
+    """
+    View to retrieve the count of ObjectTags for all matching object IDs.
+
+    This API does NOT bother doing any permission checks as the "# of tags" is not considered sensitive information.
+
+    **Retrieve Parameters**
+        * object_id_pattern (required): - The Object ID to retrieve ObjectTags for. Can contain '*' at the end
+          for wildcard matching, or use ',' to separate multiple object IDs.
+
+    **Retrieve Example Requests**
+        GET api/tagging/v1/object_tag_counts/:object_id_pattern
+
+    **Retrieve Query Returns**
+        * 200 - Success
+    """
+
+    serializer_class = ObjectTagSerializer
+    lookup_field = "object_id_pattern"
+
+    def retrieve(self, request, *args, **kwargs) -> Response:
+        """
+        Retrieve the counts of object tags that belong to a given object_id pattern
+
+        Note: We override `retrieve` here instead of `list` because we are
+        passing in the Object ID (object_id) in the path (as opposed to passing
+        it in as a query_param) to retrieve the ObjectTag counts.
+        """
+        # This API does NOT bother doing any permission checks as the # of tags is not considered sensitive information.
+        object_id_pattern = self.kwargs["object_id_pattern"]
+        qs: Any = ObjectTag.objects
+        if object_id_pattern.endswith("*"):
+            qs = qs.filter(object_id__startswith=object_id_pattern[0:len(object_id_pattern) - 1])
+        elif "*" in object_id_pattern:
+            raise ValidationError("Wildcard matches are only supported if the * is at the end.")
+        else:
+            qs = qs.filter(object_id__in=object_id_pattern.split(","))
+
+        qs = qs.values("object_id").annotate(num_tags=models.Count("id")).order_by("object_id")
+        return Response({row["object_id"]: row["num_tags"] for row in qs})
+
+
 @view_auth_classes
 class TaxonomyTagsView(ListAPIView, RetrieveUpdateDestroyAPIView):
     """