feat: WIP: refactor views to use new get_filtered_tags()

openedx · Oct 19, 2023 · 8b81b1b · 8b81b1b
1 parent c7c24e6
commit 8b81b1b
Show file tree

Hide file tree

Showing 5 changed files with 139 additions and 46 deletions.
diff --git a/openedx_tagging/core/tagging/models/base.py b/openedx_tagging/core/tagging/models/base.py
@@ -17,6 +17,7 @@
 
 from openedx_learning.lib.fields import MultiCollationTextField, case_insensitive_char_field, case_sensitive_char_field
 
+from .utils import ConcatNull
 from ..data import TagData
 
 log = logging.getLogger(__name__)
@@ -462,9 +463,12 @@ def _get_filtered_tags_deep(
         qs = Tag.annotate_depth(qs)
         # Add the "lineage" field to sort them in order correctly:
         qs = qs.annotate(sort_key=Concat(
-            Coalesce(F("parent__parent__parent__value"), Value("")),
-            Coalesce(F("parent__parent__value"), Value("")),
-            Coalesce(F("parent__value"), Value("")),
+            # For a root tag, we want sort_key="RootValue" and for a depth=1 tag
+            # we want sort_key="RootValue\tValue". The following does that, since
+            # ConcatNull(...) returns NULL if any argument is NULL.
+            Coalesce(ConcatNull(F("parent__parent__parent__value"), Value("\t")), Value("")),
+            Coalesce(ConcatNull(F("parent__parent__value"), Value("\t")), Value("")),
+            Coalesce(ConcatNull(F("parent__value"), Value("\t")), Value("")),
             F("value"),
             output_field=models.CharField(),
         ))

diff --git a/openedx_tagging/core/tagging/models/utils.py b/openedx_tagging/core/tagging/models/utils.py
@@ -0,0 +1,23 @@
+"""
+Utilities for tagging and taxonomy models
+"""
+
+from django.db.models.expressions import Func
+
+class ConcatNull(Func):
+    """
+    Concatenate two arguments together. Like normal SQL but unlike Django's
+    "Concat", if either argument is NULL, the result will be NULL.
+    """
+
+    function = "CONCAT"
+
+    def as_sqlite(self, compiler, connection, **extra_context):
+        """ SQLite doesn't have CONCAT() but has a concatenation operator """
+        return super().as_sql(
+            compiler,
+            connection,
+            template="%(expressions)s",
+            arg_joiner=" || ",
+            **extra_context,
+        )
diff --git a/openedx_tagging/core/tagging/rest_api/v1/views.py b/openedx_tagging/core/tagging/rest_api/v1/views.py
@@ -421,15 +421,15 @@ class TaxonomyTagsView(ListAPIView):
     just that subset of the tree as needed. This may be changed in the future.
 
     **List Query Parameters**
-        * pk (required) - The pk of the taxonomy to retrieve tags.
-        * parent_tag_value (optional) - Id of the tag to retrieve children tags.
+        * id (required) - The ID of the taxonomy to retrieve tags.
+        * parent_tag (optional) - Retrieve children of the tag with this value.
         * root_only (optional) - If specified, only root tags are returned.
         * page (optional) - Page number (default: 1)
         * page_size (optional) - Number of items per page (default: 10)
 
     **List Example Requests**
-        GET api/tagging/v1/taxonomy/:pk/tags                                        - Get tags of taxonomy
-        GET api/tagging/v1/taxonomy/:pk/tags?parent_tag_id=30                       - Get children tags of tag
+        GET api/tagging/v1/taxonomy/:id/tags                                        - Get tags of taxonomy
+        GET api/tagging/v1/taxonomy/:id/tags?parent_tag=Physics                     - Get child tags of tag
 
     **List Query Returns**
         * 200 - Success
@@ -472,13 +472,17 @@ def get_queryset(self) -> models.QuerySet[TagData]:
 
         if parent_tag_value:
             # Fetching tags below a certain parent is always paginated and only returns the direct children
-            self.pagination_enabled = True
             depth = 1
             if root_only:
-                raise ValidationError("?root_only and ?parent_tag_value cannot be used together")
+                raise ValidationError("?root_only and ?parent_tag cannot be used together")
         else:
-            if root_only or taxonomy.tag_set.count() > TAGS_THRESHOLD:
-                depth = 1  # Load only the root tags for now
+            if root_only:
+                depth = 1  # User Explicitly requested to load only the root tags for now
+            elif search_term:
+                depth = None  # For search, default to maximum depth but use normal pagination
+            elif taxonomy.tag_set.count() > TAGS_THRESHOLD:
+                # This is a very large taxonomy. Only load the root tags at first, so users can choose what to load.
+                depth = 1
             else:
                 # We can load and display all the tags in the taxonomy at once:
                 self.pagination_class = DisabledTagsPagination

diff --git a/tests/openedx_tagging/core/tagging/test_models.py b/tests/openedx_tagging/core/tagging/test_models.py
@@ -443,6 +443,33 @@ def test_usage_count(self) -> None:
             "Bacteria (None) (used: 3, children: 2)",
         ]
 
+    def test_pathological_tree_sort(self) -> None:
+        """
+        Check for bugs in how tree sorting happens, if the tag names are very
+        similar.
+        """
+        taxonomy = api.create_taxonomy("Sort Test")
+        root1 = Tag.objects.create(taxonomy=taxonomy, value="1")
+        child1_1 = Tag.objects.create(taxonomy=taxonomy, value="11", parent=root1)
+        child1_2 = Tag.objects.create(taxonomy=taxonomy, value="2", parent=root1)
+        child1_3 = Tag.objects.create(taxonomy=taxonomy, value="1 A", parent=root1)
+        child1_4 = Tag.objects.create(taxonomy=taxonomy, value="11111", parent=root1)
+        grandchild1_4_1 = Tag.objects.create(taxonomy=taxonomy, value="1111-grandchild", parent=child1_4)
+        root2 = Tag.objects.create(taxonomy=taxonomy, value="111")
+        child2_1 = Tag.objects.create(taxonomy=taxonomy, value="11111111", parent=root2)
+        child2_2 = Tag.objects.create(taxonomy=taxonomy, value="123", parent=root2)
+        result = pretty_format_tags(taxonomy.get_filtered_tags())
+        assert result == [
+            "1 (None) (used: 0, children: 4)",
+            "  1 A (1) (used: 0, children: 0)",
+            "  11 (1) (used: 0, children: 0)",
+            "  11111 (1) (used: 0, children: 1)",
+            "    1111-grandchild (11111) (used: 0, children: 0)",
+            "  2 (1) (used: 0, children: 0)",
+            "111 (None) (used: 0, children: 2)",
+            "  11111111 (111) (used: 0, children: 0)",
+            "  123 (111) (used: 0, children: 0)",
+        ]
 
 class TestFilteredTagsFreeTextTaxonomy(TestCase):
     """

diff --git a/tests/openedx_tagging/core/tagging/test_views.py b/tests/openedx_tagging/core/tagging/test_views.py
@@ -3,7 +3,7 @@
 """
 from __future__ import annotations
 
-from urllib.parse import parse_qs, urlparse, quote
+from urllib.parse import parse_qs, quote, quote_plus, urlparse
 
 import ddt  # type: ignore[import]
 # typing support in rules depends on https://github.com/dfunckt/django-rules/pull/177
@@ -1075,21 +1075,28 @@ def test_small_taxonomy_paged(self):
 
 
     def test_small_search(self):
+        """
+        Test performing a search
+        """
         search_term = 'eU'
         url = f"{self.small_taxonomy_url}?search_term={search_term}"
         self.client.force_authenticate(user=self.staff)
         response = self.client.get(url)
         assert response.status_code == status.HTTP_200_OK
 
         data = response.data
-        results = data.get("results", [])
-
-        assert len(results) == 3
+        assert pretty_format_tags(data["results"], parent=False, usage_count=False) == [
+            "Archaea (children: 3)",  # No match in this tag, but a child matches so it's included
+            "  Euryarchaeida (children: 0)",
+            "Bacteria (children: 2)",  # No match in this tag, but a child matches so it's included
+            "  Eubacteria (children: 0)",
+            "Eukaryota (children: 5)",
+        ]
 
         # Checking pagination values
         assert data.get("next") is None
         assert data.get("previous") is None
-        assert data.get("count") == 3
+        assert data.get("count") == 5
         assert data.get("num_pages") == 1
         assert data.get("current_page") == 1
 
@@ -1170,29 +1177,48 @@ def test_next_page_large_taxonomy(self):
         assert data.get("current_page") == 2
 
     def test_large_search(self):
+        """
+        Test searching in a large taxonomy
+        """
         self._build_large_taxonomy()
-        search_term = '1'
+        search_term = '11'
         url = f"{self.large_taxonomy_url}?search_term={search_term}"
         self.client.force_authenticate(user=self.staff)
         response = self.client.get(url)
         assert response.status_code == status.HTTP_200_OK
 
         data = response.data
-        results = data.get("results", [])
-
-        # Count of paginated root tags
-        assert len(results) == self.page_size
-
-        # Checking pagination values
-        assert data.get("next") == (
-            "http://testserver/tagging/"
-            f"rest_api/v1/taxonomies/{self.large_taxonomy.id}"
-            f"/tags/?page=2&search_term={search_term}"
-        )
-        assert data.get("previous") is None
-        assert data.get("count") == 51
-        assert data.get("num_pages") == 6
+        results = data["results"]
+        assert pretty_format_tags(results, usage_count=None) == [
+            "Tag 0 (None) (children: 12)",  # First 3 results don't match but have children that match
+            "  Tag 1 (Tag 0) (children: 12)",
+            "    Tag 11 (Tag 1) (children: 0)",
+            "  Tag 105 (Tag 0) (children: 12)",
+            "    Tag 110 (Tag 105) (children: 0)",
+            "    Tag 111 (Tag 105) (children: 0)",
+            "    Tag 112 (Tag 105) (children: 0)",
+            "    Tag 113 (Tag 105) (children: 0)",
+            "    Tag 114 (Tag 105) (children: 0)",
+            "    Tag 115 (Tag 105) (children: 0)",
+        ]
+        assert data.get("count") == 362
+        assert data.get("num_pages") == 37
         assert data.get("current_page") == 1
+        # Get the next page:
+        next_response = self.client.get(response.data.get("next"))
+        assert next_response.status_code == status.HTTP_200_OK
+        assert pretty_format_tags(next_response.data["results"], usage_count=None) == [
+            "    Tag 116 (Tag 105) (children: 0)",
+            "    Tag 117 (Tag 105) (children: 0)",
+            "  Tag 118 (Tag 0) (children: 12)",
+            "    Tag 119 (Tag 118) (children: 0)",
+            "Tag 1099 (None) (children: 12)",
+            "  Tag 1100 (Tag 1099) (children: 12)",
+            "    Tag 1101 (Tag 1100) (children: 0)",
+            "    Tag 1102 (Tag 1100) (children: 0)",
+            "    Tag 1103 (Tag 1100) (children: 0)",
+            "    Tag 1104 (Tag 1100) (children: 0)",
+        ]
 
     def test_next_large_search(self):
         self._build_large_taxonomy()
@@ -1274,17 +1300,21 @@ def test_get_leaves(self):
         parent_tag = Tag.objects.get(value="Animalia")
 
         # Build url to get tags depth=2
-        url = f"{self.small_taxonomy_url}?parent_tag_id={parent_tag.id}"
+        url = f"{self.small_taxonomy_url}?parent_tag={parent_tag.value}"
         response = self.client.get(url)
-        results = response.data.get("results", [])
+        results = response.data["results"]
 
-        # Checking tag fields
-        tag = self.small_taxonomy.tag_set.get(id=results[0].get("id"))
-        assert results[0].get("value") == tag.value
-        assert results[0].get("taxonomy_id") == self.small_taxonomy.id
-        assert results[0].get("parent_id") == tag.parent_id
-        assert results[0].get("children_count") == tag.children.count()
-        assert results[0].get("sub_tags_link") is None
+        # Even though we didn't specify root_only, only the root tags will have
+        # been returned, because of the taxonomy's size.
+        assert pretty_format_tags(results) == [
+            "    Arthropoda (Animalia) (used: 0, children: 0)",
+            "    Chordata (Animalia) (used: 0, children: 1)",
+            "    Cnidaria (Animalia) (used: 0, children: 0)",
+            "    Ctenophora (Animalia) (used: 0, children: 0)",
+            "    Gastrotrich (Animalia) (used: 0, children: 0)",
+            "    Placozoa (Animalia) (used: 0, children: 0)",
+            "    Porifera (Animalia) (used: 0, children: 0)",
+        ]
 
     def test_next_children(self):
         self._build_large_taxonomy()
@@ -1294,22 +1324,27 @@ def test_next_children(self):
         response = self.client.get(self.large_taxonomy_url)
         results = response.data.get("results", [])
 
-        # Get children to obtain next link
-        response = self.client.get(results[0].get("sub_tags_link"))
+        # Get the URL that gives us the children of the first root tag
+        first_root_tag = results[0]
+        response = self.client.get(first_root_tag.get("sub_tags_url"))
 
-        # Get next children
+        # Get next page of children
         response = self.client.get(response.data.get("next"))
         assert response.status_code == status.HTTP_200_OK
 
         data = response.data
-        results = data.get("results", [])
-        tag = self.large_taxonomy.tag_set.get(id=results[0].get("id"))
+        results = data["results"]
+        assert pretty_format_tags(results) == [
+            # There are 12 child tags total, so on this second page, we see only 2 (10 were on the first page):
+            "  Tag 79 (Tag 0) (used: 0, children: 12)",
+            "  Tag 92 (Tag 0) (used: 0, children: 12)",
+        ]
 
         # Checking pagination values
         assert data.get("next") is None
         assert data.get("previous") == (
             "http://testserver/tagging/"
-            f"rest_api/v1/taxonomies/{self.large_taxonomy.id}/tags/?parent_tag_id={tag.parent_id}"
+            f"rest_api/v1/taxonomies/{self.large_taxonomy.id}/tags/?parent_tag={quote_plus(first_root_tag['value'])}"
         )
         assert data.get("count") == self.children_tags_count[0]
         assert data.get("num_pages") == 2