Skip to content

Commit

Permalink
Address PR review, convert exclude(~Q) magic into regular filters
Browse files Browse the repository at this point in the history
  • Loading branch information
voneiden committed Aug 7, 2023
1 parent a51e1a1 commit ebd6239
Showing 1 changed file with 78 additions and 41 deletions.
119 changes: 78 additions & 41 deletions events/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from django.core.cache import cache
from django.core.exceptions import PermissionDenied
from django.db import transaction
from django.db.models import Count, F, Prefetch, Q
from django.db.models import Count, Exists, F, OuterRef, Prefetch, Q, QuerySet
from django.db.models.functions import Greatest
from django.db.utils import IntegrityError
from django.http import Http404, HttpResponsePermanentRedirect
Expand Down Expand Up @@ -2525,15 +2525,23 @@ def _find_keyword_replacements(keyword_ids: list[str]) -> tuple[list[Keyword], b
return list(set(replaced_keywords)), found_all_keywords


def _filter_keyword_or(queryset, param):
keywords, __ = _find_keyword_replacements(param.split(","))
def _filter_events_keyword_or(queryset: QuerySet, keyword_ids: list[str]) -> QuerySet:
"""
Given an Event queryset, apply OR filter on keyword ids
:param queryset:
:param keyword_ids:
:return:
"""
keywords, __ = _find_keyword_replacements(keyword_ids)
keyword_ids = [keyword.pk for keyword in keywords]

# Use negated exclude to avoid slower filter+distinct
queryset = queryset.exclude(
~(Q(keywords__pk__in=keyword_ids) | Q(audience__pk__in=keyword_ids))
kw_qs = Event.keywords.through.objects.filter(
event=OuterRef("pk"), keyword__in=keyword_ids
)
return queryset
audience_qs = Event.audience.through.objects.filter(
event=OuterRef("pk"), keyword__in=keyword_ids
)
return queryset.filter(Exists(kw_qs) | Exists(audience_qs))


def _filter_event_queryset(queryset, params, srs=None): # noqa: C901
Expand Down Expand Up @@ -2642,26 +2650,27 @@ def _filter_event_queryset(queryset, params, srs=None): # noqa: C901
vals = params.get("keyword_set_AND", None)
if vals:
vals = vals.split(",")
keyword_sets = KeywordSet.objects.filter(id__in=vals)
q = Q()
for keyword_set in keyword_sets:
keywords = keyword_set.keywords.all()
q &= Q(keywords__in=keywords)

# Use negated exclude to avoid slower filter+distinct
queryset = queryset.exclude(~q)
kw_sets = KeywordSet.objects.filter(id__in=vals).prefetch_related(
Prefetch("keywords", Keyword.objects.all().only("id"))
)
for kw_set in kw_sets:
kw_ids = [kw.id for kw in kw_set.keywords.all()]
subqs = Event.objects.filter(id=OuterRef("pk"), keywords__in=kw_ids)
queryset = queryset.filter(Exists(subqs))

vals = params.get("keyword_set_OR", None)
if vals:
vals = vals.split(",")
keyword_sets = KeywordSet.objects.filter(id__in=vals)
keyword_sets = KeywordSet.objects.filter(id__in=vals).prefetch_related(
Prefetch("keywords", Keyword.objects.all().only("id"))
)
all_keywords = set()
for keyword_set in keyword_sets:
keywords = keyword_set.keywords.all()
all_keywords.update(keywords)

# Use negated exclude to avoid slower filter+distinct
queryset = queryset.exclude(~Q(keywords__in=all_keywords))
kw_qs = Event.keywords.through.objects.filter(keyword__in=all_keywords)
queryset = queryset.filter(Exists(kw_qs))

if "local_ongoing_OR_set" in "".join(params):
count = 1
Expand Down Expand Up @@ -2724,9 +2733,18 @@ def _filter_event_queryset(queryset, params, srs=None): # noqa: C901
for i in all_sets:
val = params.get(i, None)
if val:
val = val.split(",")
# Use negated exclude to avoid slower filter+distinct
queryset = queryset.exclude(~Q(keywords__pk__in=val))
vals = val.split(",")

keyword_sets = KeywordSet.objects.filter(id__in=vals).prefetch_related(
Prefetch("keywords", Keyword.objects.all().only("id"))
)
all_keywords = set()
for keyword_set in keyword_sets:
keywords = keyword_set.keywords.all()
all_keywords.update(keywords)

kw_qs = Event.keywords.through.objects.filter(keyword__in=all_keywords)
queryset = queryset.filter(Exists(kw_qs))

val = params.get("internet_based", None)
if val and parse_bool(val, "internet_based"):
Expand Down Expand Up @@ -2762,20 +2780,20 @@ def _filter_event_queryset(queryset, params, srs=None): # noqa: C901
else ["fi", "sv", "en"]
)
tri = [TrigramSimilarity(f"name_{i}", val) for i in langs]
keywords = (
keywords = list(
Keyword.objects.annotate(simile=Greatest(*tri))
.filter(simile__gt=0.2)
.order_by("-simile")[:3]
)
if keywords:
val_q |= Q(keywords__in=keywords)

val_q |= Q(keywords__in=keywords)

combined_q &= val_q

# Use negated exclude to avoid slower filter+distinct
# In this case filter+distinct is so slow it gets a
# 504 gateway timeout in prod!
queryset = queryset.exclude(~combined_q)
# FYI: Biggest slowdown in this filter is the ordering of keywords simile
queryset = queryset.filter(
Exists(Event.objects.filter(combined_q, id=OuterRef("pk")).only("id"))
)

val = params.get("text", None)
if val:
Expand Down Expand Up @@ -2903,11 +2921,11 @@ def _filter_event_queryset(queryset, params, srs=None): # noqa: C901
queryset = queryset.filter(location_id__in=val)

if val := params.get("keyword", None):
queryset = _filter_keyword_or(queryset, val)
queryset = _filter_events_keyword_or(queryset, val.split(","))

# 'keyword_OR' behaves the same way as 'keyword'
if val := params.get("keyword_OR", None):
queryset = _filter_keyword_or(queryset, val)
queryset = _filter_events_keyword_or(queryset, val.split(","))

# Filter by keyword ids requiring all keywords to be present in event
val = params.get("keyword_AND", None)
Expand All @@ -2918,12 +2936,18 @@ def _filter_event_queryset(queryset, params, srs=None): # noqa: C901
if not found_all_keywords:
return queryset.none()

q = Q(keywords__pk=keywords[0].pk) | Q(audience__pk=keywords[0].pk)
for keyword in keywords[1:]:
q = Q()
for keyword in keywords:
q &= Q(keywords__pk=keyword.pk) | Q(audience__pk=keyword.pk)

# Use negated exclude to avoid slower filter+distinct
queryset = queryset.exclude(~q)
for keyword in keywords:
kw_qs = Event.keywords.through.objects.filter(
event=OuterRef("pk"), keyword=keyword
)
audience_qs = Event.audience.through.objects.filter(
event=OuterRef("pk"), keyword=keyword
)
queryset = queryset.filter(Exists(kw_qs) | Exists(audience_qs))

# Negative filter for keyword ids
val = params.get("keyword!", None)
Expand Down Expand Up @@ -3019,27 +3043,37 @@ def _filter_event_queryset(queryset, params, srs=None): # noqa: C901
short_desc_arg = {"short_description_" + lang + "__isnull": False}
q = (
q
| Q(in_language__id=lang)
| Exists(
Event.in_language.through.objects.filter(
event=OuterRef("pk"), language=lang
)
)
| Q(**name_arg)
| Q(**desc_arg)
| Q(**short_desc_arg)
)
else:
q = q | Q(in_language__id=lang)
q = q | Exists(
Event.in_language.through.objects.filter(
event=OuterRef("pk"), language=lang
)
)

# Use negated exclude to avoid filter+distinct
queryset = queryset.exclude(~q)
queryset = queryset.filter(q)

# Filter by in_language field only
val = params.get("in_language", None)
if val:
val = val.split(",")
q = Q()
for lang in val:
q = q | Q(in_language__id=lang)
q = q | Exists(
Event.in_language.through.objects.filter(
event=OuterRef("pk"), language=lang
)
)

# Use negated exclude to avoid filter+distinct
queryset = queryset.exclude(~q)
queryset = queryset.filter(q)

val = params.get("starts_after", None)
param = "starts_after"
Expand Down Expand Up @@ -3127,6 +3161,9 @@ def _filter_event_queryset(queryset, params, srs=None): # noqa: C901
# Include events that have at least one free offer
if val.lower() == "true":
# Use negated exclude to avoid slower filter+distinct
queryset = queryset.filter(
Exists(Offer.objects.filter(event=OuterRef("pk"), is_free=True))
)
queryset = queryset.exclude(~Q(offers__is_free=True))
# Include events that have no free offers
elif val.lower() == "false":
Expand Down

0 comments on commit ebd6239

Please sign in to comment.