Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delete events not found in Elis in kulke importer #625

Merged
merged 6 commits into from
Jul 26, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 72 additions & 38 deletions events/importer/kulke.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
from datetime import datetime, time, timedelta
from posixpath import join as urljoin
from textwrap import dedent
from typing import Iterator, Union
from typing import Iterator, Sequence, Union

import dateutil
import requests
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
from django.db import transaction
from django.db.models import Count, Q
from django_orghierarchy.models import Organization
from lxml import etree
from pytz import timezone
Expand Down Expand Up @@ -167,11 +168,6 @@ def _query_courses():
)


def _delete_courses():
courses_q = _query_courses()
courses_q.delete()


SPORTS = ["p965"]
GYMS = ["p8504"]
MOVIES = ["p1235"]
Expand Down Expand Up @@ -504,9 +500,8 @@ def text_content(k):
# Skip courses when importing events
return False

if self.options["single"]:
if str(eid) != self.options["single"]:
return False
if (single := self.options.get("single")) and single != str(eid):
return False

event = events[eid]
if is_course:
Expand Down Expand Up @@ -838,33 +833,31 @@ def simple(field):
else:
setattr(super_event, fieldname, value)

# The name may vary within a recurring event; hence, take the common part
if expand_model_fields(super_event, ["headline"])[0] not in common_fields:
words = first_event.headline.split(" ")
name = ""
while words and all(
headline.startswith(name + words[0])
for headline in [event.name for event in member_events]
):
name += words.pop(0) + " "
logger.warning(words)
logger.warning(name)
if name:
super_event.name = name
else:
# If a common part was not found, default to the first event's name
super_event.name = first_event.name
# The name may vary within a recurring event; hence, take the common part in each language
for lang in self.languages:
name_attr = f"name_{lang}"
first_name = getattr(first_event, name_attr)
words = first_name.split(" ") if first_name else []

for lang in self.languages.keys():
headline = getattr(super_event, "headline_{}".format(lang))
secondary_headline = getattr(
super_event, "secondary_headline_{}".format(lang)
)
setattr(
super_event,
"name_{}".format(lang),
make_event_name(headline, secondary_headline),
)
if name_attr not in common_fields:
name = ""
member_event_names = [
getattr(event, name_attr) for event in member_events
]

# Try to find the common part of the names
for word in words:
if all(
member_event_name and member_event_name.startswith(name + word)
for member_event_name in member_event_names
):
name += word + " "
else:
name = name.rstrip()
break

# If a common part was not found, default to the first event's name
setattr(super_event, name_attr, name or getattr(first_event, name_attr))

# Gather common keywords present in *all* subevents
common_keywords = functools.reduce(
Expand All @@ -882,7 +875,7 @@ def simple(field):
super_event.audience.add(k)

@transaction.atomic
def _save_recurring_superevent(self, recurring_group):
def _save_super_event(self, recurring_group):
kulke_ids = set(make_kulke_id(event) for event in recurring_group)
superevent_aggregates = EventAggregate.objects.filter(
members__event__id__in=kulke_ids
Expand Down Expand Up @@ -932,19 +925,58 @@ def _save_recurring_superevent(self, recurring_group):
# The imported event is not part of an aggregate but one was found it in the db.
# Remove the super event. This is the only case when an event is removed from
# a recurring aggregate.
aggregate.super_event.delete()
aggregate.super_event.soft_delete()
aggregate.super_event.sub_events.all().update(super_event=None)
return False
else:
for event in events:
EventAggregateMember.objects.get_or_create(
event=event, event_aggregate=aggregate
)
# Remove any extra event aggregate members
EventAggregateMember.objects.filter(event_aggregate=aggregate).exclude(
event__in=events
).delete()
for event in events:
event.super_event = aggregate.super_event
Event.objects.bulk_update(events, ("super_event",))

return True

def _handle_removed_events(
self, elis_event_ids: Sequence[int], begin_date: datetime
) -> None:
# Find Kulke events that are not referenced in the latest data from Elis and delete them.
unreferenced_events = Event.objects.filter(
data_source=self.data_source,
start_time__gte=begin_date,
super_event_type__isnull=True,
deleted=False,
).exclude(origin_id__in=elis_event_ids)
unreferenced_events.update(super_event=None)
count = unreferenced_events.soft_delete()

if count:
logger.debug("Deleted %d events", count)

# Find super events that no longer contain at least two events and delete them
count = (
Event.objects.exclude(super_event_type__isnull=True)
.annotate(
aggregate_member_count=Count(
"aggregate__members",
filter=Q(aggregate__members__event__deleted=False),
)
)
.filter(aggregate_member_count__lt=2, deleted=False)
.soft_delete()
)
if count:
logger.debug(
"Deleted %d empty super events",
count,
)

def import_events(self):
logger.info("Importing Kulke events")
self._import_events()
Expand Down Expand Up @@ -1007,7 +1039,9 @@ def _import_events(self, importing_courses=False):
self._verify_recurs(recurring_groups)
for group in recurring_groups.values():
if group:
self._save_recurring_superevent(group)
self._save_super_event(group)

self._handle_removed_events(events.keys(), begin_date)

def import_keywords(self):
logger.info("Importing Kulke categories as keywords")
Expand Down
5 changes: 4 additions & 1 deletion events/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,10 @@ def can_be_edited_by(self, user):


class BaseTreeQuerySet(TreeQuerySet, BaseQuerySet):
pass
def soft_delete(self):
return self.update(deleted=True)

soft_delete.alters_data = True


class ReplacedByMixin:
Expand Down
2 changes: 2 additions & 0 deletions events/tests/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ def data_source(self):


class KeywordFactory(factory.django.DjangoModelFactory):
name = factory.Faker("bs")

data_source = factory.SubFactory(DataSourceFactory)
publisher = factory.SubFactory(OrganizationFactory)

Expand Down
Loading