diff --git a/events/importer/kulke.py b/events/importer/kulke.py index ccd8f1731..13c6ef928 100644 --- a/events/importer/kulke.py +++ b/events/importer/kulke.py @@ -6,13 +6,14 @@ from datetime import datetime, time, timedelta from posixpath import join as urljoin from textwrap import dedent -from typing import Iterator, Union +from typing import Iterator, Sequence, Union import dateutil import requests from django.conf import settings from django.core.exceptions import ObjectDoesNotExist from django.db import transaction +from django.db.models import Count, Q from django_orghierarchy.models import Organization from lxml import etree from pytz import timezone @@ -167,11 +168,6 @@ def _query_courses(): ) -def _delete_courses(): - courses_q = _query_courses() - courses_q.delete() - - SPORTS = ["p965"] GYMS = ["p8504"] MOVIES = ["p1235"] @@ -504,9 +500,8 @@ def text_content(k): # Skip courses when importing events return False - if self.options["single"]: - if str(eid) != self.options["single"]: - return False + if (single := self.options.get("single")) and single != str(eid): + return False event = events[eid] if is_course: @@ -838,33 +833,31 @@ def simple(field): else: setattr(super_event, fieldname, value) - # The name may vary within a recurring event; hence, take the common part - if expand_model_fields(super_event, ["headline"])[0] not in common_fields: - words = first_event.headline.split(" ") - name = "" - while words and all( - headline.startswith(name + words[0]) - for headline in [event.name for event in member_events] - ): - name += words.pop(0) + " " - logger.warning(words) - logger.warning(name) - if name: - super_event.name = name - else: - # If a common part was not found, default to the first event's name - super_event.name = first_event.name + # The name may vary within a recurring event; hence, take the common part in each language + for lang in self.languages: + name_attr = f"name_{lang}" + first_name = getattr(first_event, name_attr) + words = first_name.split(" ") if first_name else [] - for lang in self.languages.keys(): - headline = getattr(super_event, "headline_{}".format(lang)) - secondary_headline = getattr( - super_event, "secondary_headline_{}".format(lang) - ) - setattr( - super_event, - "name_{}".format(lang), - make_event_name(headline, secondary_headline), - ) + if name_attr not in common_fields: + name = "" + member_event_names = [ + getattr(event, name_attr) for event in member_events + ] + + # Try to find the common part of the names + for word in words: + if all( + member_event_name and member_event_name.startswith(name + word) + for member_event_name in member_event_names + ): + name += word + " " + else: + name = name.rstrip() + break + + # If a common part was not found, default to the first event's name + setattr(super_event, name_attr, name or getattr(first_event, name_attr)) # Gather common keywords present in *all* subevents common_keywords = functools.reduce( @@ -882,7 +875,7 @@ def simple(field): super_event.audience.add(k) @transaction.atomic - def _save_recurring_superevent(self, recurring_group): + def _save_super_event(self, recurring_group): kulke_ids = set(make_kulke_id(event) for event in recurring_group) superevent_aggregates = EventAggregate.objects.filter( members__event__id__in=kulke_ids @@ -932,19 +925,58 @@ def _save_recurring_superevent(self, recurring_group): # The imported event is not part of an aggregate but one was found it in the db. # Remove the super event. This is the only case when an event is removed from # a recurring aggregate. - aggregate.super_event.delete() + aggregate.super_event.soft_delete() + aggregate.super_event.sub_events.all().update(super_event=None) return False else: for event in events: EventAggregateMember.objects.get_or_create( event=event, event_aggregate=aggregate ) + # Remove any extra event aggregate members + EventAggregateMember.objects.filter(event_aggregate=aggregate).exclude( + event__in=events + ).delete() for event in events: event.super_event = aggregate.super_event Event.objects.bulk_update(events, ("super_event",)) return True + def _handle_removed_events( + self, elis_event_ids: Sequence[int], begin_date: datetime + ) -> None: + # Find Kulke events that are not referenced in the latest data from Elis and delete them. + unreferenced_events = Event.objects.filter( + data_source=self.data_source, + start_time__gte=begin_date, + super_event_type__isnull=True, + deleted=False, + ).exclude(origin_id__in=elis_event_ids) + unreferenced_events.update(super_event=None) + count = unreferenced_events.soft_delete() + + if count: + logger.debug("Deleted %d events", count) + + # Find super events that no longer contain at least two events and delete them + count = ( + Event.objects.exclude(super_event_type__isnull=True) + .annotate( + aggregate_member_count=Count( + "aggregate__members", + filter=Q(aggregate__members__event__deleted=False), + ) + ) + .filter(aggregate_member_count__lt=2, deleted=False) + .soft_delete() + ) + if count: + logger.debug( + "Deleted %d empty super events", + count, + ) + def import_events(self): logger.info("Importing Kulke events") self._import_events() @@ -1007,7 +1039,9 @@ def _import_events(self, importing_courses=False): self._verify_recurs(recurring_groups) for group in recurring_groups.values(): if group: - self._save_recurring_superevent(group) + self._save_super_event(group) + + self._handle_removed_events(events.keys(), begin_date) def import_keywords(self): logger.info("Importing Kulke categories as keywords") diff --git a/events/models.py b/events/models.py index d583918f9..8b963be97 100644 --- a/events/models.py +++ b/events/models.py @@ -144,7 +144,10 @@ def can_be_edited_by(self, user): class BaseTreeQuerySet(TreeQuerySet, BaseQuerySet): - pass + def soft_delete(self): + return self.update(deleted=True) + + soft_delete.alters_data = True class ReplacedByMixin: diff --git a/events/tests/factories.py b/events/tests/factories.py index 1e047b2d3..b454d3e06 100644 --- a/events/tests/factories.py +++ b/events/tests/factories.py @@ -43,6 +43,8 @@ def data_source(self): class KeywordFactory(factory.django.DjangoModelFactory): + name = factory.Faker("bs") + data_source = factory.SubFactory(DataSourceFactory) publisher = factory.SubFactory(OrganizationFactory) diff --git a/events/tests/importers/test_kulke.py b/events/tests/importers/test_kulke.py index e90255d5c..474b62cf8 100644 --- a/events/tests/importers/test_kulke.py +++ b/events/tests/importers/test_kulke.py @@ -1,10 +1,14 @@ import os -from datetime import time +from datetime import time, timedelta +from unittest.mock import patch import pytest from django.test import TestCase +from django.utils import timezone from events.importer.kulke import KulkeImporter, parse_age_range, parse_course_time +from events.models import Event, EventAggregate, EventAggregateMember +from events.tests.factories import EventFactory, KeywordFactory @pytest.mark.django_db @@ -59,6 +63,44 @@ def test_parse_course_time_returns_correct_result(test_input, expected): class TestKulkeImporter(TestCase): + def setUp(self) -> None: + with patch.object(KulkeImporter, "fetch_kulke_categories", return_value={}): + self.importer = KulkeImporter(options={}) + self.data_source = self.importer.data_source + + def _create_super_event(self, events: list[Event]) -> Event: + aggregate = EventAggregate.objects.create() + super_event = EventFactory( + super_event_type=Event.SuperEventType.RECURRING, + data_source=self.data_source, + id="linkedevents:agg-{}".format(aggregate.id), + ) + super_event.save() + aggregate.super_event = super_event + aggregate.save() + event_aggregates = [ + EventAggregateMember(event=event, event_aggregate=aggregate) + for event in events + ] + EventAggregateMember.objects.bulk_create(event_aggregates) + return super_event + + def assert_event_soft_deleted(self, event_id: str, deleted: bool): + """ + Assert that the event with the given ID has the given deleted status, + i.e. it has been soft-deleted if `deleted` is True, and it has not been + soft-deleted if `deleted` is False. + + If the event does not exist (e.g. due to being actually deleted), the + test fails. + """ + try: + event = Event.objects.get(id=event_id) + except Event.DoesNotExist: + self.fail(f"Event with ID {event_id} does not exist") + self.assertEqual(event.deleted, deleted) + + @pytest.mark.django_db def test_html_format(self): text = ( "Lorem ipsum dolor sit amet, consectetur adipiscing elit.{0}" @@ -79,3 +121,173 @@ def test_html_format(self): "
Vestibulum lacinia interdum nisi eu vehicula.
" ) self.assertEqual(html_text, expected_text) + + @pytest.mark.django_db + def test__update_super_event(self): + now = timezone.now() + kw1, kw2, kw3 = KeywordFactory.create_batch(3, data_source=self.data_source) + event_1 = EventFactory( + name="Toistuva tapahtuma 1", + name_en="Recurring Event 1", + description="Long description", + short_description="Short description", + start_time=now - timedelta(hours=24), + end_time=now - timedelta(hours=23), + data_source=self.data_source, + ) + event_1.keywords.add(kw1, kw2) + event_1.save() + event_2 = EventFactory( + name="Toistuva tapahtuma 2", + name_en="Recurring Event 2", + description=event_1.description, + short_description="A different short description", + start_time=now, + end_time=now + timedelta(hours=1), + data_source=self.data_source, + ) + event_2.keywords.add(kw2, kw3) + event_2.save() + super_event = self._create_super_event([event_1, event_2]) + + self.importer._update_super_event(super_event, [event_1, event_2]) + # The super event should have the common part for the name + self.assertEqual(super_event.name, "Toistuva tapahtuma") + self.assertEqual(super_event.name_en, "Recurring Event") + # The start/end time should be the start/end time of the first/last event + self.assertEqual(super_event.start_time, event_1.start_time) + self.assertEqual(super_event.end_time, event_2.end_time) + # The super event should have the common subset of keywords + self.assertEqual( + set(super_event.keywords.all().values_list("id", flat=True)), {str(kw2.id)} + ) + # A field that's the same for all member events should be populated in the super event + self.assertEqual(super_event.description, event_1.description) + # No common value => field should be empty in the super event + self.assertIsNone(super_event.short_description) + + @pytest.mark.django_db + def test__update_super_event_default_name(self): + now = timezone.now() + event_1 = EventFactory( + name="Joku tapahtuma", + name_en="Some Event", + start_time=now - timedelta(hours=24), + end_time=now - timedelta(hours=23), + data_source=self.data_source, + ) + event_2 = EventFactory( + name="Ei yhteistä osaa nimessä", + name_en="No common part in the name", + start_time=now, + end_time=now + timedelta(hours=1), + data_source=self.data_source, + ) + super_event = self._create_super_event([event_1, event_2]) + + self.importer._update_super_event(super_event, [event_1, event_2]) + # If the name does not have a common part, default to the first event's name + self.assertEqual(super_event.name, "Joku tapahtuma") + self.assertEqual(super_event.name_en, "Some Event") + + @pytest.mark.django_db + def test__save_super_event(self): + event_1 = EventFactory(id="kulke:1", data_source=self.data_source, origin_id=1) + event_2 = EventFactory(id="kulke:2", data_source=self.data_source, origin_id=2) + event_3 = EventFactory(id="kulke:3", data_source=self.data_source, origin_id=3) + + # Create a super event with all three events + self.importer._save_super_event( + {event_1.origin_id, event_2.origin_id, event_3.origin_id} + ) + + event_1.refresh_from_db() + super_event = event_1.super_event + self.assertEqual( + set(member.event_id for member in super_event.aggregate.members.all()), + {event_1.id, event_2.id, event_3.id}, + ) + + # Simulate a situation where one of the events is no longer associated with the super event in Elis + self.importer._save_super_event({event_1.origin_id, event_2.origin_id}) + + event_1.refresh_from_db() + super_event = event_1.super_event + self.assertEqual( + set(member.event_id for member in super_event.aggregate.members.all()), + {event_1.id, event_2.id}, + ) + self.assertTrue(Event.objects.filter(id=event_3.id).exists()) + + # If there is only one event left in the super event, the super event should be deleted + # Deleting the event itself is not the responsibility of `_save_super_event` + self.importer._save_super_event({event_1.origin_id}) + event_1.refresh_from_db() + self.assertIsNone(event_1.super_event) + self.assertTrue(Event.objects.filter(id=event_2.id).exists()) + self.assertTrue(Event.objects.filter(id=event_3.id).exists()) + + @pytest.mark.django_db + def test__handle_removed_events(self): + """Test that removing""" + now = timezone.now() + # Event that exists in the DB but not in Elis -- will be removed + event_1 = EventFactory( + data_source=self.data_source, origin_id=1, start_time=now + ) + # Event that exists in Elis -- won't be removed + event_2 = EventFactory( + data_source=self.data_source, origin_id=2, start_time=now + ) + # Old event, outside the date range of the Elis search -- won't be removed + event_3 = EventFactory( + data_source=self.data_source, + origin_id=3, + start_time=now - timedelta(days=90), + ) + + self.importer._handle_removed_events( + elis_event_ids=[event_2.origin_id], + begin_date=now - timedelta(days=60), + ) + + self.assert_event_soft_deleted(event_1.id, True) + self.assert_event_soft_deleted(event_2.id, False) + self.assert_event_soft_deleted(event_3.id, False) + + @pytest.mark.django_db + def test__handle_removed_events_superevent(self): + now = timezone.now() + # This super event is not in Elis. The super event with all its member events should be removed. + super_1_event_1 = EventFactory( + data_source=self.data_source, origin_id=1, start_time=now + ) + super_1_event_2 = EventFactory( + data_source=self.data_source, origin_id=2, start_time=now + ) + super_1 = self._create_super_event([super_1_event_1, super_1_event_2]) + + # This super event is in Elis. It should not be removed. + super_2_event_1 = EventFactory( + data_source=self.data_source, origin_id=3, start_time=now + ) + super_2_event_2 = EventFactory( + data_source=self.data_source, origin_id=4, start_time=now + ) + super_2 = self._create_super_event([super_2_event_1, super_2_event_2]) + + # This super event is empty to begin with -- it should be removed + super_3 = self._create_super_event([]) + + self.importer._handle_removed_events( + elis_event_ids=[super_2_event_1.origin_id, super_2_event_2.origin_id], + begin_date=now - timedelta(days=60), + ) + + self.assert_event_soft_deleted(super_1_event_1.id, True) + self.assert_event_soft_deleted(super_1_event_2.id, True) + self.assert_event_soft_deleted(super_1.id, True) + self.assert_event_soft_deleted(super_2_event_1.id, False) + self.assert_event_soft_deleted(super_2_event_2.id, False) + self.assert_event_soft_deleted(super_2.id, False) + self.assert_event_soft_deleted(super_3.id, True) diff --git a/requirements-dev.in b/requirements-dev.in index 5b92661bd..a4b6f7d32 100644 --- a/requirements-dev.in +++ b/requirements-dev.in @@ -17,3 +17,4 @@ pytest-django pytest-factoryboy python-jose requests-mock +snakemd diff --git a/requirements-dev.txt b/requirements-dev.txt index 92c93f0f9..eee82d6e7 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -149,6 +149,8 @@ six==1.16.0 # freezegun # python-dateutil # requests-mock +snakemd==2.1.0 + # via -r requirements-dev.in sqlparse==0.4.4 # via # -c requirements.txt