diff --git a/events/importer/kulke.py b/events/importer/kulke.py index ccd8f1731..a3b80202a 100644 --- a/events/importer/kulke.py +++ b/events/importer/kulke.py @@ -6,13 +6,14 @@ from datetime import datetime, time, timedelta from posixpath import join as urljoin from textwrap import dedent -from typing import Iterator, Union +from typing import Iterator, Sequence, Union import dateutil import requests from django.conf import settings from django.core.exceptions import ObjectDoesNotExist from django.db import transaction +from django.db.models import Count from django_orghierarchy.models import Organization from lxml import etree from pytz import timezone @@ -167,11 +168,6 @@ def _query_courses(): ) -def _delete_courses(): - courses_q = _query_courses() - courses_q.delete() - - SPORTS = ["p965"] GYMS = ["p8504"] MOVIES = ["p1235"] @@ -504,7 +500,7 @@ def text_content(k): # Skip courses when importing events return False - if self.options["single"]: + if self.options.get("single", False): if str(eid) != self.options["single"]: return False @@ -838,33 +834,34 @@ def simple(field): else: setattr(super_event, fieldname, value) - # The name may vary within a recurring event; hence, take the common part - if expand_model_fields(super_event, ["headline"])[0] not in common_fields: - words = first_event.headline.split(" ") - name = "" - while words and all( - headline.startswith(name + words[0]) - for headline in [event.name for event in member_events] - ): - name += words.pop(0) + " " - logger.warning(words) - logger.warning(name) - if name: - super_event.name = name - else: - # If a common part was not found, default to the first event's name - super_event.name = first_event.name + # The name may vary within a recurring event; hence, take the common part in each language + for lang in self.languages: + name_attr = f"name_{lang}" + first_name = getattr(first_event, name_attr) + words = first_name.split(" ") if first_name else None - for lang in self.languages.keys(): - headline = getattr(super_event, "headline_{}".format(lang)) - secondary_headline = getattr( - super_event, "secondary_headline_{}".format(lang) - ) - setattr( - super_event, - "name_{}".format(lang), - make_event_name(headline, secondary_headline), - ) + if name_attr not in common_fields: + name = "" + member_event_names = [ + getattr(event, name_attr) for event in member_events + ] + while words and all( + member_event_name.startswith(name + words[0]) + if member_event_name + else False + for member_event_name in member_event_names + ): + name += words.pop(0) + " " + + if name: + setattr(super_event, name_attr, name.rstrip()) + else: + # If a common part was not found, default to the first event's name + setattr( + super_event, + name_attr, + getattr(first_event, name_attr), + ) # Gather common keywords present in *all* subevents common_keywords = functools.reduce( @@ -882,7 +879,7 @@ def simple(field): super_event.audience.add(k) @transaction.atomic - def _save_recurring_superevent(self, recurring_group): + def _save_super_event(self, recurring_group): kulke_ids = set(make_kulke_id(event) for event in recurring_group) superevent_aggregates = EventAggregate.objects.filter( members__event__id__in=kulke_ids @@ -939,12 +936,46 @@ def _save_recurring_superevent(self, recurring_group): EventAggregateMember.objects.get_or_create( event=event, event_aggregate=aggregate ) + # Remove any extra event aggregate members + EventAggregateMember.objects.filter(event_aggregate=aggregate).exclude( + event__in=events + ).delete() for event in events: event.super_event = aggregate.super_event Event.objects.bulk_update(events, ("super_event",)) return True + def _handle_removed_events( + self, elis_event_ids: Sequence[int], begin_date: datetime + ) -> None: + # Find Kulke events that are not referenced in the latest data from Elis and delete them. + count, deleted = ( + Event.objects.filter( + data_source=self.data_source, + start_time__gte=begin_date, + super_event_type__isnull=True, + ) + .exclude(origin_id__in=elis_event_ids) + .delete() + ) + if count: + logger.debug("Deleted %d events and associated objects: %s", count, deleted) + + # Find super events that no longer contain at least two events and delete them + count, deleted = ( + Event.objects.exclude(super_event_type__isnull=True) + .annotate(aggregate_member_count=Count("aggregate__members")) + .filter(aggregate_member_count__lt=2) + .delete() + ) + if count: + logger.debug( + "Deleted %d empty super events and associated objects: %s", + count, + deleted, + ) + def import_events(self): logger.info("Importing Kulke events") self._import_events() @@ -1007,7 +1038,9 @@ def _import_events(self, importing_courses=False): self._verify_recurs(recurring_groups) for group in recurring_groups.values(): if group: - self._save_recurring_superevent(group) + self._save_super_event(group) + + self._handle_removed_events(events.keys(), begin_date) def import_keywords(self): logger.info("Importing Kulke categories as keywords") diff --git a/events/tests/importers/test_kulke.py b/events/tests/importers/test_kulke.py index e90255d5c..5d6d61578 100644 --- a/events/tests/importers/test_kulke.py +++ b/events/tests/importers/test_kulke.py @@ -1,10 +1,14 @@ import os -from datetime import time +from datetime import time, timedelta +from unittest.mock import patch import pytest from django.test import TestCase +from django.utils import timezone from events.importer.kulke import KulkeImporter, parse_age_range, parse_course_time +from events.models import Event, EventAggregate, EventAggregateMember +from events.tests.factories import EventFactory @pytest.mark.django_db @@ -59,6 +63,29 @@ def test_parse_course_time_returns_correct_result(test_input, expected): class TestKulkeImporter(TestCase): + def setUp(self) -> None: + with patch.object(KulkeImporter, "fetch_kulke_categories", return_value={}): + self.importer = KulkeImporter(options={}) + self.data_source = self.importer.data_source + + def _create_super_event(self, events: list[Event]) -> Event: + aggregate = EventAggregate.objects.create() + super_event = EventFactory( + super_event_type=Event.SuperEventType.RECURRING, + data_source=self.data_source, + id="linkedevents:agg-{}".format(aggregate.id), + ) + super_event.save() + aggregate.super_event = super_event + aggregate.save() + event_aggregates = [ + EventAggregateMember(event=event, event_aggregate=aggregate) + for event in events + ] + EventAggregateMember.objects.bulk_create(event_aggregates) + return super_event + + @pytest.mark.django_db def test_html_format(self): text = ( "Lorem ipsum dolor sit amet, consectetur adipiscing elit.{0}" @@ -79,3 +106,156 @@ def test_html_format(self): "

Vestibulum lacinia interdum nisi eu vehicula.

" ) self.assertEqual(html_text, expected_text) + + @pytest.mark.django_db + def test__update_super_event(self): + now = timezone.now() + event_1 = EventFactory( + name="Toistuva tapahtuma 1", + name_en="Recurring Event 1", + start_time=now - timedelta(hours=24), + end_time=now - timedelta(hours=23), + data_source=self.data_source, + ) + event_2 = EventFactory( + name="Toistuva tapahtuma 2", + name_en="Recurring Event 2", + start_time=now, + end_time=now + timedelta(hours=1), + data_source=self.data_source, + ) + super_event = self._create_super_event([event_1, event_2]) + + self.importer._update_super_event(super_event, [event_1, event_2]) + # The super event should have the common part for the name + self.assertEqual(super_event.name, "Toistuva tapahtuma") + self.assertEqual(super_event.name_en, "Recurring Event") + # The start/end time should be the start/end time of the first/last event + self.assertEqual(super_event.start_time, event_1.start_time) + self.assertEqual(super_event.end_time, event_2.end_time) + + @pytest.mark.django_db + def test__update_super_event_default_name(self): + now = timezone.now() + event_1 = EventFactory( + name="Joku tapahtuma", + name_en="Some Event", + start_time=now - timedelta(hours=24), + end_time=now - timedelta(hours=23), + data_source=self.data_source, + ) + event_2 = EventFactory( + name="Ei yhteistä osaa nimessä", + name_en="No common part in the name", + start_time=now, + end_time=now + timedelta(hours=1), + data_source=self.data_source, + ) + super_event = self._create_super_event([event_1, event_2]) + + self.importer._update_super_event(super_event, [event_1, event_2]) + # If the name does not have a common part, default to the first event's name + self.assertEqual(super_event.name, "Joku tapahtuma") + self.assertEqual(super_event.name_en, "Some Event") + + @pytest.mark.django_db + def test__save_super_event(self): + event_1 = EventFactory(id="kulke:1", data_source=self.data_source, origin_id=1) + event_2 = EventFactory(id="kulke:2", data_source=self.data_source, origin_id=2) + event_3 = EventFactory(id="kulke:3", data_source=self.data_source, origin_id=3) + + # Create a super event with all three events + self.importer._save_super_event( + {event_1.origin_id, event_2.origin_id, event_3.origin_id} + ) + + event_1.refresh_from_db() + super_event = event_1.super_event + self.assertEqual( + set(member.event_id for member in super_event.aggregate.members.all()), + {event_1.id, event_2.id, event_3.id}, + ) + + # Simulate a situation where one of the events is no longer associated with the super event in Elis + self.importer._save_super_event({event_1.origin_id, event_2.origin_id}) + + event_1.refresh_from_db() + super_event = event_1.super_event + self.assertEqual( + set(member.event_id for member in super_event.aggregate.members.all()), + {event_1.id, event_2.id}, + ) + self.assertTrue(Event.objects.filter(id=event_3.id).exists()) + + # If there is only one event left in the super event, the super event should be deleted + # Deleting the event itself is not the responsibility of `_save_super_event` + self.importer._save_super_event({event_1.origin_id}) + event_1.refresh_from_db() + self.assertIsNone(event_1.super_event) + self.assertTrue(Event.objects.filter(id=event_2.id).exists()) + self.assertTrue(Event.objects.filter(id=event_3.id).exists()) + + @pytest.mark.django_db + def test__handle_removed_events(self): + """Test that removing""" + now = timezone.now() + # Event that exists in the DB but not in Elis -- will be removed + event_1 = EventFactory( + data_source=self.data_source, origin_id=1, start_time=now + ) + # Event that exists in Elis -- won't be removed + event_2 = EventFactory( + data_source=self.data_source, origin_id=2, start_time=now + ) + # Old event, outside of the date range of the Elis search -- won't be removed + event_3 = EventFactory( + data_source=self.data_source, + origin_id=3, + start_time=now - timedelta(days=90), + ) + + self.importer._handle_removed_events( + elis_event_ids=[event_2.origin_id], + begin_date=now - timedelta(days=60), + ) + + self.assertFalse(Event.objects.filter(id=event_1.id).exists()) + self.assertTrue(Event.objects.filter(id=event_2.id).exists()) + self.assertTrue(Event.objects.filter(id=event_3.id).exists()) + + @pytest.mark.django_db + def test__handle_removed_events_superevent(self): + now = timezone.now() + # This super event is not in Elis. The super event with all its member events should be removed. + super_1_event_1 = EventFactory( + data_source=self.data_source, origin_id=1, start_time=now + ) + super_1_event_2 = EventFactory( + data_source=self.data_source, origin_id=2, start_time=now + ) + super_1 = self._create_super_event([super_1_event_1, super_1_event_2]) + + # This super event is in Elis. It should not be removed. + super_2_event_1 = EventFactory( + data_source=self.data_source, origin_id=3, start_time=now + ) + super_2_event_2 = EventFactory( + data_source=self.data_source, origin_id=4, start_time=now + ) + super_2 = self._create_super_event([super_2_event_1, super_2_event_2]) + + # This super event is empty to begin with -- it should be removed + super_3 = self._create_super_event([]) + + self.importer._handle_removed_events( + elis_event_ids=[super_2_event_1.origin_id, super_2_event_2.origin_id], + begin_date=now - timedelta(days=60), + ) + + self.assertFalse(Event.objects.filter(id=super_1_event_1.id).exists()) + self.assertFalse(Event.objects.filter(id=super_1_event_2.id).exists()) + self.assertFalse(Event.objects.filter(id=super_1.id).exists()) + self.assertTrue(Event.objects.filter(id=super_2_event_1.id).exists()) + self.assertTrue(Event.objects.filter(id=super_2_event_2.id).exists()) + self.assertTrue(Event.objects.filter(id=super_2.id).exists()) + self.assertFalse(Event.objects.filter(id=super_3.id).exists())