Skip to content

Commit

Permalink
Delete events not found in Elis in kulke importer
Browse files Browse the repository at this point in the history
  • Loading branch information
tuhola committed Jun 30, 2023
1 parent d96e160 commit b19d2d1
Show file tree
Hide file tree
Showing 2 changed files with 249 additions and 36 deletions.
103 changes: 68 additions & 35 deletions events/importer/kulke.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
from datetime import datetime, time, timedelta
from posixpath import join as urljoin
from textwrap import dedent
from typing import Iterator, Union
from typing import Iterator, Sequence, Union

import dateutil
import requests
from django.conf import settings
from django.core.exceptions import ObjectDoesNotExist
from django.db import transaction
from django.db.models import Count
from django_orghierarchy.models import Organization
from lxml import etree
from pytz import timezone
Expand Down Expand Up @@ -167,11 +168,6 @@ def _query_courses():
)


def _delete_courses():
courses_q = _query_courses()
courses_q.delete()


SPORTS = ["p965"]
GYMS = ["p8504"]
MOVIES = ["p1235"]
Expand Down Expand Up @@ -504,7 +500,7 @@ def text_content(k):
# Skip courses when importing events
return False

if self.options["single"]:
if self.options.get("single", False):
if str(eid) != self.options["single"]:
return False

Expand Down Expand Up @@ -838,33 +834,34 @@ def simple(field):
else:
setattr(super_event, fieldname, value)

# The name may vary within a recurring event; hence, take the common part
if expand_model_fields(super_event, ["headline"])[0] not in common_fields:
words = first_event.headline.split(" ")
name = ""
while words and all(
headline.startswith(name + words[0])
for headline in [event.name for event in member_events]
):
name += words.pop(0) + " "
logger.warning(words)
logger.warning(name)
if name:
super_event.name = name
else:
# If a common part was not found, default to the first event's name
super_event.name = first_event.name
# The name may vary within a recurring event; hence, take the common part in each language
for lang in self.languages:
name_attr = f"name_{lang}"
first_name = getattr(first_event, name_attr)
words = first_name.split(" ") if first_name else None

for lang in self.languages.keys():
headline = getattr(super_event, "headline_{}".format(lang))
secondary_headline = getattr(
super_event, "secondary_headline_{}".format(lang)
)
setattr(
super_event,
"name_{}".format(lang),
make_event_name(headline, secondary_headline),
)
if name_attr not in common_fields:
name = ""
member_event_names = [
getattr(event, name_attr) for event in member_events
]
while words and all(
member_event_name.startswith(name + words[0])
if member_event_name
else False
for member_event_name in member_event_names
):
name += words.pop(0) + " "

if name:
setattr(super_event, name_attr, name.rstrip())
else:
# If a common part was not found, default to the first event's name
setattr(
super_event,
name_attr,
getattr(first_event, name_attr),
)

# Gather common keywords present in *all* subevents
common_keywords = functools.reduce(
Expand All @@ -882,7 +879,7 @@ def simple(field):
super_event.audience.add(k)

@transaction.atomic
def _save_recurring_superevent(self, recurring_group):
def _save_super_event(self, recurring_group):
kulke_ids = set(make_kulke_id(event) for event in recurring_group)
superevent_aggregates = EventAggregate.objects.filter(
members__event__id__in=kulke_ids
Expand Down Expand Up @@ -939,12 +936,46 @@ def _save_recurring_superevent(self, recurring_group):
EventAggregateMember.objects.get_or_create(
event=event, event_aggregate=aggregate
)
# Remove any extra event aggregate members
EventAggregateMember.objects.filter(event_aggregate=aggregate).exclude(
event__in=events
).delete()
for event in events:
event.super_event = aggregate.super_event
Event.objects.bulk_update(events, ("super_event",))

return True

def _handle_removed_events(
self, elis_event_ids: Sequence[int], begin_date: datetime
) -> None:
# Find Kulke events that are not referenced in the latest data from Elis and delete them.
count, deleted = (
Event.objects.filter(
data_source=self.data_source,
start_time__gte=begin_date,
super_event_type__isnull=True,
)
.exclude(origin_id__in=elis_event_ids)
.delete()
)
if count:
logger.debug("Deleted %d events and associated objects: %s", count, deleted)

# Find super events that no longer contain at least two events and delete them
count, deleted = (
Event.objects.exclude(super_event_type__isnull=True)
.annotate(aggregate_member_count=Count("aggregate__members"))
.filter(aggregate_member_count__lt=2)
.delete()
)
if count:
logger.debug(
"Deleted %d empty super events and associated objects: %s",
count,
deleted,
)

def import_events(self):
logger.info("Importing Kulke events")
self._import_events()
Expand Down Expand Up @@ -1007,7 +1038,9 @@ def _import_events(self, importing_courses=False):
self._verify_recurs(recurring_groups)
for group in recurring_groups.values():
if group:
self._save_recurring_superevent(group)
self._save_super_event(group)

self._handle_removed_events(events.keys(), begin_date)

def import_keywords(self):
logger.info("Importing Kulke categories as keywords")
Expand Down
182 changes: 181 additions & 1 deletion events/tests/importers/test_kulke.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import os
from datetime import time
from datetime import time, timedelta
from unittest.mock import patch

import pytest
from django.test import TestCase
from django.utils import timezone

from events.importer.kulke import KulkeImporter, parse_age_range, parse_course_time
from events.models import Event, EventAggregate, EventAggregateMember
from events.tests.factories import EventFactory


@pytest.mark.django_db
Expand Down Expand Up @@ -59,6 +63,29 @@ def test_parse_course_time_returns_correct_result(test_input, expected):


class TestKulkeImporter(TestCase):
def setUp(self) -> None:
with patch.object(KulkeImporter, "fetch_kulke_categories", return_value={}):
self.importer = KulkeImporter(options={})
self.data_source = self.importer.data_source

def _create_super_event(self, events: list[Event]) -> Event:
aggregate = EventAggregate.objects.create()
super_event = EventFactory(
super_event_type=Event.SuperEventType.RECURRING,
data_source=self.data_source,
id="linkedevents:agg-{}".format(aggregate.id),
)
super_event.save()
aggregate.super_event = super_event
aggregate.save()
event_aggregates = [
EventAggregateMember(event=event, event_aggregate=aggregate)
for event in events
]
EventAggregateMember.objects.bulk_create(event_aggregates)
return super_event

@pytest.mark.django_db
def test_html_format(self):
text = (
"Lorem ipsum dolor sit amet, consectetur adipiscing elit.{0}"
Expand All @@ -79,3 +106,156 @@ def test_html_format(self):
"<p>Vestibulum lacinia interdum nisi eu vehicula.</p>"
)
self.assertEqual(html_text, expected_text)

@pytest.mark.django_db
def test__update_super_event(self):
now = timezone.now()
event_1 = EventFactory(
name="Toistuva tapahtuma 1",
name_en="Recurring Event 1",
start_time=now - timedelta(hours=24),
end_time=now - timedelta(hours=23),
data_source=self.data_source,
)
event_2 = EventFactory(
name="Toistuva tapahtuma 2",
name_en="Recurring Event 2",
start_time=now,
end_time=now + timedelta(hours=1),
data_source=self.data_source,
)
super_event = self._create_super_event([event_1, event_2])

self.importer._update_super_event(super_event, [event_1, event_2])
# The super event should have the common part for the name
self.assertEqual(super_event.name, "Toistuva tapahtuma")
self.assertEqual(super_event.name_en, "Recurring Event")
# The start/end time should be the start/end time of the first/last event
self.assertEqual(super_event.start_time, event_1.start_time)
self.assertEqual(super_event.end_time, event_2.end_time)

@pytest.mark.django_db
def test__update_super_event_default_name(self):
now = timezone.now()
event_1 = EventFactory(
name="Joku tapahtuma",
name_en="Some Event",
start_time=now - timedelta(hours=24),
end_time=now - timedelta(hours=23),
data_source=self.data_source,
)
event_2 = EventFactory(
name="Ei yhteistä osaa nimessä",
name_en="No common part in the name",
start_time=now,
end_time=now + timedelta(hours=1),
data_source=self.data_source,
)
super_event = self._create_super_event([event_1, event_2])

self.importer._update_super_event(super_event, [event_1, event_2])
# If the name does not have a common part, default to the first event's name
self.assertEqual(super_event.name, "Joku tapahtuma")
self.assertEqual(super_event.name_en, "Some Event")

@pytest.mark.django_db
def test__save_super_event(self):
event_1 = EventFactory(id="kulke:1", data_source=self.data_source, origin_id=1)
event_2 = EventFactory(id="kulke:2", data_source=self.data_source, origin_id=2)
event_3 = EventFactory(id="kulke:3", data_source=self.data_source, origin_id=3)

# Create a super event with all three events
self.importer._save_super_event(
{event_1.origin_id, event_2.origin_id, event_3.origin_id}
)

event_1.refresh_from_db()
super_event = event_1.super_event
self.assertEqual(
set(member.event_id for member in super_event.aggregate.members.all()),
{event_1.id, event_2.id, event_3.id},
)

# Simulate a situation where one of the events is no longer associated with the super event in Elis
self.importer._save_super_event({event_1.origin_id, event_2.origin_id})

event_1.refresh_from_db()
super_event = event_1.super_event
self.assertEqual(
set(member.event_id for member in super_event.aggregate.members.all()),
{event_1.id, event_2.id},
)
self.assertTrue(Event.objects.filter(id=event_3.id).exists())

# If there is only one event left in the super event, the super event should be deleted
# Deleting the event itself is not the responsibility of `_save_super_event`
self.importer._save_super_event({event_1.origin_id})
event_1.refresh_from_db()
self.assertIsNone(event_1.super_event)
self.assertTrue(Event.objects.filter(id=event_2.id).exists())
self.assertTrue(Event.objects.filter(id=event_3.id).exists())

@pytest.mark.django_db
def test__handle_removed_events(self):
"""Test that removing"""
now = timezone.now()
# Event that exists in the DB but not in Elis -- will be removed
event_1 = EventFactory(
data_source=self.data_source, origin_id=1, start_time=now
)
# Event that exists in Elis -- won't be removed
event_2 = EventFactory(
data_source=self.data_source, origin_id=2, start_time=now
)
# Old event, outside of the date range of the Elis search -- won't be removed
event_3 = EventFactory(
data_source=self.data_source,
origin_id=3,
start_time=now - timedelta(days=90),
)

self.importer._handle_removed_events(
elis_event_ids=[event_2.origin_id],
begin_date=now - timedelta(days=60),
)

self.assertFalse(Event.objects.filter(id=event_1.id).exists())
self.assertTrue(Event.objects.filter(id=event_2.id).exists())
self.assertTrue(Event.objects.filter(id=event_3.id).exists())

@pytest.mark.django_db
def test__handle_removed_events_superevent(self):
now = timezone.now()
# This super event is not in Elis. The super event with all its member events should be removed.
super_1_event_1 = EventFactory(
data_source=self.data_source, origin_id=1, start_time=now
)
super_1_event_2 = EventFactory(
data_source=self.data_source, origin_id=2, start_time=now
)
super_1 = self._create_super_event([super_1_event_1, super_1_event_2])

# This super event is in Elis. It should not be removed.
super_2_event_1 = EventFactory(
data_source=self.data_source, origin_id=3, start_time=now
)
super_2_event_2 = EventFactory(
data_source=self.data_source, origin_id=4, start_time=now
)
super_2 = self._create_super_event([super_2_event_1, super_2_event_2])

# This super event is empty to begin with -- it should be removed
super_3 = self._create_super_event([])

self.importer._handle_removed_events(
elis_event_ids=[super_2_event_1.origin_id, super_2_event_2.origin_id],
begin_date=now - timedelta(days=60),
)

self.assertFalse(Event.objects.filter(id=super_1_event_1.id).exists())
self.assertFalse(Event.objects.filter(id=super_1_event_2.id).exists())
self.assertFalse(Event.objects.filter(id=super_1.id).exists())
self.assertTrue(Event.objects.filter(id=super_2_event_1.id).exists())
self.assertTrue(Event.objects.filter(id=super_2_event_2.id).exists())
self.assertTrue(Event.objects.filter(id=super_2.id).exists())
self.assertFalse(Event.objects.filter(id=super_3.id).exists())

0 comments on commit b19d2d1

Please sign in to comment.