-
Notifications
You must be signed in to change notification settings - Fork 0
/
importer.py
69 lines (62 loc) · 2.44 KB
/
importer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import pytz
import requests
import calendar
from datetime import timedelta
import dateutil.parser
import defusedxml.ElementTree as ET
def get_schedule(url):
def load_events(xml):
def to_unixtimestamp(dt):
dt = dt.astimezone(pytz.utc)
ts = int(calendar.timegm(dt.timetuple()))
return ts
def text_or_empty(node, child_name):
child = node.find(child_name)
if child is None:
return u""
if child.text is None:
return u""
return unicode(child.text)
def parse_duration(value):
h, m = map(int, value.split(':'))
return timedelta(hours=h, minutes=m)
def all_events():
schedule = ET.fromstring(xml)
for day in schedule.findall('day'):
for room in day.findall('room'):
for event in room.findall('event'):
yield event
parsed_events = []
for event in all_events():
start = dateutil.parser.parse(event.find('date').text)
duration = parse_duration(event.find('duration').text)
end = start + duration
persons = event.find('persons')
if persons is not None:
persons = persons.findall('person')
parsed_events.append(dict(
start = start.astimezone(pytz.utc),
start_date = start.strftime('%-d.%-m.'),
start_str = start.strftime('%H:%M'),
start_unix = to_unixtimestamp(start),
start_weekday = start.weekday(),
end_str = end.strftime('%H:%M'),
end_unix = to_unixtimestamp(end),
duration = int(duration.total_seconds() / 60),
title = text_or_empty(event, 'title'),
subtitle = text_or_empty(event, 'subtitle'),
track = text_or_empty(event, 'track'),
place = text_or_empty(event, 'room'),
abstract = text_or_empty(event, 'abstract'),
speakers = [
unicode(person.text.strip())
for person in persons
] if persons else [],
lang = text_or_empty(event, 'language'),
id = event.attrib["id"],
))
return parsed_events
r = requests.get(url)
r.raise_for_status()
schedule = r.content
return load_events(schedule)