Skip to content

Commit

Permalink
rodin events
Browse files Browse the repository at this point in the history
  • Loading branch information
ashleyzhang01 committed Mar 6, 2024
1 parent 77d5c64 commit 714a839
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@

class Command(BaseCommand):
def handle(self, *args, **kwargs):
now = timezone.localtime()

try:
resp = requests.get(ENGINEERING_EVENTS_WEBSITE)
except ConnectionError:
Expand Down
86 changes: 86 additions & 0 deletions backend/penndata/management/commands/get_rodin_events.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import datetime
import requests
from django.core.management.base import BaseCommand
from django.utils import timezone
from bs4 import BeautifulSoup

from penndata.models import Event


RODIN_EVENTS_WEBSITE = "https://rodin.house.upenn.edu"
HEADERS ={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)' +
' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

class Command(BaseCommand):
def handle(self, *args, **kwargs):
self.scrape_calendar_page(RODIN_EVENTS_WEBSITE + '/calendar')
now = timezone.localtime()
current_day, current_month, current_year = now.day, now.month, now.year
if current_day > 25:
next_month_year = current_year if current_month < 12 else current_year + 1
next_month = current_month + 1 if current_month < 12 else 1
next_month_url = f'{RODIN_EVENTS_WEBSITE}/calendar/{next_month_year}-{next_month:02d}'
self.scrape_calendar_page(next_month_url)

self.stdout.write("Uploaded Rodin College House Events!")

def scrape_details(self, event_url):
try:
resp = requests.get(event_url, headers=HEADERS)
except ConnectionError:
return None
soup = BeautifulSoup(resp.text, 'html.parser')

location = soup.select_one('.field-name-field-room').text.strip() if soup.select_one(
'.field-name-field-room') else ""
start_time_str = soup.select_one('.date-display-start').get('content') if soup.select_one(
'.date-display-start') else ""
end_time_str = soup.select_one(
'.date-display-end').get('content') if soup.select_one('.date-display-end') else ""
start_time = datetime.datetime.strptime(start_time_str, '%Y-%m-%dT%H:%M:%S%z'
) if start_time_str else None
end_time = datetime.datetime.strptime(end_time_str, '%Y-%m-%dT%H:%M:%S%z'
) if end_time_str else None
description = soup.select_one(
'.field-name-body').text.strip() if soup.select_one('.field-name-body') else ""
return location, start_time, end_time, description

def scrape_calendar_page(self, calendar_url):
try:
resp = requests.get(calendar_url, headers=HEADERS)
except ConnectionError:
return None
soup = BeautifulSoup(resp.text, 'html.parser')

event_cells = soup.find_all('td', class_='single-day future')

for cell in event_cells:
item = cell.find('div', class_='item')
if item:
event_link = item.find('a', href=True)
if event_link:
name = event_link.text.strip()
url = 'https://rodin.house.upenn.edu' + event_link['href']

location, start_time, end_time, description = self.scrape_details(url)
print("Location:", location)
print("Start Time:", start_time)
print("End Time:", end_time)
print("Description:", description)
Event.objects.update_or_create(
name=name,
defaults={
"event_type": "Rodin College House",
"image_url": "",
"start": timezone.make_aware(start_time),
"end": timezone.make_aware(end_time),
"location": location,
"website": url,
"description": description,
"email": "",
},
)
if start_time > timezone.localtime() + datetime.timedelta(days=30):
break
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,10 @@ def handle(self, *args, **kwargs):
event_start_str = event_date_parts[1].split(' - ')[0].strip()
event_end_str = event_date_parts[1].split(' - ')[1].strip()

event_start_datetime = datetime.strptime(event_date_parts[0] + ' ' + event_start_str, '%B %d, %Y %I:%M%p')
event_end_datetime = datetime.strptime(event_date_parts[0] + ' ' + event_end_str, '%B %d, %Y %I:%M%p')
event_start_datetime = datetime.strptime(
event_date_parts[0] + ' ' + event_start_str, '%B %d, %Y %I:%M%p')
event_end_datetime = datetime.strptime(
event_date_parts[0] + ' ' + event_end_str, '%B %d, %Y %I:%M%p')
last_start_datetime = event_start_datetime
else: # if no year given
event_month_elem = event.find('div', class_='PromoSearchResultEvent-month')
Expand All @@ -64,7 +66,8 @@ def handle(self, *args, **kwargs):
else:
start_year = current_year

event_start_datetime = datetime(start_year, datetime.strptime(event_month, '%B').month, event_day)
event_start_datetime = datetime(start_year, datetime.strptime(
event_month, '%B').month, event_day)

# events are ordered from future to past, so break once we find a past event
if event_start_datetime < now:
Expand Down
8 changes: 8 additions & 0 deletions k8s/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,14 @@ export class MyChart extends PennLabsChart {
cmd: ["python", "manage.py", "get_venture_events"],
env: [{ name: "DJANGO_SETTINGS_MODULE", value: "pennmobile.settings.production" }]
});

new CronJob(this, 'get-rodin-events', {
schedule:'0 17 * * *', // Every day at 5 PM
image: backendImage,
secret,
cmd: ["python", "manage.py", "get_rodin_events"],
env: [{ name: "DJANGO_SETTINGS_MODULE", value: "pennmobile.settings.production" }]
});
}
}

Expand Down

0 comments on commit 714a839

Please sign in to comment.