-
Notifications
You must be signed in to change notification settings - Fork 0
/
band_parser.py
56 lines (42 loc) · 1.41 KB
/
band_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import requests
from bs4 import BeautifulSoup
from log import get_logger
logger = get_logger(__name__)
TAGS = {'haldern': ["h4", "list-title"],
'maifeld': ["a", "wk-position-cover wk-position-z-index"],
'obs': ["h2", "entry-title fusion-post-title"]}
class BandParser():
def __init__(self, url, festival='haldern'):
self.html_doc = requests.get(url).content
self.soup = BeautifulSoup(self.html_doc, 'html.parser')
self.festival = festival
def parse_entries(self):
self.samples = self.soup.find_all(*TAGS[self.festival])
def _parse_a(self):
for s in self.samples:
a = s.findAll('a')
a0 = a[0]
band = a0.string.strip()
band = band.split(' (')[0]
self.bands.append(band)
def _parse_maifeld(self):
for s in self.samples:
band = s.attrs['title']
band = band.split('(')[0]
band = band.strip()
self.bands.append(band)
def parse_samples(self):
self.bands = []
if self.festival == 'maifeld':
self._parse_maifeld()
else:
self._parse_a()
def print_bands(self):
for band in self.bands:
logger.info(band)
def get_bands(self):
return self.bands
def parse(self):
self.parse_entries()
self.parse_samples()
return self.get_bands()