1 from BeautifulSoup import BeautifulSoup, NavigableString
3 from datetime import time, datetime
4 from textwrap import wrap
11 def __init__(self, html):
12 self.soup = BeautifulSoup(html)
15 for detail in self.details():
19 def _parse_details(self):
20 trips = map(lambda x: map(lambda x: {
21 'time': map(lambda x: (time(*map(lambda x: int(x), x.split(':')))), wrap(x.find('td', {'class': 'col_time'}).text, 5)), # black magic appears
22 'station': map(lambda x: x[2:].strip(),
23 filter(lambda x: type(x) == NavigableString, x.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
24 'info': map(lambda x: x.strip(),
25 filter(lambda x: type(x) == NavigableString, x.find('td', {'class': 'col_info'}).contents)),
26 }, x.find('tbody').findAll('tr')),
27 self.soup.findAll('div', {'class': 'data_table tourdetail'})) # all routes
33 self._details = self._parse_details()
37 def _parse_overview(self):
39 Returns dict containing
47 table = self.soup.find('table', {'id': 'tbl_fahrten'})
49 rows = table.findAll('tr')[1:]
50 overview = map(lambda x: {
51 'date': datetime.strptime(x.find('td', {'class': 'col_date'}).text, '%d.%m.%Y') # grab date
52 if x.find('td', {'class': 'col_date'}).text else None, # if date is empty set to None
53 'time': map(lambda x: time(*map(lambda x: int(x), x.split(':'))) if x else None, # extract times or set to None if empty
54 x.find('td', {'class': 'col_time'}).text.split(' - ')),
55 'duration': time(*map(lambda x: int(x), x.find('td', {'class': 'col_duration'}).text.split(':'))), # grab duration
56 'change': int(x.find('td', {'class': 'col_change'}).text) # grab changes
57 if x.find('td', {'class': 'col_change'}).text else 0, # if change is empty set to 0
58 'price': float(x.find('td', {'class': 'col_price'}).text.replace(',', '.')) # grab price
59 if x.find('td', {'class': 'col_price'}).text.find(',') >= 0 else 0.0, # if price is empty set to 0.0
67 if not self._overview:
68 self._overview = self._parse_overview()
76 raise NotImplementedError