1 from BeautifulSoup import BeautifulSoup, NavigableString
3 from datetime import time, datetime
4 from textwrap import wrap
12 def __init__(self, html):
13 self.soup = BeautifulSoup(html)
16 for detail in self.details():
20 def _parse_details(self):
21 trips = map(lambda x: map(lambda x: {
23 'time': map(lambda x: (time(*map(lambda x: int(x), x.split(':')))), wrap(x.find('td', {'class': 'col_time'}).text, 5)), # black magic appears
24 'station': map(lambda x: x[2:].strip(),
25 filter(lambda x: type(x) == NavigableString, x.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
26 'info': map(lambda x: x.strip(),
27 filter(lambda x: type(x) == NavigableString, x.find('td', {'class': 'col_info'}).contents)),
28 }, x.find('tbody').findAll('tr')),
29 self.soup.findAll('div', {'class': 'data_table tourdetail'})) # all routes
35 self._details = self._parse_details()
39 def _parse_overview(self):
41 Returns dict containing
49 table = self.soup.find('table', {'id': 'tbl_fahrten'})
51 rows = table.findAll('tr')[1:]
52 overview = map(lambda x: {
53 'date': datetime.strptime(x.find('td', {'class': 'col_date'}).text, '%d.%m.%Y') # grab date
54 if x.find('td', {'class': 'col_date'}).text else None, # if date is empty set to None
55 'time': map(lambda x: time(*map(lambda x: int(x), x.split(':'))) if x else None, # extract times or set to None if empty
56 x.find('td', {'class': 'col_time'}).text.split(' - ')),
57 'duration': time(*map(lambda x: int(x), x.find('td', {'class': 'col_duration'}).text.split(':'))), # grab duration
58 'change': int(x.find('td', {'class': 'col_change'}).text) # grab changes
59 if x.find('td', {'class': 'col_change'}).text else 0, # if change is empty set to 0
60 'price': float(x.find('td', {'class': 'col_price'}).text.replace(',', '.')) # grab price
61 if x.find('td', {'class': 'col_price'}).text.find(',') >= 0 else 0.0, # if price is empty set to 0.0
69 if not self._overview:
70 self._overview = self._parse_overview()
82 def get_stations(self, letter):
83 if not self._stations.has_key(letter):
84 bs = BeautifulSoup(urllib2.urlopen(settings.stations % letter).read())
85 self._stations[letter] = map(lambda x: x['value'], bs.find('select', {'id': 'letter'}).findAll('option'))
87 return self._stations[letter]
91 bs = BeautifulSoup(urllib2.urlopen(settings.line_overview).read())
93 lines = bs.findAll('table', {'class': 'linie'})
94 # cut line parameter out of href
95 self._lines = map(lambda x: map(lambda x: x['href'][x['href'].find('=') + 1:], x.findAll('a')), lines)