1 from BeautifulSoup import BeautifulSoup, NavigableString
3 from datetime import time, datetime
4 from textwrap import wrap
7 class ParserError(Exception):
8 def __init__(self, value='', code=0):
13 return repr(self.value)
24 def __init__(self, html):
25 self.soup = BeautifulSoup(html)
28 for detail in self.details():
32 def _parse_details(self):
33 if self._current_state < 0:
34 raise ParserError('Unable to parse details while in error state')
36 trips = map(lambda x: map(lambda x: {
38 'time': map(lambda x: (time(*map(int, x.split(':')))), wrap(x.find('td', {'class': 'col_time'}).text, 5)), # black magic appears
39 'station': map(lambda x: x[2:].strip(),
40 filter(lambda x: type(x) == NavigableString, x.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
41 'info': map(lambda x: x.strip(),
42 filter(lambda x: type(x) == NavigableString, x.find('td', {'class': 'col_info'}).contents)),
43 }, x.find('tbody').findAll('tr')),
44 self.soup.findAll('div', {'class': 'data_table tourdetail'})) # all routes
49 """returns list of trip details
50 [ [ { 'time': [datetime.time, datetime.time] if time else [],
51 'station': [u'start', u'end'] if station else [],
52 'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
53 }, ... # next trip step
54 ], ... # next trip possibility
58 self._details = self._parse_details()
62 def _parse_overview(self):
63 def get_tdtext(x, cl):
64 return x.find('td', {'class': cl}).text
67 y = get_tdtext(x, 'col_change')
74 y = get_tdtext(x, 'col_price')
76 return float(y.replace(',', '.'))
81 y = get_tdtext(x, 'col_date')
83 return datetime.strptime(y, '%d.%m.%Y').date()
88 table = self.soup.find('table', {'id': 'tbl_fahrten'})
90 # check if there is an overview table
91 if table and table.findAll('tr'):
93 rows = table.findAll('tr')[1:] # cut off headline
94 overview = map(lambda x: {
96 'time': map(lambda x: time(*map(int, x.strip().split(':'))) if x else None, # extract times or set to None if empty
97 x.find('td', {'class': 'col_time'}).text.split('-')) if x.find('td', {'class': 'col_time'}) else [],
98 'duration': time(*map(int, x.find('td', {'class': 'col_duration'}).text.split(':'))), # grab duration
99 'change': get_change(x),
100 'price': get_price(x),
104 self._current_state = self.STATE_ERROR
105 raise ParserError('Unable to parse details while in error state')
118 if not self._overview:
120 self._overview = self._parse_overview()
121 except AttributeError:
122 f = open('DEBUG', 'w')
123 f.write(str(self.soup))
126 return self._overview
128 def _check_request_state(self):
129 raise NotImplementedError()
132 def request_state(self):
133 return self._current_state
143 def get_stations(self, letter):
144 if not self._stations.has_key(letter):
145 bs = BeautifulSoup(urllib2.urlopen(settings.stations % letter).read())
146 self._stations[letter] = map(lambda x: x['value'], bs.find('select', {'id': 'letter'}).findAll('option'))
148 return self._stations[letter]
152 bs = BeautifulSoup(urllib2.urlopen(settings.line_overview).read())
154 lines = bs.findAll('table', {'class': 'linie'})
155 # cut line parameter out of href
156 self._lines = map(lambda x: map(lambda x: x['href'][x['href'].find('=') + 1:], x.findAll('a')), lines)