1 from BeautifulSoup import BeautifulSoup, NavigableString
2 from urllib2 import urlopen
3 from urllib import urlencode
5 from datetime import datetime, time
6 from textwrap import wrap
9 POSITION_TYPES = ('stop', 'address', 'poi')
11 class ParserException(Exception):
13 def __init__(self, msg = 'Parser error'):
17 UNKNOWN, CORRECTION, RESULT = range(3)
20 def search(origin_tuple, destination_tuple, dtime=None):
21 """ build route request
22 returns html result (as urllib response)
25 dtime = datetime.now()
27 origin, origin_type = origin_tuple
28 destination, destination_type = destination_tuple
29 if not origin_type in POSITION_TYPES or\
30 not destination_type in POSITION_TYPES:
31 raise ParserException('Invalid position type')
33 post = settings.search_post
34 post['name_origin'] = origin
35 post['type_origin'] = origin_type
36 post['name_destination'] = destination
37 post['type_destination'] = destination_type
38 post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
39 post['itdTime'] = dtime.strftime('%H:%M')
40 params = urlencode(post)
41 return urlopen('%s?%s' % (settings.action, params))
45 """ Parser for search response
48 def __init__(self, html):
49 self.bs = BeautifulSoup(html)
52 if self.bs.find('form', {'id': 'form_efaresults'}):
53 return PageType.RESULT
55 if self.bs.find('div', {'class':'form_error'}):
56 return PageType.CORRECTION
58 return PageType.UNKNOWN
60 def get_correction(self):
61 nlo = self.bs.find('select', {'id': 'nameList_origin'})
62 nld = self.bs.find('select', {'id': 'nameList_destination'})
64 if not nlo or not nld:
65 raise ParserError('Unable to parse html')
67 origin = nlo.findAll('option')
68 destination = nld.findAll('option')
75 return (origin, destination)
78 return rParser(str(self.bs))
83 """ Parser for routing results
86 def __init__(self, html):
87 self.soup = BeautifulSoup(html)
92 def get_tdtext(cls, x, cl):
93 return x.find('td', {'class': cl}).text
96 def get_change(cls, x):
97 y = rParser.get_tdtext(x, 'col_change')
104 def get_price(cls, x):
105 y = rParser.get_tdtext(x, 'col_price')
107 return float(y.replace(',', '.'))
112 def get_date(cls, x):
113 y = rParser.get_tdtext(x, 'col_date')
115 return datetime.strptime(y, '%d.%m.%Y').date()
120 def get_time(cls, x):
121 y = rParser.get_tdtext(x, 'col_time')
123 if (y.find("-") > 0):
124 return map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
126 return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
131 def get_duration(cls, x):
132 y = rParser.get_tdtext(x, 'col_duration')
134 return time(*map(int, y.split(":")))
139 for detail in self.details():
142 def _parse_details(self):
143 tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
145 trips = map(lambda x: map(lambda y: {
146 'time': rParser.get_time(y),
147 'station': map(lambda z: z[2:].strip(),
148 filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
149 'info': map(lambda x: x.strip(),
150 filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
151 }, x.find('tbody').findAll('tr')),
157 """returns list of trip details
158 [ [ { 'time': [datetime.time, datetime.time] if time else [],
159 'station': [u'start', u'end'] if station else [],
160 'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
161 }, ... # next trip step
162 ], ... # next trip possibility
165 if not self._details:
166 self._details = self._parse_details()
170 def _parse_overview(self):
173 table = self.soup.find('table', {'id': 'tbl_fahrten'})
175 # check if there is an overview table
176 if table and table.findAll('tr'):
178 rows = table.findAll('tr')[1:] # cut off headline
180 overview = map(lambda x: {
181 'date': rParser.get_date(x),
182 'time': rParser.get_time(x),
183 'duration': rParser.get_duration(x), # grab duration
184 'change': rParser.get_change(x),
185 'price': rParser.get_price(x),
189 raise ParserError('Unable to parse details')
202 if not self._overview:
204 self._overview = self._parse_overview()
205 except AttributeError:
206 f = open('DEBUG', 'w')
207 f.write(str(self.soup))
210 return self._overview
212 if __name__ == '__main__':