1 from BeautifulSoup import BeautifulSoup, NavigableString
2 from urllib2 import urlopen
3 from urllib import urlencode
5 from datetime import datetime, time
6 from textwrap import wrap
10 POSITION_TYPES = ('stop', 'address', 'poi')
12 class ParserError(Exception):
14 def __init__(self, msg='Parser error'):
18 UNKNOWN, CORRECTION, RESULT = range(3)
21 def search(origin_tuple, destination_tuple, dtime=None):
22 """ build route request
23 returns html result (as urllib response)
26 dtime = datetime.now()
28 origin, origin_type = origin_tuple
29 destination, destination_type = destination_tuple
30 if not origin_type in POSITION_TYPES or\
31 not destination_type in POSITION_TYPES:
32 raise ParserError('Invalid position type')
34 post = settings.search_post
35 post['name_origin'] = origin
36 post['type_origin'] = origin_type
37 post['name_destination'] = destination
38 post['type_destination'] = destination_type
39 post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
40 post['itdTime'] = dtime.strftime('%H:%M')
41 params = urlencode(post)
42 url = '%s?%s' % (settings.action, params)
44 print "\nurl %s url\n\n%s\n\nurl %s url\n" % ('~'*100, url, '~'*100)
50 """ Parser for search response
53 def __init__(self, html):
54 self.soup = BeautifulSoup(html)
57 if self.soup.find('form', {'id': 'form_efaresults'}):
58 return PageType.RESULT
60 if self.soup.find('div', {'class':'form_error'}):
61 return PageType.CORRECTION
63 return PageType.UNKNOWN
65 def get_correction(self):
66 nlo = self.soup.find('select', {'id': 'nameList_origin'})
67 nld = self.soup.find('select', {'id': 'nameList_destination'})
69 if not nlo and not nld:
70 raise ParserError('Unable to parse html')
73 origin = map(lambda x: x.text, nlo.findAll('option'))
77 destination = map(lambda x: x.text, nld.findAll('option'))
81 return (origin, destination)
84 return rParser(str(self.soup))
89 """ Parser for routing results
92 def __init__(self, html):
93 self.soup = BeautifulSoup(html)
98 def get_tdtext(cls, x, cl):
99 return x.find('td', {'class': cl}).text
102 def get_change(cls, x):
103 y = rParser.get_tdtext(x, 'col_change')
110 def get_price(cls, x):
111 y = rParser.get_tdtext(x, 'col_price')
113 return float(y.replace(',', '.'))
118 def get_date(cls, x):
119 y = rParser.get_tdtext(x, 'col_date')
121 return datetime.strptime(y, '%d.%m.%Y').date()
126 def get_time(cls, x):
127 y = rParser.get_tdtext(x, 'col_time')
129 if (y.find("-") > 0):
130 return map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
132 return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
137 def get_duration(cls, x):
138 y = rParser.get_tdtext(x, 'col_duration')
140 return time(*map(int, y.split(":")))
145 for detail in self.details():
148 def _parse_details(self):
149 tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
151 trips = map(lambda x: map(lambda y: {
152 'time': rParser.get_time(y),
153 'station': map(lambda z: z[2:].strip(),
154 filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
155 'info': map(lambda x: x.strip(),
156 filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
157 }, x.find('tbody').findAll('tr')),
163 """returns list of trip details
164 [ [ { 'time': [datetime.time, datetime.time] if time else [],
165 'station': [u'start', u'end'] if station else [],
166 'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
167 }, ... # next trip step
168 ], ... # next trip possibility
171 if not self._details:
172 self._details = self._parse_details()
176 def _parse_overview(self):
179 table = self.soup.find('table', {'id': 'tbl_fahrten'})
181 # check if there is an overview table
182 if table and table.findAll('tr'):
184 rows = table.findAll('tr')[1:] # cut off headline
186 overview = map(lambda x: {
187 'date': rParser.get_date(x),
188 'time': rParser.get_time(x),
189 'duration': rParser.get_duration(x), # grab duration
190 'change': rParser.get_change(x),
191 'price': rParser.get_price(x),
195 raise ParserError('Unable to parse overview')
208 if not self._overview:
210 self._overview = self._parse_overview()
211 except AttributeError:
212 f = open('DEBUG', 'w')
213 f.write(str(self.soup))
216 return self._overview
218 if __name__ == '__main__':
219 parser = argparse.ArgumentParser(description='Get public transport route for Vienna')
220 parser.add_argument('-o', metavar='name', type=str, help='origin', required=True)
221 parser.add_argument('-d', metavar='name', type=str, help='destination', required=True)
222 parser.add_argument('-ot', metavar='type', type=str, help='origin type: %s' % ' | '.join(POSITION_TYPES), default='stop', choices=POSITION_TYPES)
223 parser.add_argument('-dt', metavar='type', type=str, help='destination type: %s' % ' | '.join(POSITION_TYPES), default='stop', choices=POSITION_TYPES)
225 args = parser.parse_args()
227 html = search((args.o.encode('UTF-8'), args.ot), (args.d.encode('UTF-8'), args.dt)).read()
229 parser = sParser(html)
230 state = parser.check_page()
232 if state == PageType.CORRECTION:
234 cor = parser.get_correction()
237 print '* Origin ambiguous:'
240 while not lo or not lo.isdigit() or int(lo) > len(cor[0]):
243 print '%d. %s' % (i, c)
245 lo = sys.stdin.readline().strip()
247 args.o = cor[0][int(lo)-1]
251 print '* Destination ambiguous:'
252 while not ld or not ld.isdigit() or int(ld) > len(cor[1]):
255 print '%d. %s' % (j, c)
257 ld = sys.stdin.readline().strip()
259 args.d = cor[1][int(ld)-1]
261 html = search((args.o.encode('UTF-8'), args.ot), (args.d.encode('UTF-8'), args.dt)).read()
263 parser = sParser(html)
264 state = parser.check_page()
267 print 'PANIC at correction page'
269 if state == PageType.RESULT:
270 parser = rParser(html)
272 overviews = parser.overview
273 for overview in overviews:
274 print '[%s] %s-%s (%s)' % (overview['date'], overview['time'][0], overview['time'][1], overview['duration'])
278 elif state == PageType.UNKNOWN:
279 print 'PANIC unknown result'