1 from BeautifulSoup import BeautifulSoup, NavigableString
2 from urllib2 import urlopen
3 from urllib import urlencode
5 from datetime import datetime, time
6 from textwrap import wrap
11 POSITION_TYPES = ('stop', 'address', 'poi')
13 DEBUGLOG = os.path.expanduser('~/gotoVienna.debug')
15 class ParserError(Exception):
17 def __init__(self, msg='Parser error'):
21 UNKNOWN, CORRECTION, RESULT = range(3)
24 def search(origin_tuple, destination_tuple, dtime=None):
25 """ build route request
26 returns html result (as urllib response)
29 dtime = datetime.now()
31 origin, origin_type = origin_tuple
32 destination, destination_type = destination_tuple
33 if not origin_type in POSITION_TYPES or\
34 not destination_type in POSITION_TYPES:
35 raise ParserError('Invalid position type')
37 post = settings.search_post
38 post['name_origin'] = origin
39 post['type_origin'] = origin_type
40 post['name_destination'] = destination
41 post['type_destination'] = destination_type
42 post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
43 post['itdTime'] = dtime.strftime('%H:%M')
44 params = urlencode(post)
45 url = '%s?%s' % (settings.action, params)
48 f = open(DEBUGLOG, 'a')
52 print 'Unable to write to DEBUGLOG: %s' % DEBUGLOG
58 """ Parser for search response
61 def __init__(self, html):
62 self.soup = BeautifulSoup(html)
65 if self.soup.find('form', {'id': 'form_efaresults'}):
66 return PageType.RESULT
68 if self.soup.find('div', {'class':'form_error'}):
69 return PageType.CORRECTION
71 return PageType.UNKNOWN
73 def get_correction(self):
74 nlo = self.soup.find('select', {'id': 'nameList_origin'})
75 nld = self.soup.find('select', {'id': 'nameList_destination'})
77 if not nlo and not nld:
78 raise ParserError('Unable to parse html')
81 origin = map(lambda x: x.text, nlo.findAll('option'))
85 destination = map(lambda x: x.text, nld.findAll('option'))
89 return (origin, destination)
92 return rParser(str(self.soup))
97 """ Parser for routing results
100 def __init__(self, html):
101 self.soup = BeautifulSoup(html)
102 self._overview = None
106 def get_tdtext(cls, x, cl):
107 return x.find('td', {'class': cl}).text
110 def get_change(cls, x):
111 y = rParser.get_tdtext(x, 'col_change')
118 def get_price(cls, x):
119 y = rParser.get_tdtext(x, 'col_price')
121 return float(y.replace(',', '.'))
126 def get_date(cls, x):
127 y = rParser.get_tdtext(x, 'col_date')
129 return datetime.strptime(y, '%d.%m.%Y').date()
134 def get_time(cls, x):
135 y = rParser.get_tdtext(x, 'col_time')
137 if (y.find("-") > 0):
138 return map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
140 return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
145 def get_duration(cls, x):
146 y = rParser.get_tdtext(x, 'col_duration')
148 return time(*map(int, y.split(":")))
153 for detail in self.details():
156 def _parse_details(self):
157 tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
159 trips = map(lambda x: map(lambda y: {
160 'time': rParser.get_time(y),
161 'station': map(lambda z: z[2:].strip(),
162 filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
163 'info': map(lambda x: x.strip(),
164 filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
165 }, x.find('tbody').findAll('tr')),
171 """returns list of trip details
172 [ [ { 'time': [datetime.time, datetime.time] if time else [],
173 'station': [u'start', u'end'] if station else [],
174 'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
175 }, ... # next trip step
176 ], ... # next trip possibility
179 if not self._details:
180 self._details = self._parse_details()
184 def _parse_overview(self):
187 table = self.soup.find('table', {'id': 'tbl_fahrten'})
189 # check if there is an overview table
190 if table and table.findAll('tr'):
192 rows = table.findAll('tr')[1:] # cut off headline
194 overview = map(lambda x: {
195 'date': rParser.get_date(x),
196 'time': rParser.get_time(x),
197 'duration': rParser.get_duration(x), # grab duration
198 'change': rParser.get_change(x),
199 'price': rParser.get_price(x),
203 raise ParserError('Unable to parse overview')
216 if not self._overview:
218 self._overview = self._parse_overview()
219 except AttributeError:
220 f = open(DEBUGLOG, 'w')
221 f.write(str(self.soup))
224 return self._overview
226 if __name__ == '__main__':
227 parser = argparse.ArgumentParser(description='Get public transport route for Vienna')
228 parser.add_argument('-o', metavar='name', type=str, help='origin', required=True)
229 parser.add_argument('-d', metavar='name', type=str, help='destination', required=True)
230 parser.add_argument('-ot', metavar='type', type=str, help='origin type: %s' % ' | '.join(POSITION_TYPES), default='stop', choices=POSITION_TYPES)
231 parser.add_argument('-dt', metavar='type', type=str, help='destination type: %s' % ' | '.join(POSITION_TYPES), default='stop', choices=POSITION_TYPES)
233 args = parser.parse_args()
234 html = search((args.o, args.ot), (args.d, args.dt)).read()
236 parser = sParser(html)
237 state = parser.check_page()
239 if state == PageType.CORRECTION:
241 cor = parser.get_correction()
244 print '* Origin ambiguous:'
246 while not lo or not lo.isdigit() or int(lo) > len(cor[0]):
249 print '%d. %s' % (i, c)
251 lo = sys.stdin.readline().strip()
253 args.o = cor[0][int(lo) - 1]
257 print '* Destination ambiguous:'
259 while not ld or not ld.isdigit() or int(ld) > len(cor[1]):
262 print '%d. %s' % (j, c)
264 ld = sys.stdin.readline().strip()
266 args.d = cor[1][int(ld) - 1]
268 html = search((args.o.encode('UTF-8'), args.ot), (args.d.encode('UTF-8'), args.dt)).read()
270 parser = sParser(html)
271 state = parser.check_page()
274 print 'PANIC at correction page'
276 if state == PageType.RESULT:
277 parser = rParser(html)
279 overviews = parser.overview
280 details = parser.details
283 for r in range(len(overviews)):
284 print '%d. [%s] %s-%s (%s)' % (r + 1, overviews[r]['date'], overviews[r]['time'][0], overviews[r]['time'][1], overviews[r]['duration'])
286 l = sys.stdin.readline().strip()
290 if l.isdigit() and int(l) <= len(details):
291 for detail in details[int(l) - 1]:
292 if detail['time'] and detail['station']:
293 time = '%s - %s' % (detail['time'][0].strftime(TIMEFORMAT), detail['time'][1].strftime(TIMEFORMAT))
294 print '[%s] %s\n%s' % (time, ' -> '.join(detail['station']), '\n'.join(detail['info']))
296 print '\n'.join(detail['info'])
303 elif state == PageType.UNKNOWN:
304 print 'PANIC unknown result'