2 # -*- coding: UTF-8 -*-
4 from gotovienna.BeautifulSoup import BeautifulSoup, NavigableString
5 from urllib2 import urlopen
6 from urllib import urlencode
7 from datetime import datetime, time, timedelta
8 from textwrap import wrap
13 from gotovienna import defaults
15 POSITION_TYPES = ('stop', 'address', 'poi')
17 DEBUGLOG = os.path.expanduser('~/gotoVienna.debug')
19 class ParserError(Exception):
21 def __init__(self, msg='Parser error'):
25 UNKNOWN, CORRECTION, RESULT = range(3)
28 def extract_city(station):
29 """ Extract city from string if present,
30 else return default city
32 >>> extract_city('Karlsplatz, Wien')
35 if len(station.split(',')) > 1:
36 return station.split(',')[-1].strip()
40 def extract_station(station):
41 """ Remove city from string
43 >>> extract_station('Karlsplatz, Wien')
46 if len(station.split(',')) > 1:
47 return station[:station.rindex(',')].strip()
51 def split_station(station):
52 """ >>> split_station('Karlsplatz, Wien')
53 ('Karlsplatz', 'Wien')
54 >>> split_station('Karlsplatz')
55 ('Karlsplatz', 'Wien')
57 if len(station.split(',')) > 1:
58 return (station[:station.rindex(',')].strip(), station.split(',')[-1].strip())
60 return (station, 'Wien')
62 def guess_location_type(location):
63 """Guess type (stop, address, poi) of a location
65 >>> guess_location_type('pilgramgasse')
68 >>> guess_location_type('karlsplatz 14')
71 >>> guess_location_type('reumannplatz 12/34')
74 parts = location.split()
78 # Assume all single-word locations are stops
82 # If the last part is numeric, assume address
83 if last_part.isdigit() and len(parts) > 1:
86 # Addresses with door number (e.g. "12/34")
87 if all(x.isdigit() or x == '/' for x in last_part):
90 # Sane default - assume it's a stop/station name
93 def search(origin_tuple, destination_tuple, dtime=None):
94 """ build route request
95 returns html result (as urllib response)
98 dtime = datetime.now()
100 origin, origin_type = origin_tuple
101 origin, origin_city = split_station(origin)
103 destination, destination_type = destination_tuple
104 destination, destination_city = split_station(destination)
107 if origin_type is None:
108 origin_type = guess_location_type(origin)
109 print 'Guessed origin type:', origin_type
111 if destination_type is None:
112 destination_type = guess_location_type(destination)
113 print 'Guessed destination type:', destination_type
115 if (origin_type not in POSITION_TYPES or
116 destination_type not in POSITION_TYPES):
117 raise ParserError('Invalid position type')
119 post = defaults.search_post
120 post['name_origin'] = origin
121 post['type_origin'] = origin_type
122 post['name_destination'] = destination
123 post['type_destination'] = destination_type
124 post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
125 post['itdTime'] = dtime.strftime('%H:%M')
126 post['place_origin'] = origin_city
127 post['place_destination'] = destination_city
128 params = urlencode(post)
129 url = '%s?%s' % (defaults.action, params)
132 f = open(DEBUGLOG, 'a')
136 print 'Unable to write to DEBUGLOG: %s' % DEBUGLOG
142 """ Parser for search response
145 def __init__(self, html):
146 self.soup = BeautifulSoup(html)
148 def check_page(self):
149 if self.soup.find('form', {'id': 'form_efaresults'}):
150 return PageType.RESULT
152 if self.soup.find('div', {'class':'form_error'}):
153 return PageType.CORRECTION
155 return PageType.UNKNOWN
157 state = property(check_page)
159 def get_correction(self):
160 names_origin = self.soup.find('select', {'id': 'nameList_origin'})
161 names_destination = self.soup.find('select', {'id': 'nameList_destination'})
162 places_origin = self.soup.find('select', {'id': 'placeList_origin'})
163 places_destination = self.soup.find('select', {'id': 'placeList_destination'})
166 if any([names_origin, names_destination, places_origin, places_destination]):
170 dict['origin'] = map(lambda x: x.text,
171 names_origin.findAll('option'))
172 if names_destination:
173 dict['destination'] = map(lambda x: x.text,
174 names_destination.findAll('option'))
177 dict['place_origin'] = map(lambda x: x.text,
178 names_origin.findAll('option'))
179 if names_destination:
180 dict['place_destination'] = map(lambda x: x.text,
181 names_destination.findAll('option'))
186 raise ParserError('Unable to parse html')
188 def get_result(self):
189 return rParser(str(self.soup))
194 """ Parser for routing results
197 def __init__(self, html):
198 self.soup = BeautifulSoup(html)
199 self._overview = None
203 def get_tdtext(cls, x, cl):
204 return x.find('td', {'class': cl}).text
207 def get_change(cls, x):
208 y = rParser.get_tdtext(x, 'col_change')
215 def get_price(cls, x):
216 y = rParser.get_tdtext(x, 'col_price')
220 return float(y.replace(',', '.'))
225 def get_date(cls, x):
226 y = rParser.get_tdtext(x, 'col_date')
228 return datetime.strptime(y, '%d.%m.%Y').date()
233 def get_datetime(cls, x):
234 y = rParser.get_tdtext(x, 'col_time')
236 if (y.find("-") > 0):
238 times = map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
239 d = rParser.get_date(x)
240 from_dtime = datetime.combine(d, times[0])
241 if times[0] > times[1]:
243 to_dtime = datetime.combine(d + timedelta(1), times[1])
245 to_dtime = datetime.combine(d, times[1])
247 return [from_dtime, to_dtime]
250 dtregex = {'date' : '\d\d\.\d\d',
253 regex = "\s*(?P<date1>{date})?\s*(?P<time1>{time})\s*(?P<date2>{date})?\s*(?P<time2>{time})\s*".format(**dtregex)
254 ma = re.match(regex, y)
261 def extract_datetime(gr, n):
262 if 'date%d' % n in gr and gr['date%d' % n]:
263 from_dtime = datetime.strptime(str(datetime.today().year) + gr['date%d' % n] + gr['time%d' % n], '%Y%d.%m.%H:%M')
265 t = datetime.strptime(gr['time%d' % n], '%H:%M').time()
266 d = datetime.today().date()
267 return datetime.combine(d, t)
270 from_dtime = extract_datetime(gr, 1)
271 to_dtime = extract_datetime(gr, 2)
273 return [from_dtime, to_dtime]
279 for detail in self.details():
282 def _parse_details(self):
283 tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
285 trips = map(lambda x: map(lambda y: {
286 'timespan': rParser.get_datetime(y),
287 'station': map(lambda z: z[2:].strip(),
288 filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
289 'info': map(lambda x: x.strip(),
290 filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
291 }, x.find('tbody').findAll('tr')),
297 """returns list of trip details
298 [ [ { 'time': [datetime.time, datetime.time] if time else [],
299 'station': [u'start', u'end'] if station else [],
300 'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
301 }, ... # next trip step
302 ], ... # next trip possibility
305 if not self._details:
306 self._details = self._parse_details()
310 def _parse_overview(self):
313 table = self.soup.find('table', {'id': 'tbl_fahrten'})
315 # check if there is an overview table
316 if table and table.findAll('tr'):
318 rows = table.findAll('tr')[1:] # cut off headline
320 overview = map(lambda x: {
321 'timespan': rParser.get_datetime(x),
322 'change': rParser.get_change(x),
323 'price': rParser.get_price(x),
327 raise ParserError('Unable to parse overview')
340 if not self._overview:
342 self._overview = self._parse_overview()
343 except AttributeError:
344 f = open(DEBUGLOG, 'w')
345 f.write(str(self.soup))
348 return self._overview