2 # -*- coding: UTF-8 -*-
4 from BeautifulSoup import BeautifulSoup, NavigableString
5 from urllib2 import urlopen
6 from urllib import urlencode
7 from datetime import datetime, time, combine, timedelta
8 from textwrap import wrap
13 from gotovienna import defaults
15 POSITION_TYPES = ('stop', 'address', 'poi')
17 DEBUGLOG = os.path.expanduser('~/gotoVienna.debug')
19 class ParserError(Exception):
21 def __init__(self, msg='Parser error'):
25 UNKNOWN, CORRECTION, RESULT = range(3)
28 def extract_city(station):
29 """ Extract city from string if present,
30 else return default city
32 >>> extract_city('Karlsplatz, Wien')
35 if len(station.split(',')) > 1:
36 return station.split(',')[-1].strip()
40 def extract_station(station):
41 """ Remove city from string
43 >>> extract_station('Karlsplatz, Wien')
46 if len(station.split(',')) > 1:
47 return station[:station.rindex(',')].strip()
51 def split_station(station):
52 """ >>> split_station('Karlsplatz, Wien')
53 ('Karlsplatz', 'Wien')
54 >>> split_station('Karlsplatz')
55 ('Karlsplatz', 'Wien')
57 if len(station.split(',')) > 1:
58 return (station[:station.rindex(',')].strip(), station.split(',')[-1].strip())
60 return (station, 'Wien')
62 def search(origin_tuple, destination_tuple, dtime=None):
63 """ build route request
64 returns html result (as urllib response)
67 dtime = datetime.now()
69 origin, origin_type = origin_tuple
70 origin, origin_city = split_station(origin)
72 destination, destination_type = destination_tuple
73 destination, destination_city = split_station(destination)
76 if not origin_type in POSITION_TYPES or\
77 not destination_type in POSITION_TYPES:
78 raise ParserError('Invalid position type')
80 post = defaults.search_post
81 post['name_origin'] = origin
82 post['type_origin'] = origin_type
83 post['name_destination'] = destination
84 post['type_destination'] = destination_type
85 post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
86 post['itdTime'] = dtime.strftime('%H:%M')
87 post['place_origin'] = origin_city
88 post['place_destination'] = destination_city
89 params = urlencode(post)
90 url = '%s?%s' % (defaults.action, params)
93 f = open(DEBUGLOG, 'a')
97 print 'Unable to write to DEBUGLOG: %s' % DEBUGLOG
103 """ Parser for search response
106 def __init__(self, html):
107 self.soup = BeautifulSoup(html)
109 def check_page(self):
110 if self.soup.find('form', {'id': 'form_efaresults'}):
111 return PageType.RESULT
113 if self.soup.find('div', {'class':'form_error'}):
114 return PageType.CORRECTION
116 return PageType.UNKNOWN
118 def get_correction(self):
119 names_origin = self.soup.find('select', {'id': 'nameList_origin'})
120 names_destination = self.soup.find('select', {'id': 'nameList_destination'})
121 places_origin = self.soup.find('select', {'id': 'placeList_origin'})
122 places_destination = self.soup.find('select', {'id': 'placeList_destination'})
125 if names_origin or names_destination or places_origin or places_destination:
129 dict['origin'] = map(lambda x: x.text, names_origin.findAll('option'))
130 if names_destination:
131 dict['destination'] = map(lambda x: x.text, names_destination.findAll('option'))
134 dict['place_origin'] = map(lambda x: x.text, names_origin.findAll('option'))
135 if names_destination:
136 dict['place_destination'] = map(lambda x: x.text, names_destination.findAll('option'))
141 raise ParserError('Unable to parse html')
143 def get_result(self):
144 return rParser(str(self.soup))
149 """ Parser for routing results
152 def __init__(self, html):
153 self.soup = BeautifulSoup(html)
154 self._overview = None
158 def get_tdtext(cls, x, cl):
159 return x.find('td', {'class': cl}).text
162 def get_change(cls, x):
163 y = rParser.get_tdtext(x, 'col_change')
170 def get_price(cls, x):
171 y = rParser.get_tdtext(x, 'col_price')
175 return float(y.replace(',', '.'))
180 def get_date(cls, x):
181 y = rParser.get_tdtext(x, 'col_date')
183 return datetime.strptime(y, '%d.%m.%Y').date()
188 def get_datetime(cls, x):
189 y = rParser.get_tdtext(x, 'col_time')
191 if (y.find("-") > 0):
193 times = map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
194 d = rParser.get_date(x)
195 from_dtime = combine(d, times[0])
196 if times[0] > times[1]:
198 to_dtime = combine(d + timedelta(1), times[1])
200 to_dtime = combine(d, times[1])
204 return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
209 def get_duration(cls, x):
210 y = rParser.get_tdtext(x, 'col_duration')
212 return time(*map(int, y.split(":")))
217 for detail in self.details():
220 def _parse_details(self):
221 tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
223 trips = map(lambda x: map(lambda y: {
224 'time': rParser.get_time(y),
225 'station': map(lambda z: z[2:].strip(),
226 filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
227 'info': map(lambda x: x.strip(),
228 filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
229 }, x.find('tbody').findAll('tr')),
235 """returns list of trip details
236 [ [ { 'time': [datetime.time, datetime.time] if time else [],
237 'station': [u'start', u'end'] if station else [],
238 'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
239 }, ... # next trip step
240 ], ... # next trip possibility
243 if not self._details:
244 self._details = self._parse_details()
248 def _parse_overview(self):
251 table = self.soup.find('table', {'id': 'tbl_fahrten'})
253 # check if there is an overview table
254 if table and table.findAll('tr'):
256 rows = table.findAll('tr')[1:] # cut off headline
258 overview = map(lambda x: {
259 'time': rParser.get_datetime(x),
260 'duration': rParser.get_duration(x), # grab duration
261 'change': rParser.get_change(x),
262 'price': rParser.get_price(x),
266 raise ParserError('Unable to parse overview')
279 if not self._overview:
281 self._overview = self._parse_overview()
282 except AttributeError:
283 f = open(DEBUGLOG, 'w')
284 f.write(str(self.soup))
287 return self._overview