2 # -*- coding: UTF-8 -*-
4 from BeautifulSoup import BeautifulSoup, NavigableString
5 from urllib2 import urlopen
6 from urllib import urlencode
7 from datetime import datetime, time
8 from textwrap import wrap
13 from gotovienna import defaults
15 POSITION_TYPES = ('stop', 'address', 'poi')
17 DEBUGLOG = os.path.expanduser('~/gotoVienna.debug')
19 class ParserError(Exception):
21 def __init__(self, msg='Parser error'):
25 UNKNOWN, CORRECTION, RESULT = range(3)
28 def search(origin_tuple, destination_tuple, dtime=None):
29 """ build route request
30 returns html result (as urllib response)
33 dtime = datetime.now()
35 origin, origin_type = origin_tuple
36 destination, destination_type = destination_tuple
38 if origin_type is None:
41 if destination_type is None:
42 destination_type = 'stop'
44 if (origin_type not in POSITION_TYPES or
45 destination_type not in POSITION_TYPES):
46 raise ParserError('Invalid position type')
48 post = defaults.search_post
49 post['name_origin'] = origin
50 post['type_origin'] = origin_type
51 post['name_destination'] = destination
52 post['type_destination'] = destination_type
53 post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
54 post['itdTime'] = dtime.strftime('%H:%M')
55 params = urlencode(post)
56 url = '%s?%s' % (defaults.action, params)
59 f = open(DEBUGLOG, 'a')
63 print 'Unable to write to DEBUGLOG: %s' % DEBUGLOG
69 """ Parser for search response
72 def __init__(self, html):
73 self.soup = BeautifulSoup(html)
76 if self.soup.find('form', {'id': 'form_efaresults'}):
77 return PageType.RESULT
79 if self.soup.find('div', {'class':'form_error'}):
80 return PageType.CORRECTION
82 return PageType.UNKNOWN
84 state = property(check_page)
86 def get_correction(self):
87 nlo = self.soup.find('select', {'id': 'nameList_origin'})
88 nld = self.soup.find('select', {'id': 'nameList_destination'})
90 if not nlo and not nld:
91 raise ParserError('Unable to parse html')
94 origin = map(lambda x: x.text, nlo.findAll('option'))
98 destination = map(lambda x: x.text, nld.findAll('option'))
102 return (origin, destination)
104 def get_result(self):
105 return rParser(str(self.soup))
110 """ Parser for routing results
113 def __init__(self, html):
114 self.soup = BeautifulSoup(html)
115 self._overview = None
119 def get_tdtext(cls, x, cl):
120 return x.find('td', {'class': cl}).text
123 def get_change(cls, x):
124 y = rParser.get_tdtext(x, 'col_change')
131 def get_price(cls, x):
132 y = rParser.get_tdtext(x, 'col_price')
136 return float(y.replace(',', '.'))
141 def get_date(cls, x):
142 y = rParser.get_tdtext(x, 'col_date')
144 return datetime.strptime(y, '%d.%m.%Y').date()
149 def get_time(cls, x):
150 y = rParser.get_tdtext(x, 'col_time')
152 if (y.find("-") > 0):
153 return map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
155 return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
160 def get_duration(cls, x):
161 y = rParser.get_tdtext(x, 'col_duration')
163 return time(*map(int, y.split(":")))
168 for detail in self.details():
171 def _parse_details(self):
172 tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
174 trips = map(lambda x: map(lambda y: {
175 'time': rParser.get_time(y),
176 'station': map(lambda z: z[2:].strip(),
177 filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
178 'info': map(lambda x: x.strip(),
179 filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
180 }, x.find('tbody').findAll('tr')),
186 """returns list of trip details
187 [ [ { 'time': [datetime.time, datetime.time] if time else [],
188 'station': [u'start', u'end'] if station else [],
189 'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
190 }, ... # next trip step
191 ], ... # next trip possibility
194 if not self._details:
195 self._details = self._parse_details()
199 def _parse_overview(self):
202 table = self.soup.find('table', {'id': 'tbl_fahrten'})
204 # check if there is an overview table
205 if table and table.findAll('tr'):
207 rows = table.findAll('tr')[1:] # cut off headline
209 overview = map(lambda x: {
210 'date': rParser.get_date(x),
211 'time': rParser.get_time(x),
212 'duration': rParser.get_duration(x), # grab duration
213 'change': rParser.get_change(x),
214 'price': rParser.get_price(x),
218 raise ParserError('Unable to parse overview')
231 if not self._overview:
233 self._overview = self._parse_overview()
234 except AttributeError:
235 f = open(DEBUGLOG, 'w')
236 f.write(str(self.soup))
239 return self._overview