just small format changes
[pywienerlinien] / gotovienna / routing.py
1 #!/usr/bin/env python
2 # -*- coding: UTF-8 -*-
3
4 from gotovienna.BeautifulSoup import BeautifulSoup, NavigableString
5 from urllib2 import urlopen
6 from urllib import urlencode
7 from datetime import datetime, time, timedelta
8 from textwrap import wrap
9 import sys
10 import os.path
11 import re
12
13 from gotovienna import defaults
14
15 POSITION_TYPES = ('stop', 'address', 'poi')
16 TIMEFORMAT = '%H:%M'
17 DEBUGLOG = os.path.expanduser('~/gotoVienna.debug')
18
19 class ParserError(Exception):
20
21     def __init__(self, msg='Parser error'):
22         self.message = msg
23
24 class PageType:
25     UNKNOWN, CORRECTION, RESULT = range(3)
26
27
28 def extract_city(station):
29     """ Extract city from string if present,
30     else return default city
31     
32     >>> extract_city('Karlsplatz, Wien')
33     'Wien'
34     """
35     if len(station.split(',')) > 1:
36         return station.split(',')[-1].strip()
37     else:
38         return 'Wien'
39         
40 def extract_station(station):
41     """ Remove city from string
42     
43     >>> extract_station('Karlsplatz, Wien')
44     'Karlsplatz'
45     """
46     if len(station.split(',')) > 1:
47         return station[:station.rindex(',')].strip()
48     else:
49         return station
50     
51 def split_station(station):
52     """ >>> split_station('Karlsplatz, Wien')
53     ('Karlsplatz', 'Wien')
54     >>> split_station('Karlsplatz')
55     ('Karlsplatz', 'Wien')
56     """
57     if len(station.split(',')) > 1:
58         return (station[:station.rindex(',')].strip(), station.split(',')[-1].strip())
59     else:
60         return (station, 'Wien')
61
62 def guess_location_type(location):
63     """Guess type (stop, address, poi) of a location
64
65     >>> guess_location_type('pilgramgasse')
66     'stop'
67
68     >>> guess_location_type('karlsplatz 14')
69     'address'
70
71     >>> guess_location_type('reumannplatz 12/34')
72     'address'
73     """
74     parts = location.split()
75     first_part = parts[0]
76     last_part = parts[-1]
77
78     # Assume all single-word locations are stops
79     if len(parts) == 1:
80         return 'stop'
81
82     # If the last part is numeric, assume address
83     if last_part.isdigit() and len(parts) > 1:
84         return 'address'
85
86     # Addresses with door number (e.g. "12/34")
87     if all(x.isdigit() or x == '/' for x in last_part):
88         return 'address'
89
90     # Sane default - assume it's a stop/station name
91     return 'stop'
92
93 def search(origin_tuple, destination_tuple, dtime=None):
94     """ build route request
95     returns html result (as urllib response)
96     """
97     if not dtime:
98         dtime = datetime.now()
99
100     origin, origin_type = origin_tuple
101     origin, origin_city = split_station(origin)
102     
103     destination, destination_type = destination_tuple
104     destination, destination_city = split_station(destination)
105
106
107     if origin_type is None:
108         origin_type = guess_location_type(origin)
109         print 'Guessed origin type:', origin_type
110
111     if destination_type is None:
112         destination_type = guess_location_type(destination)
113         print 'Guessed destination type:', destination_type
114
115     if (origin_type not in POSITION_TYPES or
116             destination_type not in POSITION_TYPES):
117         raise ParserError('Invalid position type')
118
119     post = defaults.search_post
120     post['name_origin'] = origin
121     post['type_origin'] = origin_type
122     post['name_destination'] = destination
123     post['type_destination'] = destination_type
124     post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
125     post['itdTime'] = dtime.strftime('%H:%M')
126     post['place_origin'] = origin_city
127     post['place_destination'] = destination_city
128     params = urlencode(post)
129     url = '%s?%s' % (defaults.action, params)
130
131     try:
132         f = open(DEBUGLOG, 'a')
133         f.write(url + '\n')
134         f.close()
135     except:
136         print 'Unable to write to DEBUGLOG: %s' % DEBUGLOG
137
138     return urlopen(url)
139
140
141 class sParser:
142     """ Parser for search response
143     """
144
145     def __init__(self, html):
146         self.soup = BeautifulSoup(html)
147
148     def check_page(self):
149         if self.soup.find('form', {'id': 'form_efaresults'}):
150             return PageType.RESULT
151
152         if self.soup.find('div', {'class':'form_error'}):
153             return PageType.CORRECTION
154
155         return PageType.UNKNOWN
156
157     state = property(check_page)
158
159     def get_correction(self):
160         names_origin = self.soup.find('select', {'id': 'nameList_origin'})
161         names_destination = self.soup.find('select', {'id': 'nameList_destination'})
162         places_origin = self.soup.find('select', {'id': 'placeList_origin'})
163         places_destination = self.soup.find('select', {'id': 'placeList_destination'})
164         
165
166         if any([names_origin, names_destination, places_origin, places_destination]):
167             dict = {}
168             
169             if names_origin:
170                 dict['origin'] = map(lambda x: x.text, 
171                                      names_origin.findAll('option'))
172             if names_destination:
173                 dict['destination'] = map(lambda x: x.text, 
174                                           names_destination.findAll('option'))
175                 
176             if places_origin:
177                 dict['place_origin'] = map(lambda x: x.text, 
178                                            names_origin.findAll('option'))
179             if names_destination:
180                 dict['place_destination'] = map(lambda x: x.text, 
181                                                 names_destination.findAll('option'))
182     
183             return dict
184         
185         else:
186             raise ParserError('Unable to parse html')
187
188     def get_result(self):
189         return rParser(str(self.soup))
190
191
192
193 class rParser:
194     """ Parser for routing results
195     """
196
197     def __init__(self, html):
198         self.soup = BeautifulSoup(html)
199         self._overview = None
200         self._details = None
201
202     @classmethod
203     def get_tdtext(cls, x, cl):
204             return x.find('td', {'class': cl}).text
205
206     @classmethod
207     def get_change(cls, x):
208         y = rParser.get_tdtext(x, 'col_change')
209         if y:
210             return int(y)
211         else:
212             return 0
213
214     @classmethod
215     def get_price(cls, x):
216         y = rParser.get_tdtext(x, 'col_price')
217         if y == '*':
218             return 0.0
219         if y.find(','):
220             return float(y.replace(',', '.'))
221         else:
222             return 0.0
223
224     @classmethod
225     def get_date(cls, x):
226         y = rParser.get_tdtext(x, 'col_date')
227         if y:
228             return datetime.strptime(y, '%d.%m.%Y').date()
229         else:
230             return None
231
232     @classmethod
233     def get_datetime(cls, x):
234         y = rParser.get_tdtext(x, 'col_time')
235         if y:
236             if (y.find("-") > 0):
237                 # overview mode
238                 times = map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
239                 d = rParser.get_date(x)
240                 from_dtime = datetime.combine(d, times[0])
241                 if times[0] > times[1]:
242                     # dateline crossing
243                     to_dtime = datetime.combine(d + timedelta(1), times[1])
244                 else:
245                     to_dtime = datetime.combine(d, times[1])
246                     
247                 return [from_dtime, to_dtime]
248             
249             else:
250                 dtregex = {'date' : '\d\d\.\d\d',
251                            'time': '\d\d:\d\d'}
252                 
253                 regex = "\s*(?P<date1>{date})?\s*(?P<time1>{time})\s*(?P<date2>{date})?\s*(?P<time2>{time})\s*".format(**dtregex)
254                 ma = re.match(regex, y)
255                 
256                 if not ma:
257                     return []
258                 
259                 gr = ma.groupdict()
260                 
261                 def extract_datetime(gr, n):
262                     if 'date%d' % n in gr and gr['date%d' % n]:
263                         from_dtime = datetime.strptime(str(datetime.today().year) + gr['date%d' % n] + gr['time%d' % n], '%Y%d.%m.%H:%M')
264                     else:
265                         t = datetime.strptime(gr['time%d' % n], '%H:%M').time()
266                         d = datetime.today().date()
267                         return datetime.combine(d, t)
268                 
269                 # detail mode
270                 from_dtime = extract_datetime(gr, 1)
271                 to_dtime = extract_datetime(gr, 2)
272                 
273                 return [from_dtime, to_dtime]
274                 
275         else:
276             return []
277
278     def __iter__(self):
279         for detail in self.details():
280             yield detail
281
282     def _parse_details(self):
283         tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
284
285         trips = map(lambda x: map(lambda y: {
286                         'timespan': rParser.get_datetime(y),
287                         'station': map(lambda z: z[2:].strip(),
288                                        filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
289                         'info': map(lambda x: x.strip(),
290                                     filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
291                     }, x.find('tbody').findAll('tr')),
292                     tours) # all routes
293         return trips
294
295     @property
296     def details(self):
297         """returns list of trip details
298         [ [ { 'time': [datetime.time, datetime.time] if time else [],
299               'station': [u'start', u'end'] if station else [],
300               'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
301             }, ... # next trip step
302           ], ... # next trip possibility
303         ]
304         """
305         if not self._details:
306             self._details = self._parse_details()
307
308         return self._details
309
310     def _parse_overview(self):
311
312         # get overview table
313         table = self.soup.find('table', {'id': 'tbl_fahrten'})
314
315         # check if there is an overview table
316         if table and table.findAll('tr'):
317             # get rows
318             rows = table.findAll('tr')[1:] # cut off headline
319
320             overview = map(lambda x: {
321                                'timespan': rParser.get_datetime(x),
322                                'change': rParser.get_change(x),
323                                'price': rParser.get_price(x),
324                            },
325                            rows)
326         else:
327             raise ParserError('Unable to parse overview')
328
329         return overview
330
331     @property
332     def overview(self):
333         """dict containing
334         date: datetime
335         time: [time, time]
336         duration: time
337         change: int
338         price: float
339         """
340         if not self._overview:
341             try:
342                 self._overview = self._parse_overview()
343             except AttributeError:
344                 f = open(DEBUGLOG, 'w')
345                 f.write(str(self.soup))
346                 f.close()
347
348         return self._overview
349