changed date, time to datetime in overview part
[pywienerlinien] / gotovienna / routing.py
1 #!/usr/bin/env python
2 # -*- coding: UTF-8 -*-
3
4 from BeautifulSoup import BeautifulSoup, NavigableString
5 from urllib2 import urlopen
6 from urllib import urlencode
7 from datetime import datetime, time, combine, timedelta
8 from textwrap import wrap
9 import argparse
10 import sys
11 import os.path
12
13 from gotovienna import defaults
14
15 POSITION_TYPES = ('stop', 'address', 'poi')
16 TIMEFORMAT = '%H:%M'
17 DEBUGLOG = os.path.expanduser('~/gotoVienna.debug')
18
19 class ParserError(Exception):
20
21     def __init__(self, msg='Parser error'):
22         self.message = msg
23
24 class PageType:
25     UNKNOWN, CORRECTION, RESULT = range(3)
26
27
28 def extract_city(station):
29     """ Extract city from string if present,
30     else return default city
31     
32     >>> extract_city('Karlsplatz, Wien')
33     'Wien'
34     """
35     if len(station.split(',')) > 1:
36         return station.split(',')[-1].strip()
37     else:
38         return 'Wien'
39         
40 def extract_station(station):
41     """ Remove city from string
42     
43     >>> extract_station('Karlsplatz, Wien')
44     'Karlsplatz'
45     """
46     if len(station.split(',')) > 1:
47         return station[:station.rindex(',')].strip()
48     else:
49         return station
50     
51 def split_station(station):
52     """ >>> split_station('Karlsplatz, Wien')
53     ('Karlsplatz', 'Wien')
54     >>> split_station('Karlsplatz')
55     ('Karlsplatz', 'Wien')
56     """
57     if len(station.split(',')) > 1:
58         return (station[:station.rindex(',')].strip(), station.split(',')[-1].strip())
59     else:
60         return (station, 'Wien')
61
62 def search(origin_tuple, destination_tuple, dtime=None):
63     """ build route request
64     returns html result (as urllib response)
65     """
66     if not dtime:
67         dtime = datetime.now()
68
69     origin, origin_type = origin_tuple
70     origin, origin_city = split_station(origin)
71     
72     destination, destination_type = destination_tuple
73     destination, destination_city = split_station(destination)
74
75
76     if not origin_type in POSITION_TYPES or\
77         not destination_type in POSITION_TYPES:
78         raise ParserError('Invalid position type')
79
80     post = defaults.search_post
81     post['name_origin'] = origin
82     post['type_origin'] = origin_type
83     post['name_destination'] = destination
84     post['type_destination'] = destination_type
85     post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
86     post['itdTime'] = dtime.strftime('%H:%M')
87     post['place_origin'] = origin_city
88     post['place_destination'] = destination_city
89     params = urlencode(post)
90     url = '%s?%s' % (defaults.action, params)
91
92     try:
93         f = open(DEBUGLOG, 'a')
94         f.write(url + '\n')
95         f.close()
96     except:
97         print 'Unable to write to DEBUGLOG: %s' % DEBUGLOG
98
99     return urlopen(url)
100
101
102 class sParser:
103     """ Parser for search response
104     """
105
106     def __init__(self, html):
107         self.soup = BeautifulSoup(html)
108
109     def check_page(self):
110         if self.soup.find('form', {'id': 'form_efaresults'}):
111             return PageType.RESULT
112
113         if self.soup.find('div', {'class':'form_error'}):
114             return PageType.CORRECTION
115
116         return PageType.UNKNOWN
117
118     def get_correction(self):
119         names_origin = self.soup.find('select', {'id': 'nameList_origin'})
120         names_destination = self.soup.find('select', {'id': 'nameList_destination'})
121         places_origin = self.soup.find('select', {'id': 'placeList_origin'})
122         places_destination = self.soup.find('select', {'id': 'placeList_destination'})
123         
124
125         if names_origin or names_destination or places_origin or places_destination:
126             dict = {}
127             
128             if names_origin:
129                 dict['origin'] = map(lambda x: x.text, names_origin.findAll('option'))
130             if names_destination:
131                 dict['destination'] = map(lambda x: x.text, names_destination.findAll('option'))
132                 
133             if places_origin:
134                 dict['place_origin'] = map(lambda x: x.text, names_origin.findAll('option'))
135             if names_destination:
136                 dict['place_destination'] = map(lambda x: x.text, names_destination.findAll('option'))
137     
138             return dict
139         
140         else:
141             raise ParserError('Unable to parse html')
142
143     def get_result(self):
144         return rParser(str(self.soup))
145
146
147
148 class rParser:
149     """ Parser for routing results
150     """
151
152     def __init__(self, html):
153         self.soup = BeautifulSoup(html)
154         self._overview = None
155         self._details = None
156
157     @classmethod
158     def get_tdtext(cls, x, cl):
159             return x.find('td', {'class': cl}).text
160
161     @classmethod
162     def get_change(cls, x):
163         y = rParser.get_tdtext(x, 'col_change')
164         if y:
165             return int(y)
166         else:
167             return 0
168
169     @classmethod
170     def get_price(cls, x):
171         y = rParser.get_tdtext(x, 'col_price')
172         if y == '*':
173             return 0.0
174         if y.find(','):
175             return float(y.replace(',', '.'))
176         else:
177             return 0.0
178
179     @classmethod
180     def get_date(cls, x):
181         y = rParser.get_tdtext(x, 'col_date')
182         if y:
183             return datetime.strptime(y, '%d.%m.%Y').date()
184         else:
185             return None
186
187     @classmethod
188     def get_datetime(cls, x):
189         y = rParser.get_tdtext(x, 'col_time')
190         if y:
191             if (y.find("-") > 0):
192                 # overview mode
193                 times = map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
194                 d = rParser.get_date(x)
195                 from_dtime = combine(d, times[0])
196                 if times[0] > times[1]:
197                     # dateline crossing
198                     to_dtime = combine(d + timedelta(1), times[1])
199                 else:
200                     to_dtime = combine(d, times[1])
201                 return datetimes
202             else:
203                 # detail mode
204                 return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
205         else:
206             return []
207
208     @classmethod
209     def get_duration(cls, x):
210         y = rParser.get_tdtext(x, 'col_duration')
211         if y:
212             return time(*map(int, y.split(":")))
213         else:
214             return None
215
216     def __iter__(self):
217         for detail in self.details():
218             yield detail
219
220     def _parse_details(self):
221         tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
222
223         trips = map(lambda x: map(lambda y: {
224                         'time': rParser.get_time(y),
225                         'station': map(lambda z: z[2:].strip(),
226                                        filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
227                         'info': map(lambda x: x.strip(),
228                                     filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
229                     }, x.find('tbody').findAll('tr')),
230                     tours) # all routes
231         return trips
232
233     @property
234     def details(self):
235         """returns list of trip details
236         [ [ { 'time': [datetime.time, datetime.time] if time else [],
237               'station': [u'start', u'end'] if station else [],
238               'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
239             }, ... # next trip step
240           ], ... # next trip possibility
241         ]
242         """
243         if not self._details:
244             self._details = self._parse_details()
245
246         return self._details
247
248     def _parse_overview(self):
249
250         # get overview table
251         table = self.soup.find('table', {'id': 'tbl_fahrten'})
252
253         # check if there is an overview table
254         if table and table.findAll('tr'):
255             # get rows
256             rows = table.findAll('tr')[1:] # cut off headline
257
258             overview = map(lambda x: {
259                                'time': rParser.get_datetime(x),
260                                'duration': rParser.get_duration(x), # grab duration
261                                'change': rParser.get_change(x),
262                                'price': rParser.get_price(x),
263                            },
264                            rows)
265         else:
266             raise ParserError('Unable to parse overview')
267
268         return overview
269
270     @property
271     def overview(self):
272         """dict containing
273         date: datetime
274         time: [time, time]
275         duration: time
276         change: int
277         price: float
278         """
279         if not self._overview:
280             try:
281                 self._overview = self._parse_overview()
282             except AttributeError:
283                 f = open(DEBUGLOG, 'w')
284                 f.write(str(self.soup))
285                 f.close()
286
287         return self._overview
288