just comments, fixme
[pywienerlinien] / gotovienna / routing.py
1 #!/usr/bin/env python
2 # -*- coding: UTF-8 -*-
3
4 from BeautifulSoup import BeautifulSoup, NavigableString
5 from urllib2 import urlopen
6 from urllib import urlencode
7 from datetime import datetime, time
8 from textwrap import wrap
9 import argparse
10 import sys
11 import os.path
12
13 from gotovienna import defaults
14
15 POSITION_TYPES = ('stop', 'address', 'poi')
16 TIMEFORMAT = '%H:%M'
17 DEBUGLOG = os.path.expanduser('~/gotoVienna.debug')
18
19 class ParserError(Exception):
20
21     def __init__(self, msg='Parser error'):
22         self.message = msg
23
24 class PageType:
25     UNKNOWN, CORRECTION, RESULT = range(3)
26
27
28 def extract_city(station):
29     """ Extract city from string if present,
30     else return default city
31     
32     >>> extract_city('Karlsplatz, Wien')
33     'Wien'
34     """
35     if len(station.split(',')) > 1:
36         return station.split(',')[-1].strip()
37     else:
38         return 'Wien'
39         
40 def extract_station(station):
41     """ Remove city from string
42     
43     >>> extract_station('Karlsplatz, Wien')
44     'Karlsplatz'
45     """
46     if len(station.split(',')) > 1:
47         return station[:station.rindex(',')].strip()
48     else:
49         return station
50     
51 def split_station(station):
52     """ >>> split_station('Karlsplatz, Wien')
53     ('Karlsplatz', 'Wien')
54     >>> split_station('Karlsplatz')
55     ('Karlsplatz', 'Wien')
56     """
57     if len(station.split(',')) > 1:
58         return (station[:station.rindex(',')].strip(), station.split(',')[-1].strip())
59     else:
60         return (station, 'Wien')
61
62 def search(origin_tuple, destination_tuple, dtime=None):
63     """ build route request
64     returns html result (as urllib response)
65     """
66     if not dtime:
67         dtime = datetime.now()
68
69     origin, origin_type = origin_tuple
70     origin, origin_city = split_station(origin)
71     
72     destination, destination_type = destination_tuple
73     destination, destination_city = split_station(destination)
74
75
76     if not origin_type in POSITION_TYPES or\
77         not destination_type in POSITION_TYPES:
78         raise ParserError('Invalid position type')
79
80     post = defaults.search_post
81     post['name_origin'] = origin
82     post['type_origin'] = origin_type
83     post['name_destination'] = destination
84     post['type_destination'] = destination_type
85     post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
86     post['itdTime'] = dtime.strftime('%H:%M')
87     post['place_origin'] = origin_city
88     post['place_destination'] = destination_city
89     params = urlencode(post)
90     url = '%s?%s' % (defaults.action, params)
91
92     try:
93         f = open(DEBUGLOG, 'a')
94         f.write(url + '\n')
95         f.close()
96     except:
97         print 'Unable to write to DEBUGLOG: %s' % DEBUGLOG
98
99     return urlopen(url)
100
101
102 class sParser:
103     """ Parser for search response
104     """
105
106     def __init__(self, html):
107         self.soup = BeautifulSoup(html)
108
109     def check_page(self):
110         if self.soup.find('form', {'id': 'form_efaresults'}):
111             return PageType.RESULT
112
113         if self.soup.find('div', {'class':'form_error'}):
114             return PageType.CORRECTION
115
116         return PageType.UNKNOWN
117
118     def get_correction(self):
119         names_origin = self.soup.find('select', {'id': 'nameList_origin'})
120         names_destination = self.soup.find('select', {'id': 'nameList_destination'})
121         places_origin = self.soup.find('select', {'id': 'placeList_origin'})
122         places_destination = self.soup.find('select', {'id': 'placeList_destination'})
123         
124
125         if names_origin or names_destination or places_origin or places_destination:
126             dict = {}
127             
128             if names_origin:
129                 dict['origin'] = map(lambda x: x.text, names_origin.findAll('option'))
130             if names_destination:
131                 dict['destination'] = map(lambda x: x.text, names_destination.findAll('option'))
132                 
133             if places_origin:
134                 dict['place_origin'] = map(lambda x: x.text, names_origin.findAll('option'))
135             if names_destination:
136                 dict['place_destination'] = map(lambda x: x.text, names_destination.findAll('option'))
137     
138             return dict
139         
140         else:
141             raise ParserError('Unable to parse html')
142
143     def get_result(self):
144         return rParser(str(self.soup))
145
146
147
148 class rParser:
149     """ Parser for routing results
150     """
151
152     def __init__(self, html):
153         self.soup = BeautifulSoup(html)
154         self._overview = None
155         self._details = None
156
157     @classmethod
158     def get_tdtext(cls, x, cl):
159             return x.find('td', {'class': cl}).text
160
161     @classmethod
162     def get_change(cls, x):
163         y = rParser.get_tdtext(x, 'col_change')
164         if y:
165             return int(y)
166         else:
167             return 0
168
169     @classmethod
170     def get_price(cls, x):
171         y = rParser.get_tdtext(x, 'col_price')
172         if y == '*':
173             return 0.0
174         if y.find(','):
175             return float(y.replace(',', '.'))
176         else:
177             return 0.0
178
179     @classmethod
180     def get_date(cls, x):
181         y = rParser.get_tdtext(x, 'col_date')
182         if y:
183             return datetime.strptime(y, '%d.%m.%Y').date()
184         else:
185             return None
186
187     @classmethod
188     def get_time(cls, x):
189         y = rParser.get_tdtext(x, 'col_time')
190         if y:
191             if (y.find("-") > 0):
192                 return map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
193             else:
194                 # FIXME Error if date in line (dateLineCross)
195                 return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
196         else:
197             return []
198
199     @classmethod
200     def get_duration(cls, x):
201         y = rParser.get_tdtext(x, 'col_duration')
202         if y:
203             return time(*map(int, y.split(":")))
204         else:
205             return None
206
207     def __iter__(self):
208         for detail in self.details():
209             yield detail
210
211     def _parse_details(self):
212         tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
213
214         trips = map(lambda x: map(lambda y: {
215                         'time': rParser.get_time(y),
216                         'station': map(lambda z: z[2:].strip(),
217                                        filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
218                         'info': map(lambda x: x.strip(),
219                                     filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
220                     }, x.find('tbody').findAll('tr')),
221                     tours) # all routes
222         return trips
223
224     @property
225     def details(self):
226         """returns list of trip details
227         [ [ { 'time': [datetime.time, datetime.time] if time else [],
228               'station': [u'start', u'end'] if station else [],
229               'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
230             }, ... # next trip step
231           ], ... # next trip possibility
232         ]
233         """
234         if not self._details:
235             self._details = self._parse_details()
236
237         return self._details
238
239     def _parse_overview(self):
240
241         # get overview table
242         table = self.soup.find('table', {'id': 'tbl_fahrten'})
243
244         # check if there is an overview table
245         if table and table.findAll('tr'):
246             # get rows
247             rows = table.findAll('tr')[1:] # cut off headline
248
249             overview = map(lambda x: {
250                                'date': rParser.get_date(x),
251                                'time': rParser.get_time(x),
252                                'duration': rParser.get_duration(x), # grab duration
253                                'change': rParser.get_change(x),
254                                'price': rParser.get_price(x),
255                            },
256                            rows)
257         else:
258             raise ParserError('Unable to parse overview')
259
260         return overview
261
262     @property
263     def overview(self):
264         """dict containing
265         date: datetime
266         time: [time, time]
267         duration: time
268         change: int
269         price: float
270         """
271         if not self._overview:
272             try:
273                 self._overview = self._parse_overview()
274             except AttributeError:
275                 f = open(DEBUGLOG, 'w')
276                 f.write(str(self.soup))
277                 f.close()
278
279         return self._overview
280