1 # -*- coding: utf-8 -*-
3 from gotovienna.BeautifulSoup import BeautifulSoup
4 #from urllib2 import urlopen
5 from urllib import quote_plus
6 # Use urlopen proxy for fake user agent
7 from UrlOpener import urlopen
8 from datetime import time, datetime, timedelta
9 import datetime as date
12 from errors import LineNotFoundError, StationNotFoundError
14 from cache import Stations
15 from time import sleep
16 from utils import sort_departures
18 from gotovienna import defaults
20 class Departure(dict):
21 def __init__(self, line, station, direction, time, lowfloor):
23 self['station'] = station
24 self['direction'] = direction
26 if type(time) == date.time:
27 time = make_datetime(now, time)
28 if type(time) == datetime:
29 # FIXME convert in ModelList
30 self['realtime'] = False
32 self['time'] = (time - now).seconds/60
34 self['time'] = -1 * (now - time).seconds/60
35 self['departure'] = time
36 elif type(time) == int:
37 # FIXME convert in ModelList
38 self['realtime'] = True
40 self['departure'] = now + timedelta(minutes=self['time'])
42 raise ValueError('Wrong type: time')
44 # FIXME convert in ModelList
45 self['ftime'] = str(self['time'])
46 self['lowfloor'] = lowfloor
50 self._lines = cache.lines
52 def parse_stations(self, html):
53 bs = BeautifulSoup(html)
54 tables = bs.findAll('table', {'class': 'text_10pix'})
58 dir = tables[i].div.contents[-1].strip()[6:-6]
61 for tr in tables[i].findAll('tr', {'onmouseout': 'obj_unhighlight(this);'}):
63 sta.append((tr.a.text, defaults.line_overview + tr.a['href']))
65 sta.append((tr.text.strip(' '), None))
70 def get_stations(self, name):
71 """ Get station by direction
72 {'Directionname': [('Station name', 'url')]}
74 if not name in self.lines:
80 st = self.parse_stations(urlopen(self.lines[name]).read())
84 def parse_lines(self, html):
85 """ Parse lines from html
87 bs = BeautifulSoup(html)
89 lines = bs.findAll('td', {'class': 'linie'})
95 href = defaults.line_overview + line.a['href']
99 l[line.img['alt']] = href
105 """ Dictionary of Line names with url as value
108 self._lines = self.parse_lines(urlopen(defaults.line_overview).read())
112 def get_url_from_direction(self, line, direction, station):
113 stations = self.get_stations(line)
115 for stationname, url in stations.get(direction, []):
116 if stationname == station:
121 def parse_departures_by_station(self, html):
122 """ Parse departure page
123 precondition: html is correct departure page
124 handle select station page before calling this method
126 bs = BeautifulSoup(html)
130 li = bs.ul.findAll('li')
132 station = bs.strong.text.split(',')[0]
137 if d.find('»') == -1:
140 direction = d.replace('»', '').strip()
141 if direction.startswith('NICHT EINSTEIGEN'):
145 for span in l.findAll('span'):
146 if span.text.isdigit():
148 elif span.text.find(':') >= 0:
149 tim = time(*map(int, span.text.split(':')))
151 print 'Warning: %s' % span.text
154 if span['class'] == 'departureBarrierFree':
159 dep.append(Departure(line, station, direction, tim, lowfloor))
161 except Exception as e:
162 print 'Warning: %s' % e.message
165 except AttributeError:
166 print 'Error while getting station %s' % station
171 def get_departures_by_station(self, station):
172 """ Get list of Departures for one station
175 # TODO 1. Error handling
176 # TODO 2. more error handling
177 # TODO 3. ultimative error handling
179 html = urlopen(defaults.departures_by_station % quote_plus(station.encode('UTF-8'))).read()
181 li = BeautifulSoup(html).ul.findAll('li')
184 # Dirty workaround for ambiguous station
185 html = urlopen(defaults.qando + li[0].a['href']).read()
187 dep = self.parse_departures_by_station(html)
189 self.parse_departures_by_station(html)
192 def parse_departures(self, html):
193 bs = BeautifulSoup(html)
195 # Check for error messages
196 msg = bs.findAll('span', {'class': 'rot fett'})
197 if msg and len(msg) > 0 and unicode(msg[0].text).find(u'technischen St') > 0:
198 print '\n'.join(map(lambda x: x.text.replace(' ', ''), msg))
201 mainform = bs.find('form', {'name': 'mainform'})
205 lines = mainform.table.findAll('tr')[1]
207 if len(lines.findAll('td', {'class': 'info'})) > 0:
208 station = lines.span.text.replace(' ', '')
209 line = lines.findAll('span')[-1].text.replace(' ', '')
211 station = lines.td.span.text.replace(' ', '')
212 line = lines.find('td', {'align': 'right'}).span.text.replace(' ', '')
214 result_lines = bs.findAll('table')[-1].findAll('tr')
217 for tr in result_lines[1:]:
218 d = {'station': station}
219 th = tr.findAll('th')
222 #TODO replace with logger
223 print "[DEBUG] Unable to find th in:\n%s" % str(tr)
225 # underground site looks different -.-
228 d['direction'] = th[0].text.replace(' ', '')
232 d['lowfloor'] = th[-1].find('img') and th[-1].img.has_key('alt')
233 d['line'] = th[0].text.replace(' ', '')
234 d['direction'] = th[1].text.replace(' ', '')
237 tim = t.text.split(' ')
239 # print '[WARNING] Invalid time: %s' % time
240 # TODO: Issue a warning OR convert "HH:MM" format to countdown
245 if tim.find('rze...') >= 0:
248 # if time to next departure in cell convert to int
251 # check if time of next departue in cell
252 t = tim.strip(' ').split(':')
253 if len(t) == 2 and all(map(lambda x: x.isdigit(), t)):
255 d['time'] = make_datetime(datetime.now(), time(*t))
258 #TODO replace with logger
259 print "[DEBUG] Invalid data:\n%s" % time
261 dep.append(Departure(**d))
265 def get_departures(self, url):
266 """ Get list of next departures as Departure objects
269 #TODO parse line name and direction for station site parsing
272 # FIXME prevent from calling this method with None
273 print "ERROR empty url"
276 # open url for 90 min timeslot / get departure for next 90 min
278 tries = 2 # try a second time before return empty list
281 html = urlopen(url + "&departureSizeTimeSlot=90").read()
282 dep = self.parse_departures(html)
293 def get_departures_test(self, line, station):
294 """ replacement for get_departure
295 hide url in higher levels :)
297 raise NotImplementedError
300 UBAHN, TRAM, BUS, NIGHTLINE, OTHER = range(5)
301 LINE_TYPE_NAMES = ['U-Bahn', 'Strassenbahn', 'Bus', 'Nightline', 'Andere']
303 def get_line_sort_key(name):
304 """Return a sort key for a line name
306 >>> get_line_sort_key('U6')
309 >>> get_line_sort_key('D')
312 >>> get_line_sort_key('59A')
315 txt = ''.join(x for x in name if not x.isdigit())
316 num = ''.join(x for x in name if x.isdigit()) or '0'
318 return (txt, int(num))
320 def get_line_type(name):
321 """Get the type of line for the given name
323 >>> get_line_type('U1')
325 >>> get_line_type('59A')
330 elif name.endswith('A') or name.endswith('B') and name[1].isdigit():
332 elif name.startswith('U'):
334 elif name.startswith('N'):
336 elif name in ('D', 'O', 'VRT', 'WLB'):
341 def categorize_lines(lines):
342 """Return a categorized version of a list of line names
344 >>> categorize_lines(['U4', 'U3', '59A'])
345 [('U-Bahn', ['U3', 'U4']), ('Bus', ['59A'])]
347 categorized_lines = collections.defaultdict(list)
349 for line in sorted(lines):
350 line_type = get_line_type(line)
351 categorized_lines[line_type].append(line)
353 for lines in categorized_lines.values():
354 lines.sort(key=get_line_sort_key)
356 return [(LINE_TYPE_NAMES[key], categorized_lines[key])
357 for key in sorted(categorized_lines)]
359 def make_datetime(date, time):
360 """ Ugly workaround, immutable datetime ftw -.-
363 if date.hour > time.hour:
364 date = date + timedelta(1)
365 return datetime(year=date.year,