git.maemo.org Git - pywienerlinien/blob - gotovienna/realtime.py

   1 # -*- coding: utf-8 -*-
   2
   3 from gotovienna.BeautifulSoup import BeautifulSoup
   4 #from urllib2 import urlopen
   5 from urllib import quote_plus
   6 # Use urlopen proxy for fake user agent
   7 from UrlOpener import urlopen
   8 from datetime import time, datetime, timedelta
   9 import datetime as date
  10 import re
  11 import collections
  12 from errors import LineNotFoundError, StationNotFoundError
  13 import cache
  14 from cache import Stations
  15 from time import sleep
  16 from utils import sort_departures
  17
  18 from gotovienna import defaults
  19
  20 class Departure(dict):
  21     def __init__(self, line, station, direction, time, lowfloor):
  22         self['line'] = line
  23         self['station'] = station
  24         self['direction'] = direction
  25         now = datetime.now()
  26         if type(time) == date.time:
  27             time = make_datetime(now, time)
  28         if type(time) == datetime:
  29             # FIXME convert in ModelList
  30             self['realtime'] = False
  31             if time >= now:
  32                 self['time'] = (time - now).seconds/60
  33             else:
  34                 self['time'] = -1 * (now - time).seconds/60
  35             self['departure'] = time
  36         elif type(time) == int:
  37             # FIXME convert in ModelList
  38             self['realtime'] = True
  39             self['time'] = time
  40             self['departure'] = now + timedelta(minutes=self['time'])
  41         else:
  42             raise ValueError('Wrong type: time')
  43
  44         # FIXME convert in ModelList
  45         self['ftime'] = str(self['time'])
  46         self['lowfloor'] = lowfloor
  47
  48 class ITipParser:
  49     def __init__(self):
  50         self._lines = cache.lines
  51
  52     def parse_stations(self, html):
  53         bs = BeautifulSoup(html)
  54         tables = bs.findAll('table', {'class': 'text_10pix'})
  55         st = {}
  56
  57         for i in range(2):
  58             dir = tables[i].div.contents[-1].strip()[6:-6]
  59
  60             sta = []
  61             for tr in tables[i].findAll('tr', {'onmouseout': 'obj_unhighlight(this);'}):
  62                 if tr.a:
  63                     sta.append((tr.a.text, defaults.line_overview + tr.a['href']))
  64                 else:
  65                     sta.append((tr.text.strip('&nbsp;'), None))
  66
  67             st[dir] = sta
  68         return st
  69
  70     def get_stations(self, name):
  71         """ Get station by direction
  72         {'Directionname': [('Station name', 'url')]}
  73         """
  74         if not name in self.lines:
  75             return {}
  76
  77         st = Stations(name)
  78
  79         if not st:
  80             st = self.parse_stations(urlopen(self.lines[name]).read())
  81
  82         return st
  83
  84     def parse_lines(self, html):
  85         """ Parse lines from html
  86         """
  87         bs = BeautifulSoup(html)
  88         # get tables
  89         lines = bs.findAll('td', {'class': 'linie'})
  90
  91         l = {}
  92
  93         for line in lines:
  94             if line.a:
  95                 href = defaults.line_overview + line.a['href']
  96                 if line.text:
  97                     l[line.text] = href
  98                 elif line.img:
  99                     l[line.img['alt']] = href
 100
 101         return l
 102
 103     @property
 104     def lines(self):
 105         """ Dictionary of Line names with url as value
 106         """
 107         if not self._lines:
 108             self._lines = self.parse_lines(urlopen(defaults.line_overview).read())
 109
 110         return self._lines
 111
 112     def get_url_from_direction(self, line, direction, station):
 113         stations = self.get_stations(line)
 114
 115         for stationname, url in stations.get(direction, []):
 116             if stationname == station:
 117                 return url
 118
 119         return None
 120
 121     def parse_departures_by_station(self, html):
 122         """ Parse departure page
 123         precondition: html is correct departure page
 124         handle select station page before calling this method
 125         """
 126         bs = BeautifulSoup(html)
 127         dep = []
 128
 129         try:
 130             li = bs.ul.findAll('li')
 131
 132             station = bs.strong.text.split(',')[0]
 133
 134             for l in li:
 135                 try:
 136                     d = l.div.next
 137                     if d.find('&raquo;') == -1:
 138                         d = d.next.next
 139
 140                     direction = d.replace('&raquo;', '').strip()
 141                     if direction.startswith('NICHT EINSTEIGEN'):
 142                         continue
 143
 144                     line = l.img['alt']
 145                     for span in l.findAll('span'):
 146                         if span.text.isdigit():
 147                             tim = int(span.text)
 148                         elif span.text.find(':') >= 0:
 149                             tim = time(*map(int, span.text.split(':')))
 150                         else:
 151                             print 'Warning: %s' % span.text
 152                             continue
 153
 154                         if span['class'] == 'departureBarrierFree':
 155                             lowfloor = True
 156                         else:
 157                             lowfloor = False
 158
 159                         dep.append(Departure(line, station, direction, tim, lowfloor))
 160
 161                 except Exception as e:
 162                     print 'Warning: %s' % e.message
 163                     continue
 164
 165         except AttributeError:
 166             print 'Error while getting station %s' % station
 167
 168         finally:
 169             return dep
 170
 171     def get_departures_by_station(self, station):
 172         """ Get list of Departures for one station
 173         """
 174
 175         # TODO 1. Error handling
 176         # TODO 2. more error handling
 177         # TODO 3. ultimative error handling
 178
 179         html = urlopen(defaults.departures_by_station % quote_plus(station.encode('UTF-8'))).read()
 180
 181         li = BeautifulSoup(html).ul.findAll('li')
 182
 183         if li[0].a:
 184             # Dirty workaround for ambiguous station
 185             html = urlopen(defaults.qando + li[0].a['href']).read()
 186
 187         dep = self.parse_departures_by_station(html)
 188
 189         self.parse_departures_by_station(html)
 190         return dep
 191
 192     def parse_departures(self, html):
 193         bs = BeautifulSoup(html)
 194
 195         # Check for error messages
 196         msg = bs.findAll('span', {'class': 'rot fett'})
 197         if msg and len(msg) > 0 and unicode(msg[0].text).find(u'technischen St') > 0:
 198             print '\n'.join(map(lambda x: x.text.replace('&nbsp;', ''), msg))
 199             return []
 200
 201         mainform = bs.find('form', {'name': 'mainform'})
 202         if not mainform:
 203             return []
 204
 205         lines = mainform.table.findAll('tr')[1]
 206
 207         if len(lines.findAll('td', {'class': 'info'})) > 0:
 208             station = lines.span.text.replace('&nbsp;', '')
 209             line = lines.findAll('span')[-1].text.replace('&nbsp;', '')
 210         else:
 211             station = lines.td.span.text.replace('&nbsp;', '')
 212             line = lines.find('td', {'align': 'right'}).span.text.replace('&nbsp;', '')
 213
 214         result_lines = bs.findAll('table')[-1].findAll('tr')
 215
 216         dep = []
 217         for tr in result_lines[1:]:
 218             d = {'station': station}
 219             th = tr.findAll('th')
 220
 221             if len(th) < 2:
 222                 #TODO replace with logger
 223                 print "[DEBUG] Unable to find th in:\n%s" % str(tr)
 224             elif len(th) == 2:
 225                 # underground site looks different -.-
 226                 d['lowfloor'] = True
 227                 d['line'] = line
 228                 d['direction'] = th[0].text.replace('&nbsp;', '')
 229                 t = th[-1]
 230             else:
 231                 # all other lines
 232                 d['lowfloor'] = th[-1].find('img') and th[-1].img.has_key('alt')
 233                 d['line'] = th[0].text.replace('&nbsp;', '')
 234                 d['direction'] = th[1].text.replace('&nbsp;', '')
 235                 t = th[-2]
 236             # parse time
 237             tim = t.text.split(' ')
 238             if len(tim) < 2:
 239                 # print '[WARNING] Invalid time: %s' % time
 240                 # TODO: Issue a warning OR convert "HH:MM" format to countdown
 241                 tim = tim[0]
 242             else:
 243                 tim = tim[1]
 244
 245             if tim.find('rze...') >= 0:
 246                     d['time'] = 0
 247             elif tim.isdigit():
 248                 # if time to next departure in cell convert to int
 249                 d['time'] = int(tim)
 250             else:
 251                 # check if time of next departue in cell
 252                 t = tim.strip('&nbsp;').split(':')
 253                 if len(t) == 2 and all(map(lambda x: x.isdigit(), t)):
 254                     t = map(int, t)
 255                     d['time'] = make_datetime(datetime.now(), time(*t))
 256                 else:
 257                     # Unexpected content
 258                     #TODO replace with logger
 259                     print "[DEBUG] Invalid data:\n%s" % time
 260
 261             dep.append(Departure(**d))
 262
 263         return dep
 264
 265     def get_departures(self, url):
 266         """ Get list of next departures as Departure objects
 267         """
 268
 269         #TODO parse line name and direction for station site parsing
 270
 271         if not url:
 272             # FIXME prevent from calling this method with None
 273             print "ERROR empty url"
 274             return []
 275
 276         # open url for 90 min timeslot / get departure for next 90 min
 277         retry = 0
 278         tries = 2 # try a second time before return empty list
 279
 280         while retry < tries:
 281             html = urlopen(url + "&departureSizeTimeSlot=90").read()
 282             dep = self.parse_departures(html)
 283
 284             if dep:
 285                 return dep
 286
 287             retry += 1
 288             if retry == tries:
 289                 return []
 290
 291             sleep(0.5)
 292
 293     def get_departures_test(self, line, station):
 294         """ replacement for get_departure
 295             hide url in higher levels :)
 296         """
 297         raise NotImplementedError
 298
 299
 300 UBAHN, TRAM, BUS, NIGHTLINE, OTHER = range(5)
 301 LINE_TYPE_NAMES = ['U-Bahn', 'Strassenbahn', 'Bus', 'Nightline', 'Andere']
 302
 303 def get_line_sort_key(name):
 304     """Return a sort key for a line name
 305
 306     >>> get_line_sort_key('U6')
 307     ('U', 6)
 308
 309     >>> get_line_sort_key('D')
 310     ('D', 0)
 311
 312     >>> get_line_sort_key('59A')
 313     ('A', 59)
 314     """
 315     txt = ''.join(x for x in name if not x.isdigit())
 316     num = ''.join(x for x in name if x.isdigit()) or '0'
 317
 318     return (txt, int(num))
 319
 320 def get_line_type(name):
 321     """Get the type of line for the given name
 322
 323     >>> get_line_type('U1')
 324     UBAHN
 325     >>> get_line_type('59A')
 326     BUS
 327     """
 328     if name.isdigit():
 329         return TRAM
 330     elif name.endswith('A') or name.endswith('B') and name[1].isdigit():
 331         return BUS
 332     elif name.startswith('U'):
 333         return UBAHN
 334     elif name.startswith('N'):
 335         return NIGHTLINE
 336     elif name in ('D', 'O', 'VRT', 'WLB'):
 337         return TRAM
 338
 339     return OTHER
 340
 341 def categorize_lines(lines):
 342     """Return a categorized version of a list of line names
 343
 344     >>> categorize_lines(['U4', 'U3', '59A'])
 345     [('U-Bahn', ['U3', 'U4']), ('Bus', ['59A'])]
 346     """
 347     categorized_lines = collections.defaultdict(list)
 348
 349     for line in sorted(lines):
 350         line_type = get_line_type(line)
 351         categorized_lines[line_type].append(line)
 352
 353     for lines in categorized_lines.values():
 354         lines.sort(key=get_line_sort_key)
 355
 356     return [(LINE_TYPE_NAMES[key], categorized_lines[key])
 357         for key in sorted(categorized_lines)]
 358
 359 def make_datetime(date, time):
 360     """ Ugly workaround, immutable datetime ftw -.-
 361         If
 362     """
 363     if date.hour > time.hour:
 364         date = date + timedelta(1)
 365     return datetime(year=date.year,
 366                     month=date.month,
 367                     day=date.day,
 368                     hour=time.hour,
 369                     minute=time.minute,
 370                     second=time.second)