git.maemo.org Git - pywienerlinien/blob - gotovienna/routing.py

   1 #!/usr/bin/env python
   2 # -*- coding: UTF-8 -*-
   3
   4 from gotovienna.BeautifulSoup import BeautifulSoup, NavigableString
   5 from urllib2 import urlopen
   6 from urllib import urlencode
   7 from datetime import datetime, time, timedelta
   8 from textwrap import wrap
   9 import sys
  10 import os.path
  11 import re
  12
  13 from gotovienna import defaults
  14
  15 POSITION_TYPES = ('stop', 'address', 'poi')
  16 TIMEFORMAT = '%H:%M'
  17 DEBUGLOG = os.path.expanduser('~/gotoVienna.debug')
  18
  19 class ParserError(Exception):
  20
  21     def __init__(self, msg='Parser error'):
  22         self.message = msg
  23
  24 class PageType:
  25     UNKNOWN, CORRECTION, RESULT = range(3)
  26
  27
  28 def extract_city(station):
  29     """ Extract city from string if present,
  30     else return default city
  31
  32     >>> extract_city('Karlsplatz, Wien')
  33     'Wien'
  34     """
  35     if len(station.split(',')) > 1:
  36         return station.split(',')[-1].strip()
  37     else:
  38         return 'Wien'
  39
  40 def extract_station(station):
  41     """ Remove city from string
  42
  43     >>> extract_station('Karlsplatz, Wien')
  44     'Karlsplatz'
  45     """
  46     if len(station.split(',')) > 1:
  47         return station[:station.rindex(',')].strip()
  48     else:
  49         return station
  50
  51 def split_station(station):
  52     """ >>> split_station('Karlsplatz, Wien')
  53     ('Karlsplatz', 'Wien')
  54     >>> split_station('Karlsplatz')
  55     ('Karlsplatz', 'Wien')
  56     """
  57     if len(station.split(',')) > 1:
  58         return (station[:station.rindex(',')].strip(), station.split(',')[-1].strip())
  59     else:
  60         return (station, 'Wien')
  61
  62 def guess_location_type(location):
  63     """Guess type (stop, address, poi) of a location
  64
  65     >>> guess_location_type('pilgramgasse')
  66     'stop'
  67
  68     >>> guess_location_type('karlsplatz 14')
  69     'address'
  70
  71     >>> guess_location_type('reumannplatz 12/34')
  72     'address'
  73     """
  74     parts = location.split()
  75     first_part = parts[0]
  76     last_part = parts[-1]
  77
  78     # Assume all single-word locations are stops
  79     if len(parts) == 1:
  80         return 'stop'
  81
  82     # If the last part is numeric, assume address
  83     if last_part.isdigit() and len(parts) > 1:
  84         return 'address'
  85
  86     # Addresses with door number (e.g. "12/34")
  87     if all(x.isdigit() or x == '/' for x in last_part):
  88         return 'address'
  89
  90     # Sane default - assume it's a stop/station name
  91     return 'stop'
  92
  93 def search(origin_tuple, destination_tuple, dtime=None):
  94     """ build route request
  95     returns html result (as urllib response)
  96     """
  97     if not dtime:
  98         dtime = datetime.now()
  99
 100     origin, origin_type = origin_tuple
 101     origin, origin_city = split_station(origin)
 102
 103     destination, destination_type = destination_tuple
 104     destination, destination_city = split_station(destination)
 105
 106
 107     if origin_type is None:
 108         origin_type = guess_location_type(origin)
 109         print 'Guessed origin type:', origin_type
 110
 111     if destination_type is None:
 112         destination_type = guess_location_type(destination)
 113         print 'Guessed destination type:', destination_type
 114
 115     if (origin_type not in POSITION_TYPES or
 116             destination_type not in POSITION_TYPES):
 117         raise ParserError('Invalid position type')
 118
 119     post = defaults.search_post
 120     post['name_origin'] = origin
 121     post['type_origin'] = origin_type
 122     post['name_destination'] = destination
 123     post['type_destination'] = destination_type
 124     post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
 125     post['itdTime'] = dtime.strftime('%H:%M')
 126     post['place_origin'] = origin_city
 127     post['place_destination'] = destination_city
 128     params = urlencode(post)
 129     url = '%s?%s' % (defaults.action, params)
 130
 131     try:
 132         f = open(DEBUGLOG, 'a')
 133         f.write(url + '\n')
 134         f.close()
 135     except:
 136         print 'Unable to write to DEBUGLOG: %s' % DEBUGLOG
 137
 138     return urlopen(url)
 139
 140
 141 class sParser:
 142     """ Parser for search response
 143     """
 144
 145     def __init__(self, html):
 146         self.soup = BeautifulSoup(html)
 147
 148     def check_page(self):
 149         if self.soup.find('form', {'id': 'form_efaresults'}):
 150             return PageType.RESULT
 151
 152         if self.soup.find('div', {'class':'form_error'}):
 153             return PageType.CORRECTION
 154
 155         return PageType.UNKNOWN
 156
 157     state = property(check_page)
 158
 159     def get_correction(self):
 160         names_origin = self.soup.find('select', {'id': 'nameList_origin'})
 161         names_destination = self.soup.find('select', {'id': 'nameList_destination'})
 162         places_origin = self.soup.find('select', {'id': 'placeList_origin'})
 163         places_destination = self.soup.find('select', {'id': 'placeList_destination'})
 164
 165
 166         if any([names_origin, names_destination, places_origin, places_destination]):
 167             dict = {}
 168
 169             if names_origin:
 170                 dict['origin'] = map(lambda x: x.text,
 171                                      names_origin.findAll('option'))
 172             if names_destination:
 173                 dict['destination'] = map(lambda x: x.text,
 174                                           names_destination.findAll('option'))
 175
 176             if places_origin:
 177                 dict['place_origin'] = map(lambda x: x.text,
 178                                            names_origin.findAll('option'))
 179             if names_destination:
 180                 dict['place_destination'] = map(lambda x: x.text,
 181                                                 names_destination.findAll('option'))
 182
 183             return dict
 184
 185         else:
 186             raise ParserError('Unable to parse html')
 187
 188     def get_result(self):
 189         return rParser(str(self.soup))
 190
 191
 192
 193 class rParser:
 194     """ Parser for routing results
 195     """
 196
 197     def __init__(self, html):
 198         self.soup = BeautifulSoup(html)
 199         self._overview = None
 200         self._details = None
 201
 202     @classmethod
 203     def get_tdtext(cls, x, cl):
 204             return x.find('td', {'class': cl}).text
 205
 206     @classmethod
 207     def get_change(cls, x):
 208         y = rParser.get_tdtext(x, 'col_change')
 209         if y:
 210             return int(y)
 211         else:
 212             return 0
 213
 214     @classmethod
 215     def get_price(cls, x):
 216         y = rParser.get_tdtext(x, 'col_price')
 217         if y == '*':
 218             return 0.0
 219         if y.find(','):
 220             return float(y.replace(',', '.'))
 221         else:
 222             return 0.0
 223
 224     @classmethod
 225     def get_date(cls, x):
 226         y = rParser.get_tdtext(x, 'col_date')
 227         if y:
 228             return datetime.strptime(y, '%d.%m.%Y').date()
 229         else:
 230             return None
 231
 232     @classmethod
 233     def get_datetime(cls, x):
 234         y = rParser.get_tdtext(x, 'col_time')
 235         if y:
 236             if (y.find("-") > 0):
 237                 # overview mode
 238                 times = map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
 239                 d = rParser.get_date(x)
 240                 from_dtime = datetime.combine(d, times[0])
 241                 if times[0] > times[1]:
 242                     # dateline crossing
 243                     to_dtime = datetime.combine(d + timedelta(1), times[1])
 244                 else:
 245                     to_dtime = datetime.combine(d, times[1])
 246
 247                 return [from_dtime, to_dtime]
 248
 249             else:
 250                 dtregex = {'date' : '\d\d\.\d\d',
 251                            'time': '\d\d:\d\d'}
 252
 253                 regex = "\s*(?P<date1>{date})?\s*(?P<time1>{time})\s*(?P<date2>{date})?\s*(?P<time2>{time})\s*".format(**dtregex)
 254                 ma = re.match(regex, y)
 255
 256                 if not ma:
 257                     return []
 258
 259                 gr = ma.groupdict()
 260
 261                 def extract_datetime(gr, n):
 262                     if 'date%d' % n in gr and gr['date%d' % n]:
 263                         from_dtime = datetime.strptime(str(datetime.today().year) + gr['date%d' % n] + gr['time%d' % n], '%Y%d.%m.%H:%M')
 264                     else:
 265                         t = datetime.strptime(gr['time%d' % n], '%H:%M').time()
 266                         d = datetime.today().date()
 267                         return datetime.combine(d, t)
 268
 269                 # detail mode
 270                 from_dtime = extract_datetime(gr, 1)
 271                 to_dtime = extract_datetime(gr, 2)
 272
 273                 return [from_dtime, to_dtime]
 274
 275         else:
 276             return []
 277
 278     def __iter__(self):
 279         for detail in self.details():
 280             yield detail
 281
 282     def _parse_details(self):
 283         tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
 284
 285         trips = map(lambda x: map(lambda y: {
 286                         'timespan': rParser.get_datetime(y),
 287                         'station': map(lambda z: z[2:].strip(),
 288                                        filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
 289                         'info': map(lambda x: x.strip(),
 290                                     filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
 291                     }, x.find('tbody').findAll('tr')),
 292                     tours) # all routes
 293         return trips
 294
 295     @property
 296     def details(self):
 297         """returns list of trip details
 298         [ [ { 'time': [datetime.time, datetime.time] if time else [],
 299               'station': [u'start', u'end'] if station else [],
 300               'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
 301             }, ... # next trip step
 302           ], ... # next trip possibility
 303         ]
 304         """
 305         if not self._details:
 306             self._details = self._parse_details()
 307
 308         return self._details
 309
 310     def _parse_overview(self):
 311
 312         # get overview table
 313         table = self.soup.find('table', {'id': 'tbl_fahrten'})
 314
 315         # check if there is an overview table
 316         if table and table.findAll('tr'):
 317             # get rows
 318             rows = table.findAll('tr')[1:] # cut off headline
 319
 320             overview = map(lambda x: {
 321                                'timespan': rParser.get_datetime(x),
 322                                'change': rParser.get_change(x),
 323                                'price': rParser.get_price(x),
 324                            },
 325                            rows)
 326         else:
 327             raise ParserError('Unable to parse overview')
 328
 329         return overview
 330
 331     @property
 332     def overview(self):
 333         """dict containing
 334         date: datetime
 335         time: [time, time]
 336         duration: time
 337         change: int
 338         price: float
 339         """
 340         if not self._overview:
 341             try:
 342                 self._overview = self._parse_overview()
 343             except AttributeError:
 344                 f = open(DEBUGLOG, 'w')
 345                 f.write(str(self.soup))
 346                 f.close()
 347
 348         return self._overview
 349