git.maemo.org Git - pywienerlinien/blob - scotty.py

   1 #!/usr/bin/env python
   2 # -*- coding: UTF-8 -*-
   3
   4 from BeautifulSoup import BeautifulSoup, NavigableString
   5 from urllib2 import urlopen
   6 from urllib import urlencode
   7 import settings
   8 from datetime import datetime, time
   9 from textwrap import wrap
  10 import argparse
  11 import sys
  12 import os.path
  13
  14 POSITION_TYPES = ('stop', 'address', 'poi')
  15 TIMEFORMAT = '%H:%M'
  16 DEBUGLOG = os.path.expanduser('~/gotoVienna.debug')
  17
  18 class ParserError(Exception):
  19
  20     def __init__(self, msg='Parser error'):
  21         self.message = msg
  22
  23 class PageType:
  24     UNKNOWN, CORRECTION, RESULT = range(3)
  25
  26
  27 def search(origin_tuple, destination_tuple, dtime=None):
  28     """ build route request
  29     returns html result (as urllib response)
  30     """
  31     if not dtime:
  32         dtime = datetime.now()
  33
  34     origin, origin_type = origin_tuple
  35     destination, destination_type = destination_tuple
  36     if not origin_type in POSITION_TYPES or\
  37         not destination_type in POSITION_TYPES:
  38         raise ParserError('Invalid position type')
  39
  40     post = settings.search_post
  41     post['name_origin'] = origin
  42     post['type_origin'] = origin_type
  43     post['name_destination'] = destination
  44     post['type_destination'] = destination_type
  45     post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
  46     post['itdTime'] = dtime.strftime('%H:%M')
  47     params = urlencode(post)
  48     url = '%s?%s' % (settings.action, params)
  49
  50     try:
  51         f = open(DEBUGLOG, 'a')
  52         f.write(url + '\n')
  53         f.close()
  54     except:
  55         print 'Unable to write to DEBUGLOG: %s' % DEBUGLOG
  56
  57     return urlopen(url)
  58
  59
  60 class sParser:
  61     """ Parser for search response
  62     """
  63
  64     def __init__(self, html):
  65         self.soup = BeautifulSoup(html)
  66
  67     def check_page(self):
  68         if self.soup.find('form', {'id': 'form_efaresults'}):
  69             return PageType.RESULT
  70
  71         if self.soup.find('div', {'class':'form_error'}):
  72             return PageType.CORRECTION
  73
  74         return PageType.UNKNOWN
  75
  76     def get_correction(self):
  77         nlo = self.soup.find('select', {'id': 'nameList_origin'})
  78         nld = self.soup.find('select', {'id': 'nameList_destination'})
  79
  80         if not nlo and not nld:
  81             raise ParserError('Unable to parse html')
  82
  83         if nlo:
  84             origin = map(lambda x: x.text, nlo.findAll('option'))
  85         else:
  86             origin = []
  87         if nld:
  88             destination = map(lambda x: x.text, nld.findAll('option'))
  89         else:
  90             destination = []
  91
  92         return (origin, destination)
  93
  94     def get_result(self):
  95         return rParser(str(self.soup))
  96
  97
  98
  99 class rParser:
 100     """ Parser for routing results
 101     """
 102
 103     def __init__(self, html):
 104         self.soup = BeautifulSoup(html)
 105         self._overview = None
 106         self._details = None
 107
 108     @classmethod
 109     def get_tdtext(cls, x, cl):
 110             return x.find('td', {'class': cl}).text
 111
 112     @classmethod
 113     def get_change(cls, x):
 114         y = rParser.get_tdtext(x, 'col_change')
 115         if y:
 116             return int(y)
 117         else:
 118             return 0
 119
 120     @classmethod
 121     def get_price(cls, x):
 122         y = rParser.get_tdtext(x, 'col_price')
 123         if y == '*':
 124             return 0.0
 125         if y.find(','):
 126             return float(y.replace(',', '.'))
 127         else:
 128             return 0.0
 129
 130     @classmethod
 131     def get_date(cls, x):
 132         y = rParser.get_tdtext(x, 'col_date')
 133         if y:
 134             return datetime.strptime(y, '%d.%m.%Y').date()
 135         else:
 136             return None
 137
 138     @classmethod
 139     def get_time(cls, x):
 140         y = rParser.get_tdtext(x, 'col_time')
 141         if y:
 142             if (y.find("-") > 0):
 143                 return map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
 144             else:
 145                 return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
 146         else:
 147             return []
 148
 149     @classmethod
 150     def get_duration(cls, x):
 151         y = rParser.get_tdtext(x, 'col_duration')
 152         if y:
 153             return time(*map(int, y.split(":")))
 154         else:
 155             return None
 156
 157     def __iter__(self):
 158         for detail in self.details():
 159             yield detail
 160
 161     def _parse_details(self):
 162         tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
 163
 164         trips = map(lambda x: map(lambda y: {
 165                         'time': rParser.get_time(y),
 166                         'station': map(lambda z: z[2:].strip(),
 167                                        filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
 168                         'info': map(lambda x: x.strip(),
 169                                     filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
 170                     }, x.find('tbody').findAll('tr')),
 171                     tours) # all routes
 172         return trips
 173
 174     @property
 175     def details(self):
 176         """returns list of trip details
 177         [ [ { 'time': [datetime.time, datetime.time] if time else [],
 178               'station': [u'start', u'end'] if station else [],
 179               'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
 180             }, ... # next trip step
 181           ], ... # next trip possibility
 182         ]
 183         """
 184         if not self._details:
 185             self._details = self._parse_details()
 186
 187         return self._details
 188
 189     def _parse_overview(self):
 190
 191         # get overview table
 192         table = self.soup.find('table', {'id': 'tbl_fahrten'})
 193
 194         # check if there is an overview table
 195         if table and table.findAll('tr'):
 196             # get rows
 197             rows = table.findAll('tr')[1:] # cut off headline
 198
 199             overview = map(lambda x: {
 200                                'date': rParser.get_date(x),
 201                                'time': rParser.get_time(x),
 202                                'duration': rParser.get_duration(x), # grab duration
 203                                'change': rParser.get_change(x),
 204                                'price': rParser.get_price(x),
 205                            },
 206                            rows)
 207         else:
 208             raise ParserError('Unable to parse overview')
 209
 210         return overview
 211
 212     @property
 213     def overview(self):
 214         """dict containing
 215         date: datetime
 216         time: [time, time]
 217         duration: time
 218         change: int
 219         price: float
 220         """
 221         if not self._overview:
 222             try:
 223                 self._overview = self._parse_overview()
 224             except AttributeError:
 225                 f = open(DEBUGLOG, 'w')
 226                 f.write(str(self.soup))
 227                 f.close()
 228
 229         return self._overview
 230
 231 if __name__ == '__main__':
 232     parser = argparse.ArgumentParser(description='Get public transport route for Vienna')
 233     parser.add_argument('-o', metavar='name', type=str, help='origin', required=True)
 234     parser.add_argument('-d', metavar='name', type=str, help='destination', required=True)
 235     parser.add_argument('-ot', metavar='type', type=str, help='origin type: %s' % ' | '.join(POSITION_TYPES), default='stop', choices=POSITION_TYPES)
 236     parser.add_argument('-dt', metavar='type', type=str, help='destination type: %s' % ' | '.join(POSITION_TYPES), default='stop', choices=POSITION_TYPES)
 237
 238     args = parser.parse_args()
 239     html = search((args.o, args.ot), (args.d, args.dt)).read()
 240
 241     parser = sParser(html)
 242     state = parser.check_page()
 243
 244     if state == PageType.CORRECTION:
 245         try:
 246             cor = parser.get_correction()
 247             if cor[0]:
 248                 print
 249                 print '* Origin ambiguous:'
 250                 lo = None
 251                 while not lo or not lo.isdigit() or int(lo) > len(cor[0]):
 252                     i = 1
 253                     for c in cor[0]:
 254                         print '%d. %s' % (i, c)
 255                         i += 1
 256                     lo = sys.stdin.readline().strip()
 257
 258                 args.o = cor[0][int(lo) - 1]
 259
 260             if cor[1]:
 261                 print
 262                 print '* Destination ambiguous:'
 263                 ld = None
 264                 while not ld or not ld.isdigit() or int(ld) > len(cor[1]):
 265                     j = 1
 266                     for c in cor[1]:
 267                         print '%d. %s' % (j, c)
 268                         j += 1
 269                     ld = sys.stdin.readline().strip()
 270
 271                 args.d = cor[1][int(ld) - 1]
 272
 273             html = search((args.o.encode('UTF-8'), args.ot), (args.d.encode('UTF-8'), args.dt)).read()
 274
 275             parser = sParser(html)
 276             state = parser.check_page()
 277
 278         except ParserError:
 279             print 'PANIC at correction page'
 280
 281     if state == PageType.RESULT:
 282         parser = rParser(html)
 283         try:
 284             overviews = parser.overview
 285             details = parser.details
 286             l = ''
 287             while not l == 'q':
 288                 for idx, overview in enumerate(overviews):
 289                     if not overview['date'] or not overview['time']:
 290                         # XXX: Bogus data for e.g. Pilgramgasse->Karlsplatz?!
 291                         continue
 292
 293                     print '%d. [%s] %s-%s (%s)' % (idx + 1,
 294                             overview['date'],
 295                             overview['time'][0],
 296                             overview['time'][1],
 297                             overview['duration'])
 298                 print 'q. Quit'
 299                 l = sys.stdin.readline().strip()
 300                 print
 301                 print '~' * 100
 302
 303                 if l.isdigit() and int(l) <= len(details):
 304                     for detail in details[int(l) - 1]:
 305                         if detail['time'] and detail['station']:
 306                             time = '%s - %s' % (detail['time'][0].strftime(TIMEFORMAT), detail['time'][1].strftime(TIMEFORMAT))
 307                             print '[%s] %s\n%s' % (time, ' -> '.join(detail['station']), '\n'.join(detail['info']))
 308                         else:
 309                             print '\n'.join(detail['info'])
 310                         print '-' * 100
 311                 print
 312
 313         except ParserError:
 314             print 'parsererror'
 315
 316     elif state == PageType.UNKNOWN:
 317         print 'PANIC unknown result'