git.maemo.org Git - pywienerlinien/blob - scotty.py

   1 from BeautifulSoup import BeautifulSoup, NavigableString
   2 from urllib2 import urlopen
   3 from urllib import urlencode
   4 import settings
   5 from datetime import datetime, time
   6 from textwrap import wrap
   7 import argparse
   8 import sys
   9 import os.path
  10
  11 POSITION_TYPES = ('stop', 'address', 'poi')
  12 TIMEFORMAT = '%H:%M'
  13 DEBUGLOG = os.path.expanduser('~/gotoVienna.debug')
  14
  15 class ParserError(Exception):
  16
  17     def __init__(self, msg='Parser error'):
  18         self.message = msg
  19
  20 class PageType:
  21     UNKNOWN, CORRECTION, RESULT = range(3)
  22
  23
  24 def search(origin_tuple, destination_tuple, dtime=None):
  25     """ build route request
  26     returns html result (as urllib response)
  27     """
  28     if not dtime:
  29         dtime = datetime.now()
  30
  31     origin, origin_type = origin_tuple
  32     destination, destination_type = destination_tuple
  33     if not origin_type in POSITION_TYPES or\
  34         not destination_type in POSITION_TYPES:
  35         raise ParserError('Invalid position type')
  36
  37     post = settings.search_post
  38     post['name_origin'] = origin
  39     post['type_origin'] = origin_type
  40     post['name_destination'] = destination
  41     post['type_destination'] = destination_type
  42     post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
  43     post['itdTime'] = dtime.strftime('%H:%M')
  44     params = urlencode(post)
  45     url = '%s?%s' % (settings.action, params)
  46
  47     try:
  48         f = open(DEBUGLOG, 'a')
  49         f.write(url + '\n')
  50         f.close()
  51     except:
  52         print 'Unable to write to DEBUGLOG: %s' % DEBUGLOG
  53
  54     return urlopen(url)
  55
  56
  57 class sParser:
  58     """ Parser for search response
  59     """
  60
  61     def __init__(self, html):
  62         self.soup = BeautifulSoup(html)
  63
  64     def check_page(self):
  65         if self.soup.find('form', {'id': 'form_efaresults'}):
  66             return PageType.RESULT
  67
  68         if self.soup.find('div', {'class':'form_error'}):
  69             return PageType.CORRECTION
  70
  71         return PageType.UNKNOWN
  72
  73     def get_correction(self):
  74         nlo = self.soup.find('select', {'id': 'nameList_origin'})
  75         nld = self.soup.find('select', {'id': 'nameList_destination'})
  76
  77         if not nlo and not nld:
  78             raise ParserError('Unable to parse html')
  79
  80         if nlo:
  81             origin = map(lambda x: x.text, nlo.findAll('option'))
  82         else:
  83             origin = []
  84         if nld:
  85             destination = map(lambda x: x.text, nld.findAll('option'))
  86         else:
  87             destination = []
  88
  89         return (origin, destination)
  90
  91     def get_result(self):
  92         return rParser(str(self.soup))
  93
  94
  95
  96 class rParser:
  97     """ Parser for routing results
  98     """
  99
 100     def __init__(self, html):
 101         self.soup = BeautifulSoup(html)
 102         self._overview = None
 103         self._details = None
 104
 105     @classmethod
 106     def get_tdtext(cls, x, cl):
 107             return x.find('td', {'class': cl}).text
 108
 109     @classmethod
 110     def get_change(cls, x):
 111         y = rParser.get_tdtext(x, 'col_change')
 112         if y:
 113             return int(y)
 114         else:
 115             return 0
 116
 117     @classmethod
 118     def get_price(cls, x):
 119         y = rParser.get_tdtext(x, 'col_price')
 120         if y.find(','):
 121             return float(y.replace(',', '.'))
 122         else:
 123             return 0.0
 124
 125     @classmethod
 126     def get_date(cls, x):
 127         y = rParser.get_tdtext(x, 'col_date')
 128         if y:
 129             return datetime.strptime(y, '%d.%m.%Y').date()
 130         else:
 131             return None
 132
 133     @classmethod
 134     def get_time(cls, x):
 135         y = rParser.get_tdtext(x, 'col_time')
 136         if y:
 137             if (y.find("-") > 0):
 138                 return map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
 139             else:
 140                 return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
 141         else:
 142             return []
 143
 144     @classmethod
 145     def get_duration(cls, x):
 146         y = rParser.get_tdtext(x, 'col_duration')
 147         if y:
 148             return time(*map(int, y.split(":")))
 149         else:
 150             return None
 151
 152     def __iter__(self):
 153         for detail in self.details():
 154             yield detail
 155
 156     def _parse_details(self):
 157         tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
 158
 159         trips = map(lambda x: map(lambda y: {
 160                         'time': rParser.get_time(y),
 161                         'station': map(lambda z: z[2:].strip(),
 162                                        filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
 163                         'info': map(lambda x: x.strip(),
 164                                     filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
 165                     }, x.find('tbody').findAll('tr')),
 166                     tours) # all routes
 167         return trips
 168
 169     @property
 170     def details(self):
 171         """returns list of trip details
 172         [ [ { 'time': [datetime.time, datetime.time] if time else [],
 173               'station': [u'start', u'end'] if station else [],
 174               'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
 175             }, ... # next trip step
 176           ], ... # next trip possibility
 177         ]
 178         """
 179         if not self._details:
 180             self._details = self._parse_details()
 181
 182         return self._details
 183
 184     def _parse_overview(self):
 185
 186         # get overview table
 187         table = self.soup.find('table', {'id': 'tbl_fahrten'})
 188
 189         # check if there is an overview table
 190         if table and table.findAll('tr'):
 191             # get rows
 192             rows = table.findAll('tr')[1:] # cut off headline
 193
 194             overview = map(lambda x: {
 195                                'date': rParser.get_date(x),
 196                                'time': rParser.get_time(x),
 197                                'duration': rParser.get_duration(x), # grab duration
 198                                'change': rParser.get_change(x),
 199                                'price': rParser.get_price(x),
 200                            },
 201                            rows)
 202         else:
 203             raise ParserError('Unable to parse overview')
 204
 205         return overview
 206
 207     @property
 208     def overview(self):
 209         """dict containing
 210         date: datetime
 211         time: [time, time]
 212         duration: time
 213         change: int
 214         price: float
 215         """
 216         if not self._overview:
 217             try:
 218                 self._overview = self._parse_overview()
 219             except AttributeError:
 220                 f = open(DEBUGLOG, 'w')
 221                 f.write(str(self.soup))
 222                 f.close()
 223
 224         return self._overview
 225
 226 if __name__ == '__main__':
 227     parser = argparse.ArgumentParser(description='Get public transport route for Vienna')
 228     parser.add_argument('-o', metavar='name', type=str, help='origin', required=True)
 229     parser.add_argument('-d', metavar='name', type=str, help='destination', required=True)
 230     parser.add_argument('-ot', metavar='type', type=str, help='origin type: %s' % ' | '.join(POSITION_TYPES), default='stop', choices=POSITION_TYPES)
 231     parser.add_argument('-dt', metavar='type', type=str, help='destination type: %s' % ' | '.join(POSITION_TYPES), default='stop', choices=POSITION_TYPES)
 232
 233     args = parser.parse_args()
 234     html = search((args.o, args.ot), (args.d, args.dt)).read()
 235
 236     parser = sParser(html)
 237     state = parser.check_page()
 238
 239     if state == PageType.CORRECTION:
 240         try:
 241             cor = parser.get_correction()
 242             if cor[0]:
 243                 print
 244                 print '* Origin ambiguous:'
 245                 lo = None
 246                 while not lo or not lo.isdigit() or int(lo) > len(cor[0]):
 247                     i = 1
 248                     for c in cor[0]:
 249                         print '%d. %s' % (i, c)
 250                         i += 1
 251                     lo = sys.stdin.readline().strip()
 252
 253                 args.o = cor[0][int(lo) - 1]
 254
 255             if cor[1]:
 256                 print
 257                 print '* Destination ambiguous:'
 258                 ld = None
 259                 while not ld or not ld.isdigit() or int(ld) > len(cor[1]):
 260                     j = 1
 261                     for c in cor[1]:
 262                         print '%d. %s' % (j, c)
 263                         j += 1
 264                     ld = sys.stdin.readline().strip()
 265
 266                 args.d = cor[1][int(ld) - 1]
 267
 268             html = search((args.o.encode('UTF-8'), args.ot), (args.d.encode('UTF-8'), args.dt)).read()
 269
 270             parser = sParser(html)
 271             state = parser.check_page()
 272
 273         except ParserError:
 274             print 'PANIC at correction page'
 275
 276     if state == PageType.RESULT:
 277         parser = rParser(html)
 278         try:
 279             overviews = parser.overview
 280             details = parser.details
 281             l = ''
 282             while not l == 'q':
 283                 for r in range(len(overviews)):
 284                     print '%d. [%s] %s-%s (%s)' % (r + 1, overviews[r]['date'], overviews[r]['time'][0], overviews[r]['time'][1], overviews[r]['duration'])
 285                 print 'q. Quit'
 286                 l = sys.stdin.readline().strip()
 287                 print
 288                 print '~' * 100
 289
 290                 if l.isdigit() and int(l) <= len(details):
 291                     for detail in details[int(l) - 1]:
 292                         if detail['time'] and detail['station']:
 293                             time = '%s - %s' % (detail['time'][0].strftime(TIMEFORMAT), detail['time'][1].strftime(TIMEFORMAT))
 294                             print '[%s] %s\n%s' % (time, ' -> '.join(detail['station']), '\n'.join(detail['info']))
 295                         else:
 296                             print '\n'.join(detail['info'])
 297                         print '-' * 100
 298                 print
 299
 300         except ParserError:
 301             print 'parsererror'
 302
 303     elif state == PageType.UNKNOWN:
 304         print 'PANIC unknown result'