git.maemo.org Git - pywienerlinien/blob - scotty.py

   1 from BeautifulSoup import BeautifulSoup, NavigableString
   2 from urllib2 import urlopen
   3 from urllib import urlencode
   4 import settings
   5 from datetime import datetime, time
   6 from textwrap import wrap
   7 import argparse
   8 import sys
   9
  10 POSITION_TYPES = ('stop', 'address', 'poi')
  11
  12 class ParserError(Exception):
  13
  14     def __init__(self, msg='Parser error'):
  15         self.message = msg
  16
  17 class PageType:
  18     UNKNOWN, CORRECTION, RESULT = range(3)
  19
  20
  21 def search(origin_tuple, destination_tuple, dtime=None):
  22     """ build route request
  23     returns html result (as urllib response)
  24     """
  25     if not dtime:
  26         dtime = datetime.now()
  27
  28     origin, origin_type = origin_tuple
  29     destination, destination_type = destination_tuple
  30     if not origin_type in POSITION_TYPES or\
  31         not destination_type in POSITION_TYPES:
  32         raise ParserError('Invalid position type')
  33
  34     post = settings.search_post
  35     post['name_origin'] = origin
  36     post['type_origin'] = origin_type
  37     post['name_destination'] = destination
  38     post['type_destination'] = destination_type
  39     post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
  40     post['itdTime'] = dtime.strftime('%H:%M')
  41     params = urlencode(post)
  42     url = '%s?%s' % (settings.action, params)
  43
  44     print "\nurl %s url\n\n%s\n\nurl %s url\n" % ('~'*100, url, '~'*100)
  45
  46     return urlopen(url)
  47
  48
  49 class sParser:
  50     """ Parser for search response
  51     """
  52
  53     def __init__(self, html):
  54         self.soup = BeautifulSoup(html)
  55
  56     def check_page(self):
  57         if self.soup.find('form', {'id': 'form_efaresults'}):
  58             return PageType.RESULT
  59
  60         if self.soup.find('div', {'class':'form_error'}):
  61             return PageType.CORRECTION
  62
  63         return PageType.UNKNOWN
  64
  65     def get_correction(self):
  66         nlo = self.soup.find('select', {'id': 'nameList_origin'})
  67         nld = self.soup.find('select', {'id': 'nameList_destination'})
  68
  69         if not nlo and not nld:
  70             raise ParserError('Unable to parse html')
  71
  72         if nlo:
  73             origin = map(lambda x: x.text, nlo.findAll('option'))
  74         else:
  75             origin = []
  76         if nld:
  77             destination = map(lambda x: x.text, nld.findAll('option'))
  78         else:
  79             destination = []
  80
  81         return (origin, destination)
  82
  83     def get_result(self):
  84         return rParser(str(self.soup))
  85
  86
  87
  88 class rParser:
  89     """ Parser for routing results
  90     """
  91
  92     def __init__(self, html):
  93         self.soup = BeautifulSoup(html)
  94         self._overview = None
  95         self._details = None
  96
  97     @classmethod
  98     def get_tdtext(cls, x, cl):
  99             return x.find('td', {'class': cl}).text
 100
 101     @classmethod
 102     def get_change(cls, x):
 103         y = rParser.get_tdtext(x, 'col_change')
 104         if y:
 105             return int(y)
 106         else:
 107             return 0
 108
 109     @classmethod
 110     def get_price(cls, x):
 111         y = rParser.get_tdtext(x, 'col_price')
 112         if y.find(','):
 113             return float(y.replace(',', '.'))
 114         else:
 115             return 0.0
 116
 117     @classmethod
 118     def get_date(cls, x):
 119         y = rParser.get_tdtext(x, 'col_date')
 120         if y:
 121             return datetime.strptime(y, '%d.%m.%Y').date()
 122         else:
 123             return None
 124
 125     @classmethod
 126     def get_time(cls, x):
 127         y = rParser.get_tdtext(x, 'col_time')
 128         if y:
 129             if (y.find("-") > 0):
 130                 return map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
 131             else:
 132                 return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
 133         else:
 134             return []
 135
 136     @classmethod
 137     def get_duration(cls, x):
 138         y = rParser.get_tdtext(x, 'col_duration')
 139         if y:
 140             return time(*map(int, y.split(":")))
 141         else:
 142             return None
 143
 144     def __iter__(self):
 145         for detail in self.details():
 146             yield detail
 147
 148     def _parse_details(self):
 149         tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
 150
 151         trips = map(lambda x: map(lambda y: {
 152                         'time': rParser.get_time(y),
 153                         'station': map(lambda z: z[2:].strip(),
 154                                        filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
 155                         'info': map(lambda x: x.strip(),
 156                                     filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
 157                     }, x.find('tbody').findAll('tr')),
 158                     tours) # all routes
 159         return trips
 160
 161     @property
 162     def details(self):
 163         """returns list of trip details
 164         [ [ { 'time': [datetime.time, datetime.time] if time else [],
 165               'station': [u'start', u'end'] if station else [],
 166               'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
 167             }, ... # next trip step
 168           ], ... # next trip possibility
 169         ]
 170         """
 171         if not self._details:
 172             self._details = self._parse_details()
 173
 174         return self._details
 175
 176     def _parse_overview(self):
 177
 178         # get overview table
 179         table = self.soup.find('table', {'id': 'tbl_fahrten'})
 180
 181         # check if there is an overview table
 182         if table and table.findAll('tr'):
 183             # get rows
 184             rows = table.findAll('tr')[1:] # cut off headline
 185
 186             overview = map(lambda x: {
 187                                'date': rParser.get_date(x),
 188                                'time': rParser.get_time(x),
 189                                'duration': rParser.get_duration(x), # grab duration
 190                                'change': rParser.get_change(x),
 191                                'price': rParser.get_price(x),
 192                            },
 193                            rows)
 194         else:
 195             raise ParserError('Unable to parse overview')
 196
 197         return overview
 198
 199     @property
 200     def overview(self):
 201         """dict containing
 202         date: datetime
 203         time: [time, time]
 204         duration: time
 205         change: int
 206         price: float
 207         """
 208         if not self._overview:
 209             try:
 210                 self._overview = self._parse_overview()
 211             except AttributeError:
 212                 f = open('DEBUG', 'w')
 213                 f.write(str(self.soup))
 214                 f.close()
 215
 216         return self._overview
 217
 218 if __name__ == '__main__':
 219     parser = argparse.ArgumentParser(description='Get public transport route for Vienna')
 220     parser.add_argument('-o', metavar='name', type=str, help='origin', required=True)
 221     parser.add_argument('-d', metavar='name', type=str, help='destination', required=True)
 222     parser.add_argument('-ot', metavar='type', type=str, help='origin type: %s' % ' | '.join(POSITION_TYPES), default='stop', choices=POSITION_TYPES)
 223     parser.add_argument('-dt', metavar='type', type=str, help='destination type: %s' % ' | '.join(POSITION_TYPES), default='stop', choices=POSITION_TYPES)
 224
 225     args = parser.parse_args()
 226
 227     html = search((args.o.encode('UTF-8'), args.ot), (args.d.encode('UTF-8'), args.dt)).read()
 228
 229     parser = sParser(html)
 230     state = parser.check_page()
 231
 232     if state == PageType.CORRECTION:
 233         try:
 234             cor = parser.get_correction()
 235             if cor[0]:
 236                 print
 237                 print '* Origin ambiguous:'
 238                 lo = None
 239                 ld = None
 240                 while not lo or not lo.isdigit() or int(lo) > len(cor[0]):
 241                     i = 1
 242                     for c in cor[0]:
 243                         print '%d. %s' % (i, c)
 244                         i += 1
 245                     lo = sys.stdin.readline().strip()
 246
 247                 args.o = cor[0][int(lo)-1]
 248
 249             if cor[1]:
 250                 print
 251                 print '* Destination ambiguous:'
 252                 while not ld or not ld.isdigit() or int(ld) > len(cor[1]):
 253                     j = 1
 254                     for c in cor[1]:
 255                         print '%d. %s' % (j, c)
 256                         j += 1
 257                     ld = sys.stdin.readline().strip()
 258
 259                 args.d = cor[1][int(ld)-1]
 260
 261             html = search((args.o.encode('UTF-8'), args.ot), (args.d.encode('UTF-8'), args.dt)).read()
 262
 263             parser = sParser(html)
 264             state = parser.check_page()
 265
 266         except ParserError:
 267             print 'PANIC at correction page'
 268
 269     if state == PageType.RESULT:
 270         parser = rParser(html)
 271         try:
 272             overviews = parser.overview
 273             for overview in overviews:
 274                 print '[%s] %s-%s (%s)' % (overview['date'], overview['time'][0], overview['time'][1], overview['duration'])
 275         except ParserError:
 276             print 'parsererror'
 277
 278     elif state == PageType.UNKNOWN:
 279         print 'PANIC unknown result'