fixing routing.py bug, if time is 24:00
[pywienerlinien] / gotovienna / routing.py
index da89ba3..d82fb3f 100644 (file)
@@ -1,28 +1,95 @@
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-
 
-from BeautifulSoup import BeautifulSoup, NavigableString
-from urllib2 import urlopen
+from gotovienna.BeautifulSoup import BeautifulSoup, NavigableString
+#from urllib2 import urlopen
+from UrlOpener import urlopen
 from urllib import urlencode
-import settings
-from datetime import datetime, time
+from datetime import datetime, time, timedelta
 from textwrap import wrap
-import argparse
 import sys
 import os.path
+import re
+
+from gotovienna import defaults
 
 POSITION_TYPES = ('stop', 'address', 'poi')
 TIMEFORMAT = '%H:%M'
 DEBUGLOG = os.path.expanduser('~/gotoVienna.debug')
 
 class ParserError(Exception):
-    
+
     def __init__(self, msg='Parser error'):
         self.message = msg
 
 class PageType:
     UNKNOWN, CORRECTION, RESULT = range(3)
+
+
+def extract_city(station):
+    """ Extract city from string if present,
+    else return default city
     
+    >>> extract_city('Karlsplatz, Wien')
+    'Wien'
+    """
+    if len(station.split(',')) > 1:
+        return station.split(',')[-1].strip()
+    else:
+        return 'Wien'
+
+def extract_station(station):
+    """ Remove city from string
+    
+    >>> extract_station('Karlsplatz, Wien')
+    'Karlsplatz'
+    """
+    if len(station.split(',')) > 1:
+        return station[:station.rindex(',')].strip()
+    else:
+        return station
+
+def split_station(station):
+    """ >>> split_station('Karlsplatz, Wien')
+    ('Karlsplatz', 'Wien')
+    >>> split_station('Karlsplatz')
+    ('Karlsplatz', 'Wien')
+    """
+    if len(station.split(',')) > 1:
+        return (station[:station.rindex(',')].strip(), station.split(',')[-1].strip())
+    else:
+        return (station, 'Wien')
+
+def guess_location_type(location):
+    """Guess type (stop, address, poi) of a location
+
+    >>> guess_location_type('pilgramgasse')
+    'stop'
+
+    >>> guess_location_type('karlsplatz 14')
+    'address'
+
+    >>> guess_location_type('reumannplatz 12/34')
+    'address'
+    """
+    parts = location.split()
+    first_part = parts[0]
+    last_part = parts[-1]
+
+    # Assume all single-word locations are stops
+    if len(parts) == 1:
+        return 'stop'
+
+    # If the last part is numeric, assume address
+    if last_part.isdigit() and len(parts) > 1:
+        return 'address'
+
+    # Addresses with door number (e.g. "12/34")
+    if all(x.isdigit() or x == '/' for x in last_part):
+        return 'address'
+
+    # Sane default - assume it's a stop/station name
+    return 'stop'
 
 def search(origin_tuple, destination_tuple, dtime=None):
     """ build route request
@@ -30,30 +97,39 @@ def search(origin_tuple, destination_tuple, dtime=None):
     """
     if not dtime:
         dtime = datetime.now()
-    
+
     origin, origin_type = origin_tuple
+    origin, origin_city = split_station(origin)
+
     destination, destination_type = destination_tuple
-    if not origin_type in POSITION_TYPES or\
-        not destination_type in POSITION_TYPES:
+    destination, destination_city = split_station(destination)
+
+
+    if origin_type is None:
+        origin_type = guess_location_type(origin)
+        print 'Guessed origin type:', origin_type
+
+    if destination_type is None:
+        destination_type = guess_location_type(destination)
+        print 'Guessed destination type:', destination_type
+
+    if (origin_type not in POSITION_TYPES or
+            destination_type not in POSITION_TYPES):
         raise ParserError('Invalid position type')
-        
-    post = settings.search_post
+
+    post = defaults.search_post
     post['name_origin'] = origin
     post['type_origin'] = origin_type
     post['name_destination'] = destination
     post['type_destination'] = destination_type
     post['itdDateDayMonthYear'] = dtime.strftime('%d.%m.%Y')
     post['itdTime'] = dtime.strftime('%H:%M')
+    post['place_origin'] = origin_city
+    post['place_destination'] = destination_city
     params = urlencode(post)
-    url = '%s?%s' % (settings.action, params)
-    
-    try:
-        f = open(DEBUGLOG, 'a')
-        f.write(url + '\n')
-        f.close()
-    except:
-        print 'Unable to write to DEBUGLOG: %s' % DEBUGLOG
-    
+    url = '%s?%s' % (defaults.action, params)
+    #print url
+
     return urlopen(url)
 
 
@@ -63,39 +139,52 @@ class sParser:
 
     def __init__(self, html):
         self.soup = BeautifulSoup(html)
-    
+
     def check_page(self):
         if self.soup.find('form', {'id': 'form_efaresults'}):
             return PageType.RESULT
-        
+
         if self.soup.find('div', {'class':'form_error'}):
             return PageType.CORRECTION
-        
+
         return PageType.UNKNOWN
-    
+
+    state = property(check_page)
+
     def get_correction(self):
-        nlo = self.soup.find('select', {'id': 'nameList_origin'})
-        nld = self.soup.find('select', {'id': 'nameList_destination'})
-        
-        if not nlo and not nld:
-            raise ParserError('Unable to parse html')
-        
-        if nlo:
-            origin = map(lambda x: x.text, nlo.findAll('option'))
-        else:
-            origin = []
-        if nld:
-            destination = map(lambda x: x.text, nld.findAll('option'))
+        names_origin = self.soup.find('select', {'id': 'nameList_origin'})
+        names_destination = self.soup.find('select', {'id': 'nameList_destination'})
+        places_origin = self.soup.find('select', {'id': 'placeList_origin'})
+        places_destination = self.soup.find('select', {'id': 'placeList_destination'})
+
+
+        if any([names_origin, names_destination, places_origin, places_destination]):
+            dict = {}
+
+            if names_origin:
+                dict['origin'] = map(lambda x: x.text,
+                                     names_origin.findAll('option'))
+            if names_destination:
+                dict['destination'] = map(lambda x: x.text,
+                                          names_destination.findAll('option'))
+
+            if places_origin:
+                dict['place_origin'] = map(lambda x: x.text,
+                                           names_origin.findAll('option'))
+            if names_destination:
+                dict['place_destination'] = map(lambda x: x.text,
+                                                names_destination.findAll('option'))
+
+            return dict
+
         else:
-            destination = []
-        
-        return (origin, destination)
-    
+            raise ParserError('Unable to parse html')
+
     def get_result(self):
         return rParser(str(self.soup))
-        
-        
-        
+
+
+
 class rParser:
     """ Parser for routing results
     """
@@ -108,7 +197,7 @@ class rParser:
     @classmethod
     def get_tdtext(cls, x, cl):
             return x.find('td', {'class': cl}).text
-    
+
     @classmethod
     def get_change(cls, x):
         y = rParser.get_tdtext(x, 'col_change')
@@ -134,25 +223,59 @@ class rParser:
             return datetime.strptime(y, '%d.%m.%Y').date()
         else:
             return None
-        
+
     @classmethod
-    def get_time(cls, x):
+    def get_datetime(cls, x):
         y = rParser.get_tdtext(x, 'col_time')
         if y:
             if (y.find("-") > 0):
-                return map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
+                # overview mode
+                times = map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
+                d = rParser.get_date(x)
+                from_dtime = datetime.combine(d, times[0])
+                if times[0] > times[1]:
+                    # dateline crossing
+                    to_dtime = datetime.combine(d + timedelta(1), times[1])
+                else:
+                    to_dtime = datetime.combine(d, times[1])
+
+                return [from_dtime, to_dtime]
+
             else:
-                return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
+                dtregex = {'date' : '\d\d\.\d\d',
+                           'time': '\d\d:\d\d'}
+
+                regex = "\s*(?P<date1>{date})?\s*(?P<time1>{time})\s*(?P<date2>{date})?\s*(?P<time2>{time})\s*".format(**dtregex)
+                ma = re.match(regex, y)
+
+                if not ma:
+                    return []
+
+                gr = ma.groupdict()
+
+                def extract_datetime(gr, n):
+                    if 'date%d' % n in gr and gr['date%d' % n]:
+                        if gr['time%d' % n] == '24:00':
+                            gr['time%d' % n] = '0:00'
+                        from_dtime = datetime.strptime(str(datetime.today().year) + gr['date%d' % n] + gr['time%d' % n], '%Y%d.%m.%H:%M')
+                    else:
+                        d = datetime.today().date()
+                        # Strange times possible at wienerlinien
+                        if gr['time%d' % n] == '24:00':
+                            gr['time%d' % n] = '0:00'
+                            d += timedelta(days=1)
+                        t = datetime.strptime(gr['time%d' % n], '%H:%M').time()
+                        
+                        return datetime.combine(d, t)
+
+                # detail mode
+                from_dtime = extract_datetime(gr, 1)
+                to_dtime = extract_datetime(gr, 2)
+
+                return [from_dtime, to_dtime]
+
         else:
             return []
-        
-    @classmethod
-    def get_duration(cls, x):
-        y = rParser.get_tdtext(x, 'col_duration')
-        if y:
-            return time(*map(int, y.split(":")))
-        else:
-            return None
 
     def __iter__(self):
         for detail in self.details():
@@ -162,7 +285,7 @@ class rParser:
         tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
 
         trips = map(lambda x: map(lambda y: {
-                        'time': rParser.get_time(y),
+                        'timespan': rParser.get_datetime(y),
                         'station': map(lambda z: z[2:].strip(),
                                        filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
                         'info': map(lambda x: x.strip(),
@@ -177,7 +300,7 @@ class rParser:
         [ [ { 'time': [datetime.time, datetime.time] if time else [],
               'station': [u'start', u'end'] if station else [],
               'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
-            }, ... # next trip step 
+            }, ... # next trip step
           ], ... # next trip possibility
         ]
         """
@@ -195,11 +318,9 @@ class rParser:
         if table and table.findAll('tr'):
             # get rows
             rows = table.findAll('tr')[1:] # cut off headline
-            
+
             overview = map(lambda x: {
-                               'date': rParser.get_date(x),
-                               'time': rParser.get_time(x),
-                               'duration': rParser.get_duration(x), # grab duration
+                               'timespan': rParser.get_datetime(x),
                                'change': rParser.get_change(x),
                                'price': rParser.get_price(x),
                            },