X-Git-Url: http://git.maemo.org/git/?a=blobdiff_plain;f=gotovienna%2Frouting.py;h=d82fb3fa29c1cbd4b6ad11b9c587127d36ad549b;hb=0b35f6c10bb65f9acb2559ed385abf0c7701a042;hp=98d057356967d176182e49cc8ed74f4984a5e2e1;hpb=1a61d38aec0b6cdd8dbee7ca385d38fa52355b07;p=pywienerlinien diff --git a/gotovienna/routing.py b/gotovienna/routing.py index 98d0573..d82fb3f 100644 --- a/gotovienna/routing.py +++ b/gotovienna/routing.py @@ -1,14 +1,15 @@ #!/usr/bin/env python # -*- coding: UTF-8 -*- -from BeautifulSoup import BeautifulSoup, NavigableString -from urllib2 import urlopen +from gotovienna.BeautifulSoup import BeautifulSoup, NavigableString +#from urllib2 import urlopen +from UrlOpener import urlopen from urllib import urlencode -from datetime import datetime, time +from datetime import datetime, time, timedelta from textwrap import wrap -import argparse import sys import os.path +import re from gotovienna import defaults @@ -36,7 +37,7 @@ def extract_city(station): return station.split(',')[-1].strip() else: return 'Wien' - + def extract_station(station): """ Remove city from string @@ -47,7 +48,7 @@ def extract_station(station): return station[:station.rindex(',')].strip() else: return station - + def split_station(station): """ >>> split_station('Karlsplatz, Wien') ('Karlsplatz', 'Wien') @@ -59,6 +60,37 @@ def split_station(station): else: return (station, 'Wien') +def guess_location_type(location): + """Guess type (stop, address, poi) of a location + + >>> guess_location_type('pilgramgasse') + 'stop' + + >>> guess_location_type('karlsplatz 14') + 'address' + + >>> guess_location_type('reumannplatz 12/34') + 'address' + """ + parts = location.split() + first_part = parts[0] + last_part = parts[-1] + + # Assume all single-word locations are stops + if len(parts) == 1: + return 'stop' + + # If the last part is numeric, assume address + if last_part.isdigit() and len(parts) > 1: + return 'address' + + # Addresses with door number (e.g. "12/34") + if all(x.isdigit() or x == '/' for x in last_part): + return 'address' + + # Sane default - assume it's a stop/station name + return 'stop' + def search(origin_tuple, destination_tuple, dtime=None): """ build route request returns html result (as urllib response) @@ -68,13 +100,21 @@ def search(origin_tuple, destination_tuple, dtime=None): origin, origin_type = origin_tuple origin, origin_city = split_station(origin) - + destination, destination_type = destination_tuple destination, destination_city = split_station(destination) - if not origin_type in POSITION_TYPES or\ - not destination_type in POSITION_TYPES: + if origin_type is None: + origin_type = guess_location_type(origin) + print 'Guessed origin type:', origin_type + + if destination_type is None: + destination_type = guess_location_type(destination) + print 'Guessed destination type:', destination_type + + if (origin_type not in POSITION_TYPES or + destination_type not in POSITION_TYPES): raise ParserError('Invalid position type') post = defaults.search_post @@ -88,13 +128,7 @@ def search(origin_tuple, destination_tuple, dtime=None): post['place_destination'] = destination_city params = urlencode(post) url = '%s?%s' % (defaults.action, params) - - try: - f = open(DEBUGLOG, 'a') - f.write(url + '\n') - f.close() - except: - print 'Unable to write to DEBUGLOG: %s' % DEBUGLOG + #print url return urlopen(url) @@ -115,28 +149,34 @@ class sParser: return PageType.UNKNOWN + state = property(check_page) + def get_correction(self): names_origin = self.soup.find('select', {'id': 'nameList_origin'}) names_destination = self.soup.find('select', {'id': 'nameList_destination'}) places_origin = self.soup.find('select', {'id': 'placeList_origin'}) places_destination = self.soup.find('select', {'id': 'placeList_destination'}) - - if names_origin or names_destination or places_origin or places_destination: + + if any([names_origin, names_destination, places_origin, places_destination]): dict = {} - + if names_origin: - dict['origin'] = map(lambda x: x.text, names_origin.findAll('option')) + dict['origin'] = map(lambda x: x.text, + names_origin.findAll('option')) if names_destination: - dict['destination'] = map(lambda x: x.text, names_destination.findAll('option')) - + dict['destination'] = map(lambda x: x.text, + names_destination.findAll('option')) + if places_origin: - dict['place_origin'] = map(lambda x: x.text, names_origin.findAll('option')) + dict['place_origin'] = map(lambda x: x.text, + names_origin.findAll('option')) if names_destination: - dict['place_destination'] = map(lambda x: x.text, names_destination.findAll('option')) - + dict['place_destination'] = map(lambda x: x.text, + names_destination.findAll('option')) + return dict - + else: raise ParserError('Unable to parse html') @@ -185,24 +225,57 @@ class rParser: return None @classmethod - def get_time(cls, x): + def get_datetime(cls, x): y = rParser.get_tdtext(x, 'col_time') if y: if (y.find("-") > 0): - return map(lambda z: time(*map(int, z.split(':'))), y.split('-')) + # overview mode + times = map(lambda z: time(*map(int, z.split(':'))), y.split('-')) + d = rParser.get_date(x) + from_dtime = datetime.combine(d, times[0]) + if times[0] > times[1]: + # dateline crossing + to_dtime = datetime.combine(d + timedelta(1), times[1]) + else: + to_dtime = datetime.combine(d, times[1]) + + return [from_dtime, to_dtime] + else: - # FIXME Error if date in line (dateLineCross) - return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5)) - else: - return [] + dtregex = {'date' : '\d\d\.\d\d', + 'time': '\d\d:\d\d'} + + regex = "\s*(?P{date})?\s*(?P{time})\s*(?P{date})?\s*(?P{time})\s*".format(**dtregex) + ma = re.match(regex, y) + + if not ma: + return [] + + gr = ma.groupdict() + + def extract_datetime(gr, n): + if 'date%d' % n in gr and gr['date%d' % n]: + if gr['time%d' % n] == '24:00': + gr['time%d' % n] = '0:00' + from_dtime = datetime.strptime(str(datetime.today().year) + gr['date%d' % n] + gr['time%d' % n], '%Y%d.%m.%H:%M') + else: + d = datetime.today().date() + # Strange times possible at wienerlinien + if gr['time%d' % n] == '24:00': + gr['time%d' % n] = '0:00' + d += timedelta(days=1) + t = datetime.strptime(gr['time%d' % n], '%H:%M').time() + + return datetime.combine(d, t) + + # detail mode + from_dtime = extract_datetime(gr, 1) + to_dtime = extract_datetime(gr, 2) + + return [from_dtime, to_dtime] - @classmethod - def get_duration(cls, x): - y = rParser.get_tdtext(x, 'col_duration') - if y: - return time(*map(int, y.split(":"))) else: - return None + return [] def __iter__(self): for detail in self.details(): @@ -212,7 +285,7 @@ class rParser: tours = self.soup.findAll('div', {'class': 'data_table tourdetail'}) trips = map(lambda x: map(lambda y: { - 'time': rParser.get_time(y), + 'timespan': rParser.get_datetime(y), 'station': map(lambda z: z[2:].strip(), filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings 'info': map(lambda x: x.strip(), @@ -247,9 +320,7 @@ class rParser: rows = table.findAll('tr')[1:] # cut off headline overview = map(lambda x: { - 'date': rParser.get_date(x), - 'time': rParser.get_time(x), - 'duration': rParser.get_duration(x), # grab duration + 'timespan': rParser.get_datetime(x), 'change': rParser.get_change(x), 'price': rParser.get_price(x), },