#!/usr/bin/env python
# -*- coding: UTF-8 -*-
-from BeautifulSoup import BeautifulSoup, NavigableString
-from urllib2 import urlopen
+from gotovienna.BeautifulSoup import BeautifulSoup, NavigableString
+#from urllib2 import urlopen
+from UrlOpener import urlopen
from urllib import urlencode
-from datetime import datetime, time
+from datetime import datetime, time, timedelta
from textwrap import wrap
-import argparse
import sys
import os.path
+import re
from gotovienna import defaults
return station.split(',')[-1].strip()
else:
return 'Wien'
-
+
def extract_station(station):
""" Remove city from string
return station[:station.rindex(',')].strip()
else:
return station
-
+
def split_station(station):
""" >>> split_station('Karlsplatz, Wien')
('Karlsplatz', 'Wien')
else:
return (station, 'Wien')
+def guess_location_type(location):
+ """Guess type (stop, address, poi) of a location
+
+ >>> guess_location_type('pilgramgasse')
+ 'stop'
+
+ >>> guess_location_type('karlsplatz 14')
+ 'address'
+
+ >>> guess_location_type('reumannplatz 12/34')
+ 'address'
+ """
+ parts = location.split()
+ first_part = parts[0]
+ last_part = parts[-1]
+
+ # Assume all single-word locations are stops
+ if len(parts) == 1:
+ return 'stop'
+
+ # If the last part is numeric, assume address
+ if last_part.isdigit() and len(parts) > 1:
+ return 'address'
+
+ # Addresses with door number (e.g. "12/34")
+ if all(x.isdigit() or x == '/' for x in last_part):
+ return 'address'
+
+ # Sane default - assume it's a stop/station name
+ return 'stop'
+
def search(origin_tuple, destination_tuple, dtime=None):
""" build route request
returns html result (as urllib response)
origin, origin_type = origin_tuple
origin, origin_city = split_station(origin)
-
+
destination, destination_type = destination_tuple
destination, destination_city = split_station(destination)
- if not origin_type in POSITION_TYPES or\
- not destination_type in POSITION_TYPES:
+ if origin_type is None:
+ origin_type = guess_location_type(origin)
+ print 'Guessed origin type:', origin_type
+
+ if destination_type is None:
+ destination_type = guess_location_type(destination)
+ print 'Guessed destination type:', destination_type
+
+ if (origin_type not in POSITION_TYPES or
+ destination_type not in POSITION_TYPES):
raise ParserError('Invalid position type')
post = defaults.search_post
post['place_destination'] = destination_city
params = urlencode(post)
url = '%s?%s' % (defaults.action, params)
-
- try:
- f = open(DEBUGLOG, 'a')
- f.write(url + '\n')
- f.close()
- except:
- print 'Unable to write to DEBUGLOG: %s' % DEBUGLOG
+ #print url
return urlopen(url)
return PageType.UNKNOWN
+ state = property(check_page)
+
def get_correction(self):
names_origin = self.soup.find('select', {'id': 'nameList_origin'})
names_destination = self.soup.find('select', {'id': 'nameList_destination'})
places_origin = self.soup.find('select', {'id': 'placeList_origin'})
places_destination = self.soup.find('select', {'id': 'placeList_destination'})
-
- if names_origin or names_destination or places_origin or places_destination:
+
+ if any([names_origin, names_destination, places_origin, places_destination]):
dict = {}
-
+
if names_origin:
- dict['origin'] = map(lambda x: x.text, names_origin.findAll('option'))
+ dict['origin'] = map(lambda x: x.text,
+ names_origin.findAll('option'))
if names_destination:
- dict['destination'] = map(lambda x: x.text, names_destination.findAll('option'))
-
+ dict['destination'] = map(lambda x: x.text,
+ names_destination.findAll('option'))
+
if places_origin:
- dict['place_origin'] = map(lambda x: x.text, names_origin.findAll('option'))
+ dict['place_origin'] = map(lambda x: x.text,
+ names_origin.findAll('option'))
if names_destination:
- dict['place_destination'] = map(lambda x: x.text, names_destination.findAll('option'))
-
+ dict['place_destination'] = map(lambda x: x.text,
+ names_destination.findAll('option'))
+
return dict
-
+
else:
raise ParserError('Unable to parse html')
return None
@classmethod
- def get_time(cls, x):
+ def get_datetime(cls, x):
y = rParser.get_tdtext(x, 'col_time')
if y:
if (y.find("-") > 0):
- return map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
+ # overview mode
+ times = map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
+ d = rParser.get_date(x)
+ from_dtime = datetime.combine(d, times[0])
+ if times[0] > times[1]:
+ # dateline crossing
+ to_dtime = datetime.combine(d + timedelta(1), times[1])
+ else:
+ to_dtime = datetime.combine(d, times[1])
+
+ return [from_dtime, to_dtime]
+
else:
- # FIXME Error if date in line (dateLineCross)
- return map(lambda z: time(*map(int, z.split(':'))), wrap(y, 5))
- else:
- return []
+ dtregex = {'date' : '\d\d\.\d\d',
+ 'time': '\d\d:\d\d'}
+
+ regex = "\s*(?P<date1>{date})?\s*(?P<time1>{time})\s*(?P<date2>{date})?\s*(?P<time2>{time})\s*".format(**dtregex)
+ ma = re.match(regex, y)
+
+ if not ma:
+ return []
+
+ gr = ma.groupdict()
+
+ def extract_datetime(gr, n):
+ if 'date%d' % n in gr and gr['date%d' % n]:
+ if gr['time%d' % n] == '24:00':
+ gr['time%d' % n] = '0:00'
+ from_dtime = datetime.strptime(str(datetime.today().year) + gr['date%d' % n] + gr['time%d' % n], '%Y%d.%m.%H:%M')
+ else:
+ d = datetime.today().date()
+ # Strange times possible at wienerlinien
+ if gr['time%d' % n] == '24:00':
+ gr['time%d' % n] = '0:00'
+ d += timedelta(days=1)
+ t = datetime.strptime(gr['time%d' % n], '%H:%M').time()
+
+ return datetime.combine(d, t)
+
+ # detail mode
+ from_dtime = extract_datetime(gr, 1)
+ to_dtime = extract_datetime(gr, 2)
+
+ return [from_dtime, to_dtime]
- @classmethod
- def get_duration(cls, x):
- y = rParser.get_tdtext(x, 'col_duration')
- if y:
- return time(*map(int, y.split(":")))
else:
- return None
+ return []
def __iter__(self):
for detail in self.details():
tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})
trips = map(lambda x: map(lambda y: {
- 'time': rParser.get_time(y),
+ 'timespan': rParser.get_datetime(y),
'station': map(lambda z: z[2:].strip(),
filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
'info': map(lambda x: x.strip(),
rows = table.findAll('tr')[1:] # cut off headline
overview = map(lambda x: {
- 'date': rParser.get_date(x),
- 'time': rParser.get_time(x),
- 'duration': rParser.get_duration(x), # grab duration
+ 'timespan': rParser.get_datetime(x),
'change': rParser.get_change(x),
'price': rParser.get_price(x),
},