base version
[pystan] / debian / pystan / usr / lib / python2.5 / site-packages / pystan / lib / timetable_parser.py
diff --git a/debian/pystan/usr/lib/python2.5/site-packages/pystan/lib/timetable_parser.py b/debian/pystan/usr/lib/python2.5/site-packages/pystan/lib/timetable_parser.py
new file mode 100644 (file)
index 0000000..132ba71
--- /dev/null
@@ -0,0 +1,82 @@
+
+import re
+
+from HTMLParser import HTMLParser
+
+class StanTimetableParser(HTMLParser):
+
+    def __init__(self):
+        HTMLParser.__init__(self)
+
+        self.result = {
+            'navigation': {},
+            'timetable': []
+        }
+
+        self.current_tt_line = None
+        self.state = None
+        self.TT_CAPTURING_TIMETABLE = 'TT_CAPTURING_TIMETABLE'
+        self.TT_CAPTURING_NAVIGATION = 'TT_CAPTURING_NAVIGATION'
+        self.TT_STOP_HOUR = 'TT_STOP_HOUR'
+        self.TT_STOP_NAME = 'TT_STOP_NAME'
+        self.TT_NAVIG_PREV = 'TT_NAVIG_PREV'
+        self.TT_NAVIG_NEXT = 'TT_NAVIG_NEXT'
+        self.TT_NAVIG_PREV_LINK = 'TT_NAVIG_PREV_LINK'
+        self.TT_NAVIG_NEXT_LINK = 'TT_NAVIG_NEXT_LINK'
+
+    def handle_starttag(self, tag, attrs):
+        self.last_tag = tag
+
+        attributes = {}
+        for attr in attrs:
+            attributes[attr[0]] = attr[1]
+
+        if tag == 'div' and attributes.has_key('class') and attributes['class'] == 'goatResultTop':
+            self.state = self.TT_CAPTURING_NAVIGATION
+
+        elif tag == 'table' and attributes.has_key('id') and attributes['id'] == 'linehour':
+            self.state = self.TT_CAPTURING_TIMETABLE
+
+        elif self.state == self.TT_CAPTURING_TIMETABLE:
+            if tag == 'tr':
+                self.current_tt_line = []
+            elif tag == 'strong':
+                self.state = self.TT_STOP_NAME
+            elif tag == 'td' and attributes.has_key('class') and 'hour' in attributes['class']:
+                self.state = self.TT_STOP_HOUR
+
+        elif self.state == self.TT_CAPTURING_NAVIGATION:
+            if tag == 'div' and attributes.has_key('class') and attributes['class'] == 'linehourPrev':
+                self.state = self.TT_NAVIG_PREV
+            elif tag == 'div' and attributes.has_key('class') and attributes['class'] == 'linehourNext':
+                self.state = self.TT_NAVIG_NEXT
+
+        elif self.state == self.TT_NAVIG_PREV and tag == 'a':
+            self.result['navigation']['prev'] = attributes['href']
+            self.state = self.TT_CAPTURING_NAVIGATION
+
+        elif self.state == self.TT_NAVIG_NEXT and tag == 'a':
+            self.result['navigation']['next'] = attributes['href']
+            self.state = self.TT_CAPTURING_NAVIGATION
+
+
+
+    def handle_data(self, data):
+        if self.state == self.TT_STOP_HOUR:
+            self.current_tt_line.append(data)
+            self.state = self.TT_CAPTURING_TIMETABLE
+
+        elif self.state == self.TT_STOP_NAME:
+            # remove in-parenthesis
+            data = re.compile('^[^\(]+').match(data).group()
+            self.current_tt_line.append(data)
+            self.state = self.TT_CAPTURING_TIMETABLE
+
+
+
+    def handle_endtag(self, tag):
+        if tag == 'tr' and self.state == self.TT_CAPTURING_TIMETABLE and self.current_tt_line is not None and len(self.current_tt_line) > 0:
+            self.result['timetable'].append(self.current_tt_line)
+
+        elif tag == 'table' and self.state == self.TT_CAPTURING_TIMETABLE:
+            self.state = None