1 /* This file is part of Cinaest.
3 * Copyright (C) 2009 Philipp Zabel
5 * Cinaest is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * Cinaest is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with Cinaest. If not, see <http://www.gnu.org/licenses/>.
19 public class Theater {
21 public string address;
25 public class GoogleMovie {
28 public Theater theater;
31 public string showtimes;
34 class GoogleParser : Object {
36 public string location;
39 CurlWrapper curlwrapper;
42 public delegate void ReceiveMovie (GoogleMovie movie);
43 public ReceiveMovie _get_callback;
46 curlwrapper = new CurlWrapper ();
48 re_runtime = new Regex ("([0-9]+)hr ([0-9]+)min");
49 } catch (RegexError e) {
50 critical ("Failed to initialize regex: %s\n", e.message);
54 private Html.Doc* get_html_document (string buf) {
55 return Html.Doc.read_memory ((char[]) buf, (int) buf.length,
56 "http://movies.google.de", null, Html.ParserOption.NOERROR | Html.ParserOption.NOWARNING);
59 public int parse (string buf) throws Error {
60 var doc = get_html_document (buf);
62 stderr.printf ("Error: parsing failed\n");
66 // TODO: set up location
69 var theater = search_tag_by_class (doc->children, "div", "theater");
70 if (theater == null) {
71 stderr.printf ("Error: does not contain theater\n");
75 while (theater != null) {
76 theater = parse_theater (theater);
81 private Xml.Node* parse_theater (Xml.Node* t) {
82 var theater = new Theater ();
83 var desc = t->children;
84 if (desc != null && desc->name == "div" && desc->get_prop ("class") == "desc") {
85 var name = desc->children;
86 if (name != null && name->name == "h2" && name->get_prop ("class") == "name") {
87 var a = name->children;
88 if (a != null && a->name == "a")
89 theater.name = get_child_text_content (a);
90 print ("THEATER \"%s\"\n", theater.name);
92 var info = name->next;
93 if (info != null && info->name == "div" && info->get_prop ("class") == "info") {
94 var text = info->children;
95 if (text != null && text->name == "text") {
96 var address_and_phone = text->content.split (" - ");
97 if (address_and_phone.length >= 2) {
98 theater.address = address_and_phone[0];
99 theater.phone = address_and_phone[1].replace (" ", "").replace ("-", "");
104 var showtimes = desc->next;
105 if (showtimes != null && showtimes->name == "div" && showtimes->get_prop ("class") == "showtimes") {
106 var left = search_tag_by_class (showtimes->children, "div", "show_left");
107 if (left != null && left->children != null) {
109 var movie = search_tag_by_class (left->children, "div", "movie");
110 while (movie != null) {
111 movie = parse_movie (movie, theater);
114 var right = search_tag_by_class (left->next, "div", "show_right");
115 if (right != null && right->children != null) {
117 var movie = search_tag_by_class (right->children, "div", "movie");
118 while (movie != null) {
119 movie = parse_movie (movie, theater);
127 private Xml.Node* parse_movie (Xml.Node* m, Theater theater) {
128 var movie = new GoogleMovie ();
129 movie.theater = theater;
131 for (n = m->children; n != null; n = n->next) {
132 if (n->name == "div" && n->get_prop ("class") == "name")
133 movie.title = parse_movie_name (n);
134 if (n->name == "span" && n->get_prop ("class") == "info")
135 parse_movie_info (n, movie);
136 if (n->name == "div" && n->get_prop ("class") == "times")
137 parse_movie_times (n, movie);
139 if (pattern == null) {
140 if (!movie.title.has_prefix (_title))
143 if (!pattern.match ((uint) movie.title.length, movie.title, null))
146 _get_callback (movie);
151 private string? parse_movie_name (Xml.Node* n) {
153 if (a != null && a->name == "a") {
154 var text = a->children;
155 if (text != null && text->name == "text")
156 print ("\"%s\"\n", text->content);
157 return strip_tags (text->content);
162 // FIXME - this is specific for Germany
163 private string strip_tags (string title) {
164 string tag_suffix = " (OmU)"; // original audio with subtitles
165 if (title.has_suffix (tag_suffix))
166 return title.substring (0, title.length - tag_suffix.length);
167 tag_suffix = " (OV)"; // original audio
168 if (title.has_suffix (tag_suffix))
169 return title.substring (0, title.length - tag_suffix.length);
173 private void parse_movie_info (Xml.Node* i, GoogleMovie movie) {
174 var text = i->children;
175 if (text != null && text->name == "text") {
177 print ("\t\"%s\"\n", text->content);
178 if (re_runtime.match (text->content, 0, out match)) {
179 movie.runtime = match.fetch (1).to_int () * 3600 +
180 match.fetch (2).to_int () * 60;
182 movie.fsk = text->content.str ("Rated ").replace (" - ", "");
184 for (var n = text->next; n != null; n = n->next) {
185 if (n->name == "nobr") {
186 movie.rating = parse_rating (n);
187 if (movie.rating == 0)
194 private int parse_rating (Xml.Node* nobr) {
195 for (var n = nobr->children; n != null; n = n->next) {
196 if (n->name == "nobr") {
197 for (var img = n->children; img != null; img = img->next) {
198 if (img->name == "img") {
199 var alt = img->get_prop ("alt"); // "Rated 0.0 out of 5.0"
200 if (alt != null && alt != "") // ^
201 print ("\trating: %s - %f\n", alt, alt.offset (6).to_double ());
202 return (int) (10 * alt.offset (6).to_double ());
210 private void parse_movie_times (Xml.Node* node, GoogleMovie movie) {
211 movie.showtimes = get_child_text_content (node).replace ("\xc2\xa0", ","); // U+00A0 =
214 unowned string? get_child_text_content (Xml.Node *n) {
215 if (n->children != null && n->children->name == "text")
216 return n->children->content;
221 Xml.Node* search_tag_by_property (Xml.Node* node, string tag, string prop, string val) requires (node != null) {
222 for (var n = node; n != null; n = n->next) {
223 if (n->name == tag && n->get_prop (prop) == val)
225 if (n->children != null) {
226 var found = search_tag_by_property (n->children, tag, prop, val);
234 Xml.Node* search_tag_by_class (Xml.Node* node, string tag, string @class) requires (node != null) {
235 return search_tag_by_property (node, tag, "class", @class);
238 public async int query (string title, string? location, ReceiveMovie callback, Cancellable? cancellable = null) {
239 _get_callback = callback;
241 if (title.chr(title.length, '*') != null) {
242 pattern = new PatternSpec (title);
247 // TODO - use google.de in Germany, also provides genres
248 string uri = "http://google.com/movies";
249 if (location != null && location != "")
250 uri += "?near=" + location;
252 stdout.printf ("GET: %s\n", uri);
254 string buf = yield curlwrapper.http_get (uri);
257 stderr.printf ("Error: %s\n", e.message);