Add Google plugin
[cinaest] / src / plugins / google-parser.vala
1 /* This file is part of Cinaest.
2  *
3  * Copyright (C) 2009 Philipp Zabel
4  *
5  * Cinaest is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Cinaest is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with Cinaest. If not, see <http://www.gnu.org/licenses/>.
17  */
18
19 errordomain ParserError {
20         WRONG_TAG,
21         EOF
22 }
23
24 public class GoogleMovie : Movie {
25         public string cinema;
26         public string runtime;
27         public string fsk;
28         public string showtimes;
29 }
30
31 public class GoogleParser : Object {
32         private MovieSource.ReceiveMovieFunction _get_callback;
33         char *current;
34         string cinema_name;
35         MovieFilter _filter;
36         PatternSpec pattern;
37
38         public int next_tag_offset () {
39                 int i = -1;
40                 while (current[++i] != '<' && current[i] != 0);
41                 return i;
42         }
43
44         public void next_tag () {
45                 if (current[0] == 0)
46                         return;
47                 current += next_tag_offset ();
48         }
49
50         public void finish_tag () {
51                 while (current[0] != '>' && current[0] != 0)
52                         current++;
53                 if (current[0] == '>')
54                         current++;
55         }
56
57         public weak string parse_tag (bool finish = true) throws Error {
58                 weak string tag;
59                 next_tag ();
60                 int i = 1;
61                 while (current[++i].isalnum ());
62                 if (current[i] == 0)
63                         throw new ParserError.EOF ("EOF in tag");
64                 if (current[i] == '>')
65                         finish = false;
66                 current[i] = 0;
67                 tag = (string) (current + 1);
68                 current += i + 1;
69                 if (finish)
70                         finish_tag ();
71                 return tag;
72         }
73
74         public void expect_tag (string tag) throws Error {
75                 var found = parse_tag (true);
76                 if (tag != found) {
77                         throw new ParserError.WRONG_TAG ("Wrong tag \"%s\", expected \"%s\"",
78                                                          found, tag);
79                 }
80         }
81
82         public string parse_text () {
83                 string text = ((string) current).ndup (next_tag_offset ());
84                 next_tag ();
85                 return text;
86         }
87
88         public void parse_attribute (string _attr, out string value) {
89                 string attr;
90                 if (current[0] == 0)
91                         return;
92                 int i = -1;
93                 while (current[++i] != '=' && current[i] != '>' && current[i] != 0) {
94                         
95                 }
96                 attr = ((string) current).ndup (i);
97                 current += i;
98                 if (current[0] == 0)
99                         return;
100                 current++;
101                 i = -1;
102                 while (!current[++i].isspace () && current[i] != '>' && current[i] != 0) {
103                         if (current[i] == '"')
104                                 while (current[++i] != '"' && current[i] != 0);
105                 }
106                 if (attr == _attr) {
107                         if (current[0] == '"')
108                                 value = ((string) current).substring (1, i - 2);
109                         else
110                                 value = ((string) current).ndup (i);
111                 }
112                 current += i;
113         }
114
115         public void skip_whitespace () {
116                 if (current[0] == 0)
117                         return;
118                 int i = -1;
119                 while (current[++i].isspace () && current[i] != 0);
120                 current += i;
121         }
122
123         public string? parse_tag_attribute (string tag, string attribute) throws Error {
124                 var found = parse_tag (false);
125                 if (tag != found) {
126                         throw new ParserError.WRONG_TAG ("Wrong tag \"%s\", expected \"%s\"",
127                                                          found, tag);
128                 }
129
130                 string? value = null;
131                 skip_whitespace ();
132                 while (current[0] != '>' && current[0] != 0) {
133                         parse_attribute (attribute, out value);
134                         skip_whitespace ();
135                 }
136                 // Skip the closing '>' bracket
137                 if (current[0] != 0)
138                         current++;
139
140                 return value;
141         }
142
143         public void parse_movie () throws Error {
144                 expect_tag ("a"); // rating
145                 expect_tag ("nobr");
146                 expect_tag ("nobr");
147                 weak string rating_string = parse_tag_attribute ("img", "alt").offset (6); // "Rated " ->"0.0 out of 5.0"
148                 double rating = rating_string.to_double ();
149
150                 expect_tag ("img");
151                 expect_tag ("img");
152                 expect_tag ("img");
153                 expect_tag ("img");
154                 expect_tag ("/nobr");
155                 expect_tag ("/nobr");
156                 expect_tag ("br");
157                 expect_tag ("nobr");
158                 expect_tag ("/nobr");
159                 expect_tag ("/a");
160                 expect_tag ("/font");
161                 expect_tag ("/td");
162                 expect_tag ("td");
163                 expect_tag ("font");
164                 expect_tag ("a"); // <a href="/movies?near=city&mid=hexnumber"> --> link
165                 expect_tag ("b");
166                 var title = convert (parse_text ().replace ("&#39;", "'"), -1, "utf-8", "iso-8859-1"); // FIXME
167                 expect_tag ("/b");
168                 expect_tag ("/a");
169                 expect_tag ("br");
170                 var runtime_and_fsk = parse_text ().replace ("&nbsp;", " ").replace ("&#8206;", "").split (" - ");
171
172                 var showtimes = "";
173                 if (parse_tag () == "br") {
174                         showtimes = parse_text ().replace ("&nbsp;", ",");
175                         expect_tag ("/font");
176                 }
177
178                 if (pattern == null) {
179                         if (!title.has_prefix (_filter.title))
180                                 return;
181                 } else {
182                         if (!pattern.match ((uint) title.length, title, null))
183                                 return;
184                 }
185
186                 var movie = new GoogleMovie ();
187
188                 movie.title = strip_tags (title);
189                 movie.year = 0;
190                 movie.rating = (int) (rating * 10);
191
192                 movie.cinema = cinema_name;
193                 if (runtime_and_fsk.length >= 2) {
194                         movie.runtime = runtime_and_fsk[0];
195                         movie.fsk = runtime_and_fsk[1];
196                 }
197                 movie.showtimes = showtimes;
198
199                 // TODO - could be configurable by settings
200                 if (movie.runtime != null)
201                         movie.secondary = "%s - %s - %s".printf (movie.runtime, cinema_name, showtimes);
202                 else
203                         movie.secondary = "%s - %s".printf (cinema_name, showtimes);
204
205                 _get_callback (movie);
206         }
207
208         // FIXME - this is specific for Germany
209         private string strip_tags (string title) {
210                 string tag_suffix = " (OmU)"; // original audio with subtitles
211                 if (title.has_suffix (tag_suffix))
212                         return title.substring (0, title.length - tag_suffix.length);
213                 tag_suffix = " (OV)"; // original audio
214                 if (title.has_suffix (tag_suffix))
215                         return title.substring (0, title.length - tag_suffix.length);
216                 return title.dup ();
217         }
218
219         public void parse_cinema () throws Error {
220                 expect_tag ("tr");
221                 expect_tag ("td");
222                 expect_tag ("a"); // --> link
223                 expect_tag ("b");
224                 string name = convert (parse_text ().replace ("&amp;", "&"), -1, "utf-8", "iso-8859-1"); // FIXME
225                 expect_tag ("/b");
226                 expect_tag ("/a");
227                 expect_tag ("br");
228                 expect_tag ("font");
229                 string address = parse_text ().replace ("&nbsp;", " ");
230                 expect_tag ("a"); // --> map
231                 expect_tag ("/a");
232                 expect_tag ("/font");
233                 expect_tag ("/td");
234                 expect_tag ("/tr");
235
236                 cinema_name = name;
237                 // FIXME - store cinema address for movie detail window
238         }
239
240         public void parse (ref char[] buf) throws Error {
241                 current = buf;
242                 next_tag ();
243                 while (current[0] != 0) {
244                         int i = 1;
245                         while (current[i++] != '>');
246                         if (((string) current).has_prefix ("<a href=\"/movies/reviews?cid="))
247                                 parse_movie ();
248                         else if (((string) current).has_prefix("<tr valign=top><td colspan=4><a href=\"/movies?near="))
249                                 parse_cinema ();
250                         else
251                                 current += i;
252                         next_tag ();
253                 }
254         }
255
256         public GoogleParser (MovieFilter filter, string? location, MovieSource.ReceiveMovieFunction callback) {
257                 _get_callback = callback;
258                 _filter = filter;
259                 if (filter.title.chr(filter.title.length, '*') != null) {
260                         pattern = new PatternSpec (filter.title);
261                 } else {
262                         pattern = null;
263                 }
264                 try {
265                         // TODO - use google.de in Germany, also provides genres
266                         string uri = "http://google.com/movies";
267                         if (location != null && location != "")
268                                 uri += "?near=" + location;
269                         File file = File.new_for_uri (uri);
270                         InputStream stream = file.read (null);
271
272                         char[] buf = new char[256*1024];
273                         size_t nread;
274                         bool ok = stream.read_all (buf, buf.length, out nread, null);
275
276                         buf[nread] = 0;
277                         parse (ref buf);
278                 } catch (Error e) {
279                         stderr.printf ("Error: %s\n", e.message);
280                 }
281         }
282 }