From 245fa700f373b8a71539e532bf88941a9da38abc Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 12 Nov 2009 11:41:49 +0100 Subject: [PATCH] Google plugin: fix parser for changed HTML layout --- src/plugins/google-parser.vala | 121 +++++++++++++++++++++++----------------- 1 file changed, 71 insertions(+), 50 deletions(-) diff --git a/src/plugins/google-parser.vala b/src/plugins/google-parser.vala index b9dc1d5..5c36ceb 100644 --- a/src/plugins/google-parser.vala +++ b/src/plugins/google-parser.vala @@ -141,39 +141,34 @@ public class GoogleParser : Object { } public void parse_movie () throws Error { - expect_tag ("a"); // rating - expect_tag ("nobr"); - expect_tag ("nobr"); - weak string rating_string = parse_tag_attribute ("img", "alt").offset (6); // "Rated " ->"0.0 out of 5.0" - double rating = rating_string.to_double (); - - expect_tag ("img"); - expect_tag ("img"); - expect_tag ("img"); - expect_tag ("img"); - expect_tag ("/nobr"); - expect_tag ("/nobr"); - expect_tag ("br"); - expect_tag ("nobr"); - expect_tag ("/nobr"); - expect_tag ("/a"); - expect_tag ("/font"); - expect_tag ("/td"); - expect_tag ("td"); - expect_tag ("font"); - expect_tag ("a"); // --> link - expect_tag ("b"); - var title = convert (parse_text ().replace ("'", "'"), -1, "utf-8", "iso-8859-1"); // FIXME - expect_tag ("/b"); + expect_tag ("div"); // class=movie + expect_tag ("div"); // class=name + expect_tag ("a"); // href="/movies?near=city&mid=..." + expect_tag ("span"); // dir=ltr + var title = convert (parse_text ().replace ("'", "'").replace ("&", "&"), -1, "utf-8", "iso-8859-1"); // FIXME + expect_tag ("/span"); expect_tag ("/a"); - expect_tag ("br"); - var runtime_and_fsk = parse_text ().replace (" ", " ").replace ("‎", "").split (" - "); - - var showtimes = ""; - if (parse_tag () == "br") { - showtimes = parse_text ().replace (" ", ","); - expect_tag ("/font"); + expect_tag ("/div"); + expect_tag ("span"); // class=info + string[] runtime_and_fsk = {}; + double rating = 0.0; + if (parse_tag () == "nobr") { + expect_tag ("nobr"); + weak string rating_string = parse_tag_attribute ("img", "alt").offset (6); // "Rated " ->"0.0 out of 5.0" + rating = rating_string.to_double (); + expect_tag ("img"); + expect_tag ("img"); + expect_tag ("img"); + expect_tag ("img"); + expect_tag ("/nobr"); + expect_tag ("/nobr"); + runtime_and_fsk = parse_text ().replace ("‎", "").offset (3).split (" - "); + expect_tag ("/span"); } + expect_tag ("div"); // class=times + var showtimes = parse_text ().replace (" ", ","); + expect_tag ("/div"); + expect_tag ("/div"); if (pattern == null) { if (!title.has_prefix (_filter.title)) @@ -217,21 +212,25 @@ public class GoogleParser : Object { } public void parse_cinema () throws Error { - expect_tag ("tr"); - expect_tag ("td"); - expect_tag ("a"); // --> link - expect_tag ("b"); + expect_tag ("div"); // class=theater + expect_tag ("div"); // class=desc id=theater_... + expect_tag ("h2"); // class=name + expect_tag ("a"); // href="/movies?near=city&tid=..." + expect_tag ("span"); // dir=ltr string name = convert (parse_text ().replace ("&", "&"), -1, "utf-8", "iso-8859-1"); // FIXME - expect_tag ("/b"); + expect_tag ("/span"); expect_tag ("/a"); - expect_tag ("br"); - expect_tag ("font"); - string address = parse_text ().replace (" ", " "); - expect_tag ("a"); // --> map + expect_tag ("/h2"); + expect_tag ("div"); // class=info + var address_and_phone = parse_text ().replace (" ", " ").split (" - "); + if (address_and_phone.length >= 2) { + // string address = address_and_phone[0]; + // string phone = address_and_phone[1]; + } + expect_tag ("a"); // target=_top expect_tag ("/a"); - expect_tag ("/font"); - expect_tag ("/td"); - expect_tag ("/tr"); + expect_tag ("/div"); + expect_tag ("/div"); cinema_name = name; // FIXME - store cinema address for movie detail window @@ -240,15 +239,27 @@ public class GoogleParser : Object { public void parse (ref char[] buf) throws Error { current = buf; next_tag (); + + { + int i = 1; + while (current[i++] != '>'); + if (((string) current).has_prefix ("")) { parse_movie (); - else if (((string) current).has_prefix("")) { parse_cinema (); - else + } else { current += i; + } next_tag (); } } @@ -266,14 +277,24 @@ public class GoogleParser : Object { string uri = "http://google.com/movies"; if (location != null && location != "") uri += "?near=" + location; + + stdout.printf ("GET: %s\n", uri); + File file = File.new_for_uri (uri); - InputStream stream = file.read (null); + InputStream stream = yield file.read_async (Priority.DEFAULT_IDLE, null); char[] buf = new char[256*1024]; size_t nread; - bool ok = stream.read_all (buf, buf.length, out nread, cancellable); - - buf[nread] = 0; + size_t total = 0; + while (total < 256*1024) { + nread = yield stream.read_async ((char *)buf + total, 256*1024 - total, Priority.DEFAULT_IDLE, cancellable); + total += nread; + if (cancellable.is_cancelled ()) + return; + if (nread == 0) + break; + } + buf[total] = 0; parse (ref buf); } catch (Error e) { stderr.printf ("Error: %s\n", e.message); -- 1.7.9.5