IMDb plugin + downloader: parse IMDb plots
[cinaest] / src / imdb / imdb-plaintext-downloader.vala
index 89170d9..a7c28e7 100644 (file)
@@ -3,15 +3,17 @@ using GLib;
 class IMDbDownloadServer : Object, IMDbDownloader {
        MainLoop loop;
        Cancellable cancellable;
-       int64 sofar;
-       int64 total;
        bool running;
+       uint source_id;
        unowned IMDbSqlite sqlite;
        string[] mirrors = {
                "ftp.fu-berlin.de/pub/misc/movies/database/",
                "ftp.funet.fi/pub/mirrors/ftp.imdb.com/pub/",
                "ftp.sunet.se/pub/tv+movies/imdb/"
        };
+       string url;
+       int flags;
+       int percent_finished;
 
        delegate void ParseLineFunction (string line);
 
@@ -22,16 +24,26 @@ class IMDbDownloadServer : Object, IMDbDownloader {
 
        // IMDbDownloader implementation
 
-       public void download (string mirror, int flags) throws DBus.Error {
+       public void download (string mirror, int _flags) throws DBus.Error {
                if (running) {
-                       message ("Download in progress. Abort.\n");
+                       stdout.printf ("Download in progress. Abort.\n");
                        return;
                }
                running = true;
-               message ("Download started (%x).", flags);
+               if (source_id != 0) {
+                       Source.remove (source_id);
+               }
+
+               stdout.printf ("Download started (%x).", flags);
                progress (0);
-               download_imdb_async.begin ("ftp://anonymous@" + mirror, flags, Priority.DEFAULT_IDLE);
-               message ("Download finished.");
+               url = "ftp://anonymous@" + mirror;
+               flags = _flags;
+               try {
+                       Thread.create(download_thread, false);
+               } catch (ThreadError e) {
+                       critical ("Failed to create download thread\n");
+                       return;
+               }
        }
 
        public void cancel () throws DBus.Error {
@@ -44,54 +56,10 @@ class IMDbDownloadServer : Object, IMDbDownloader {
 
        // Private methods
 
-       async void download_imdb_async (string mirror, int flags, int io_priority) {
-               Mount m;
-               File movies = File.new_for_uri (mirror + "/movies.list.gz");
-               File genres = File.new_for_uri (mirror + "/genres.list.gz");
-               File ratings = File.new_for_uri (mirror + "/ratings.list.gz");
-
+       private void* download_thread () {
                description_changed ("Connecting to FTP ...");
                progress (0);
-
-               try {
-                       m = yield movies.find_enclosing_mount_async(io_priority, cancellable);
-               } catch (Error e0) {
-                       try {
-                               bool mounted = yield movies.mount_enclosing_volume (MountMountFlags.NONE, null, cancellable);
-                               if (mounted) {
-                                       m = yield movies.find_enclosing_mount_async(io_priority, cancellable);
-                               } else {
-                                       running = false;
-                                       return;
-                               }
-                       } catch (Error e1) {
-                               critical ("Failed to mount: %s\n", e1.message);
-                               running = false;
-                               return;
-                       }
-               }
-               stdout.printf ("Mounted: %s\n", m.get_name ());
-
-               description_changed ("Querying file sizes ...");
-               try {
-                       FileInfo info;
-
-                       if (MOVIES in flags) {
-                               info = yield movies.query_info_async ("", FileQueryInfoFlags.NONE, io_priority, cancellable);
-                               total += info.get_size ();
-                       }
-                       if (GENRES in flags) {
-                               info = yield genres.query_info_async ("", FileQueryInfoFlags.NONE, io_priority, cancellable);
-                               total += info.get_size ();
-                       }
-                       if (RATINGS in flags) {
-                               info = yield ratings.query_info_async ("", FileQueryInfoFlags.NONE, io_priority, cancellable);
-                               total += info.get_size ();
-                       }
-               } catch (Error e3) {
-                       warning ("Failed to get size: %s\n", e3.message);
-                       total = 0;
-               }
+               percent_finished = 0;
 
                var cache_dir = Path.build_filename (Environment.get_user_cache_dir (), "cinaest");
                DirUtils.create_with_parents (cache_dir, 0770);
@@ -104,81 +72,85 @@ class IMDbDownloadServer : Object, IMDbDownloader {
                        var movie_parser = new MovieLineParser (sqlite);
                        var genre_parser = new GenreLineParser (sqlite);
                        var rating_parser = new RatingLineParser (sqlite);
-                       sofar = 0;
+                       var aka_parser = new AkaLineParser (sqlite);
+                       var plot_parser = new PlotLineParser (sqlite);
+
+                       var downloader = new IMDbFtpDownloader (cancellable);
+                       downloader.progress_changed.connect (on_progress_changed);
 
                        if (MOVIES in flags) {
                                description_changed ("Downloading movie list ...");
-                               yield download_async(movies, movie_parser, io_priority);
+                               downloader.download (url + "movies.list.gz", movie_parser);
                        }
+                       percent_finished = 20;
                        if (GENRES in flags) {
                                description_changed ("Downloading genre data ...");
-                               yield download_async(genres, genre_parser, io_priority);
+                               downloader.download (url + "genres.list.gz", genre_parser);
                        }
+                       percent_finished = 40;
                        if (RATINGS in flags) {
                                description_changed ("Downloading rating data ...");
-                               yield download_async(ratings, rating_parser, io_priority);
+                               downloader.download (url + "ratings.list.gz", rating_parser);
+                       }
+                       percent_finished = 60;
+                       if (AKAS in flags) {
+                               description_changed ("Downloading alternative titles ...");
+                               downloader.download (url + "aka-titles.list.gz", aka_parser);
+                       }
+                       percent_finished = 80;
+                       if (PLOTS in flags) {
+                               description_changed ("Downloading plots ...");
+                               print ("Downloading Plots");
+                               downloader.download (url + "plot.list.gz", plot_parser);
                        }
                } catch (Error e2) {
                        if (e2 is IOError.CANCELLED)
-                               message ("Download cancelled.\n");
+                               stdout.printf ("Download cancelled.\n");
                        else
                                warning ("Failed to open/read stream: %s\n", e2.message);
                }
 
+               description_changed ("Creating indices ...");
+               if (AKAS in flags)
+                       sqlite.create_aka_index ();
+               if (MOVIES in flags)
+                       sqlite.create_votes_index ();
+
                if (!cancellable.is_cancelled ()) {
                        stdout.printf ("Download complete.\n");
                        progress (100);
                }
 
-               try {
-                       bool unmounted = yield m.unmount(MountUnmountFlags.NONE, null);
-                       if (!unmounted) {
-                               warning ("Failed to unmount.\n");
-                       }
-               } catch (Error e4) {
-                       warning ("Failed to unmount: %s\n", e4.message);
-               }
-
                sqlite = null;
                running = false;
 
-               // FIXME - use a timeout?
-               loop.quit ();
-       }
+               timeout_quit ();
 
-       async void download_async (File f, LineParser parser, int io_priority) throws Error {
-               int percent = 0;
-               unowned string line = null;
-
-               var stream = new GzipInputStream (yield f.read_async (io_priority, cancellable));
-               var data = new DataInputStream(stream);
-
-               do {
-                       size_t l;
+               return null;
+       }
 
-                       line = yield data.read_line_async (io_priority, cancellable, out l);
-                       if (line != null)
-                               parser.parse_line (line);
+       private void on_progress_changed (int percent) {
+               progress (percent_finished + percent / 5);
+       }
 
-                       if (total == 0)
-                               continue;
-                       int p = (int) (100 * (sofar + stream.total_in ()) / total);
-                       if (p > percent) {
-                               percent = p;
-                               progress (p);
-                       }
-               } while (line != null);
+       private void timeout_quit () {
+               source_id = Timeout.add (3000, quit);
+       }
 
-               sofar += stream.total_in ();
+        private bool quit () {
+               loop.quit ();
 
-               yield stream.close_async (io_priority, cancellable);
-       }
+                // One-shot only
+                return false;
+        }
 
        public void run () {
                loop.run ();
        }
 
        public static void main () {
+               Curl.global_init (Curl.GLOBAL_DEFAULT);
+
                try {
                        var conn = DBus.Bus.get (DBus.BusType.SESSION);
                        dynamic DBus.Object bus = conn.get_object ("org.freedesktop.DBus",
@@ -200,140 +172,7 @@ class IMDbDownloadServer : Object, IMDbDownloader {
                } catch (Error e) {
                        critical ("Oops: %s\n", e.message);
                }
-       }
-}
-
-abstract class LineParser {
-        internal unowned IMDbSqlite sqlite;
-
-       public LineParser (IMDbSqlite _sqlite) {
-               sqlite = _sqlite;
-       }
-
-       public abstract void parse_line (string line);
-
-       internal bool skip_title (string title) {
-               if (title.has_suffix ("(TV)")) {
-                       return true;
-               }
-               if (title.has_suffix ("(V)")) {
-                       return true;
-               }
-               if (title.has_suffix ("(VG)")) {
-                       return true;
-               }
-               return false;
-       }
-}
-
-class MovieLineParser : LineParser {
-       Regex re_movie;
-
-       public MovieLineParser (IMDbSqlite _sqlite) {
-               base (_sqlite);
-               try {
-                       re_movie = new Regex ("^([^\t]+)\t+([0-9]+)$");
-               } catch (RegexError e) {
-                       critical ("Failed to initialize regex: %s\n", e.message);
-               }
-       }
-
-       public override void parse_line (string line) {
-               MatchInfo matchinfo;
-
-               // Skip series episodes
-               if (line[0] == '"')
-                       return;
-
-               if (!re_movie.match(line, 0, out matchinfo))
-                       return;
-
-               string title;
-               string year = matchinfo.fetch (2);
-               try {
-                       title = convert(matchinfo.fetch (1), -1, "utf-8", "latin1");
-               } catch (ConvertError e) {
-                       return;
-               }
-
-               if (skip_title (title))
-                       return;
-
-               sqlite.add_movie (title, year.to_int ());
-       }
-}
-
-class GenreLineParser : LineParser {
-       Regex re_genre;
-
-       public GenreLineParser (IMDbSqlite _sqlite) {
-               base (_sqlite);
-               try {
-                       re_genre = new Regex ("^([^\t]+)\t+([A-Za-z-]+)$");
-               } catch (RegexError e) {
-                       critical ("Failed to initialize regex: %s\n", e.message);
-               }
-       }
-
-       public override void parse_line (string line) {
-               MatchInfo matchinfo;
-
-               // Skip series episodes
-               if (line[0] == '"')
-                       return;
-
-               if (!re_genre.match(line, 0, out matchinfo))
-                       return;
-
-               string title;
-               string genre = matchinfo.fetch (2);
-               try {
-                       title = convert(matchinfo.fetch (1), -1, "utf-8", "latin1");
-               } catch (ConvertError e) {
-                       return;
-               }
-
-               sqlite.movie_add_genre (title, genre);
-       }
-}
-
-class RatingLineParser : LineParser {
-       Regex re_rating;
-
-       public RatingLineParser (IMDbSqlite _sqlite) {
-               base (_sqlite);
-               try {
-                       re_rating = new Regex ("^      .+ +[0-9]+ +([0-9.]+) +(.+)$");
-               } catch (RegexError e) {
-                       critical ("Failed to initialize regex: %s\n", e.message);
-               }
-       }
-
-       public override void parse_line (string line) {
-               MatchInfo matchinfo;
-
-               // Skip series episodes
-               if (line[0] == '"')
-                       return;
-
-               if (!re_rating.match(line, 0, out matchinfo))
-                       return;
-
-               string title;
-               string rating = matchinfo.fetch (1);
-               try {
-                       title = convert(matchinfo.fetch (2), -1, "utf-8", "latin1");
-               } catch (ConvertError e) {
-                       return;
-               }
-
-               // Skip series episodes
-               if (title[0] == '"')
-                       return;
-
-               if (skip_title (title))
-                       return;
 
-               sqlite.movie_set_rating (title, (int) (rating.to_double () * 10));
+               Curl.global_cleanup ();
        }
 }