IMDb plugin + downloader: parse IMDb alternative titles and use to match
authorPhilipp Zabel <philipp.zabel@gmail.com>
Sun, 3 Jan 2010 16:05:07 +0000 (17:05 +0100)
committerPhilipp Zabel <philipp.zabel@gmail.com>
Wed, 6 Jan 2010 17:36:56 +0000 (18:36 +0100)
Using the international alternative titles to match the title filter makes
finding foreign films much easier (or in some cases possible at all).

If the movie is returned via a match of the alternative title, display it
in the user interface as secondary text.

src/imdb/imdb-line-parser.vala
src/imdb/imdb-plaintext-downloader.vala
src/imdb/imdb-sqlite.vala
src/imdb/plaintext-downloader-interface.vala
src/plugins/imdb-download-dialog.vala
src/plugins/imdb-plugin.vala

index fd3875a..25d1658 100644 (file)
@@ -133,3 +133,81 @@ class RatingLineParser : LineParser {
                sqlite.movie_set_rating (title, (int) (rating.to_double () * 10), votes.to_int ());
        }
 }
+
+class AkaLineParser : LineParser {
+       enum AkaState {
+               HEADER,
+               NONE,
+               TITLE
+       }
+       AkaState state;
+       string title;
+
+       public AkaLineParser (IMDbSqlite _sqlite) {
+               base (_sqlite);
+               state = AkaState.HEADER;
+               title = null;
+       }
+
+       public override void parse_line (string line) {
+               if (state == AkaState.HEADER) {
+                       if (line == "AKA TITLES LIST") title = line;
+                       if (line == "===============" && title != null)
+                               state = AkaState.NONE;
+                       return;
+               }
+
+               if (state == AkaState.NONE) {
+                       // Skip empty lines
+                       if (line == "")
+                               return;
+
+                       // Skip series episodes
+                       if (line[0] == '"')
+                               return;
+
+                       // Parse error
+                       if (line[0] == ' ')
+                               return;
+
+                       try {
+                               title = convert (line, -1, "utf-8", "latin1");
+                       } catch (ConvertError e) {
+                               title = null;
+                               return;
+                       }
+
+                       if (skip_title (title))
+                               return;
+
+                       state = AkaState.TITLE;
+               }
+
+               if (state == AkaState.TITLE) {
+                       // Empty lines mark end of title
+                       if (line == "") {
+                               state = AkaState.NONE;
+                               return;
+                       }
+
+                       if (line.has_prefix ("   (aka ")) {
+                               if (skip_title (title))
+                                       return;
+
+                               char* start = line.offset (8);
+                               char* end = ((string) start).str ("))");
+                               if (end != null)
+                                       end[1] = '\0';
+
+                               string aka;
+                               try {
+                                       aka = convert ((string) start, -1, "utf-8", "latin1");
+                               } catch (ConvertError e) {
+                                       return;
+                               }
+
+                               sqlite.add_aka (title, aka);
+                       }
+               }
+       }
+}
index 821c9a2..a978527 100644 (file)
@@ -72,6 +72,7 @@ class IMDbDownloadServer : Object, IMDbDownloader {
                        var movie_parser = new MovieLineParser (sqlite);
                        var genre_parser = new GenreLineParser (sqlite);
                        var rating_parser = new RatingLineParser (sqlite);
+                       var aka_parser = new AkaLineParser (sqlite);
 
                        var downloader = new IMDbFtpDownloader (cancellable);
                        downloader.progress_changed.connect (on_progress_changed);
@@ -80,16 +81,21 @@ class IMDbDownloadServer : Object, IMDbDownloader {
                                description_changed ("Downloading movie list ...");
                                downloader.download (url + "movies.list.gz", movie_parser);
                        }
-                       percent_finished = 33;
+                       percent_finished = 25;
                        if (GENRES in flags) {
                                description_changed ("Downloading genre data ...");
                                downloader.download (url + "genres.list.gz", genre_parser);
                        }
-                       percent_finished = 66;
+                       percent_finished = 50;
                        if (RATINGS in flags) {
                                description_changed ("Downloading rating data ...");
                                downloader.download (url + "ratings.list.gz", rating_parser);
                        }
+                       percent_finished = 75;
+                       if (AKAS in flags) {
+                               description_changed ("Downloading alternative titles ...");
+                               downloader.download (url + "aka-titles.list.gz", aka_parser);
+                       }
                } catch (Error e2) {
                        if (e2 is IOError.CANCELLED)
                                stdout.printf ("Download cancelled.\n");
@@ -98,6 +104,8 @@ class IMDbDownloadServer : Object, IMDbDownloader {
                }
 
                description_changed ("Creating indices ...");
+               if (AKAS in flags)
+                       sqlite.create_aka_index ();
                if (MOVIES in flags)
                        sqlite.create_votes_index ();
 
@@ -115,7 +123,7 @@ class IMDbDownloadServer : Object, IMDbDownloader {
        }
 
        private void on_progress_changed (int percent) {
-               progress (percent_finished + percent / 3);
+               progress (percent_finished + percent / 4);
        }
 
        private void timeout_quit () {
index d26564b..ee655d5 100644 (file)
@@ -22,7 +22,7 @@ class IMDbSqlite : Object {
        Database db;
        List<string> genres;
 
-       public delegate void ReceiveMovieFunction (string title, int year, int rating, int genres);
+       public delegate void ReceiveMovieFunction (string title, string? aka, int year, int rating, int genres);
 
        public IMDbSqlite (string filename) {
                int rc;
@@ -126,6 +126,48 @@ class IMDbSqlite : Object {
                return 0;
        }
 
+       public int add_aka (string title, string aka) {
+               int rowid;
+
+               if (!movie_exists (title, out rowid))
+                       return 1;
+
+               string sql = "INSERT INTO Akas(Aka, TitleID) VALUES (\"%s\", %d);".printf (aka, rowid);
+               int rc;
+               rc = db.exec (sql, callback, null);
+               if (rc != Sqlite.OK) {
+                       stderr.printf ("SQL error: %d, %s\n", rc, db.errmsg ());
+                       return 1;
+               }
+
+               return 0;
+       }
+
+       public bool movie_exists (string title, out int rowid = null) {
+               string sql = "SELECT rowid FROM Movies WHERE Title=\"%s\"".printf (title);
+               Statement stmt;
+               int rc;
+               int count = 0;
+
+               rc = db.prepare_v2 (sql, -1, out stmt);
+               if (rc != Sqlite.OK) {
+                       stderr.printf ("SQL error: %d, %s\n", rc, db.errmsg ());
+                       return false;
+               }
+
+               do {
+                       rc = stmt.step ();
+                       if (rc == Sqlite.ROW) {
+                               if (&rowid != null) {
+                                       rowid = stmt.column_int (0);
+                               }
+                               return true;
+                       }
+               } while (rc == Sqlite.ROW);
+
+               return false;
+       }
+
        public int clear () {
                int rc;
 
@@ -133,7 +175,9 @@ class IMDbSqlite : Object {
                        "DROP TABLE IF EXISTS Movies;" +
                        "CREATE TABLE Movies (Title TEXT PRIMARY KEY COLLATE NOCASE, Year INTEGER, Rating INTEGER, Votes INTEGER NOT NULL DEFAULT 0, Genres INTEGER NOT NULL DEFAULT 0);" +
                        "DROP TABLE IF EXISTS Genres;" +
-                       "CREATE TABLE Genres (Bit INTEGER PRIMARY KEY, Genre TEXT NOT NULL);",
+                       "CREATE TABLE Genres (Bit INTEGER PRIMARY KEY, Genre TEXT NOT NULL);" +
+                       "DROP TABLE IF EXISTS Akas;" +
+                       "CREATE TABLE Akas (Aka TEXT NOT NULL COLLATE NOCASE, TitleID INTEGER NOT NULL);",
                        callback, null);
                if (rc != Sqlite.OK) {
                        stderr.printf ("SQL error: %d, %s\n", rc, db.errmsg ());
@@ -143,6 +187,18 @@ class IMDbSqlite : Object {
                return 0;
        }
 
+       public int create_aka_index () {
+               int rc;
+
+               rc = db.exec ("CREATE INDEX AkasAka ON Akas(Aka);", callback, null);
+               if (rc != Sqlite.OK) {
+                       stderr.printf ("SQL error: %d, %s\n", rc, db.errmsg ());
+                       return 1;
+               }
+
+               return 0;
+       }
+
        public int create_votes_index () {
                int rc;
 
@@ -159,6 +215,7 @@ class IMDbSqlite : Object {
        public async int query (MovieFilter filter, ReceiveMovieFunction receive_movie, int limit, Cancellable? cancellable) {
                var sql = "SELECT Title, Year, Rating, Genres FROM Movies";
                var sep = " WHERE ";
+               string match = null;
                Statement stmt;
                int rc;
 
@@ -167,10 +224,12 @@ class IMDbSqlite : Object {
                db.progress_handler (1000, progress_handler);
 
                if (filter.title != null && filter.title != "") {
-                       if ("*" in filter.title)
-                               sql += sep + "Title GLOB \"%s (*)\"".printf (filter.title);
-                       else
-                               sql += sep + "Title LIKE \"%s%%\"".printf (filter.title);
+                       if ("*" in filter.title) {
+                               match = "GLOB \"%s (*)\"".printf (filter.title);
+                       } else {
+                               match = "LIKE \"%s%%\"".printf (filter.title);
+                       }
+                       sql += sep + "(Title %s OR rowid IN (SELECT TitleID FROM Akas WHERE Aka %s))".printf (match, match);
                        sep = " AND ";
                }
                if (filter.year_min > 0) {
@@ -208,7 +267,13 @@ class IMDbSqlite : Object {
                                string title = stmt.column_text (0);
                                int rating = stmt.column_int (2);
                                int genres = stmt.column_int (3);
-                               receive_movie (strip_year (title, year), year, rating, genres);
+                               string aka = null;
+                               if (match != null && !(filter.matches_title (strip_year (title, year)))) {
+                                       aka = movie_aka (title, match);
+                                       if (aka != null)
+                                               aka = strip_year (aka, year);
+                               }
+                               receive_movie (strip_year (title, year), aka, year, rating, genres);
                        }
                } while (rc == Sqlite.ROW);
 
@@ -216,6 +281,28 @@ class IMDbSqlite : Object {
                return 0;
        }
 
+       private string movie_aka (string title, string match) {
+               string sql = "SELECT Aka FROM Akas WHERE (TitleID = (SELECT rowid FROM Movies WHERE Title = \"%s\") AND Aka %s) LIMIT 1;".printf (title, match);
+               Statement stmt;
+               int rc;
+               string aka = null;
+
+               rc = db.prepare_v2 (sql, -1, out stmt);
+               if (rc != Sqlite.OK) {
+                       stderr.printf ("SQL error: %d, %s\n", rc, db.errmsg ());
+                       return null;
+               }
+
+               do {
+                       rc = stmt.step ();
+                       if (rc == Sqlite.ROW) {
+                               aka = stmt.column_text (0);
+                       }
+               } while (rc == Sqlite.ROW);
+
+               return aka;
+       }
+
        private int progress_handler () {
                ((MainContext) null).iteration (false);
                return (int) _cancellable.is_cancelled ();
index 0aa840a..af7eae3 100644 (file)
@@ -8,7 +8,8 @@ public interface IMDbDownloader {
        public const int MOVIES = 0x1;
        public const int GENRES = 0x2;
        public const int RATINGS = 0x4;
-       public const int PLOTS = 0x8;
+       public const int AKAS = 0x4;
+       public const int PLOTS = 0x10;
        public const int SERIES = 0x100;
        public const int TV = 0x200;
        public const int VIDEO = 0x400;
index ba52c8d..701195c 100644 (file)
@@ -35,7 +35,7 @@ class IMDbDownloadDialog : Note {
                        server.Progress += this.on_progress;
                        server.DescriptionChanged += this.on_description_changed;
 
-                       server.download (mirror, IMDbDownloader.MOVIES | IMDbDownloader.GENRES | IMDbDownloader.RATINGS);
+                       server.download (mirror, IMDbDownloader.MOVIES | IMDbDownloader.GENRES | IMDbDownloader.RATINGS | IMDbDownloader.AKAS);
                } catch (DBus.Error e) {
                        warning ("Failed to invoke IMDb downloader: %s", e.message);
                }
index d03a0eb..b8b33ea 100644 (file)
@@ -174,14 +174,18 @@ class IMDBSource : MovieSource {
                yield sqlite.query (filter, receive_movie, limit, cancellable);
        }
 
-       private void receive_movie (string title, int year, int rating, int genres) {
+       private void receive_movie (string title, string? aka, int year, int rating, int genres) {
                Movie movie = new Movie ();
                movie.title = title;
                movie.year = year;
                movie.rating = rating;
                movie.genres.field = genres;
                // TODO - depending on settings, this could be something else, like director info or runtime
-               movie.secondary = movie.genres.to_string ();
+               if (aka != null) {
+                       movie.secondary = "aka \"%s\" - %s".printf (aka, movie.genres.to_string ());
+               } else {
+                       movie.secondary = movie.genres.to_string ();
+               }
                _get_callback (movie);
        }