IMDb plugin + downloader: parse IMDb plots
authorPhilipp Zabel <philipp.zabel@gmail.com>
Tue, 5 Jan 2010 17:47:38 +0000 (18:47 +0100)
committerPhilipp Zabel <philipp.zabel@gmail.com>
Sat, 9 Jan 2010 11:21:18 +0000 (12:21 +0100)
These take up a lot of space, so this should be eventually made configurable.

src/imdb/imdb-line-parser.vala
src/imdb/imdb-plaintext-downloader.vala
src/imdb/imdb-sqlite.vala

index 1218cbf..7963fda 100644 (file)
@@ -226,3 +226,89 @@ class AkaLineParser : LineParser {
                }
        }
 }
+
+class PlotLineParser : LineParser {
+       enum PlotState {
+               HEADER,
+               NONE,
+               TITLE
+       }
+       string title;
+       string plot;
+       PlotState state;
+
+       public PlotLineParser (IMDbSqlite _sqlite) {
+               base (_sqlite);
+               state = PlotState.HEADER;
+               title = null;
+       }
+
+       public override void parse_line (string line) {
+               if (state == PlotState.HEADER) {
+                       if (line == "PLOT SUMMARIES LIST") title = line;
+                       if (line == "===================" && title != null)
+                               state = PlotState.NONE;
+                       return;
+               }
+
+               // Skip empty lines
+               if (line == "")
+                       return;
+
+               if (state == PlotState.NONE) {
+                       if (line.has_prefix ("MV: ")) {
+                               // Skip series episodes
+                               if (line[4] == '"')
+                                       return;
+
+                               try {
+                                       title = convert (line.offset (4), -1, "utf-8", "latin1");
+                               } catch (ConvertError e) {
+                                       stderr.printf ("Error converting title to UTF-8\n");
+                                       title = null;
+                                       return;
+                               }
+
+                               if (skip_title (title))
+                                       return;
+
+                               state = PlotState.TITLE;
+                               plot = "";
+                       }
+                       return;
+               }
+
+               if (state == PlotState.TITLE) {
+                       if (line.has_prefix ("PL: ")) {
+                               if (skip_title (title))
+                                       return;
+
+                               try {
+                                       if (plot != "")
+                                               plot += " ";
+                                       plot += convert (line.offset (4), -1, "utf-8", "latin1");
+                               } catch (ConvertError e) {
+                                       stderr.printf ("Error converting plot for \"%s\" to UTF-8\n", title);
+                                       plot = "";
+                                       return;
+                               }
+                       }
+
+                       // BY: tag marks end of plot
+                       if (line.has_prefix ("BY: ")) {
+                               string author;
+                               try {
+                                       author = convert (line.offset (4), -1, "utf-8", "latin1");
+                               } catch (ConvertError e) {
+                                       stderr.printf ("Error converting plot author for \"%s\" to UTF-8\n", title);
+                                       author = null;
+                               }
+
+                               sqlite.add_plot (title, plot, author);
+
+                               state = PlotState.NONE;
+                               return;
+                       }
+               }
+       }
+}
index a978527..a7c28e7 100644 (file)
@@ -73,6 +73,7 @@ class IMDbDownloadServer : Object, IMDbDownloader {
                        var genre_parser = new GenreLineParser (sqlite);
                        var rating_parser = new RatingLineParser (sqlite);
                        var aka_parser = new AkaLineParser (sqlite);
+                       var plot_parser = new PlotLineParser (sqlite);
 
                        var downloader = new IMDbFtpDownloader (cancellable);
                        downloader.progress_changed.connect (on_progress_changed);
@@ -81,21 +82,27 @@ class IMDbDownloadServer : Object, IMDbDownloader {
                                description_changed ("Downloading movie list ...");
                                downloader.download (url + "movies.list.gz", movie_parser);
                        }
-                       percent_finished = 25;
+                       percent_finished = 20;
                        if (GENRES in flags) {
                                description_changed ("Downloading genre data ...");
                                downloader.download (url + "genres.list.gz", genre_parser);
                        }
-                       percent_finished = 50;
+                       percent_finished = 40;
                        if (RATINGS in flags) {
                                description_changed ("Downloading rating data ...");
                                downloader.download (url + "ratings.list.gz", rating_parser);
                        }
-                       percent_finished = 75;
+                       percent_finished = 60;
                        if (AKAS in flags) {
                                description_changed ("Downloading alternative titles ...");
                                downloader.download (url + "aka-titles.list.gz", aka_parser);
                        }
+                       percent_finished = 80;
+                       if (PLOTS in flags) {
+                               description_changed ("Downloading plots ...");
+                               print ("Downloading Plots");
+                               downloader.download (url + "plot.list.gz", plot_parser);
+                       }
                } catch (Error e2) {
                        if (e2 is IOError.CANCELLED)
                                stdout.printf ("Download cancelled.\n");
@@ -123,7 +130,7 @@ class IMDbDownloadServer : Object, IMDbDownloader {
        }
 
        private void on_progress_changed (int percent) {
-               progress (percent_finished + percent / 4);
+               progress (percent_finished + percent / 5);
        }
 
        private void timeout_quit () {
index 44b8a98..7371725 100644 (file)
@@ -143,6 +143,29 @@ class IMDbSqlite : Object {
                return 0;
        }
 
+       public int add_plot (string title, string plot, string? author) {
+               int rowid;
+
+               if (!movie_exists (title, out rowid))
+                       return 1;
+
+               string sql;
+               if (author != null) {
+                       sql = "INSERT INTO Plots(rowid, Plot, Author) VALUES (%d, \"%s\", \"%s\");".printf (rowid, plot.replace ("\"", "&quot;"), author.replace ("\"", "&quot;"));
+               } else {
+                       sql = "INSERT INTO Plots(rowid, Plot) VALUES (%d, \"%s\");".printf (rowid, plot.replace ("\"", "&quot;"));
+               }
+               int rc;
+               rc = db.exec (sql, callback, null);
+               if (rc != Sqlite.OK) {
+                       stderr.printf ("SQL error: %d, %s\n", rc, db.errmsg ());
+                       stderr.printf ("offending SQL: %s\n", sql);
+                       return 1;
+               }
+
+               return 0;
+       }
+
        public bool movie_exists (string title, out int rowid = null) {
                string sql = "SELECT rowid FROM Movies WHERE Title=\"%s\"".printf (title);
                Statement stmt;
@@ -176,7 +199,9 @@ class IMDbSqlite : Object {
                        "DROP TABLE IF EXISTS Genres;" +
                        "CREATE TABLE Genres (Bit INTEGER PRIMARY KEY, Genre TEXT NOT NULL);" +
                        "DROP TABLE IF EXISTS Akas;" +
-                       "CREATE TABLE Akas (Aka TEXT NOT NULL COLLATE NOCASE, TitleID INTEGER NOT NULL);",
+                       "CREATE TABLE Akas (Aka TEXT NOT NULL COLLATE NOCASE, TitleID INTEGER NOT NULL);" +
+                       "DROP TABLE IF EXISTS Plots;" +
+                       "CREATE TABLE Plots (Plot TEXT NOT NULL, Author TEXT)",
                        callback, null);
                if (rc != Sqlite.OK) {
                        stderr.printf ("SQL error: %d, %s\n", rc, db.errmsg ());