From: Philipp Zabel Date: Fri, 30 Oct 2009 20:49:24 +0000 (+0100) Subject: Add IMDb plaintext downloader D-Bus service X-Git-Tag: v0.0.1~18 X-Git-Url: http://git.maemo.org/git/?p=cinaest;a=commitdiff_plain;h=ea0c7a7da70c0e622ae52c391b37a5fc965a6ad4 Add IMDb plaintext downloader D-Bus service Downloads and parses IMDb plaintext files into the $HOME/.cache/cinaest/imdb.db sqlite3 database. --- diff --git a/Makefile b/Makefile index 5dce26d..3fa5938 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -all: cinaest libimdb-plugin.so +all: cinaest imdb-plaintext-downloader libimdb-plugin.so cinaest_SOURCES = \ src/main.vala \ @@ -32,7 +32,22 @@ libimdb-plugin.so: ${imdb_plugin_SOURCES} valac -C ${imdb_plugin_VALAFLAGS} ${imdb_plugin_SOURCES} gcc -o $@ ${imdb_plugin_CFLAGS} ${imdb_plugin_LDADD} ${imdb_plugin_CSOURCES} +imdb_plaintext_downloader_SOURCES = \ + src/imdb/gzip-input-stream.vala \ + src/imdb/imdb-plaintext-downloader.vala \ + src/imdb/plaintext-downloader-interface.vala \ + src/imdb/imdb-sqlite.vala + +imdb_plaintext_downloader_VALAFLAGS = --vapidir ./vapi \ + --pkg dbus-glib-1 --pkg gio-2.0 --pkg sqlite3 --pkg zlib -X -lz + +imdb-plaintext-downloader: ${imdb_plaintext_downloader_SOURCES} + valac ${imdb_plaintext_downloader_VALAFLAGS} -o $@ \ + ${imdb_plaintext_downloader_SOURCES} + .PHONY: all clean clean: - rm -f cinaest libimdb-plugin.so src/*.c src/imdb/*.c src/plugins/*.c + rm -f cinaest imdb-plaintext-downloader \ + libimdb-plugin.so \ + src/*.c src/imdb/*.c src/plugins/*.c diff --git a/src/imdb/imdb-plaintext-downloader.vala b/src/imdb/imdb-plaintext-downloader.vala new file mode 100644 index 0000000..89170d9 --- /dev/null +++ b/src/imdb/imdb-plaintext-downloader.vala @@ -0,0 +1,339 @@ +using GLib; + +class IMDbDownloadServer : Object, IMDbDownloader { + MainLoop loop; + Cancellable cancellable; + int64 sofar; + int64 total; + bool running; + unowned IMDbSqlite sqlite; + string[] mirrors = { + "ftp.fu-berlin.de/pub/misc/movies/database/", + "ftp.funet.fi/pub/mirrors/ftp.imdb.com/pub/", + "ftp.sunet.se/pub/tv+movies/imdb/" + }; + + delegate void ParseLineFunction (string line); + + construct { + loop = new MainLoop (null, false); + cancellable = new Cancellable (); + } + + // IMDbDownloader implementation + + public void download (string mirror, int flags) throws DBus.Error { + if (running) { + message ("Download in progress. Abort.\n"); + return; + } + running = true; + message ("Download started (%x).", flags); + progress (0); + download_imdb_async.begin ("ftp://anonymous@" + mirror, flags, Priority.DEFAULT_IDLE); + message ("Download finished."); + } + + public void cancel () throws DBus.Error { + cancellable.cancel (); + } + + public string[] get_mirrors () throws DBus.Error { + return mirrors; + } + + // Private methods + + async void download_imdb_async (string mirror, int flags, int io_priority) { + Mount m; + File movies = File.new_for_uri (mirror + "/movies.list.gz"); + File genres = File.new_for_uri (mirror + "/genres.list.gz"); + File ratings = File.new_for_uri (mirror + "/ratings.list.gz"); + + description_changed ("Connecting to FTP ..."); + progress (0); + + try { + m = yield movies.find_enclosing_mount_async(io_priority, cancellable); + } catch (Error e0) { + try { + bool mounted = yield movies.mount_enclosing_volume (MountMountFlags.NONE, null, cancellable); + if (mounted) { + m = yield movies.find_enclosing_mount_async(io_priority, cancellable); + } else { + running = false; + return; + } + } catch (Error e1) { + critical ("Failed to mount: %s\n", e1.message); + running = false; + return; + } + } + stdout.printf ("Mounted: %s\n", m.get_name ()); + + description_changed ("Querying file sizes ..."); + try { + FileInfo info; + + if (MOVIES in flags) { + info = yield movies.query_info_async ("", FileQueryInfoFlags.NONE, io_priority, cancellable); + total += info.get_size (); + } + if (GENRES in flags) { + info = yield genres.query_info_async ("", FileQueryInfoFlags.NONE, io_priority, cancellable); + total += info.get_size (); + } + if (RATINGS in flags) { + info = yield ratings.query_info_async ("", FileQueryInfoFlags.NONE, io_priority, cancellable); + total += info.get_size (); + } + } catch (Error e3) { + warning ("Failed to get size: %s\n", e3.message); + total = 0; + } + + var cache_dir = Path.build_filename (Environment.get_user_cache_dir (), "cinaest"); + DirUtils.create_with_parents (cache_dir, 0770); + + var _sqlite = new IMDbSqlite (Path.build_filename (cache_dir, "imdb.db")); + sqlite = _sqlite; + _sqlite.clear (); + + try { + var movie_parser = new MovieLineParser (sqlite); + var genre_parser = new GenreLineParser (sqlite); + var rating_parser = new RatingLineParser (sqlite); + sofar = 0; + + if (MOVIES in flags) { + description_changed ("Downloading movie list ..."); + yield download_async(movies, movie_parser, io_priority); + } + if (GENRES in flags) { + description_changed ("Downloading genre data ..."); + yield download_async(genres, genre_parser, io_priority); + } + if (RATINGS in flags) { + description_changed ("Downloading rating data ..."); + yield download_async(ratings, rating_parser, io_priority); + } + } catch (Error e2) { + if (e2 is IOError.CANCELLED) + message ("Download cancelled.\n"); + else + warning ("Failed to open/read stream: %s\n", e2.message); + } + + if (!cancellable.is_cancelled ()) { + stdout.printf ("Download complete.\n"); + progress (100); + } + + try { + bool unmounted = yield m.unmount(MountUnmountFlags.NONE, null); + if (!unmounted) { + warning ("Failed to unmount.\n"); + } + } catch (Error e4) { + warning ("Failed to unmount: %s\n", e4.message); + } + + sqlite = null; + running = false; + + // FIXME - use a timeout? + loop.quit (); + } + + async void download_async (File f, LineParser parser, int io_priority) throws Error { + int percent = 0; + unowned string line = null; + + var stream = new GzipInputStream (yield f.read_async (io_priority, cancellable)); + var data = new DataInputStream(stream); + + do { + size_t l; + + line = yield data.read_line_async (io_priority, cancellable, out l); + if (line != null) + parser.parse_line (line); + + if (total == 0) + continue; + int p = (int) (100 * (sofar + stream.total_in ()) / total); + if (p > percent) { + percent = p; + progress (p); + } + } while (line != null); + + sofar += stream.total_in (); + + yield stream.close_async (io_priority, cancellable); + } + + public void run () { + loop.run (); + } + + public static void main () { + try { + var conn = DBus.Bus.get (DBus.BusType.SESSION); + dynamic DBus.Object bus = conn.get_object ("org.freedesktop.DBus", + "/org/freedesktop/DBus", + "org.freedesktop.DBus"); + + // Try to register service in session bus + uint request_name_result = bus.request_name (DBUS_SERVICE, (uint) 0); + + if (request_name_result == DBus.RequestNameReply.PRIMARY_OWNER) { + // Start server + var server = new IMDbDownloadServer (); + conn.register_object (DBUS_OBJECT, server); + + server.run (); + } else { + critical ("Service \"org.maemo.cinaest.IMDb\" already registered. Abort.\n"); + } + } catch (Error e) { + critical ("Oops: %s\n", e.message); + } + } +} + +abstract class LineParser { + internal unowned IMDbSqlite sqlite; + + public LineParser (IMDbSqlite _sqlite) { + sqlite = _sqlite; + } + + public abstract void parse_line (string line); + + internal bool skip_title (string title) { + if (title.has_suffix ("(TV)")) { + return true; + } + if (title.has_suffix ("(V)")) { + return true; + } + if (title.has_suffix ("(VG)")) { + return true; + } + return false; + } +} + +class MovieLineParser : LineParser { + Regex re_movie; + + public MovieLineParser (IMDbSqlite _sqlite) { + base (_sqlite); + try { + re_movie = new Regex ("^([^\t]+)\t+([0-9]+)$"); + } catch (RegexError e) { + critical ("Failed to initialize regex: %s\n", e.message); + } + } + + public override void parse_line (string line) { + MatchInfo matchinfo; + + // Skip series episodes + if (line[0] == '"') + return; + + if (!re_movie.match(line, 0, out matchinfo)) + return; + + string title; + string year = matchinfo.fetch (2); + try { + title = convert(matchinfo.fetch (1), -1, "utf-8", "latin1"); + } catch (ConvertError e) { + return; + } + + if (skip_title (title)) + return; + + sqlite.add_movie (title, year.to_int ()); + } +} + +class GenreLineParser : LineParser { + Regex re_genre; + + public GenreLineParser (IMDbSqlite _sqlite) { + base (_sqlite); + try { + re_genre = new Regex ("^([^\t]+)\t+([A-Za-z-]+)$"); + } catch (RegexError e) { + critical ("Failed to initialize regex: %s\n", e.message); + } + } + + public override void parse_line (string line) { + MatchInfo matchinfo; + + // Skip series episodes + if (line[0] == '"') + return; + + if (!re_genre.match(line, 0, out matchinfo)) + return; + + string title; + string genre = matchinfo.fetch (2); + try { + title = convert(matchinfo.fetch (1), -1, "utf-8", "latin1"); + } catch (ConvertError e) { + return; + } + + sqlite.movie_add_genre (title, genre); + } +} + +class RatingLineParser : LineParser { + Regex re_rating; + + public RatingLineParser (IMDbSqlite _sqlite) { + base (_sqlite); + try { + re_rating = new Regex ("^ .+ +[0-9]+ +([0-9.]+) +(.+)$"); + } catch (RegexError e) { + critical ("Failed to initialize regex: %s\n", e.message); + } + } + + public override void parse_line (string line) { + MatchInfo matchinfo; + + // Skip series episodes + if (line[0] == '"') + return; + + if (!re_rating.match(line, 0, out matchinfo)) + return; + + string title; + string rating = matchinfo.fetch (1); + try { + title = convert(matchinfo.fetch (2), -1, "utf-8", "latin1"); + } catch (ConvertError e) { + return; + } + + // Skip series episodes + if (title[0] == '"') + return; + + if (skip_title (title)) + return; + + sqlite.movie_set_rating (title, (int) (rating.to_double () * 10)); + } +}