IMDb: Use SQLite indices instead of (half) MD5 hashes for lookup
[cinaest] / src / imdb / imdb-plaintext-downloader.vala
1 using GLib;
2
3 class IMDbDownloadServer : Object, IMDbDownloader {
4         MainLoop loop;
5         Cancellable cancellable;
6         int64 sofar;
7         int64 total;
8         bool running;
9         uint source_id;
10         unowned IMDbSqlite sqlite;
11         string[] mirrors = {
12                 "ftp.fu-berlin.de/pub/misc/movies/database/",
13                 "ftp.funet.fi/pub/mirrors/ftp.imdb.com/pub/",
14                 "ftp.sunet.se/pub/tv+movies/imdb/"
15         };
16
17         delegate void ParseLineFunction (string line);
18
19         construct {
20                 loop = new MainLoop (null, false);
21                 cancellable = new Cancellable ();
22         }
23
24         // IMDbDownloader implementation
25
26         public void download (string mirror, int flags) throws DBus.Error {
27                 if (running) {
28                         message ("Download in progress. Abort.\n");
29                         return;
30                 }
31                 running = true;
32                 if (source_id != 0) {
33                         Source.remove (source_id);
34                 }
35
36                 message ("Download started (%x).", flags);
37                 progress (0);
38                 download_imdb_async.begin ("ftp://anonymous@" + mirror, flags, Priority.DEFAULT_IDLE);
39                 message ("Download finished.");
40         }
41
42         public void cancel () throws DBus.Error {
43                 cancellable.cancel ();
44         }
45
46         public string[] get_mirrors () throws DBus.Error {
47                 return mirrors;
48         }
49
50         // Private methods
51
52         async void download_imdb_async (string mirror, int flags, int io_priority) {
53                 Mount m;
54                 File movies = File.new_for_uri (mirror + "/movies.list.gz");
55                 File genres = File.new_for_uri (mirror + "/genres.list.gz");
56                 File ratings = File.new_for_uri (mirror + "/ratings.list.gz");
57
58                 description_changed ("Connecting to FTP ...");
59                 progress (0);
60
61                 try {
62                         m = yield movies.find_enclosing_mount_async(io_priority, cancellable);
63                 } catch (Error e0) {
64                         try {
65                                 bool mounted = yield movies.mount_enclosing_volume (MountMountFlags.NONE, null, cancellable);
66                                 if (mounted) {
67                                         m = yield movies.find_enclosing_mount_async(io_priority, cancellable);
68                                 } else {
69                                         running = false;
70                                         timeout_quit ();
71                                         return;
72                                 }
73                         } catch (Error e1) {
74                                 critical ("Failed to mount: %s\n", e1.message);
75                                 running = false;
76                                 timeout_quit ();
77                                 return;
78                         }
79                 }
80                 stdout.printf ("Mounted: %s\n", m.get_name ());
81
82                 description_changed ("Querying file sizes ...");
83                 try {
84                         FileInfo info;
85
86                         if (MOVIES in flags) {
87                                 info = yield movies.query_info_async ("", FileQueryInfoFlags.NONE, io_priority, cancellable);
88                                 total += info.get_size ();
89                         }
90                         if (GENRES in flags) {
91                                 info = yield genres.query_info_async ("", FileQueryInfoFlags.NONE, io_priority, cancellable);
92                                 total += info.get_size ();
93                         }
94                         if (RATINGS in flags) {
95                                 info = yield ratings.query_info_async ("", FileQueryInfoFlags.NONE, io_priority, cancellable);
96                                 total += info.get_size ();
97                         }
98                 } catch (Error e3) {
99                         warning ("Failed to get size: %s\n", e3.message);
100                         total = 0;
101                 }
102
103                 var cache_dir = Path.build_filename (Environment.get_user_cache_dir (), "cinaest");
104                 DirUtils.create_with_parents (cache_dir, 0770);
105
106                 var _sqlite = new IMDbSqlite (Path.build_filename (cache_dir, "imdb.db"));
107                 sqlite = _sqlite;
108                 _sqlite.clear ();
109
110                 try {
111                         var movie_parser = new MovieLineParser (sqlite);
112                         var genre_parser = new GenreLineParser (sqlite);
113                         var rating_parser = new RatingLineParser (sqlite);
114                         sofar = 0;
115
116                         if (MOVIES in flags) {
117                                 description_changed ("Downloading movie list ...");
118                                 yield download_async(movies, movie_parser, io_priority);
119                                 description_changed ("Creating title index ...");
120                                 sqlite.create_title_index ();
121                         }
122                         if (GENRES in flags) {
123                                 description_changed ("Downloading genre data ...");
124                                 yield download_async(genres, genre_parser, io_priority);
125                         }
126                         if (RATINGS in flags) {
127                                 description_changed ("Downloading rating data ...");
128                                 yield download_async(ratings, rating_parser, io_priority);
129                         }
130                 } catch (Error e2) {
131                         if (e2 is IOError.CANCELLED)
132                                 message ("Download cancelled.\n");
133                         else
134                                 warning ("Failed to open/read stream: %s\n", e2.message);
135                 }
136
137                 try {
138                         bool unmounted = yield m.unmount(MountUnmountFlags.NONE, null);
139                         if (!unmounted) {
140                                 warning ("Failed to unmount.\n");
141                         }
142                 } catch (Error e4) {
143                         warning ("Failed to unmount: %s\n", e4.message);
144                 }
145
146                 description_changed ("Creating indices ...");
147                 sqlite.create_votes_index ();
148
149                 if (!cancellable.is_cancelled ()) {
150                         stdout.printf ("Download complete.\n");
151                         progress (100);
152                 }
153
154                 sqlite = null;
155                 running = false;
156
157                 timeout_quit ();
158         }
159
160         private void timeout_quit () {
161                 source_id = Timeout.add (3000, quit);
162         }
163
164         private bool quit () {
165                 loop.quit ();
166
167                 // One-shot only
168                 return false;
169         }
170
171         async void download_async (File f, LineParser parser, int io_priority) throws Error {
172                 int percent = 0;
173                 unowned string line = null;
174
175                 var stream = new GzipInputStream (yield f.read_async (io_priority, cancellable));
176                 var data = new DataInputStream(stream);
177
178                 do {
179                         size_t l;
180
181                         line = yield data.read_line_async (io_priority, cancellable, out l);
182                         if (line != null)
183                                 parser.parse_line (line);
184
185                         if (total == 0)
186                                 continue;
187                         int p = (int) (100 * (sofar + stream.total_in ()) / total);
188                         if (p > percent) {
189                                 percent = p;
190                                 if (p < 100)
191                                         progress (p);
192                         }
193                 } while (line != null);
194
195                 sofar += stream.total_in ();
196
197                 yield stream.close_async (io_priority, cancellable);
198         }
199
200         public void run () {
201                 loop.run ();
202         }
203
204         public static void main () {
205                 try {
206                         var conn = DBus.Bus.get (DBus.BusType.SESSION);
207                         dynamic DBus.Object bus = conn.get_object ("org.freedesktop.DBus",
208                                                                    "/org/freedesktop/DBus",
209                                                                    "org.freedesktop.DBus");
210
211                         // Try to register service in session bus
212                         uint request_name_result = bus.request_name (DBUS_SERVICE, (uint) 0);
213
214                         if (request_name_result == DBus.RequestNameReply.PRIMARY_OWNER) {
215                                 // Start server
216                                 var server = new IMDbDownloadServer ();
217                                 conn.register_object (DBUS_OBJECT, server);
218
219                                 server.run ();
220                         } else {        
221                                 critical ("Service \"org.maemo.cinaest.IMDb\" already registered. Abort.\n");
222                         }
223                 } catch (Error e) {
224                         critical ("Oops: %s\n", e.message);
225                 }
226         }
227 }
228
229 abstract class LineParser {
230         internal unowned IMDbSqlite sqlite;
231
232         public LineParser (IMDbSqlite _sqlite) {
233                 sqlite = _sqlite;
234         }
235
236         public abstract void parse_line (string line);
237
238         internal bool skip_title (string title) {
239                 if (title.has_suffix ("(TV)")) {
240                         return true;
241                 }
242                 if (title.has_suffix ("(V)")) {
243                         return true;
244                 }
245                 if (title.has_suffix ("(VG)")) {
246                         return true;
247                 }
248                 return false;
249         }
250 }
251
252 class MovieLineParser : LineParser {
253         Regex re_movie;
254
255         public MovieLineParser (IMDbSqlite _sqlite) {
256                 base (_sqlite);
257                 try {
258                         re_movie = new Regex ("^([^\t]+)\t+([0-9]+)$");
259                 } catch (RegexError e) {
260                         critical ("Failed to initialize regex: %s\n", e.message);
261                 }
262         }
263
264         public override void parse_line (string line) {
265                 MatchInfo matchinfo;
266
267                 // Skip series episodes
268                 if (line[0] == '"')
269                         return;
270
271                 if (!re_movie.match(line, 0, out matchinfo))
272                         return;
273
274                 string title;
275                 string year = matchinfo.fetch (2);
276                 try {
277                         title = convert(matchinfo.fetch (1), -1, "utf-8", "latin1");
278                 } catch (ConvertError e) {
279                         return;
280                 }
281
282                 if (skip_title (title))
283                         return;
284
285                 sqlite.add_movie (title, year.to_int ());
286         }
287 }
288
289 class GenreLineParser : LineParser {
290         Regex re_genre;
291
292         public GenreLineParser (IMDbSqlite _sqlite) {
293                 base (_sqlite);
294                 try {
295                         re_genre = new Regex ("^([^\t]+)\t+([A-Za-z-]+)$");
296                 } catch (RegexError e) {
297                         critical ("Failed to initialize regex: %s\n", e.message);
298                 }
299         }
300
301         public override void parse_line (string line) {
302                 MatchInfo matchinfo;
303
304                 // Skip series episodes
305                 if (line[0] == '"')
306                         return;
307
308                 if (!re_genre.match(line, 0, out matchinfo))
309                         return;
310
311                 string title;
312                 string genre = matchinfo.fetch (2);
313                 try {
314                         title = convert(matchinfo.fetch (1), -1, "utf-8", "latin1");
315                 } catch (ConvertError e) {
316                         return;
317                 }
318
319                 sqlite.movie_add_genre (title, genre);
320         }
321 }
322
323 class RatingLineParser : LineParser {
324         Regex re_rating;
325
326         public RatingLineParser (IMDbSqlite _sqlite) {
327                 base (_sqlite);
328                 try {
329                         re_rating = new Regex ("^      .+ +([0-9]+) +([0-9.]+) +(.+)$");
330                 } catch (RegexError e) {
331                         critical ("Failed to initialize regex: %s\n", e.message);
332                 }
333         }
334
335         public override void parse_line (string line) {
336                 MatchInfo matchinfo;
337
338                 // Skip series episodes
339                 if (line[0] == '"')
340                         return;
341
342                 if (!re_rating.match(line, 0, out matchinfo))
343                         return;
344
345                 string title;
346                 string votes = matchinfo.fetch (1);
347                 string rating = matchinfo.fetch (2);
348                 try {
349                         title = convert(matchinfo.fetch (3), -1, "utf-8", "latin1");
350                 } catch (ConvertError e) {
351                         return;
352                 }
353
354                 // Skip series episodes
355                 if (title[0] == '"')
356                         return;
357
358                 if (skip_title (title))
359                         return;
360
361                 sqlite.movie_set_rating (title, (int) (rating.to_double () * 10), votes.to_int ());
362         }
363 }