imdb_plaintext_downloader_SOURCES = \
src/imdb/imdb-plaintext-downloader.c \
- src/imdb/imdb-ftp-downloader.c \
+ src/imdb/ftp-downloader.c \
+ src/imdb/gzip-input-stream.c \
+ src/imdb/imdb-gzip-parser.c \
src/imdb/imdb-line-parser.c \
src/imdb/imdb-sqlite.c \
src/imdb/plaintext-downloader-interface.c
imdb_plaintext_downloader_VALASOURCES = \
src/imdb/imdb-plaintext-downloader.vala \
- src/imdb/imdb-ftp-downloader.vala \
+ src/imdb/ftp-downloader.vala \
+ src/imdb/gzip-input-stream.vala \
+ src/imdb/imdb-gzip-parser.vala \
src/imdb/imdb-line-parser.vala \
src/imdb/imdb-sqlite.vala \
src/imdb/plaintext-downloader-interface.vala
--- /dev/null
+class FtpDownloader {
+ private Curl.EasyHandle curl;
+ private Cancellable cancellable;
+ private FileStream file;
+ private string dirname;
+ private HashTable<string,int> file_size;
+
+ public FtpDownloader (Cancellable? _cancellable) {
+ cancellable = _cancellable;
+ curl = new Curl.EasyHandle ();
+ }
+
+ [CCode (instance_pos = -1)]
+ size_t write_callback (void *buffer, size_t size, size_t nmemb) {
+ if (cancellable != null && cancellable.is_cancelled ())
+ return 0;
+
+ unowned uint8[] buf = (uint8[]) buffer;
+ buf.length = (int) (size * nmemb);
+
+ file.write (buf);
+
+ return buf.length;
+ }
+
+ private int last_dlnow;
+
+ int progress_callback (double dltotal, double dlnow, double ultotal, double ulnow) {
+ if (cancellable != null && cancellable.is_cancelled ())
+ return 1;
+ if (last_dlnow != (int) dlnow) {
+ last_dlnow = (int) dlnow;
+ progress ((int) dltotal, last_dlnow);
+ }
+ return 0;
+ }
+
+ public void download (string url, string filename) throws IOError {
+ print ("download (\"%s\", \"%s\")\n", url, filename);
+ download_dir (Path.get_dirname (url) + "/");
+ string basename = Path.get_basename (url);
+ int size = file_size.lookup (basename);
+ if (size > 0) {
+ Posix.Stat st;
+ Posix.stat (filename, out st);
+ if (size == st.st_size) {
+ return;
+ }
+ }
+
+ curl.setopt (Curl.Option.URL, url);
+ curl.setopt (Curl.Option.WRITEFUNCTION, write_callback);
+ curl.setopt (Curl.Option.WRITEDATA, this);
+ curl.setopt (Curl.Option.NOPROGRESS, 0L);
+ curl.setopt (Curl.Option.PROGRESSFUNCTION, progress_callback);
+ curl.setopt (Curl.Option.PROGRESSDATA, this);
+
+ last_dlnow = -1;
+ file = FileStream.open (filename, "w");
+
+ var res = curl.perform ();
+ if (Curl.Code.ABORTED_BY_CALLBACK == res) {
+ throw new IOError.CANCELLED ("Download cancelled.");
+ } else if (res != 0) {
+ stderr.printf ("cURL performed: %d\n", res);
+ }
+
+ file = null;
+ }
+
+ void parse_dir_entry (string line) {
+ try {
+ Regex re_dir_entry = new Regex ("^.* ([0-9]*) [A-Z][a-z]* *[0-9]* [0-9]* [0-9]*:[0-9]* ([^ ]*)$");
+ MatchInfo match_info;
+ if (re_dir_entry.match (line, 0, out match_info)) {
+ string name = match_info.fetch (2);
+ int size = match_info.fetch (1).to_int ();
+ file_size.insert (name, size);
+ }
+ } catch (RegexError e) {
+ }
+ }
+
+ string last_line = null;
+ [CCode (instance_pos = -1)]
+ size_t dir_callback (void *buffer, size_t size, size_t nmemb) {
+ if (cancellable != null && cancellable.is_cancelled ())
+ return 0;
+
+ unowned char[] buf = (char[]) buffer;
+ buf.length = (int) (size * nmemb);
+
+ char *p = buf;
+ int i;
+ int j;
+ for (i = 0, j = 0; i < buf.length; i++, j++) {
+ if (buf[i] == '\n') {
+ buf[i] = 0;
+ if (last_line != null) {
+ parse_dir_entry (last_line + (string) p);
+ last_line = null;
+ } else {
+ parse_dir_entry ((string) p);
+ }
+ p += j + 1;
+ j = -1;
+ }
+ }
+ if (j > 0)
+ last_line = ((string) p).ndup (j);
+
+ return buf.length;
+ }
+
+ public void download_dir (string url) throws IOError {
+ if (dirname != null && dirname == url)
+ return;
+ print ("download_dir (\"%s\")\n", url);
+
+ curl.setopt (Curl.Option.URL, url);
+ curl.setopt (Curl.Option.WRITEFUNCTION, dir_callback);
+ curl.setopt (Curl.Option.WRITEDATA, this);
+ curl.setopt (Curl.Option.NOPROGRESS, 1L);
+ curl.setopt (Curl.Option.PROGRESSFUNCTION, null);
+ curl.setopt (Curl.Option.PROGRESSDATA, null);
+
+ file_size = new HashTable<string, int> (str_hash, int_equal);
+
+ var res = curl.perform ();
+ if (Curl.Code.ABORTED_BY_CALLBACK == res) {
+ throw new IOError.CANCELLED ("Dir listing cancelled.");
+ } else if (res != 0) {
+ stderr.printf ("cURL performed: %d\n", res);
+ }
+ dirname = url;
+ }
+
+ public signal void progress (int dltotal, int dlnow);
+}
// Private methods
private bool do_download () {
- download ("ftp.fu-berlin.de/pub/misc/movies/database/", MOVIES | GENRES | RATINGS | AKAS | PLOTS);
+ try {
+ download ("ftp.fu-berlin.de/pub/misc/movies/database/", MOVIES | GENRES | RATINGS | AKAS | PLOTS);
+ } catch (Error e) {
+ print ("Error: %s\n", e.message);
+ }
return false;
}
var aka_parser = new AkaLineParser (sqlite);
var plot_parser = new PlotLineParser (sqlite);
- var downloader = new IMDbFtpDownloader (cancellable);
- downloader.progress_changed.connect (on_progress_changed);
+ var downloader = new FtpDownloader (cancellable);
+
+ var parser = new IMDbGzipParser (cancellable);
+
+ downloader.progress.connect (on_progress);
if (MOVIES in flags) {
description_changed ("Downloading movie list ...");
- downloader.download (url + "movies.list.gz", movie_parser);
+ downloader.download (url + "movies.list.gz", Path.build_filename (cache_dir, "movies.list.gz"));
}
percent_finished = 20;
if (GENRES in flags) {
description_changed ("Downloading genre data ...");
- downloader.download (url + "genres.list.gz", genre_parser);
+ downloader.download (url + "genres.list.gz", Path.build_filename (cache_dir, "genres.list.gz"));
}
percent_finished = 40;
if (RATINGS in flags) {
description_changed ("Downloading rating data ...");
- downloader.download (url + "ratings.list.gz", rating_parser);
+ downloader.download (url + "ratings.list.gz", Path.build_filename (cache_dir, "ratings.list.gz"));
}
percent_finished = 60;
if (AKAS in flags) {
description_changed ("Downloading alternative titles ...");
- downloader.download (url + "aka-titles.list.gz", aka_parser);
+ downloader.download (url + "aka-titles.list.gz", Path.build_filename (cache_dir, "aka-titles.list.gz"));
}
percent_finished = 80;
if (PLOTS in flags) {
description_changed ("Downloading plots ...");
- downloader.download (url + "plot.list.gz", plot_parser);
+ downloader.download (url + "plot.list.gz", Path.build_filename (cache_dir, "plot.list.gz"));
+ }
+
+ if (MOVIES in flags) {
+ description_changed ("Parsing movie list ...");
+ parser.parse (Path.build_filename (cache_dir, "movies.list.gz"), movie_parser);
+ }
+ percent_finished = 20;
+ if (GENRES in flags) {
+ description_changed ("Parsing genre data ...");
+ parser.parse (Path.build_filename (cache_dir, "genres.list.gz"), genre_parser);
+ }
+ percent_finished = 40;
+ if (RATINGS in flags) {
+ description_changed ("Parsing rating data ...");
+ parser.parse (Path.build_filename (cache_dir, "ratings.list.gz"), rating_parser);
+ }
+ percent_finished = 60;
+ if (AKAS in flags) {
+ description_changed ("Parsing alternative titles ...");
+ parser.parse (Path.build_filename (cache_dir, "aka-titles.list.gz"), aka_parser);
+ }
+ percent_finished = 80;
+ if (PLOTS in flags) {
+ description_changed ("Parsing plots ...");
+ parser.parse (Path.build_filename (cache_dir, "plot.list.gz"), plot_parser);
}
} catch (Error e2) {
if (e2 is IOError.CANCELLED)
return null;
}
- private void on_progress_changed (int percent) {
+ private void on_progress (int dltotal, int dlnow) {
+ /*
progress (percent_finished + percent / 5);
stdout.printf ("%d %%\r", percent_finished + percent / 5);
+ */
+ stdout.printf ("%d / %d\r", dlnow, dltotal);
stdout.flush ();
}
loop.run ();
}
+ public void show_desc (string desc) {
+ print ("DESC: \"%s\"\n", desc);
+ }
+
public static void main () {
Curl.global_init (Curl.GLOBAL_DEFAULT);
// Start server
var downloader = new IMDbDownloaderCLI ();
+ downloader.description_changed.connect (downloader.show_desc);
Idle.add (downloader.do_download);
downloader.run ();
+++ /dev/null
-class IMDbFtpDownloader {
- Curl.EasyHandle curl;
- private ZLib.InflateStream strm;
- private int percent;
- private char[] buf_out;
- private uint have;
- private LineParser parser;
- private Cancellable cancellable;
-
- [CCode (instance_pos = -1)]
- size_t write_callback (void *buffer, size_t size, size_t nmemb) {
- if (cancellable != null && cancellable.is_cancelled ())
- return 0;
- strm.next_in = buffer;
- strm.avail_in = (uint) (size * nmemb);
- if (strm.avail_in == 0)
- return 0;
-
- do {
- strm.next_out = (char*) buf_out + have;
- strm.avail_out = buf_out.length - have;
-
- char* p = (char*) buf_out;
-
- var ret = strm.inflate (ZLib.Flush.NO_FLUSH);
- assert (ret != ZLib.Status.STREAM_ERROR);
- if (ret == ZLib.Status.NEED_DICT)
- ret = ZLib.Status.DATA_ERROR;
- switch (ret) {
- case ZLib.Status.DATA_ERROR:
- case ZLib.Status.MEM_ERROR:
- return ret;
- }
-
- have = buf_out.length - strm.avail_out;
-
- char* l = p;
- int j = 0;
- for (int i = 0; i < have; i++, j++) {
- if (p[i] == '\n') {
- p[i] = 0;
- if (parser != null)
- parser.parse_line ((string) l);
- j = -1;
- l = p + i + 1;
- }
- }
- if (j > 0) {
- Memory.copy (p, l, j);
- have = j;
- } else {
- have = 0;
- }
- } while (strm.avail_out == 0);
-
- return size * nmemb;
- }
-
- int progress_callback (double dltotal, double dlnow, double ultotal, double ulnow) {
- if (cancellable != null && cancellable.is_cancelled ())
- return 1;
- if (dltotal > 0) {
- int p = (int) (100 * dlnow / dltotal);
- if (p > percent) {
- percent = p;
- progress_changed (p);
- }
- }
- return 0;
- }
-
- public IMDbFtpDownloader (Cancellable? _cancellable) {
- cancellable = _cancellable;
- curl = new Curl.EasyHandle ();
- curl.setopt (Curl.Option.WRITEFUNCTION, write_callback);
- curl.setopt (Curl.Option.WRITEDATA, this);
- curl.setopt (Curl.Option.NOPROGRESS, 0L);
- curl.setopt (Curl.Option.PROGRESSFUNCTION, progress_callback);
- curl.setopt (Curl.Option.PROGRESSDATA, this);
- buf_out = new char[16384];
- }
-
- public void download (string url, LineParser? _parser) throws IOError {
- curl.setopt (Curl.Option.URL, url);
- percent = 0;
- parser = _parser;
- have = 0;
-
- strm = ZLib.InflateStream.full (15 | 32);
-
- var res = curl.perform ();
- if (Curl.Code.ABORTED_BY_CALLBACK == res) {
- throw new IOError.CANCELLED ("Download cancelled.");
- }
- }
-
- public signal void progress_changed (int percent);
-}
--- /dev/null
+class IMDbGzipParser {
+ private LineParser parser;
+ private Cancellable cancellable;
+
+ public IMDbGzipParser (Cancellable? _cancellable) {
+ cancellable = _cancellable;
+ }
+
+ public void parse (string path, LineParser? _parser) throws IOError {
+ parser = _parser;
+
+ var file = File.new_for_path (path);
+ var gz_stream = new GzipInputStream (file.read (cancellable));
+ var stream = new DataInputStream (gz_stream);
+
+ int total_in = 0;
+ size_t length;
+ string line;
+ progress (0, 0);
+ line = stream.read_line (out length, cancellable);
+ while (line != null) {
+ parser.parse_line (line);
+ line = stream.read_line (out length, cancellable);
+ if (gz_stream.total_in () > total_in) {
+ total_in = (int) gz_stream.total_in ();
+ progress (0, total_in);
+ }
+ }
+ }
+
+ public signal void progress (int total, int now);
+}
var aka_parser = new AkaLineParser (sqlite);
var plot_parser = new PlotLineParser (sqlite);
- var downloader = new IMDbFtpDownloader (cancellable);
- downloader.progress_changed.connect (on_progress_changed);
+ var downloader = new FtpDownloader (cancellable);
+ downloader.progress.connect (on_progress);
+
+ var parser = new IMDbGzipParser (cancellable);
if (MOVIES in flags) {
description_changed ("Downloading movie list ...");
- downloader.download (url + "movies.list.gz", movie_parser);
+ downloader.download (url + "movies.list.gz", Path.build_filename (cache_dir, "movies.list.gz"));
}
percent_finished = 20;
if (GENRES in flags) {
description_changed ("Downloading genre data ...");
- downloader.download (url + "genres.list.gz", genre_parser);
+ downloader.download (url + "genres.list.gz", Path.build_filename (cache_dir, "genres.list.gz"));
}
percent_finished = 40;
if (RATINGS in flags) {
description_changed ("Downloading rating data ...");
- downloader.download (url + "ratings.list.gz", rating_parser);
+ downloader.download (url + "ratings.list.gz", Path.build_filename (cache_dir, "ratings.list.gz"));
}
percent_finished = 60;
if (AKAS in flags) {
description_changed ("Downloading alternative titles ...");
- downloader.download (url + "aka-titles.list.gz", aka_parser);
+ downloader.download (url + "aka-titles.list.gz", Path.build_filename (cache_dir, "aka-titles.list.gz"));
}
percent_finished = 80;
if (PLOTS in flags) {
description_changed ("Downloading plots ...");
- print ("Downloading Plots");
- downloader.download (url + "plot.list.gz", plot_parser);
+ downloader.download (url + "plot.list.gz", Path.build_filename (cache_dir, "plot.list.gz"));
+ }
+
+ if (MOVIES in flags) {
+ description_changed ("Parsing movie list ...");
+ parser.parse (Path.build_filename (cache_dir, "movies.list.gz"), movie_parser);
+ }
+ percent_finished = 20;
+ if (GENRES in flags) {
+ description_changed ("Parsing genre data ...");
+ parser.parse (Path.build_filename (cache_dir, "genres.list.gz"), genre_parser);
+ }
+ percent_finished = 40;
+ if (RATINGS in flags) {
+ description_changed ("Parsing rating data ...");
+ parser.parse (Path.build_filename (cache_dir, "ratings.list.gz"), rating_parser);
+ }
+ percent_finished = 60;
+ if (AKAS in flags) {
+ description_changed ("Parsing alternative titles ...");
+ parser.parse (Path.build_filename (cache_dir, "aka-titles.list.gz"), aka_parser);
+ }
+ percent_finished = 80;
+ if (PLOTS in flags) {
+ description_changed ("Parsing plots ...");
+ parser.parse (Path.build_filename (cache_dir, "plot.list.gz"), plot_parser);
}
} catch (Error e2) {
if (e2 is IOError.CANCELLED)
return null;
}
- private void on_progress_changed (int percent) {
- progress (percent_finished + percent / 5);
+ private void on_progress (int dltotal, int dlnow) {
+ stdout.printf ("%d / %d\r", dlnow, dltotal);
+ if (dltotal > 0)
+ progress (99*dlnow/dltotal/100);
}
private void timeout_quit () {