IMDb line parser: add parser for actors and actresses lists
authorPhilipp Zabel <philipp.zabel@gmail.com>
Fri, 13 Aug 2010 20:28:56 +0000 (22:28 +0200)
committerPhilipp Zabel <philipp.zabel@gmail.com>
Fri, 13 Aug 2010 20:31:16 +0000 (22:31 +0200)
src/imdb/imdb-line-parser.vala

index 7963fda..43b6d96 100644 (file)
@@ -312,3 +312,136 @@ class PlotLineParser : LineParser {
                }
        }
 }
+
+class PersonParser : LineParser {
+       enum PersonState {
+               HEADER,
+               NONE,
+               PERSON
+       }
+       enum PersonType {
+               NONE,
+               ACTOR,
+               ACTRESS,
+               DIRECTOR,
+               WRITER
+       }
+       PersonState state;
+       PersonType type;
+       string name;
+
+       public PersonParser (IMDbSqlite _sqlite) {
+               base (_sqlite);
+               reset ();
+       }
+
+       public void reset () {
+               state = PersonState.HEADER;
+               type = PersonType.NONE;
+               name = null;
+       }
+
+       public override void parse_line (string line) {
+               if (state == PersonState.HEADER) {
+                       if (line == "THE ACTORS LIST") type = PersonType.ACTOR;
+                       else if (line == "THE ACTRESSES LIST") type = PersonType.ACTRESS;
+                       else if (line == "THE DIRECTORS LIST") type = PersonType.DIRECTOR;
+                       else if (line == "THE WRITERS LIST") type = PersonType.WRITER;
+                       else if (line == "----\t\t\t------" && type != PersonType.NONE)
+                               state = PersonState.NONE;
+
+                       return;
+               }
+
+               // Skip empty lines
+               if (line == "") {
+                       state = PersonState.NONE;
+                       name = null;
+
+                       return;
+               }
+
+               if (state == PersonState.NONE) {
+                       if (line.has_prefix ("\t")) {
+                               stderr.printf ("Invalid person entry: %s\n", line);
+                               error ("EXIT\n");
+                               return;
+                       }
+
+                       unowned string title = line.rstr ("\t");
+                       if (title == null)
+                               return;
+                       title = title.offset (1);
+
+                       char* end = line.str ("\t");
+                       if (end != null)
+                               end[0] = '\0';
+
+                       try {
+                               name = convert (line, -1, "utf-8", "latin1");
+                       } catch (ConvertError e) {
+                               stderr.printf ("Error converting name to UTF-8\n");
+                               name = null;
+
+                               return;
+                       }
+
+                       sqlite.add_person (name);
+                       parse_title (convert (title, -1, "utf-8", "latin1"));
+
+                       state = PersonState.PERSON;
+
+                       return;
+               }
+
+               if (state == PersonState.PERSON) {
+                       if (line.has_prefix ("\t\t\t"))
+                               parse_title (line.offset (3));
+                       else
+                               stderr.printf ("\t???: %s\n", line);
+               }
+       }
+
+       private void parse_title (string title) {
+               // Skip series episodes
+               if (title[0] == '"')
+                       return;
+
+               char* end = title.str ("  ");
+               unowned string next;
+               if (end != null) {
+                       next = ((string) end).offset (2);
+                       end[0] = '\0';
+               } else {
+                       return;
+               }
+
+               if (skip_title (title))
+                       return;
+
+
+               if (type == PersonType.ACTOR || type == PersonType.ACTRESS) {
+                       string character = null;
+                       int number = 0;
+                       string info = null;
+                       do {
+                               unowned string current = next;
+                               end = current.str ("  ");
+                               if (end != null) {
+                                       next = ((string) end).offset (2);
+                                       end[0] = '\0';
+                               }
+
+                               if (current.has_prefix ("["))
+                                       character = current.substring (1, current.length - 2);
+                               if (current.has_prefix ("<"))
+                                       number = current.offset (1).to_int ();
+                               if (current.has_prefix ("("))
+                                       info = current;
+                       } while (end != null);
+
+                       sqlite.add_actor (name, title, info, character, number);
+               }
+       }
+}
+