Complete functional gmovies example. Supports the new Google Movies format and it...
authorbaturix <gervarela@picandocodigo.com>
Sat, 14 Nov 2009 23:24:33 +0000 (23:24 +0000)
committerbaturix <gervarela@picandocodigo.com>
Sat, 14 Nov 2009 23:24:33 +0000 (23:24 +0000)
git-svn-id: file:///svnroot/maevies/trunk@11 a96798e0-47ce-444a-94a4-1d14e63744fc

examples/gmovies.c

index 6730049..5771391 100644 (file)
@@ -1,6 +1,5 @@
-/* Parses a Google movies web (previously downloaded on a file)
- * using libxml2. Examples of Google movies web files can be
- * found in the gmovies_data directory.
+/* Queries Google movies for the theaters of a city and parses
+ * the response using libxml2. 
  */ 
 
 #include <libxml/HTMLparser.h>
 #include <unistd.h>
 
 
+typedef struct struct_theater
+{
+       gchar* name;
+       gchar* address;
+       xmlNodePtr data; //points to the tree node with the theater info, including movies
+} Theater;
+
+
+typedef struct struct_showtime
+{
+       gchar* time;
+} ShowTime;
+
+
+typedef struct struct_movie
+{
+       gchar* title;
+       int rating;
+       gchar* info;
+       GList* showTimes;
+} Movie;
+       
+
+/** Search for a sibling node by the name of the sibling node */
 xmlNodePtr getSiblingByName(xmlNodePtr node, xmlChar* name, int nameLen)
 {
        
@@ -23,34 +46,43 @@ xmlNodePtr getSiblingByName(xmlNodePtr node, xmlChar* name, int nameLen)
        
 } 
 
+/** Search a child node by its node name */
 xmlNodePtr getChildByName(xmlNodePtr node, xmlChar* name, int nameLen)
 {
        return getSiblingByName(node->children, name, nameLen);
 }
 
+/** Search the first sibling node that has an attribute 'attr' 
+ * with the value 'attrValue' */
 xmlNodePtr getFirstSiblingByAttributeValue(
        xmlNodePtr sibling, xmlChar* attr, xmlChar * attrValue, int attrValueLen)
 {
        xmlNodePtr tempNode = sibling;
+       xmlNodePtr result = NULL;
        
-       while(tempNode != NULL) {
+       while ((tempNode != NULL) && (result == NULL)) {
                xmlChar* value = xmlGetProp(tempNode, attr);
-               if ((value != NULL) && (strncmp(value, attrValue, attrValueLen)) == 0)
-                       return tempNode;
+               if (value != NULL) {
+                       if (strncmp(value, attrValue, attrValueLen) == 0) {
+                               result = tempNode;
+                       }
+                       free(value);
+               }
                tempNode = tempNode->next;
        }
        
-       return NULL;
+       return result;
 }
 
-
+/** Search the first child node that has an attribute 'attr' with
+ * value 'attrValue' */
 xmlNodePtr getFirstChildByAttributeValue(
        xmlNodePtr node, xmlChar* attr, xmlChar * attrValue, int attrValueLen)
 {
        return getFirstSiblingByAttributeValue(node->children, attr, attrValue, attrValueLen);
 }
 
-
+/** Advances N sibling nodes in the node list */
 xmlNodePtr jumpXSiblings(xmlNodePtr node, int siblings)
 {
        xmlNodePtr r = node;
@@ -63,10 +95,6 @@ xmlNodePtr jumpXSiblings(xmlNodePtr node, int siblings)
        return r;
 }
 
-int isSeparatorTR(xmlNodePtr node)
-{
-       return ((node != NULL) && (childrenCount(node) == 1));
-}
 
 int childrenCount(xmlNodePtr node)
 {
@@ -81,18 +109,250 @@ int childrenCount(xmlNodePtr node)
 }
 
 
-int startsTheatherData(xmlNodePtr node)
+/** Search the <div> with the results and returns it, or NULL
+ * if it couldn't be found */
+xmlNodePtr getMovieResultsDiv(xmlNodePtr root)
+{
+       //<body>
+       xmlNodePtr body = getSiblingByName(root->children, "body", 4);
+       
+       //<div id="results">
+       xmlNodePtr tempNode = getFirstChildByAttributeValue(body, "id", "results", 8);
+       
+       if (tempNode == NULL) {
+               //no results
+               return NULL;
+       }
+       
+       //<div id="movie_results">
+       tempNode = getFirstChildByAttributeValue(tempNode, "id", "movie_results", 14);
+       
+       if (tempNode == NULL) {
+               //no results
+               return NULL;
+       }
+       
+       //<div class="movie_results">
+       tempNode = getFirstChildByAttributeValue(tempNode, "class", "movie_results", 14);
+       
+       if (tempNode == NULL) {
+               //no results
+               return NULL;
+       }
+       
+       return tempNode;
+       
+}
+
+
+/** Parses the results and returns a list with all the theaters.
+ * Theater info is parsed and returned as Theater 'objects", the movie
+ * info is not parsed */
+GList* getTheaterList(xmlNodePtr movieResults)
 {
+       //<div class="movie_results"><div class="theater"/><div class="theater"/>...
+       xmlNodePtr nav = movieResults->children;
+       xmlNodePtr tmp1, tmp2, tmp3 = NULL;
        
-       if (strncmp(node->name, "tr", 2) == 0) {
-               xmlNodePtr td = node->children;
-               if ((td != NULL) && (strncmp(td->name, "td", 2) == 0)) {
-                       xmlChar* value = xmlGetProp(td, "colspan");
-                       return ((value != NULL) && (strncmp(value, "4", 1)) == 0);
+       GList* resultList = NULL;
+       
+       while(nav != NULL) {
+               tmp1 = getFirstSiblingByAttributeValue(nav, "class", "theater", 7);
+               
+               /*
+                *  <div class="theater">
+                *              <div class="desc">
+                *                      <div class="name"/><div class="info"/>
+                *              </div>
+                *              <div class="showtimes"/>
+                * </div> 
+                */
+               
+               if (tmp1 != NULL) { //its theater data
+                       Theater* t = malloc(sizeof(Theater));
+                       tmp2 = getFirstChildByAttributeValue(tmp1, "class", "desc", 4);
+                       if (tmp2 != NULL) { //has desc
+                               tmp3 = getFirstChildByAttributeValue(tmp2, "class", "name", 4);
+                               if ((tmp3 != NULL) && (tmp3->children != NULL) && (tmp3->children->children != NULL)) { //<div class="name"><a><span/>
+                                       t->name = xmlNodeGetContent(tmp3->children->children);
+                               }
+                               tmp3 = getFirstChildByAttributeValue(tmp2, "class", "info", 4);
+                               if (tmp3 != NULL) { //<div class="info">
+                                       t->address = xmlNodeGetContent(tmp3);
+                               }       
+                       }
+                       t->data = tmp1;
+                       resultList = g_list_append(resultList, t);
                }
+               nav = nav->next;
+       }
+       
+       return resultList;
+       
+}
+
+GList* getShowtimes(gchar* times)
+{
+       GList* resultList = NULL;
+       
+       gchar** timesArray = g_strsplit(times, " ", -1);
+       
+       int i = 0;
+       for(i=0; timesArray[i] != NULL; i++) {
+               ShowTime* st = malloc(sizeof(ShowTime));
+               st->time = g_strndup(timesArray[i], 5);
+               resultList = g_list_append(resultList, st);
        }
        
-       return -1;
+       g_strfreev(timesArray);
+       
+       return resultList;
+}
+
+
+GList* getMovieList(xmlNodePtr movieSideDiv)
+{
+       
+       xmlNodePtr nav = movieSideDiv->children;
+       xmlNodePtr tmp1, tmp2, tmp3 = NULL;
+       
+       GList* resultList = NULL;
+       
+       while(nav != NULL) {
+               tmp1 = getFirstSiblingByAttributeValue(nav, "class", "movie", 5);
+               if (tmp1 != NULL) { //is a movie
+                       Movie* m = malloc(sizeof(Movie));
+                       tmp2 = getFirstChildByAttributeValue(tmp1, "class", "name", 4);
+                       if ((tmp2 != NULL) && (tmp2->children != NULL) && (tmp2->children->children != NULL)) { //<div class="name"><a><span/>
+                               m->title = xmlNodeGetContent(tmp2->children->children);
+                       }
+                       tmp2 = getFirstChildByAttributeValue(tmp1, "class", "info", 4);
+                       if (tmp2 != NULL) {
+                               m->info = xmlNodeGetContent(tmp2);
+                       }
+                       tmp2 = getFirstChildByAttributeValue(tmp1, "class", "times", 5);
+                       if (tmp2 != NULL) {
+                               gchar* showtimesStr = xmlNodeGetContent(tmp2);
+                               GList* showtimes = getShowtimes(showtimesStr);
+                               free(showtimesStr);
+                               m->showTimes = showtimes;
+                       }
+                       resultList = g_list_append(resultList, m);
+               }
+               
+               nav = nav->next;
+       }
+                               
+       return resultList;
+}
+
+GList* getTheaterMovies(Theater* t)
+{
+       
+       /*
+        * <div class="theater">
+        *              <div class="desc"/>
+        *              <div class="showtimes">
+        *                      <div class="show_left">
+        *                              <div class="movie/>
+        *                              ...
+        *                      </div>
+        *                      <div class="show_right">
+        *                              <div class="movie"/>
+        *                              ...
+        *                      </div>
+        *              </div>
+        * </div>
+        */
+       
+       GList* left = NULL;
+       GList* right = NULL;
+       xmlNodePtr showtimesDiv = getFirstChildByAttributeValue(t->data, "class", "showtimes", 9);
+       if (showtimesDiv != NULL) {
+               
+               xmlNodePtr showLeft = getFirstChildByAttributeValue(showtimesDiv, "class", "show_left", 9);     
+               if (showLeft != NULL)
+                       left = getMovieList(showLeft);
+                       
+               xmlNodePtr showRight = getFirstChildByAttributeValue(showtimesDiv, "class", "show_right", 10);  
+               if (showRight != NULL)
+                       right = getMovieList(showRight);        
+                       
+               return g_list_concat(left, right);
+                       
+       }
+               
+       
+}
+
+
+void deleteShowTime(ShowTime* st)
+{
+       free(st->time);
+       free(st);
+}
+
+
+void deleteShowTimeList(GList * showTimeList)
+{
+       g_list_foreach(showTimeList, (GFunc) deleteShowTime, NULL);
+       g_list_free(showTimeList);
+}
+
+
+void deleteMovie(Movie* m)
+{
+       free(m->title);
+       free(m->info);
+       deleteShowTimeList(m->showTimes);
+       free(m);
+}
+
+
+void deleteMovieList(GList* movieList)
+{
+       g_list_foreach(movieList, (GFunc) deleteMovie, NULL);
+       g_list_free(movieList);
+}
+
+
+void deleteTheater(Theater* t)
+{
+       free(t->name);
+       free(t->address);
+       free(t);
+}
+
+void deleteTheaterList(GList* theaterList)
+{
+       g_list_foreach(theaterList, (GFunc) deleteTheater, NULL);
+       g_list_free(theaterList);
+}
+
+
+void showTime(ShowTime* st, gpointer nothing)
+{
+       printf("%s ", st->time);
+}
+
+void showMovie(Movie* m, gpointer nothing)
+{
+       printf("        Title = %s\n", m->title);
+       printf("        Info = %s\n", m->info);
+       printf("        Schedule = ");
+       g_list_foreach(m->showTimes, (GFunc) showTime, NULL);
+       printf("\n");
+}
+
+
+void showTheater(Theater * t, gpointer nothing)
+{
+       printf("Name = %s\n", t->name);
+       printf("Info = %s\n", t->address);
+       GList* movieList = getTheaterMovies(t);
+       g_list_foreach(movieList, (GFunc) showMovie, NULL);
+       deleteMovieList(movieList);
+       printf("\n");
 }
 
 
@@ -100,7 +360,7 @@ int main (int argc, char ** argv)
 {
 
        if (argc != 2) {
-               printf("usage: gmovies file.html\n");
+               printf("usage: gmovies city_name\n");
                exit(-1);
        }
 
@@ -125,85 +385,31 @@ int main (int argc, char ** argv)
 
        payload = rest_proxy_call_get_payload(call);
        len = rest_proxy_call_get_payload_length(call);
-       
-       //write(1, payload, len);
-       //printf("\n\n");
 
        htmlDocPtr doc = htmlReadMemory(payload, len, "http://movies.google.com", "UTF-8", HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
 
        xmlNodePtr root = xmlDocGetRootElement(doc);//html
        
        //get the body node
-       xmlNodePtr body = getSiblingByName(root->children, "body", 4);
-       
-       xmlNodePtr tempNode = getFirstChildByAttributeValue(body, "id", "results", 8); //the data is a div with id = results
-       
-       if (tempNode == NULL) {
-               printf("results div not found.\n");
+       xmlNodePtr movieResults = getMovieResultsDiv(root);     
+
+       if (movieResults == NULL) {
+               printf("NO RESULTS.\n");
                exit(-1);
        }
        
-       tempNode = getFirstChildByAttributeValue(tempNode, "id", "movie_results", 14);
+       GList* theaterList = getTheaterList(movieResults);
        
-       if (tempNode == NULL) {
-               printf("movie_results div not found.\n");
-               exit(-1);
-       }
+       g_list_foreach(theaterList, (GFunc) showTheater, NULL);
        
-       tempNode = getFirstChildByAttributeValue(tempNode, "class", "movie_results", 14);
-       
-       if (tempNode == NULL) {
-               printf("movie_results class not found.\n");
-               exit(-1);
-       }
+       deleteTheaterList(theaterList);
        
-       //look for theaters
-       xmlNodePtr nav = tempNode->children;
-       while(nav != NULL) {
-               tempNode = getFirstSiblingByAttributeValue(nav, "class", "theater", 7);
-               tempNode = getFirstChildByAttributeValue(tempNode, "class", "desc", 4);
-               if (tempNode != NULL) {                 
-                       tempNode = getFirstChildByAttributeValue(tempNode, "class", "name", 4);
-                       printf("Info = %s\n", xmlNodeGetContent(tempNode->children->children));
-               }
-               nav = nav->next;
-       }
+       xmlFreeDoc(doc);
+
+       g_object_unref(call);
+       g_object_unref(proxy);
        
        exit(0);
-       
-       //get the form node inside body, the data is in the next node (a table)
-       xmlNodePtr dataTable = getChildByName(body, "form", 4)->next;
-
-/*
-       //tbody
-       xmlNodePtr elem = dataTable;
-       //xmlNodePtr nav = dataTable->children;
-       int i = 0;
-       while(nav != NULL) {
-               elem = nav;
-               if (startsTheatherData(elem)) {
-                       elem = elem->children; //td
-                       elem = elem->children; //a
-                       elem = elem->children; //b
-
-                       printf("Theather %d = %s\n", i++, xmlNodeGetContent(elem));
-                       printf("-------------------------------------------------\n");
-                       
-                       xmlNodePtr n1 = nav->next; //in this tr there is 4 td with 2 film data
-                       while(!startsTheatherData(n1) && !isSeparatorTR(n1)) {
-                               elem = n1->children->next; //the first td is for rating
-                               printf("%s\n", xmlNodeGetContent(elem->children->children));
-                               if (childrenCount(n1->children) > 2) {
-                                       elem = elem->next->next; //the first td is for rating
-                                       printf("%s\n", xmlNodeGetContent(elem->children->children));
-                               }
-                               n1 = n1->next;
-                       }
-                       
-                       printf("\n\n");
-               }
-               nav = nav->next;
-       }
-*/
 
 }
+