1 /* Queries Google movies for the theaters of a city and parses
2 * the response using libxml2.
5 #include <libxml/HTMLparser.h>
6 #include <libxml/tree.h>
7 #include <rest/rest-proxy.h>
13 typedef struct struct_theater
17 xmlNodePtr data; //points to the tree node with the theater info, including movies
21 typedef struct struct_showtime
27 typedef struct struct_movie
36 /** Search for a sibling node by the name of the sibling node */
37 xmlNodePtr getSiblingByName(xmlNodePtr node, xmlChar* name, int nameLen)
40 xmlNodePtr sibling = node->next;
41 while((sibling != NULL) && (strncmp(sibling->name, name, nameLen) != 0)) {
42 sibling = sibling->next;
49 /** Search a child node by its node name */
50 xmlNodePtr getChildByName(xmlNodePtr node, xmlChar* name, int nameLen)
52 return getSiblingByName(node->children, name, nameLen);
55 /** Search the first sibling node that has an attribute 'attr'
56 * with the value 'attrValue' */
57 xmlNodePtr getFirstSiblingByAttributeValue(
58 xmlNodePtr sibling, xmlChar* attr, xmlChar * attrValue, int attrValueLen)
60 xmlNodePtr tempNode = sibling;
61 xmlNodePtr result = NULL;
63 while ((tempNode != NULL) && (result == NULL)) {
64 xmlChar* value = xmlGetProp(tempNode, attr);
66 if (strncmp(value, attrValue, attrValueLen) == 0) {
71 tempNode = tempNode->next;
77 /** Search the first child node that has an attribute 'attr' with
78 * value 'attrValue' */
79 xmlNodePtr getFirstChildByAttributeValue(
80 xmlNodePtr node, xmlChar* attr, xmlChar * attrValue, int attrValueLen)
82 return getFirstSiblingByAttributeValue(node->children, attr, attrValue, attrValueLen);
85 /** Advances N sibling nodes in the node list */
86 xmlNodePtr jumpXSiblings(xmlNodePtr node, int siblings)
91 for(; i<siblings; i++) {
99 int childrenCount(xmlNodePtr node)
102 xmlNodePtr nav = node->children;
112 /** Search the <div> with the results and returns it, or NULL
113 * if it couldn't be found */
114 xmlNodePtr getMovieResultsDiv(xmlNodePtr root)
117 xmlNodePtr body = getSiblingByName(root->children, "body", 4);
120 xmlNodePtr tempNode = getFirstChildByAttributeValue(body, "id", "results", 8);
122 if (tempNode == NULL) {
127 //<div id="movie_results">
128 tempNode = getFirstChildByAttributeValue(tempNode, "id", "movie_results", 14);
130 if (tempNode == NULL) {
135 //<div class="movie_results">
136 tempNode = getFirstChildByAttributeValue(tempNode, "class", "movie_results", 14);
138 if (tempNode == NULL) {
148 /** Parses the results and returns a list with all the theaters.
149 * Theater info is parsed and returned as Theater 'objects", the movie
150 * info is not parsed */
151 GList* getTheaterList(xmlNodePtr movieResults)
153 //<div class="movie_results"><div class="theater"/><div class="theater"/>...
154 xmlNodePtr nav = movieResults->children;
155 xmlNodePtr tmp1, tmp2, tmp3 = NULL;
157 GList* resultList = NULL;
160 tmp1 = getFirstSiblingByAttributeValue(nav, "class", "theater", 7);
163 * <div class="theater">
165 * <div class="name"/><div class="info"/>
167 * <div class="showtimes"/>
171 if (tmp1 != NULL) { //its theater data
172 Theater* t = malloc(sizeof(Theater));
173 tmp2 = getFirstChildByAttributeValue(tmp1, "class", "desc", 4);
174 if (tmp2 != NULL) { //has desc
175 tmp3 = getFirstChildByAttributeValue(tmp2, "class", "name", 4);
176 if ((tmp3 != NULL) && (tmp3->children != NULL) && (tmp3->children->children != NULL)) { //<div class="name"><a><span/>
177 t->name = xmlNodeGetContent(tmp3->children->children);
179 tmp3 = getFirstChildByAttributeValue(tmp2, "class", "info", 4);
180 if (tmp3 != NULL) { //<div class="info">
181 t->address = xmlNodeGetContent(tmp3);
185 resultList = g_list_append(resultList, t);
194 GList* getShowtimes(gchar* times)
196 GList* resultList = NULL;
198 gchar** timesArray = g_strsplit(times, " ", -1);
201 for(i=0; timesArray[i] != NULL; i++) {
202 ShowTime* st = malloc(sizeof(ShowTime));
203 st->time = g_strndup(timesArray[i], 5);
204 resultList = g_list_append(resultList, st);
207 g_strfreev(timesArray);
213 GList* getMovieList(xmlNodePtr movieSideDiv)
216 xmlNodePtr nav = movieSideDiv->children;
217 xmlNodePtr tmp1, tmp2, tmp3 = NULL;
219 GList* resultList = NULL;
222 tmp1 = getFirstSiblingByAttributeValue(nav, "class", "movie", 5);
223 if (tmp1 != NULL) { //is a movie
224 Movie* m = malloc(sizeof(Movie));
225 tmp2 = getFirstChildByAttributeValue(tmp1, "class", "name", 4);
226 if ((tmp2 != NULL) && (tmp2->children != NULL) && (tmp2->children->children != NULL)) { //<div class="name"><a><span/>
227 m->title = xmlNodeGetContent(tmp2->children->children);
229 tmp2 = getFirstChildByAttributeValue(tmp1, "class", "info", 4);
231 m->info = xmlNodeGetContent(tmp2);
233 tmp2 = getFirstChildByAttributeValue(tmp1, "class", "times", 5);
235 gchar* showtimesStr = xmlNodeGetContent(tmp2);
236 GList* showtimes = getShowtimes(showtimesStr);
238 m->showTimes = showtimes;
240 resultList = g_list_append(resultList, m);
249 GList* getTheaterMovies(Theater* t)
253 * <div class="theater">
254 * <div class="desc"/>
255 * <div class="showtimes">
256 * <div class="show_left">
257 * <div class="movie/>
260 * <div class="show_right">
261 * <div class="movie"/>
270 xmlNodePtr showtimesDiv = getFirstChildByAttributeValue(t->data, "class", "showtimes", 9);
271 if (showtimesDiv != NULL) {
273 xmlNodePtr showLeft = getFirstChildByAttributeValue(showtimesDiv, "class", "show_left", 9);
274 if (showLeft != NULL)
275 left = getMovieList(showLeft);
277 xmlNodePtr showRight = getFirstChildByAttributeValue(showtimesDiv, "class", "show_right", 10);
278 if (showRight != NULL)
279 right = getMovieList(showRight);
281 return g_list_concat(left, right);
289 void deleteShowTime(ShowTime* st)
296 void deleteShowTimeList(GList * showTimeList)
298 g_list_foreach(showTimeList, (GFunc) deleteShowTime, NULL);
299 g_list_free(showTimeList);
303 void deleteMovie(Movie* m)
307 deleteShowTimeList(m->showTimes);
312 void deleteMovieList(GList* movieList)
314 g_list_foreach(movieList, (GFunc) deleteMovie, NULL);
315 g_list_free(movieList);
319 void deleteTheater(Theater* t)
326 void deleteTheaterList(GList* theaterList)
328 g_list_foreach(theaterList, (GFunc) deleteTheater, NULL);
329 g_list_free(theaterList);
333 void showTime(ShowTime* st, gpointer nothing)
335 printf("%s ", st->time);
338 void showMovie(Movie* m, gpointer nothing)
340 printf(" Title = %s\n", m->title);
341 printf(" Info = %s\n", m->info);
342 printf(" Schedule = ");
343 g_list_foreach(m->showTimes, (GFunc) showTime, NULL);
348 void showTheater(Theater * t, gpointer nothing)
350 printf("Name = %s\n", t->name);
351 printf("Info = %s\n", t->address);
352 GList* movieList = getTheaterMovies(t);
353 g_list_foreach(movieList, (GFunc) showMovie, NULL);
354 deleteMovieList(movieList);
359 int main (int argc, char ** argv)
363 printf("usage: gmovies city_name\n");
369 const gchar *payload;
370 const char *city = argv[1];
376 proxy = rest_proxy_new(
377 "http://www.google.com/movies",
379 call = rest_proxy_new_call(proxy);
381 rest_proxy_call_add_params(call,
384 rest_proxy_call_run(call, NULL, NULL);
386 payload = rest_proxy_call_get_payload(call);
387 len = rest_proxy_call_get_payload_length(call);
389 htmlDocPtr doc = htmlReadMemory(payload, len, "http://movies.google.com", "UTF-8", HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
391 xmlNodePtr root = xmlDocGetRootElement(doc);//html
394 xmlNodePtr movieResults = getMovieResultsDiv(root);
396 if (movieResults == NULL) {
397 printf("NO RESULTS.\n");
401 GList* theaterList = getTheaterList(movieResults);
403 g_list_foreach(theaterList, (GFunc) showTheater, NULL);
405 deleteTheaterList(theaterList);
409 g_object_unref(call);
410 g_object_unref(proxy);