#include <libxml/HTMLparser.h>
#include <libxml/tree.h>
+#include <rest/rest/rest-proxy.h>
+#include <glib.h>
#include <stdio.h>
+#include <unistd.h>
xmlNodePtr getSiblingByName(xmlNodePtr node, xmlChar* name, int nameLen)
return getSiblingByName(node->children, name, nameLen);
}
+xmlNodePtr getFirstSiblingByAttributeValue(
+ xmlNodePtr sibling, xmlChar* attr, xmlChar * attrValue, int attrValueLen)
+{
+ xmlNodePtr tempNode = sibling;
+
+ while(tempNode != NULL) {
+ xmlChar* value = xmlGetProp(tempNode, attr);
+ if ((value != NULL) && (strncmp(value, attrValue, attrValueLen)) == 0)
+ return tempNode;
+ tempNode = tempNode->next;
+ }
+
+ return NULL;
+}
+
+
+xmlNodePtr getFirstChildByAttributeValue(
+ xmlNodePtr node, xmlChar* attr, xmlChar * attrValue, int attrValueLen)
+{
+ return getFirstSiblingByAttributeValue(node->children, attr, attrValue, attrValueLen);
+}
+
+
xmlNodePtr jumpXSiblings(xmlNodePtr node, int siblings)
{
xmlNodePtr r = node;
exit(-1);
}
- htmlDocPtr doc = htmlReadFile(argv[1], "UTF-8", 0);
+ RestProxy *proxy;
+ RestProxyCall *call;
+ const gchar *payload;
+ const char *city = argv[1];
+ gssize len;
+
+ g_thread_init(NULL);
+ g_type_init();
+
+ proxy = rest_proxy_new(
+ "http://www.google.com/movies",
+ FALSE);
+ call = rest_proxy_new_call(proxy);
+
+ rest_proxy_call_add_params(call,
+ "near", city,
+ NULL);
+ rest_proxy_call_run(call, NULL, NULL);
+
+ payload = rest_proxy_call_get_payload(call);
+ len = rest_proxy_call_get_payload_length(call);
+
+ //write(1, payload, len);
+ //printf("\n\n");
+
+ htmlDocPtr doc = htmlReadMemory(payload, len, "http://movies.google.com", "UTF-8", HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
xmlNodePtr root = xmlDocGetRootElement(doc);//html
//get the body node
- xmlNodePtr rootChild = getSiblingByName(root->children, "body", 4);
-
+ xmlNodePtr body = getSiblingByName(root->children, "body", 4);
+
+ xmlNodePtr tempNode = getFirstChildByAttributeValue(body, "id", "results", 8); //the data is a div with id = results
+
+ if (tempNode == NULL) {
+ printf("results div not found.\n");
+ exit(-1);
+ }
+
+ tempNode = getFirstChildByAttributeValue(tempNode, "id", "movie_results", 14);
+
+ if (tempNode == NULL) {
+ printf("movie_results div not found.\n");
+ exit(-1);
+ }
+
+ tempNode = getFirstChildByAttributeValue(tempNode, "class", "movie_results", 14);
+
+ if (tempNode == NULL) {
+ printf("movie_results class not found.\n");
+ exit(-1);
+ }
+
+ //look for theaters
+ xmlNodePtr nav = tempNode->children;
+ while(nav != NULL) {
+ tempNode = getFirstSiblingByAttributeValue(nav, "class", "theater", 7);
+ tempNode = getFirstChildByAttributeValue(tempNode, "class", "desc", 4);
+ if (tempNode != NULL) {
+ tempNode = getFirstChildByAttributeValue(tempNode, "class", "name", 4);
+ printf("Info = %s\n", xmlNodeGetContent(tempNode->children->children));
+ }
+ nav = nav->next;
+ }
+
+ exit(0);
+
//get the form node inside body, the data is in the next node (a table)
- xmlNodePtr dataTable = getChildByName(rootChild, "form", 4)->next;
+ xmlNodePtr dataTable = getChildByName(body, "form", 4)->next;
+/*
//tbody
xmlNodePtr elem = dataTable;
- xmlNodePtr nav = dataTable->children;
+ //xmlNodePtr nav = dataTable->children;
int i = 0;
while(nav != NULL) {
elem = nav;
}
nav = nav->next;
}
+*/
}