Now the example parses almos completely the file and extracts all theaters and all...
authorbaturix <gervarela@picandocodigo.com>
Mon, 9 Nov 2009 23:49:18 +0000 (23:49 +0000)
committerbaturix <gervarela@picandocodigo.com>
Mon, 9 Nov 2009 23:49:18 +0000 (23:49 +0000)
git-svn-id: file:///svnroot/maevies/trunk@9 a96798e0-47ce-444a-94a4-1d14e63744fc

examples/gmovies.c

index bfa491b..79b1e7c 100644 (file)
@@ -8,6 +8,68 @@
 #include <stdio.h>
 
 
+xmlNodePtr getSiblingByName(xmlNodePtr node, xmlChar* name, int nameLen)
+{
+       
+       xmlNodePtr sibling = node->next;
+       while((sibling != NULL) && (strncmp(sibling->name, name, nameLen) != 0)) {
+               sibling = sibling->next;
+       }
+       
+       return sibling;
+       
+} 
+
+xmlNodePtr getChildByName(xmlNodePtr node, xmlChar* name, int nameLen)
+{
+       return getSiblingByName(node->children, name, nameLen);
+}
+
+xmlNodePtr jumpXSiblings(xmlNodePtr node, int siblings)
+{
+       xmlNodePtr r = node;
+       
+       int i = 0;
+       for(; i<siblings; i++) {
+               r = r->next;
+       }
+       
+       return r;
+}
+
+int isSeparatorTR(xmlNodePtr node)
+{
+       return ((node != NULL) && (childrenCount(node) == 1));
+}
+
+int childrenCount(xmlNodePtr node)
+{
+       int i=0;
+       xmlNodePtr nav = node->children;
+       while(nav != NULL) {
+               i++;
+               nav = nav->next;
+       }
+       
+       return i;
+}
+
+
+int startsTheatherData(xmlNodePtr node)
+{
+       
+       if (strncmp(node->name, "tr", 2) == 0) {
+               xmlNodePtr td = node->children;
+               if ((td != NULL) && (strncmp(td->name, "td", 2) == 0)) {
+                       xmlChar* value = xmlGetProp(td, "colspan");
+                       return ((value != NULL) && (strncmp(value, "4", 1)) == 0);
+               }
+       }
+       
+       return -1;
+}
+
+
 int main (int argc, char ** argv)
 {
 
@@ -19,21 +81,41 @@ int main (int argc, char ** argv)
        htmlDocPtr doc = htmlReadFile(argv[1], "UTF-8", 0);
 
        xmlNodePtr root = xmlDocGetRootElement(doc);//html
+       
+       //get the body node
+       xmlNodePtr rootChild = getSiblingByName(root->children, "body", 4);
 
-       xmlNodePtr body = xmlLastElementChild(root);//body
-
-       xmlNodePtr elem = xmlFirstElementChild(body);
-       int i =0;
-       for(i=0; i<6; i++) {
-               elem = xmlNextElementSibling(elem);
-       }
+       //get the form node inside body, the data is in the next node (a table)
+       xmlNodePtr dataTable = getChildByName(rootChild, "form", 4)->next;
 
        //tbody
-       elem = xmlFirstElementChild(elem); //tr
-       elem = xmlFirstElementChild(elem); //td
-       elem = xmlFirstElementChild(elem); //a
-       elem = xmlFirstElementChild(elem); //b
+       xmlNodePtr elem = dataTable;
+       xmlNodePtr nav = dataTable->children;
+       int i = 0;
+       while(nav != NULL) {
+               elem = nav;
+               if (startsTheatherData(elem)) {
+                       elem = elem->children; //td
+                       elem = elem->children; //a
+                       elem = elem->children; //b
 
-       printf("First theater = %s\n", xmlNodeGetContent(elem));
+                       printf("Theather %d = %s\n", i++, xmlNodeGetContent(elem));
+                       printf("-------------------------------------------------\n");
+                       
+                       xmlNodePtr n1 = nav->next; //in this tr there is 4 td with 2 film data
+                       while(!startsTheatherData(n1) && !isSeparatorTR(n1)) {
+                               elem = n1->children->next; //the first td is for rating
+                               printf("%s\n", xmlNodeGetContent(elem->children->children));
+                               if (childrenCount(n1->children) > 2) {
+                                       elem = elem->next->next; //the first td is for rating
+                                       printf("%s\n", xmlNodeGetContent(elem->children->children));
+                               }
+                               n1 = n1->next;
+                       }
+                       
+                       printf("\n\n");
+               }
+               nav = nav->next;
+       }
 
 }