3 * Copyright (c) 2007 Mikko Sysikaski <mikko.sysikaski@gmail.com>
4 * Toni Spets <toni.spets@gmail.com>
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
20 #include <libxml/parser.h>
21 #include <libxml/tree.h>
24 #define PARSE_OPTIONS 0
27 PRSS *prss_parse_doc(xmlDocPtr doc);
29 PRSS *prss_parse_data(const char *xml_data)
31 xmlDocPtr doc = xmlReadMemory(xml_data, strlen(xml_data), "", NULL,
38 return prss_parse_doc(doc);
41 PRSS *prss_parse_file(const char *xml_file)
43 xmlDocPtr doc = xmlReadFile(xml_file, NULL, PARSE_OPTIONS);
49 return prss_parse_doc(doc);
52 void prss_free(PRSS *data)
57 xmlFreeDoc(data->_data);
63 static inline void prss_null(PRSS *p)
65 memset(p, 0, sizeof(PRSS));
67 static inline void prss_null_item(PRSS_Item *i)
69 memset(i, 0, sizeof(PRSS_Item));
72 static inline void read_item(PRSS_Item *res, xmlNodePtr data)
76 res->title = res->link = res->description = NULL;
77 for (; data; data = data->next) {
81 if (data->type != XML_ELEMENT_NODE) {
84 child = data->children;
90 name = (const char *)data->name;
91 if (!strcasecmp(name, "title")) {
92 res->title = (char *) child->content;
93 } else if (!strcasecmp(name, "link")) {
94 res->link = (char *) child->content;
95 } else if (!strcasecmp(name, "description")) {
96 res->description = (char *) child->content;
97 } else if (!strcasecmp(name, "category")) {
98 res->category = (char *) child->content;
99 } else if (!strcasecmp(name, "pubDate")) {
100 res->pubdate = (char *) child->content;
101 } else if (!strcasecmp(name, "guid")) {
102 res->guid = (char *) child->content;
106 static inline void read_element(PRSS *res, xmlNodePtr n)
111 if (n->type != XML_ELEMENT_NODE) {
120 name = (const char *)n->name;
121 if (!strcasecmp(name, "title")) {
122 res->title = (char *) child->content;
123 } else if (!strcasecmp(name, "link")) {
124 res->link = (char *) child->content;
125 } else if (!strcasecmp(name, "description")) {
126 res->description = (char *) child->content;
127 } else if (!strcasecmp(name, "language")) {
128 res->language = (char *) child->content;
129 } else if (!strcasecmp(name, "pubDate")) {
130 res->pubdate = (char *) child->content;
131 } else if (!strcasecmp(name, "lastBuildDate")) {
132 res->lastbuilddate = (char *) child->content;
133 } else if (!strcasecmp(name, "generator")) {
134 res->generator = (char *) child->content;
135 } else if (!strcasecmp(name, "docs")) {
136 res->docs = (char *) child->content;
137 } else if (!strcasecmp(name, "managingEditor")) {
138 res->managingeditor = (char *) child->content;
139 } else if (!strcasecmp(name, "webMaster")) {
140 res->webmaster = (char *) child->content;
141 } else if (!strcasecmp(name, "copyright")) {
142 res->copyright = (char *) child->content;
143 } else if (!strcasecmp(name, "ttl")) {
144 res->ttl = (char *) child->content;
145 } else if (!strcasecmp(name, "item")) {
146 read_item(&res->items[res->item_count++], n->children);
150 static inline int parse_rss_2_0(PRSS *res, xmlNodePtr root)
152 xmlNodePtr channel = root->children;
156 while (channel && (channel->type != XML_ELEMENT_NODE
157 || strcmp((const char *) channel->name, "channel"))) {
158 channel = channel->next;
164 for (n = channel->children; n; n = n->next) {
165 if (n->type == XML_ELEMENT_NODE &&
166 !strcmp((const char *) n->name, "item")) {
171 res->version = strndup("2.0", text_buffer_size);
172 res->items = malloc(items * sizeof(PRSS_Item));
175 for (n = channel->children; n; n = n->next) {
176 read_element(res, n);
181 static inline int parse_rss_1_0(PRSS *res, xmlNodePtr root)
186 for (n = root->children; n; n = n->next) {
187 if (n->type == XML_ELEMENT_NODE) {
188 if (!strcmp((const char *) n->name, "item")) {
190 } else if (!strcmp((const char *) n->name, "channel")) {
193 for (i = n->children; i; i = i->next) {
194 read_element(res, i);
200 res->version = strndup("1.0", text_buffer_size);
201 res->items = malloc(items * sizeof(PRSS_Item));
204 for (n = root->children; n; n = n->next) {
205 if (n->type == XML_ELEMENT_NODE &&
206 !strcmp((const char *) n->name, "item")) {
207 read_item(&res->items[res->item_count++], n->children);
213 static inline int parse_rss_0_9x(PRSS *res, xmlNodePtr root)
216 return parse_rss_2_0(res, root);
219 PRSS *prss_parse_doc(xmlDocPtr doc)
221 /* FIXME: doc shouldn't be freed after failure when called explicitly from
224 xmlNodePtr root = xmlDocGetRootElement(doc);
225 PRSS *result = malloc(sizeof(PRSS));
230 if (root->type == XML_ELEMENT_NODE) {
231 if (!strcmp((const char *) root->name, "RDF")) {
233 if (!parse_rss_1_0(result, root)) {
239 } else if (!strcmp((const char *) root->name, "rss")) {
240 // RSS 2.0 or <1.0 document
241 if (!parse_rss_2_0(result, root)) {