2008-02-20 20:30:45 +00:00
|
|
|
/* $Id$
|
2007-08-10 19:53:44 +00:00
|
|
|
*
|
2007-06-01 15:49:49 +00:00
|
|
|
* Copyright (c) 2007 Mikko Sysikaski <mikko.sysikaski@gmail.com>
|
2008-02-20 20:30:45 +00:00
|
|
|
* Toni Spets <toni.spets@gmail.com>
|
2007-06-01 15:49:49 +00:00
|
|
|
*
|
|
|
|
* Permission to use, copy, modify, and distribute this software for any
|
|
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
|
|
* copyright notice and this permission notice appear in all copies.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
2008-02-20 20:30:45 +00:00
|
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
|
2007-06-01 15:49:49 +00:00
|
|
|
|
2008-03-29 12:44:29 +00:00
|
|
|
#include "config.h"
|
|
|
|
#include "prss.h"
|
2007-06-01 10:42:57 +00:00
|
|
|
#include <libxml/parser.h>
|
|
|
|
#include <libxml/tree.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
|
|
|
|
#ifndef PARSE_OPTIONS
|
|
|
|
#define PARSE_OPTIONS 0
|
|
|
|
#endif
|
|
|
|
|
2008-02-20 20:30:45 +00:00
|
|
|
PRSS *prss_parse_doc(xmlDocPtr doc);
|
2007-06-03 08:58:05 +00:00
|
|
|
|
2008-02-20 20:30:45 +00:00
|
|
|
PRSS *prss_parse_data(const char *xml_data)
|
2007-06-01 10:42:57 +00:00
|
|
|
{
|
2008-02-20 20:30:45 +00:00
|
|
|
xmlDocPtr doc = xmlReadMemory(xml_data, strlen(xml_data), "", NULL,
|
|
|
|
PARSE_OPTIONS);
|
|
|
|
|
|
|
|
if (!doc) {
|
2007-06-01 10:42:57 +00:00
|
|
|
return NULL;
|
2008-02-20 20:30:45 +00:00
|
|
|
}
|
|
|
|
|
2007-06-02 08:17:33 +00:00
|
|
|
return prss_parse_doc(doc);
|
2007-06-01 10:42:57 +00:00
|
|
|
}
|
2008-02-20 20:30:45 +00:00
|
|
|
|
|
|
|
PRSS *prss_parse_file(const char *xml_file)
|
2007-06-01 10:42:57 +00:00
|
|
|
{
|
|
|
|
xmlDocPtr doc = xmlReadFile(xml_file, NULL, PARSE_OPTIONS);
|
2008-02-20 20:30:45 +00:00
|
|
|
|
|
|
|
if (!doc) {
|
2007-06-01 10:42:57 +00:00
|
|
|
return NULL;
|
2008-02-20 20:30:45 +00:00
|
|
|
}
|
|
|
|
|
2007-06-02 08:17:33 +00:00
|
|
|
return prss_parse_doc(doc);
|
2007-06-01 10:42:57 +00:00
|
|
|
}
|
2008-02-20 20:30:45 +00:00
|
|
|
|
|
|
|
void prss_free(PRSS *data)
|
2007-06-01 10:42:57 +00:00
|
|
|
{
|
2008-02-20 20:30:45 +00:00
|
|
|
if (!data) {
|
2007-06-01 15:49:49 +00:00
|
|
|
return;
|
2008-02-20 20:30:45 +00:00
|
|
|
}
|
2007-06-01 10:42:57 +00:00
|
|
|
xmlFreeDoc(data->_data);
|
2007-08-07 22:05:06 +00:00
|
|
|
free(data->version);
|
2007-06-01 10:42:57 +00:00
|
|
|
free(data->items);
|
2007-06-01 15:49:49 +00:00
|
|
|
free(data);
|
|
|
|
}
|
|
|
|
|
2008-02-20 20:30:45 +00:00
|
|
|
static inline void prss_null(PRSS *p)
|
2007-06-01 15:49:49 +00:00
|
|
|
{
|
2007-06-02 08:17:33 +00:00
|
|
|
memset(p, 0, sizeof(PRSS));
|
2007-06-01 15:49:49 +00:00
|
|
|
}
|
2008-02-20 20:30:45 +00:00
|
|
|
static inline void prss_null_item(PRSS_Item *i)
|
2007-06-01 15:49:49 +00:00
|
|
|
{
|
2007-06-02 08:17:33 +00:00
|
|
|
memset(i, 0, sizeof(PRSS_Item));
|
2007-06-01 10:42:57 +00:00
|
|
|
}
|
|
|
|
|
2008-02-20 20:30:45 +00:00
|
|
|
static inline void read_item(PRSS_Item *res, xmlNodePtr data)
|
2007-06-01 10:42:57 +00:00
|
|
|
{
|
2007-06-01 15:49:49 +00:00
|
|
|
prss_null_item(res);
|
2008-02-20 20:30:45 +00:00
|
|
|
|
2007-06-01 15:49:49 +00:00
|
|
|
res->title = res->link = res->description = NULL;
|
2008-02-20 20:30:45 +00:00
|
|
|
for (; data; data = data->next) {
|
2008-03-29 06:24:04 +00:00
|
|
|
xmlNodePtr child;
|
2008-03-29 03:45:36 +00:00
|
|
|
const char *name;
|
|
|
|
|
2008-02-20 20:30:45 +00:00
|
|
|
if (data->type != XML_ELEMENT_NODE) {
|
2007-06-01 10:42:57 +00:00
|
|
|
continue;
|
2008-02-20 20:30:45 +00:00
|
|
|
}
|
2008-03-29 06:24:04 +00:00
|
|
|
child = data->children;
|
2008-02-20 20:30:45 +00:00
|
|
|
|
|
|
|
if (!child) {
|
2007-06-01 10:42:57 +00:00
|
|
|
continue;
|
2008-02-20 20:30:45 +00:00
|
|
|
}
|
|
|
|
|
2008-03-29 03:45:36 +00:00
|
|
|
name = (const char *)data->name;
|
|
|
|
if (!strcasecmp(name, "title")) {
|
2008-02-20 20:30:45 +00:00
|
|
|
res->title = (char *) child->content;
|
2008-03-29 03:45:36 +00:00
|
|
|
} else if (!strcasecmp(name, "link")) {
|
2008-02-20 20:30:45 +00:00
|
|
|
res->link = (char *) child->content;
|
2008-03-29 03:45:36 +00:00
|
|
|
} else if (!strcasecmp(name, "description")) {
|
2008-02-20 20:30:45 +00:00
|
|
|
res->description = (char *) child->content;
|
2008-03-29 03:45:36 +00:00
|
|
|
} else if (!strcasecmp(name, "category")) {
|
2008-02-20 20:30:45 +00:00
|
|
|
res->category = (char *) child->content;
|
2008-03-29 03:45:36 +00:00
|
|
|
} else if (!strcasecmp(name, "pubDate")) {
|
2008-02-20 20:30:45 +00:00
|
|
|
res->pubdate = (char *) child->content;
|
2008-03-29 03:45:36 +00:00
|
|
|
} else if (!strcasecmp(name, "guid")) {
|
2008-02-20 20:30:45 +00:00
|
|
|
res->guid = (char *) child->content;
|
2007-06-01 10:42:57 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2008-02-20 20:30:45 +00:00
|
|
|
static inline void read_element(PRSS *res, xmlNodePtr n)
|
2007-06-01 10:42:57 +00:00
|
|
|
{
|
2008-03-29 06:24:04 +00:00
|
|
|
xmlNodePtr child;
|
2008-03-29 03:45:36 +00:00
|
|
|
const char *name;
|
|
|
|
|
2008-02-20 20:30:45 +00:00
|
|
|
if (n->type != XML_ELEMENT_NODE) {
|
2007-06-01 15:49:49 +00:00
|
|
|
return;
|
2008-02-20 20:30:45 +00:00
|
|
|
}
|
2008-03-29 06:24:04 +00:00
|
|
|
child = n->children;
|
2008-02-20 20:30:45 +00:00
|
|
|
|
|
|
|
if (!child) {
|
2007-06-01 15:49:49 +00:00
|
|
|
return;
|
2008-02-20 20:30:45 +00:00
|
|
|
}
|
|
|
|
|
2008-03-29 03:45:36 +00:00
|
|
|
name = (const char *)n->name;
|
|
|
|
if (!strcasecmp(name, "title")) {
|
2008-02-20 20:30:45 +00:00
|
|
|
res->title = (char *) child->content;
|
2008-03-29 03:45:36 +00:00
|
|
|
} else if (!strcasecmp(name, "link")) {
|
2008-02-20 20:30:45 +00:00
|
|
|
res->link = (char *) child->content;
|
2008-03-29 03:45:36 +00:00
|
|
|
} else if (!strcasecmp(name, "description")) {
|
2008-02-20 20:30:45 +00:00
|
|
|
res->description = (char *) child->content;
|
2008-03-29 03:45:36 +00:00
|
|
|
} else if (!strcasecmp(name, "language")) {
|
2008-02-20 20:30:45 +00:00
|
|
|
res->language = (char *) child->content;
|
2008-03-29 03:45:36 +00:00
|
|
|
} else if (!strcasecmp(name, "pubDate")) {
|
2008-02-20 20:30:45 +00:00
|
|
|
res->pubdate = (char *) child->content;
|
2008-03-29 03:45:36 +00:00
|
|
|
} else if (!strcasecmp(name, "lastBuildDate")) {
|
2008-02-20 20:30:45 +00:00
|
|
|
res->lastbuilddate = (char *) child->content;
|
2008-03-29 03:45:36 +00:00
|
|
|
} else if (!strcasecmp(name, "generator")) {
|
2008-02-20 20:30:45 +00:00
|
|
|
res->generator = (char *) child->content;
|
2008-03-29 03:45:36 +00:00
|
|
|
} else if (!strcasecmp(name, "docs")) {
|
2008-02-20 20:30:45 +00:00
|
|
|
res->docs = (char *) child->content;
|
2008-03-29 03:45:36 +00:00
|
|
|
} else if (!strcasecmp(name, "managingEditor")) {
|
2008-02-20 20:30:45 +00:00
|
|
|
res->managingeditor = (char *) child->content;
|
2008-03-29 03:45:36 +00:00
|
|
|
} else if (!strcasecmp(name, "webMaster")) {
|
2008-02-20 20:30:45 +00:00
|
|
|
res->webmaster = (char *) child->content;
|
2008-03-29 03:45:36 +00:00
|
|
|
} else if (!strcasecmp(name, "copyright")) {
|
2008-02-20 20:30:45 +00:00
|
|
|
res->copyright = (char *) child->content;
|
2008-03-29 03:45:36 +00:00
|
|
|
} else if (!strcasecmp(name, "ttl")) {
|
2008-02-20 20:30:45 +00:00
|
|
|
res->ttl = (char *) child->content;
|
2008-03-29 03:45:36 +00:00
|
|
|
} else if (!strcasecmp(name, "item")) {
|
2007-06-01 15:49:49 +00:00
|
|
|
read_item(&res->items[res->item_count++], n->children);
|
2007-06-01 10:42:57 +00:00
|
|
|
}
|
2007-06-01 15:49:49 +00:00
|
|
|
}
|
|
|
|
|
2008-02-20 20:30:45 +00:00
|
|
|
static inline int parse_rss_2_0(PRSS *res, xmlNodePtr root)
|
2007-06-01 15:49:49 +00:00
|
|
|
{
|
|
|
|
xmlNodePtr channel = root->children;
|
2008-03-29 06:24:04 +00:00
|
|
|
xmlNodePtr n;
|
|
|
|
int items = 0;
|
2008-02-20 20:30:45 +00:00
|
|
|
|
|
|
|
while (channel && (channel->type != XML_ELEMENT_NODE
|
2008-03-29 03:45:36 +00:00
|
|
|
|| strcmp((const char *) channel->name, "channel"))) {
|
2007-06-01 15:49:49 +00:00
|
|
|
channel = channel->next;
|
2008-02-20 20:30:45 +00:00
|
|
|
}
|
|
|
|
if (!channel) {
|
2007-06-01 15:49:49 +00:00
|
|
|
return 0;
|
2008-02-20 20:30:45 +00:00
|
|
|
}
|
2007-06-01 15:49:49 +00:00
|
|
|
|
2008-02-20 20:30:45 +00:00
|
|
|
for (n = channel->children; n; n = n->next) {
|
2008-03-29 03:45:36 +00:00
|
|
|
if (n->type == XML_ELEMENT_NODE &&
|
|
|
|
!strcmp((const char *) n->name, "item")) {
|
2007-06-01 10:42:57 +00:00
|
|
|
++items;
|
2008-02-20 20:30:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-08-07 22:05:06 +00:00
|
|
|
res->version = strdup("2.0");
|
2008-02-20 20:30:45 +00:00
|
|
|
res->items = malloc(items * sizeof(PRSS_Item));
|
2007-06-01 15:49:49 +00:00
|
|
|
res->item_count = 0;
|
2007-06-01 10:42:57 +00:00
|
|
|
|
2008-02-20 20:30:45 +00:00
|
|
|
for (n = channel->children; n; n = n->next) {
|
2007-06-01 15:49:49 +00:00
|
|
|
read_element(res, n);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
2008-02-20 20:30:45 +00:00
|
|
|
static inline int parse_rss_1_0(PRSS *res, xmlNodePtr root)
|
2007-06-01 15:49:49 +00:00
|
|
|
{
|
|
|
|
int items = 0;
|
|
|
|
xmlNodePtr n;
|
2008-02-20 20:30:45 +00:00
|
|
|
|
|
|
|
for (n = root->children; n; n = n->next) {
|
|
|
|
if (n->type == XML_ELEMENT_NODE) {
|
2008-03-29 03:45:36 +00:00
|
|
|
if (!strcmp((const char *) n->name, "item")) {
|
2007-06-01 15:49:49 +00:00
|
|
|
++items;
|
2008-03-29 03:45:36 +00:00
|
|
|
} else if (!strcmp((const char *) n->name, "channel")) {
|
2007-06-01 15:49:49 +00:00
|
|
|
xmlNodePtr i;
|
2008-02-20 20:30:45 +00:00
|
|
|
|
|
|
|
for (i = n->children; i; i = i->next) {
|
2007-06-01 15:49:49 +00:00
|
|
|
read_element(res, i);
|
|
|
|
}
|
|
|
|
}
|
2007-06-01 10:42:57 +00:00
|
|
|
}
|
|
|
|
}
|
2008-02-20 20:30:45 +00:00
|
|
|
|
2007-08-07 22:05:06 +00:00
|
|
|
res->version = strdup("1.0");
|
2008-02-20 20:30:45 +00:00
|
|
|
res->items = malloc(items * sizeof(PRSS_Item));
|
2007-06-01 15:49:49 +00:00
|
|
|
res->item_count = 0;
|
|
|
|
|
2008-02-20 20:30:45 +00:00
|
|
|
for (n = root->children; n; n = n->next) {
|
2008-03-29 03:45:36 +00:00
|
|
|
if (n->type == XML_ELEMENT_NODE &&
|
|
|
|
!strcmp((const char *) n->name, "item")) {
|
2007-06-01 15:49:49 +00:00
|
|
|
read_item(&res->items[res->item_count++], n->children);
|
2008-02-20 20:30:45 +00:00
|
|
|
}
|
2007-06-01 15:49:49 +00:00
|
|
|
}
|
2008-02-20 20:30:45 +00:00
|
|
|
|
2007-06-01 15:49:49 +00:00
|
|
|
return 1;
|
|
|
|
}
|
2008-02-20 20:30:45 +00:00
|
|
|
static inline int parse_rss_0_9x(PRSS *res, xmlNodePtr root)
|
2007-06-01 15:49:49 +00:00
|
|
|
{
|
|
|
|
// almost same...
|
|
|
|
return parse_rss_2_0(res, root);
|
|
|
|
}
|
|
|
|
|
2008-02-20 20:30:45 +00:00
|
|
|
PRSS *prss_parse_doc(xmlDocPtr doc)
|
2007-06-01 15:49:49 +00:00
|
|
|
{
|
2008-02-20 20:30:45 +00:00
|
|
|
/* FIXME: doc shouldn't be freed after failure when called explicitly from
|
|
|
|
* program! */
|
2007-06-03 08:58:05 +00:00
|
|
|
|
2007-06-01 15:49:49 +00:00
|
|
|
xmlNodePtr root = xmlDocGetRootElement(doc);
|
2008-02-20 20:30:45 +00:00
|
|
|
PRSS *result = malloc(sizeof(PRSS));
|
|
|
|
|
2007-06-01 15:49:49 +00:00
|
|
|
prss_null(result);
|
|
|
|
result->_data = doc;
|
|
|
|
do {
|
|
|
|
if (root->type == XML_ELEMENT_NODE) {
|
2008-03-29 03:45:36 +00:00
|
|
|
if (!strcmp((const char *) root->name, "RDF")) {
|
2007-06-01 15:49:49 +00:00
|
|
|
// RSS 1.0 document
|
|
|
|
if (!parse_rss_1_0(result, root)) {
|
|
|
|
free(result);
|
2007-06-03 08:58:05 +00:00
|
|
|
xmlFreeDoc(doc);
|
2007-06-01 15:49:49 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return result;
|
2008-03-29 03:45:36 +00:00
|
|
|
} else if (!strcmp((const char *) root->name, "rss")) {
|
2007-06-01 15:49:49 +00:00
|
|
|
// RSS 2.0 or <1.0 document
|
|
|
|
if (!parse_rss_2_0(result, root)) {
|
|
|
|
free(result);
|
2007-06-03 08:58:05 +00:00
|
|
|
xmlFreeDoc(doc);
|
2007-06-01 15:49:49 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
root = root->next;
|
2008-02-20 20:30:45 +00:00
|
|
|
} while (root);
|
2007-06-01 15:49:49 +00:00
|
|
|
free(result);
|
|
|
|
return NULL;
|
2007-06-01 10:42:57 +00:00
|
|
|
}
|