From 93c6baebb6c6dcbc5e6e93fb6982a7c5826369ee Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 13 Oct 2010 22:00:46 -0500 Subject: [PATCH] Enable use of HTTP cache headers in curl plugin This will allow us to get 304 responses back from remote URLs that we are grabbing using the curl, weather, and rss plugins. The first time we fetch a resource, we will always get the full content, but from there on out we will store any provided 'Etag' or 'Last-Modified' header, and submit these on the next request. If we get a 304 response back, we won't have to do any work at all. This benefits both us (bandwidth and parsing savings) and remote URLs (we actually make an attempt to not retrieve the same resource over and over again). --- src/ccurl_thread.cc | 73 +++++++++++++++++++++++++++++++++++++++++++-- src/ccurl_thread.h | 4 ++- 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/src/ccurl_thread.cc b/src/ccurl_thread.cc index e4ff2f21..48528559 100644 --- a/src/ccurl_thread.cc +++ b/src/ccurl_thread.cc @@ -48,6 +48,11 @@ typedef struct _ccurl_memory_t { size_t size; } ccurl_memory_t; +typedef struct _ccurl_headers_t { + char *last_modified; + char *etag; +} ccurl_headers_t; + /* finds a location based on uri in the list provided */ ccurl_location_ptr ccurl_find_location(ccurl_location_list &locations, char *uri) { @@ -71,12 +76,37 @@ void ccurl_free_locations(ccurl_location_list &locations) for (ccurl_location_list::iterator i = locations.begin(); i != locations.end(); i++) { free_and_zero((*i)->uri); + free_and_zero((*i)->last_modified); + free_and_zero((*i)->etag); free_and_zero((*i)->result); (*i)->p_timed_thread.reset(); } locations.clear(); } +/* callback used by curl for parsing the header data */ +size_t ccurl_parse_header_callback(void *ptr, size_t size, size_t nmemb, void *data) +{ + size_t realsize = size * nmemb; + const char *value = (const char*)ptr; + char *end; + ccurl_headers_t *headers = (ccurl_headers_t*)data; + + if (strncmp(value, "Last-Modified: ", 15) == EQUAL) { + headers->last_modified = strndup(value + 15, realsize - 15); + if ((end = strchr(headers->last_modified, '\r')) != NULL) { + *end = '\0'; + } + } else if (strncmp(value,"ETag: ", 6) == EQUAL) { + headers->etag = strndup(value + 6, realsize - 6); + if ((end = strchr(headers->etag, '\r')) != NULL) { + *end = '\0'; + } + } + + return realsize; +} + /* callback used by curl for writing the received data */ size_t ccurl_write_memory_callback(void *ptr, size_t size, size_t nmemb, void *data) { @@ -98,27 +128,52 @@ void ccurl_fetch_data(thread_handle &handle, const ccurl_location_ptr &curloc) { CURL *curl = NULL; CURLcode res; + struct curl_slist *headers = NULL; // curl temps ccurl_memory_t chunk; + ccurl_headers_t response_headers; chunk.memory = NULL; chunk.size = 0; + memset(&response_headers, 0, sizeof(ccurl_headers_t)); curl = curl_easy_init(); if (curl) { DBGP("reading curl data from '%s'", curloc->uri); curl_easy_setopt(curl, CURLOPT_URL, curloc->uri); curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1); + curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, ccurl_parse_header_callback); + curl_easy_setopt(curl, CURLOPT_HEADERDATA, (void *) &response_headers); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, ccurl_write_memory_callback); curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) &chunk); - curl_easy_setopt(curl, CURLOPT_USERAGENT, "conky-curl/1.0"); + curl_easy_setopt(curl, CURLOPT_USERAGENT, "conky-curl/1.1"); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); curl_easy_setopt(curl, CURLOPT_LOW_SPEED_LIMIT, 1000); curl_easy_setopt(curl, CURLOPT_LOW_SPEED_TIME, 60); + if (curloc->last_modified) { + const char *header = "If-Modified-Since: "; + int len = strlen(header) + strlen(curloc->last_modified) + 1; + char *str = (char*) malloc(len); + snprintf(str, len, "%s%s", header, curloc->last_modified); + headers = curl_slist_append(headers, str); + free(str); + } + if (curloc->etag) { + const char *header = "If-None-Match: "; + int len = strlen(header) + strlen(curloc->etag) + 1; + char *str = (char*) malloc(len); + snprintf(str, len, "%s%s", header, curloc->etag); + headers = curl_slist_append(headers, str); + free(str); + } + if (headers) { + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); + } + res = curl_easy_perform(curl); - if (res == CURLE_OK && chunk.size) { + if (res == CURLE_OK) { long http_status_code; if (curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, @@ -127,6 +182,15 @@ void ccurl_fetch_data(thread_handle &handle, const ccurl_location_ptr &curloc) case 200: { std::lock_guard lock(handle.mutex()); + free_and_zero(curloc->last_modified); + free_and_zero(curloc->etag); + if (response_headers.last_modified) { + curloc->last_modified = + strdup(response_headers.last_modified); + } + if (response_headers.etag) { + curloc->etag = strdup(response_headers.etag); + } curloc->process_function(curloc->result, chunk.memory); } break; @@ -142,9 +206,12 @@ void ccurl_fetch_data(thread_handle &handle, const ccurl_location_ptr &curloc) } free(chunk.memory); } else { - NORM_ERR("curl: no data from server"); + NORM_ERR("curl: could not retrieve data from server"); } + free_and_zero(response_headers.last_modified); + free_and_zero(response_headers.etag); + curl_slist_free_all(headers); curl_easy_cleanup(curl); } } diff --git a/src/ccurl_thread.h b/src/ccurl_thread.h index 3d468917..06bb0372 100644 --- a/src/ccurl_thread.h +++ b/src/ccurl_thread.h @@ -32,9 +32,11 @@ /* curl thread lib exports begin */ struct ccurl_location_t { - ccurl_location_t() : uri(0), result(0) {} + ccurl_location_t() : uri(0), last_modified(0), etag(0), result(0) {} /* uri of location */ char *uri; + char *last_modified; + char *etag; /* a pointer to some arbitrary data, will be freed by ccurl_free_info() if * non-null */ char *result;