Massive speed improvements for readdir operations

complete s3fs_readdir() refactor
    - multi interface now batches HTTP requests
      - proper HTTP KeepAlive sessions are back! (CURLOPT_FORBID_REUSE is no longer required)
    - use xpath to quickly grab xml nodes
    - lots of cleanup
    - fixes some strange stat cache behavior
    - huge readdir performance benefits (8-14x in my case) on large directories



git-svn-id: http://s3fs.googlecode.com/svn/trunk@348 df820570-a93a-0410-bd06-b72b767a4274
This commit is contained in:
ben.lemasurier@gmail.com 2011-07-02 02:11:54 +00:00
parent 1a6885359c
commit 2eafa487d7
5 changed files with 539 additions and 558 deletions

View File

@ -1,7 +1,7 @@
dnl Process this file with autoconf to produce a configure script. dnl Process this file with autoconf to produce a configure script.
AC_PREREQ(2.59) AC_PREREQ(2.59)
AC_INIT(s3fs, 1.54) AC_INIT(s3fs, 1.55)
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM

View File

@ -55,7 +55,13 @@ CURL *create_curl_handle(void) {
curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 0); curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 0);
curl_easy_setopt(curl_handle, CURLOPT_PROGRESSFUNCTION, my_curl_progress); curl_easy_setopt(curl_handle, CURLOPT_PROGRESSFUNCTION, my_curl_progress);
curl_easy_setopt(curl_handle, CURLOPT_PROGRESSDATA, curl_handle); curl_easy_setopt(curl_handle, CURLOPT_PROGRESSDATA, curl_handle);
curl_easy_setopt(curl_handle, CURLOPT_FORBID_REUSE, 1); // curl_easy_setopt(curl_handle, CURLOPT_FORBID_REUSE, 1);
if(ssl_verify_hostname.substr(0,1) == "0")
curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYHOST, 0);
if(curl_ca_bundle.size() != 0)
curl_easy_setopt(curl_handle, CURLOPT_CAINFO, curl_ca_bundle.c_str());
now = time(0); now = time(0);
curl_times[curl_handle] = now; curl_times[curl_handle] = now;
curl_progress[curl_handle] = progress_t(-1, -1); curl_progress[curl_handle] = progress_t(-1, -1);
@ -396,172 +402,3 @@ void locate_bundle(void) {
return; return;
} }
// Multi CURL stuff
CURLHLL *create_h_element(CURL *handle) {
CURLHLL *p;
p = (CURLHLL *) malloc(sizeof(CURLHLL));
if(p == NULL) {
printf("create_h_element: could not allocation memory\n");
exit(EXIT_FAILURE);
}
p->handle = handle;
p->next = NULL;
return p;
}
CURLMHLL *create_mh_element(CURLM *handle) {
CURLMHLL *p;
p = (CURLMHLL *) malloc(sizeof(CURLMHLL));
if(p == NULL) {
printf("create_mh_element: could not allocation memory\n");
exit(EXIT_FAILURE);
}
p->handle = handle;
p->curlhll_head = NULL;
p->next = NULL;
return p;
}
CURLMHLL *add_mh_element(CURLMHLL *head, CURLM *handle) {
CURLMHLL *p;
CURLMHLL *p_new;
p_new = create_mh_element(handle);
for (p = head; p->next != NULL; p = p->next);
;
p->next = p_new;
return p_new;
}
void add_h_element(CURLHLL *head, CURL *handle) {
CURLHLL *p;
CURLHLL *p_new;
p_new = create_h_element(handle);
for (p = head; p->next != NULL; p = p->next);
;
p->next = p_new;
return;
}
void add_h_to_mh(CURL *h, CURLMHLL *mh) {
CURLHLL *h_head;
h_head = mh->curlhll_head;
if(h_head == NULL) {
h_head = create_h_element(h);
mh->curlhll_head = h_head;
} else {
add_h_element(h_head, h);
}
return;
}
void cleanup_multi_stuff(CURLMHLL *mhhead) {
// move this to it's own cleanup function
CURLMHLL *my_mhhead;
CURLMHLL *pnext;
CURLHLL *cnext;
CURLHLL *chhead;
CURLMcode curlm_code;
CURL *curl_handle;
CURLM *curl_multi_handle;
if(mhhead == NULL)
return;
// Remove all of the easy handles from its multi handle
my_mhhead = mhhead;
pnext = NULL;
cnext = NULL;
chhead = NULL;
do {
chhead = my_mhhead->curlhll_head;
while(chhead != NULL) {
cnext = chhead->next;
curl_multi_handle = my_mhhead->handle;
curl_handle = chhead->handle;
curlm_code = curl_multi_remove_handle(curl_multi_handle, curl_handle);
if(curlm_code != CURLM_OK) {
syslog(LOG_ERR, "curl_multi_remove_handle code: %d msg: %s",
curlm_code, curl_multi_strerror(curlm_code));
}
chhead = cnext;
}
pnext = my_mhhead->next;
my_mhhead = pnext;
} while(my_mhhead != NULL);
// now clean up the easy handles
my_mhhead = mhhead;
pnext = NULL;
cnext = NULL;
chhead = NULL;
do {
chhead = my_mhhead->curlhll_head;
while(chhead != NULL) {
cnext = chhead->next;
destroy_curl_handle(chhead->handle);
chhead = cnext;
}
pnext = my_mhhead->next;
my_mhhead = pnext;
} while(my_mhhead != NULL);
// now cleanup the multi handles
my_mhhead = mhhead;
pnext = NULL;
cnext = NULL;
chhead = NULL;
do {
pnext = my_mhhead->next;
curlm_code = curl_multi_cleanup(my_mhhead->handle);
if(curlm_code != CURLM_OK) {
syslog(LOG_ERR, "curl_multi_cleanup code: %d msg: %s",
curlm_code, curl_multi_strerror(curlm_code));
}
my_mhhead = pnext;
} while(my_mhhead != NULL);
// Now free the memory structures
my_mhhead = mhhead;
pnext = NULL;
cnext = NULL;
chhead = NULL;
do {
chhead = my_mhhead->curlhll_head;
while(chhead != NULL) {
cnext = chhead->next;
free(chhead);
chhead = cnext;
}
pnext = my_mhhead->next;
free(my_mhhead);
my_mhhead = pnext;
} while(my_mhhead != NULL);
return;
}

View File

@ -13,17 +13,6 @@ struct WriteThis {
int sizeleft; int sizeleft;
}; };
typedef struct curlhll {
CURL *handle;
struct curlhll *next;
} CURLHLL;
typedef struct curlmhll {
CURLM *handle;
struct curlhll *curlhll_head;
struct curlmhll * next;
} CURLMHLL;
typedef std::pair<double, double> progress_t; typedef std::pair<double, double> progress_t;
extern int retries; extern int retries;
@ -42,11 +31,4 @@ int my_curl_progress(
void *clientp, double dltotal, double dlnow, double ultotal, double ulnow); void *clientp, double dltotal, double dlnow, double ultotal, double ulnow);
void locate_bundle(void); void locate_bundle(void);
CURLHLL *create_h_element(CURL *handle);
CURLMHLL *create_mh_element(CURLM *handle);
CURLMHLL *add_mh_element(CURLMHLL *head, CURLM *handle);
void add_h_element(CURLHLL *head, CURL *handle);
void add_h_to_mh(CURL *h, CURLMHLL *mh);
void cleanup_multi_stuff(CURLMHLL *mhhead);
#endif // S3FS_CURL_H_ #endif // S3FS_CURL_H_

File diff suppressed because it is too large Load Diff

View File

@ -2,8 +2,8 @@
#define S3FS_S3_H_ #define S3FS_S3_H_
#define FUSE_USE_VERSION 26 #define FUSE_USE_VERSION 26
#define MULTIPART_SIZE 10485760 // 10MB #define MULTIPART_SIZE 10485760 // 10MB
#define MAX_REQUESTS 100 // max number of concurrent HTTP requests
#include <map> #include <map>
#include <string> #include <string>
@ -91,6 +91,18 @@ static int complete_multipart_upload(const char *path, std::string upload_id, st
std::string md5sum(int fd); std::string md5sum(int fd);
char *get_realpath(const char *path); char *get_realpath(const char *path);
static int insert_object(char *name, struct s3_object **head);
static unsigned int count_object_list(struct s3_object *list);
static int free_object(struct s3_object *object);
static int free_object_list(struct s3_object *head);
static CURL *create_head_handle(struct head_data *request);
static int list_bucket(const char *path, struct s3_object **head);
static bool is_truncated(const char *xml);
static int append_objects_from_xml(const char *xml, struct s3_object **head);
static const char *get_next_marker(const char *xml);
static char *get_object_name(xmlDocPtr doc, xmlNodePtr node);
static int s3fs_getattr(const char *path, struct stat *stbuf); static int s3fs_getattr(const char *path, struct stat *stbuf);
static int s3fs_readlink(const char *path, char *buf, size_t size); static int s3fs_readlink(const char *path, char *buf, size_t size);
static int s3fs_mknod(const char* path, mode_t mode, dev_t rdev); static int s3fs_mknod(const char* path, mode_t mode, dev_t rdev);