From 45950044f728236803475604cb63a95257d954b6 Mon Sep 17 00:00:00 2001 From: "ggtakec@gmail.com" Date: Wed, 19 Jun 2013 14:53:58 +0000 Subject: [PATCH] Changes codes 1) Changes codes for performance and request's costs s3fs gets object's attributes by using HEAD request. Directory objects is following 4 type: a) name type is "dir", with meta information b) name type is "dir", without meta information(but has files in itself) c) name type is "dir/", with(out) meta information d) name type is "dir_$folder$", with(out) meta information The codes is changed to order checking directory object. So that, s3fs decreases requests for checking objects. Before version has a bug, that is s3fs can not be recognizable type-b) definitely when s3fs check the object directly.(but s3fs can, when s3fs check the object by listing) This change fixes this bug. 2) Adds "multireq_max" option Added "multireq_max" option is maximum number of parallel request for listing objects. This changes is possible to solve CURLE_COULDNT_CONNECT. If this option can not solve it, this option will be useful for tuning performance by each. git-svn-id: http://s3fs.googlecode.com/svn/trunk@449 df820570-a93a-0410-bd06-b72b767a4274 --- doc/man/s3fs.1 | 3 + src/s3fs.cpp | 143 ++++++++++++++++++++++++++++------------------ src/s3fs_util.cpp | 38 ++++++++++++ src/s3fs_util.h | 1 + 4 files changed, 128 insertions(+), 57 deletions(-) diff --git a/doc/man/s3fs.1 b/doc/man/s3fs.1 index ef47d1f..201ea4f 100644 --- a/doc/man/s3fs.1 +++ b/doc/man/s3fs.1 @@ -92,6 +92,9 @@ You can specify this option for performance, s3fs memorizes in stat cache that t \fB\-o\fR nodnscache - disable dns cache. s3fs is always using dns cache, this option make dns cache disable. .TP +\fB\-o\fR multireq_max (default="500") +maximum number of parallel request for listing objects. +.TP \fB\-o\fR url (default="http://s3.amazonaws.com") sets the url to use to access Amazon S3. If you want to use HTTPS, then you can set url=https://s3.amazonaws.com .TP diff --git a/src/s3fs.cpp b/src/s3fs.cpp index 741d861..6ea6db7 100644 --- a/src/s3fs.cpp +++ b/src/s3fs.cpp @@ -54,7 +54,7 @@ using namespace std; //------------------------------------------------------------------- // Define //------------------------------------------------------------------- -#define MAX_MULTI_HEADREQ 500 // max request count in readdir curl_multi. +#define MAX_MULTI_HEADREQ 500 // default: max request count in readdir curl_multi. #define DIRTYPE_UNKNOWN -1 #define DIRTYPE_NEW 0 #define DIRTYPE_OLD 1 @@ -112,6 +112,7 @@ static gid_t s3fs_gid = 0; // default = root. static bool is_s3fs_umask = false;// default does not set. static mode_t s3fs_umask = 0; static bool dns_cache = true; // default = true +static int multireq_maxcnt = MAX_MULTI_HEADREQ; // if .size()==0 then local file cache is disabled static std::string use_cache; @@ -307,6 +308,12 @@ static int chk_dir_object_type(const char *path, string& newpath, string& nowpat // Get object attributes with stat cache. // This function is base for s3fs_getattr(). // +// [NOTICE] +// Checking order is changed following list because of reducing the number of the requests. +// 1) "dir" +// 2) "dir/" +// 3) "dir_$folder$" +// static int get_object_attribute(const char *path, struct stat *pstbuf, headers_t* pmeta, bool overcheck, bool* pisforce) { int result = -1; @@ -321,6 +328,10 @@ static int get_object_attribute(const char *path, struct stat *pstbuf, headers_t //FGPRINT(" get_object_attribute[path=%s]\n", path); + if(!path || '\0' == path[0]){ + return -ENOENT; + } + memset(pstat, 0, sizeof(struct stat)); if(0 == strcmp(path, "/") || 0 == strcmp(path, ".")){ pstat->st_nlink = 1; // see fuse faq @@ -330,12 +341,9 @@ static int get_object_attribute(const char *path, struct stat *pstbuf, headers_t // Check cache. strpath = path; - if(overcheck){ - Pos = strpath.find("_$folder$", 0); - if(string::npos != Pos){ - strpath = strpath.substr(0, Pos); - strpath += "/"; - } + if(overcheck && string::npos != (Pos = strpath.find("_$folder$", 0))){ + strpath = strpath.substr(0, Pos); + strpath += "/"; } if(pisforce){ (*pisforce) = false; @@ -348,63 +356,80 @@ static int get_object_attribute(const char *path, struct stat *pstbuf, headers_t return -ENOENT; } - // At first, check "object/". - strpath = path; - if(overcheck && 0 < strpath.length() && '/' != strpath[strpath.length() - 1]){ - strpath += "/"; - s3_realpath = get_realpath(strpath.c_str()); - if(0 != (result = curl_get_headers(s3_realpath.c_str(), (*pheader)))){ - string strSp= path; - strSp += "_$folder$"; - s3_realpath = get_realpath(strSp.c_str()); - result = curl_get_headers(s3_realpath.c_str(), (*pheader)); + // At first, check path + strpath = path; + s3_realpath = get_realpath(strpath.c_str()); + result = curl_get_headers(s3_realpath.c_str(), (*pheader)); + + // overcheck + if(overcheck && 0 != result){ + if('/' != strpath[strpath.length() - 1] && string::npos == strpath.find("_$folder$", 0)){ + // path is "object", check "object/" for overcheck + strpath += "/"; + s3_realpath = get_realpath(strpath.c_str()); + result = curl_get_headers(s3_realpath.c_str(), (*pheader)); } - } - if(0 != result){ - strpath = path; - s3_realpath = get_realpath(strpath.c_str()); - if(0 != (result = curl_get_headers(s3_realpath.c_str(), (*pheader)))){ - // Not found --> check( if overcheck ) - if(overcheck){ - if(string::npos == strpath.find("_$folder$", 0)){ - // path doesn't have "_$folder$" --> check for s3fox etc - strpath += "_$folder$"; - s3_realpath = get_realpath(strpath.c_str()); - result = curl_get_headers(s3_realpath.c_str(), (*pheader)); + if(0 != result){ + // not found "object/", check "_$folder$" + strpath = path; + if(string::npos == strpath.find("_$folder$", 0)){ + if('/' == strpath[strpath.length() - 1]){ + strpath = strpath.substr(0, strpath.length() - 1); } - if(0 == result){ - // found "_$folder$" object. - strpath = path; // reset original + strpath += "_$folder$"; + s3_realpath = get_realpath(strpath.c_str()); + result = curl_get_headers(s3_realpath.c_str(), (*pheader)); + } + } + if(0 != result){ + // not found "object/" and "object_$folder$", check no dir object. + strpath = path; + if(string::npos == strpath.find("_$folder$", 0)){ + if('/' == strpath[strpath.length() - 1]){ + strpath = strpath.substr(0, strpath.length() - 1); + } + if(-ENOTEMPTY == directory_empty(strpath.c_str())){ + // found "no dir obejct". strpath += "/"; - }else{ - // path does not have "_$folder$" --> check "no dir obejct". - if(-ENOTEMPTY == directory_empty(path)){ - // found "no dir obejct". - forcedir = true; - strpath = path; // reset original - strpath += "/"; - if(pisforce){ - (*pisforce) = true; - } - }else{ - // Add no object cache. - strpath = path; // reset original - StatCache::getStatCacheData()->AddNoObjectCache(strpath); - return result; + forcedir = true; + if(pisforce){ + (*pisforce) = true; } + result = 0; } - }else{ - return result; } - }else{ - // if path has "_$folder$", need to cut it. - Pos = strpath.find("_$folder$", 0); - if(string::npos != Pos){ - strpath = strpath.substr(0, Pos); - strpath += "/"; + } + }else{ + // found "path" object. + if('/' != strpath[strpath.length() - 1]){ + // check a case of that "object" does not have attribute and "object" is possible to be directory. + if(is_need_check_obj_detail(*pheader)){ + if(-ENOTEMPTY == directory_empty(strpath.c_str())){ + strpath += "/"; + forcedir = true; + if(pisforce){ + (*pisforce) = true; + } + result = 0; + } } } } + + if(0 != result){ + // finally, "path" object did not find. Add no object cache. + strpath = path; // reset original + StatCache::getStatCacheData()->AddNoObjectCache(strpath); + return result; + } + + // if path has "_$folder$", need to cut it. + if(string::npos != (Pos = strpath.find("_$folder$", 0))){ + strpath = strpath.substr(0, Pos); + strpath += "/"; + } + + // Set into cache if(0 != StatCache::getStatCacheData()->GetCacheSize()){ // add into stat cache if(!StatCache::getStatCacheData()->AddStat(strpath, (*pheader), forcedir)){ @@ -2963,7 +2988,7 @@ static int readdir_multi_head(const char *path, S3ObjList& head) mh = curl_multi_init(); // Make single head request. - for(liter = headlist.begin(), cnt = 0; headlist.end() != liter && cnt < MAX_MULTI_HEADREQ; ){ + for(liter = headlist.begin(), cnt = 0; headlist.end() != liter && cnt < multireq_maxcnt; ){ string fullpath = path + (*liter); string fullorg = path + head.GetOrgName((*liter).c_str()); string etag = head.GetETag((*liter).c_str()); @@ -2991,7 +3016,7 @@ static int readdir_multi_head(const char *path, S3ObjList& head) return -EIO; } liter++; - cnt++; // max request count in multi-request is MAX_MULTI_HEADREQ. + cnt++; // max request count in multi-request is multireq_maxcnt. } // Send multi request. @@ -4208,6 +4233,10 @@ static int my_fuse_opt_proc(void *data, const char *arg, int key, struct fuse_ar use_cache = strchr(arg, '=') + 1; return 0; } + if (strstr(arg, "multireq_max=") != 0) { + multireq_maxcnt = atoi(strchr(arg, '=') + 1); + return 0; + } if(strstr(arg, "nonempty") != 0) { nonempty = true; // need to continue for fuse. diff --git a/src/s3fs_util.cpp b/src/s3fs_util.cpp index 7eee3c1..81abd35 100644 --- a/src/s3fs_util.cpp +++ b/src/s3fs_util.cpp @@ -637,6 +637,41 @@ time_t get_lastmodified(headers_t& meta) return get_lastmodified((*iter).second.c_str()); } +// +// Returns it whether it is an object with need checking in detail. +// If this function returns true, the object is possible to be directory +// and is needed checking detail(searching sub object). +// +bool is_need_check_obj_detail(headers_t& meta) +{ + headers_t::const_iterator iter; + + // directory object is Content-Length as 0. + if(0 != get_size(meta)){ + return false; + } + // if the object has x-amz-meta information, checking is no more. + if(meta.end() != meta.find("x-amz-meta-mode") || + meta.end() != meta.find("x-amz-meta-mtime") || + meta.end() != meta.find("x-amz-meta-uid") || + meta.end() != meta.find("x-amz-meta-gid") || + meta.end() != meta.find("x-amz-meta-owner") || + meta.end() != meta.find("x-amz-meta-group") || + meta.end() != meta.find("x-amz-meta-permissions") ) + { + return false; + } + // if there is not Content-Type, or Content-Type is "x-directory", + // checking is no more. + if(meta.end() == (iter = meta.find("Content-Type"))){ + return false; + } + if("application/x-directory" == (*iter).second){ + return false; + } + return true; +} + //------------------------------------------------------------------- // Help //------------------------------------------------------------------- @@ -711,6 +746,9 @@ void show_help (void) " nodnscache - disable dns cache\n" " - s3fs is always using dns cache, this option make dns cache disable.\n" "\n" + " multireq_max (default=\"500\")\n" + " - maximum number of parallel request for listing objects.\n" + "\n" " url (default=\"http://s3.amazonaws.com\")\n" " - sets the url to use to access amazon s3\n" "\n" diff --git a/src/s3fs_util.h b/src/s3fs_util.h index d7d9a85..90fde86 100644 --- a/src/s3fs_util.h +++ b/src/s3fs_util.h @@ -93,6 +93,7 @@ gid_t get_gid(headers_t& meta); blkcnt_t get_blocks(off_t size); time_t get_lastmodified(const char* s); time_t get_lastmodified(headers_t& meta); +bool is_need_check_obj_detail(headers_t& meta); void show_usage(void); void show_help(void);