From 6472eedddcd56ee26db580eb83c24200be6a9b25 Mon Sep 17 00:00:00 2001 From: Takeshi Nakatani Date: Sun, 7 Feb 2016 05:08:52 +0000 Subject: [PATCH] Supported regex type for additional header format. --- doc/man/s3fs.1 | 11 +-- src/curl.cpp | 152 ++++++++++++++++++++++++++++-------------- src/curl.h | 23 ++++--- src/s3fs_util.cpp | 12 ++-- test/sample_ahbe.conf | 45 ++++++++----- 5 files changed, 158 insertions(+), 85 deletions(-) diff --git a/doc/man/s3fs.1 b/doc/man/s3fs.1 index 2bb06d9..e0c9a6c 100644 --- a/doc/man/s3fs.1 +++ b/doc/man/s3fs.1 @@ -105,16 +105,17 @@ specify the path to the password file, which which takes precedence over the pas This option specifies the configuration file path which file is the additional HTTP header by file(object) extension. The configuration file format is below: ----------- - line = [file suffix] HTTP-header [HTTP-values] - file suffix = file(object) suffix, if this field is empty, it means "*"(all object). + line = [file suffix or regex] HTTP-header [HTTP-values] + file suffix = file(object) suffix, if this field is empty, it means "reg:(.*)".(=all object). + regex = regular expression to match the file(object) path. this type starts with "reg:" prefix. HTTP-header = additional HTTP header name HTTP-values = additional HTTP header value ----------- Sample: ----------- - .gz Content-Encoding gzip - .Z Content-Encoding compress - X-S3FS-MYHTTPHEAD myvalue + .gz Content-Encoding gzip + .Z Content-Encoding compress + reg:^/MYDIR/(.*)[.]t2$ Content-Encoding text2 ----------- A sample configuration file is uploaded in "test" directory. If you specify this option for set "Content-Encoding" HTTP header, please take care for RFC 2616. diff --git a/src/curl.cpp b/src/curl.cpp index 55d752c..91902af 100644 --- a/src/curl.cpp +++ b/src/curl.cpp @@ -3780,6 +3780,11 @@ int S3fsMultiCurl::Request(void) return 0; } +//------------------------------------------------------------------- +// Symbols +//------------------------------------------------------------------- +#define ADD_HEAD_REGEX "reg:" + //------------------------------------------------------------------- // Class AdditionalHeader //------------------------------------------------------------------- @@ -3821,7 +3826,8 @@ bool AdditionalHeader::Load(const char* file) } // read file - string line; + string line; + PADDHEAD paddhead; while(getline(AH, line)){ if('#' == line[0]){ continue; @@ -3854,26 +3860,44 @@ bool AdditionalHeader::Load(const char* file) return false; } - // set charcntlist - int keylen = key.size(); - charcnt_list_t::iterator iter; - for(iter = charcntlist.begin(); iter != charcntlist.end(); ++iter){ - if(keylen == (*iter)){ - break; + paddhead = new ADDHEAD; + if(0 == strncasecmp(key.c_str(), ADD_HEAD_REGEX, strlen(ADD_HEAD_REGEX))){ + // regex + if(key.size() <= strlen(ADD_HEAD_REGEX)){ + S3FS_PRN_ERR("file format error: %s key(suffix) does not have key string.", key.c_str()); + continue; } - } - if(iter == charcntlist.end()){ - charcntlist.push_back(keylen); - } - // set addheader - addheader_t::iterator aiter; - if(addheader.end() == (aiter = addheader.find(key))){ - headerpair_t hpair; - hpair[head] = value; - addheader[key] = hpair; + key = key.substr(strlen(ADD_HEAD_REGEX)); + + // compile + regex_t* preg = new regex_t; + int result; + char errbuf[256]; + if(0 != (result = regcomp(preg, key.c_str(), REG_EXTENDED | REG_NOSUB))){ // we do not need matching info + regerror(result, preg, errbuf, sizeof(errbuf)); + S3FS_PRN_ERR("failed to compile regex from %s key by %s.", key.c_str(), errbuf); + delete preg; + delete paddhead; + continue; + } + + // set + paddhead->pregex = preg; + paddhead->basestring = key; + paddhead->headkey = head; + paddhead->headvalue = value; + }else{ - aiter->second[head] = value; + // not regex, directly comparing + paddhead->pregex = NULL; + paddhead->basestring = key; + paddhead->headkey = head; + paddhead->headvalue = value; } + + // add list + addheadlist.push_back(paddhead); + // set flag if(!is_enable){ is_enable = true; @@ -3885,8 +3909,17 @@ bool AdditionalHeader::Load(const char* file) void AdditionalHeader::Unload(void) { is_enable = false; - charcntlist.clear(); - addheader.clear(); + + for(addheadlist_t::iterator iter = addheadlist.begin(); iter != addheadlist.end(); iter = addheadlist.erase(iter)){ + PADDHEAD paddhead = *iter; + if(paddhead){ + if(paddhead->pregex){ + regfree(paddhead->pregex); + delete paddhead->pregex; + } + delete paddhead; + } + } } bool AdditionalHeader::AddHeader(headers_t& meta, const char* path) const @@ -3898,21 +3931,35 @@ bool AdditionalHeader::AddHeader(headers_t& meta, const char* path) const S3FS_PRN_WARN("path is NULL."); return false; } - int nPathLen = strlen(path); - for(charcnt_list_t::const_iterator iter = charcntlist.begin(); iter != charcntlist.end(); ++iter){ - // get target character count - if(nPathLen < (*iter)){ + + size_t pathlength = strlen(path); + + // loop + for(addheadlist_t::const_iterator iter = addheadlist.begin(); iter != addheadlist.end(); ++iter){ + const PADDHEAD paddhead = *iter; + if(!paddhead){ continue; } - // make target suffix(same character count) & find - string suffix(&path[nPathLen - (*iter)]); - addheader_t::const_iterator aiter; - if(addheader.end() == (aiter = addheader.find(suffix))){ - continue; - } - for(headerpair_t::const_iterator piter = aiter->second.begin(); piter != aiter->second.end(); ++piter){ - // Adding header - meta[(*piter).first] = (*piter).second; + + if(paddhead->pregex){ + // regex + int result; + regmatch_t match; // not use + + if(0 == (result = regexec(paddhead->pregex, path, 1, &match, 0))){ + // match -> adding header + meta[paddhead->headkey] = paddhead->headvalue; + break; + } + }else{ + // directly comparing + if(paddhead->basestring.length() < pathlength){ + if(0 == paddhead->basestring.length() || 0 == strcmp(&path[pathlength - paddhead->basestring.length()], paddhead->basestring.c_str())){ + // match -> adding header + meta[paddhead->headkey] = paddhead->headvalue; + break; + } + } } } return true; @@ -3939,26 +3986,31 @@ bool AdditionalHeader::Dump(void) const if(!IS_S3FS_LOG_DBG()){ return true; } - // character count list - stringstream ssdbg; - ssdbg << "Character count list[" << charcntlist.size() << "] = {"; - for(charcnt_list_t::const_iterator citer = charcntlist.begin(); citer != charcntlist.end(); ++citer){ - ssdbg << " " << (*citer); - } - ssdbg << " }\n"; - // additional header - ssdbg << "Additional Header list[" << addheader.size() << "] = {\n"; - for(addheader_t::const_iterator aiter = addheader.begin(); aiter != addheader.end(); ++aiter){ - string key = (*aiter).first; - if(0 == key.size()){ - key = "*"; - } - for(headerpair_t::const_iterator piter = (*aiter).second.begin(); piter != (*aiter).second.end(); ++piter){ - ssdbg << " " << key << "\t--->\t" << (*piter).first << ": " << (*piter).second << "\n"; + stringstream ssdbg; + int cnt = 1; + + ssdbg << "Additional Header list[" << addheadlist.size() << "] = {" << endl; + + for(addheadlist_t::const_iterator iter = addheadlist.begin(); iter != addheadlist.end(); ++iter, ++cnt){ + const PADDHEAD paddhead = *iter; + + ssdbg << " [" << cnt << "] = {" << endl; + + if(paddhead){ + if(paddhead->pregex){ + ssdbg << " type\t\t--->\tregex" << endl; + }else{ + ssdbg << " type\t\t--->\tsuffix matching" << endl; + } + ssdbg << " base string\t--->\t" << paddhead->basestring << endl; + ssdbg << " add header\t--->\t" << paddhead->headkey << ": " << paddhead->headvalue << endl; } + ssdbg << " }" << endl; } - ssdbg << "}"; + + + ssdbg << "}" << endl; // print all S3FS_PRN_DBG("%s", ssdbg.str().c_str()); diff --git a/src/curl.h b/src/curl.h index 5955a2c..5d0a989 100644 --- a/src/curl.h +++ b/src/curl.h @@ -428,25 +428,32 @@ class S3fsMultiCurl //---------------------------------------------- // class AdditionalHeader //---------------------------------------------- -typedef std::list charcnt_list_t; -typedef std::map headerpair_t; -typedef std::map addheader_t; +#include + +typedef struct add_header{ + regex_t* pregex; // not NULL means using regex, NULL means comparing suffix directly. + std::string basestring; + std::string headkey; + std::string headvalue; +}ADDHEAD, *PADDHEAD; + +typedef std::vector addheadlist_t; class AdditionalHeader { private: static AdditionalHeader singleton; bool is_enable; - charcnt_list_t charcntlist; - addheader_t addheader; + addheadlist_t addheadlist; + + protected: + AdditionalHeader(); + ~AdditionalHeader(); public: // Reference singleton static AdditionalHeader* get(void) { return &singleton; } - AdditionalHeader(); - ~AdditionalHeader(); - bool Load(const char* file); void Unload(void); diff --git a/src/s3fs_util.cpp b/src/s3fs_util.cpp index a95f312..0f5b50d 100644 --- a/src/s3fs_util.cpp +++ b/src/s3fs_util.cpp @@ -954,17 +954,19 @@ void show_help (void) " file is the additional HTTP header by file(object) extension.\n" " The configuration file format is below:\n" " -----------\n" - " line = [file suffix] HTTP-header [HTTP-values]\n" + " line = [file suffix or regex] HTTP-header [HTTP-values]\n" " file suffix = file(object) suffix, if this field is empty,\n" - " it means \"*\"(all object).\n" + " it means \"reg:(.*)\".(=all object).\n" + " regex = regular expression to match the file(object) path.\n" + " this type starts with \"reg:\" prefix.\n" " HTTP-header = additional HTTP header name\n" " HTTP-values = additional HTTP header value\n" " -----------\n" " Sample:\n" " -----------\n" - " .gz Content-Encoding gzip\n" - " .Z Content-Encoding compress\n" - " X-S3FS-MYHTTPHEAD myvalue\n" + " .gz Content-Encoding gzip\n" + " .Z Content-Encoding compress\n" + " reg:^/MYDIR/(.*)[.]t2$ Content-Encoding text2\n" " -----------\n" " A sample configuration file is uploaded in \"test\" directory.\n" " If you specify this option for set \"Content-Encoding\" HTTP \n" diff --git a/test/sample_ahbe.conf b/test/sample_ahbe.conf index a93e0a5..4e41b79 100644 --- a/test/sample_ahbe.conf +++ b/test/sample_ahbe.conf @@ -4,21 +4,27 @@ # s3fs loads this file at starting. # # Format: -# line = [file suffix] HTTP-header [HTTP-header-values] +# line = [file suffix or regex] HTTP-header [HTTP-header-values] # file suffix = file(object) suffix, if this field is empty, -# it means "*"(all object). +# it means "reg:(.*)".(=all object). +# regex = regular expression to match the file(object) path. +# this type starts with "reg:" prefix. # HTTP-header = additional HTTP header name # HTTP-header-values = additional HTTP header value # # # +# Verification is done in the order in which they are described in the file. +# That order is very important. +# # Example: -# " Content-Encoding gzip" --> all object -# ".gz Content-Encoding gzip" --> only ".gz" extension file +# " Content-Encoding gzip" --> all object +# ".gz Content-Encoding gzip" --> only ".gz" extension file +# "reg:^/DIR/(.*).t2$ Content-Encoding text2" --> "/DIR/*.t2" extension file # # Notice: -# If you need to set all object, you can specify without "suffix". -# Then all of object(file) is added additional header. +# If you need to set all object, you can specify without "suffix" or regex +# type "reg:(.*)". Then all of object(file) is added additional header. # If you have this configuration file for Content-Encoding, you should # know about RFC 2616. # @@ -27,15 +33,20 @@ # Encoding header, and SHOULD NOT be used in the Content-Encoding # header." # -.gz Content-Encoding gzip -.Z Content-Encoding compress -.bz2 Content-Encoding bzip2 -.svgz Content-Encoding gzip -.svg.gz Content-Encoding gzip -.tgz Content-Encoding gzip -.tar.gz Content-Encoding gzip -.taz Content-Encoding gzip -.tz Content-Encoding gzip -.tbz2 Content-Encoding gzip -gz.js Content-Encoding gzip + +# file suffix type +.gz Content-Encoding gzip +.Z Content-Encoding compress +.bz2 Content-Encoding bzip2 +.svgz Content-Encoding gzip +.svg.gz Content-Encoding gzip +.tgz Content-Encoding gzip +.tar.gz Content-Encoding gzip +.taz Content-Encoding gzip +.tz Content-Encoding gzip +.tbz2 Content-Encoding gzip +gz.js Content-Encoding gzip + +# regex type(test) +reg:^/MYDIR/(.*)[.]t2$ Content-Encoding text2