Supported regex type for additional header format.

This commit is contained in:
Takeshi Nakatani 2016-02-07 05:08:52 +00:00
parent 938554e569
commit 6472eedddc
5 changed files with 158 additions and 85 deletions

View File

@ -105,16 +105,17 @@ specify the path to the password file, which which takes precedence over the pas
This option specifies the configuration file path which file is the additional HTTP header by file(object) extension.
The configuration file format is below:
-----------
line = [file suffix] HTTP-header [HTTP-values]
file suffix = file(object) suffix, if this field is empty, it means "*"(all object).
line = [file suffix or regex] HTTP-header [HTTP-values]
file suffix = file(object) suffix, if this field is empty, it means "reg:(.*)".(=all object).
regex = regular expression to match the file(object) path. this type starts with "reg:" prefix.
HTTP-header = additional HTTP header name
HTTP-values = additional HTTP header value
-----------
Sample:
-----------
.gz Content-Encoding gzip
.Z Content-Encoding compress
X-S3FS-MYHTTPHEAD myvalue
.gz Content-Encoding gzip
.Z Content-Encoding compress
reg:^/MYDIR/(.*)[.]t2$ Content-Encoding text2
-----------
A sample configuration file is uploaded in "test" directory.
If you specify this option for set "Content-Encoding" HTTP header, please take care for RFC 2616.

View File

@ -3780,6 +3780,11 @@ int S3fsMultiCurl::Request(void)
return 0;
}
//-------------------------------------------------------------------
// Symbols
//-------------------------------------------------------------------
#define ADD_HEAD_REGEX "reg:"
//-------------------------------------------------------------------
// Class AdditionalHeader
//-------------------------------------------------------------------
@ -3821,7 +3826,8 @@ bool AdditionalHeader::Load(const char* file)
}
// read file
string line;
string line;
PADDHEAD paddhead;
while(getline(AH, line)){
if('#' == line[0]){
continue;
@ -3854,26 +3860,44 @@ bool AdditionalHeader::Load(const char* file)
return false;
}
// set charcntlist
int keylen = key.size();
charcnt_list_t::iterator iter;
for(iter = charcntlist.begin(); iter != charcntlist.end(); ++iter){
if(keylen == (*iter)){
break;
paddhead = new ADDHEAD;
if(0 == strncasecmp(key.c_str(), ADD_HEAD_REGEX, strlen(ADD_HEAD_REGEX))){
// regex
if(key.size() <= strlen(ADD_HEAD_REGEX)){
S3FS_PRN_ERR("file format error: %s key(suffix) does not have key string.", key.c_str());
continue;
}
}
if(iter == charcntlist.end()){
charcntlist.push_back(keylen);
}
// set addheader
addheader_t::iterator aiter;
if(addheader.end() == (aiter = addheader.find(key))){
headerpair_t hpair;
hpair[head] = value;
addheader[key] = hpair;
key = key.substr(strlen(ADD_HEAD_REGEX));
// compile
regex_t* preg = new regex_t;
int result;
char errbuf[256];
if(0 != (result = regcomp(preg, key.c_str(), REG_EXTENDED | REG_NOSUB))){ // we do not need matching info
regerror(result, preg, errbuf, sizeof(errbuf));
S3FS_PRN_ERR("failed to compile regex from %s key by %s.", key.c_str(), errbuf);
delete preg;
delete paddhead;
continue;
}
// set
paddhead->pregex = preg;
paddhead->basestring = key;
paddhead->headkey = head;
paddhead->headvalue = value;
}else{
aiter->second[head] = value;
// not regex, directly comparing
paddhead->pregex = NULL;
paddhead->basestring = key;
paddhead->headkey = head;
paddhead->headvalue = value;
}
// add list
addheadlist.push_back(paddhead);
// set flag
if(!is_enable){
is_enable = true;
@ -3885,8 +3909,17 @@ bool AdditionalHeader::Load(const char* file)
void AdditionalHeader::Unload(void)
{
is_enable = false;
charcntlist.clear();
addheader.clear();
for(addheadlist_t::iterator iter = addheadlist.begin(); iter != addheadlist.end(); iter = addheadlist.erase(iter)){
PADDHEAD paddhead = *iter;
if(paddhead){
if(paddhead->pregex){
regfree(paddhead->pregex);
delete paddhead->pregex;
}
delete paddhead;
}
}
}
bool AdditionalHeader::AddHeader(headers_t& meta, const char* path) const
@ -3898,21 +3931,35 @@ bool AdditionalHeader::AddHeader(headers_t& meta, const char* path) const
S3FS_PRN_WARN("path is NULL.");
return false;
}
int nPathLen = strlen(path);
for(charcnt_list_t::const_iterator iter = charcntlist.begin(); iter != charcntlist.end(); ++iter){
// get target character count
if(nPathLen < (*iter)){
size_t pathlength = strlen(path);
// loop
for(addheadlist_t::const_iterator iter = addheadlist.begin(); iter != addheadlist.end(); ++iter){
const PADDHEAD paddhead = *iter;
if(!paddhead){
continue;
}
// make target suffix(same character count) & find
string suffix(&path[nPathLen - (*iter)]);
addheader_t::const_iterator aiter;
if(addheader.end() == (aiter = addheader.find(suffix))){
continue;
}
for(headerpair_t::const_iterator piter = aiter->second.begin(); piter != aiter->second.end(); ++piter){
// Adding header
meta[(*piter).first] = (*piter).second;
if(paddhead->pregex){
// regex
int result;
regmatch_t match; // not use
if(0 == (result = regexec(paddhead->pregex, path, 1, &match, 0))){
// match -> adding header
meta[paddhead->headkey] = paddhead->headvalue;
break;
}
}else{
// directly comparing
if(paddhead->basestring.length() < pathlength){
if(0 == paddhead->basestring.length() || 0 == strcmp(&path[pathlength - paddhead->basestring.length()], paddhead->basestring.c_str())){
// match -> adding header
meta[paddhead->headkey] = paddhead->headvalue;
break;
}
}
}
}
return true;
@ -3939,26 +3986,31 @@ bool AdditionalHeader::Dump(void) const
if(!IS_S3FS_LOG_DBG()){
return true;
}
// character count list
stringstream ssdbg;
ssdbg << "Character count list[" << charcntlist.size() << "] = {";
for(charcnt_list_t::const_iterator citer = charcntlist.begin(); citer != charcntlist.end(); ++citer){
ssdbg << " " << (*citer);
}
ssdbg << " }\n";
// additional header
ssdbg << "Additional Header list[" << addheader.size() << "] = {\n";
for(addheader_t::const_iterator aiter = addheader.begin(); aiter != addheader.end(); ++aiter){
string key = (*aiter).first;
if(0 == key.size()){
key = "*";
}
for(headerpair_t::const_iterator piter = (*aiter).second.begin(); piter != (*aiter).second.end(); ++piter){
ssdbg << " " << key << "\t--->\t" << (*piter).first << ": " << (*piter).second << "\n";
stringstream ssdbg;
int cnt = 1;
ssdbg << "Additional Header list[" << addheadlist.size() << "] = {" << endl;
for(addheadlist_t::const_iterator iter = addheadlist.begin(); iter != addheadlist.end(); ++iter, ++cnt){
const PADDHEAD paddhead = *iter;
ssdbg << " [" << cnt << "] = {" << endl;
if(paddhead){
if(paddhead->pregex){
ssdbg << " type\t\t--->\tregex" << endl;
}else{
ssdbg << " type\t\t--->\tsuffix matching" << endl;
}
ssdbg << " base string\t--->\t" << paddhead->basestring << endl;
ssdbg << " add header\t--->\t" << paddhead->headkey << ": " << paddhead->headvalue << endl;
}
ssdbg << " }" << endl;
}
ssdbg << "}";
ssdbg << "}" << endl;
// print all
S3FS_PRN_DBG("%s", ssdbg.str().c_str());

View File

@ -428,25 +428,32 @@ class S3fsMultiCurl
//----------------------------------------------
// class AdditionalHeader
//----------------------------------------------
typedef std::list<int> charcnt_list_t;
typedef std::map<std::string, std::string> headerpair_t;
typedef std::map<std::string, headerpair_t> addheader_t;
#include <regex.h>
typedef struct add_header{
regex_t* pregex; // not NULL means using regex, NULL means comparing suffix directly.
std::string basestring;
std::string headkey;
std::string headvalue;
}ADDHEAD, *PADDHEAD;
typedef std::vector<PADDHEAD> addheadlist_t;
class AdditionalHeader
{
private:
static AdditionalHeader singleton;
bool is_enable;
charcnt_list_t charcntlist;
addheader_t addheader;
addheadlist_t addheadlist;
protected:
AdditionalHeader();
~AdditionalHeader();
public:
// Reference singleton
static AdditionalHeader* get(void) { return &singleton; }
AdditionalHeader();
~AdditionalHeader();
bool Load(const char* file);
void Unload(void);

View File

@ -954,17 +954,19 @@ void show_help (void)
" file is the additional HTTP header by file(object) extension.\n"
" The configuration file format is below:\n"
" -----------\n"
" line = [file suffix] HTTP-header [HTTP-values]\n"
" line = [file suffix or regex] HTTP-header [HTTP-values]\n"
" file suffix = file(object) suffix, if this field is empty,\n"
" it means \"*\"(all object).\n"
" it means \"reg:(.*)\".(=all object).\n"
" regex = regular expression to match the file(object) path.\n"
" this type starts with \"reg:\" prefix.\n"
" HTTP-header = additional HTTP header name\n"
" HTTP-values = additional HTTP header value\n"
" -----------\n"
" Sample:\n"
" -----------\n"
" .gz Content-Encoding gzip\n"
" .Z Content-Encoding compress\n"
" X-S3FS-MYHTTPHEAD myvalue\n"
" .gz Content-Encoding gzip\n"
" .Z Content-Encoding compress\n"
" reg:^/MYDIR/(.*)[.]t2$ Content-Encoding text2\n"
" -----------\n"
" A sample configuration file is uploaded in \"test\" directory.\n"
" If you specify this option for set \"Content-Encoding\" HTTP \n"

View File

@ -4,21 +4,27 @@
# s3fs loads this file at starting.
#
# Format:
# line = [file suffix] HTTP-header [HTTP-header-values]
# line = [file suffix or regex] HTTP-header [HTTP-header-values]
# file suffix = file(object) suffix, if this field is empty,
# it means "*"(all object).
# it means "reg:(.*)".(=all object).
# regex = regular expression to match the file(object) path.
# this type starts with "reg:" prefix.
# HTTP-header = additional HTTP header name
# HTTP-header-values = additional HTTP header value
#
# <suffix(extension)> <HTTP header> <HTTP header values>
#
# Verification is done in the order in which they are described in the file.
# That order is very important.
#
# Example:
# " Content-Encoding gzip" --> all object
# ".gz Content-Encoding gzip" --> only ".gz" extension file
# " Content-Encoding gzip" --> all object
# ".gz Content-Encoding gzip" --> only ".gz" extension file
# "reg:^/DIR/(.*).t2$ Content-Encoding text2" --> "/DIR/*.t2" extension file
#
# Notice:
# If you need to set all object, you can specify without "suffix".
# Then all of object(file) is added additional header.
# If you need to set all object, you can specify without "suffix" or regex
# type "reg:(.*)". Then all of object(file) is added additional header.
# If you have this configuration file for Content-Encoding, you should
# know about RFC 2616.
#
@ -27,15 +33,20 @@
# Encoding header, and SHOULD NOT be used in the Content-Encoding
# header."
#
.gz Content-Encoding gzip
.Z Content-Encoding compress
.bz2 Content-Encoding bzip2
.svgz Content-Encoding gzip
.svg.gz Content-Encoding gzip
.tgz Content-Encoding gzip
.tar.gz Content-Encoding gzip
.taz Content-Encoding gzip
.tz Content-Encoding gzip
.tbz2 Content-Encoding gzip
gz.js Content-Encoding gzip
# file suffix type
.gz Content-Encoding gzip
.Z Content-Encoding compress
.bz2 Content-Encoding bzip2
.svgz Content-Encoding gzip
.svg.gz Content-Encoding gzip
.tgz Content-Encoding gzip
.tar.gz Content-Encoding gzip
.taz Content-Encoding gzip
.tz Content-Encoding gzip
.tbz2 Content-Encoding gzip
gz.js Content-Encoding gzip
# regex type(test)
reg:^/MYDIR/(.*)[.]t2$ Content-Encoding text2