Supported regex type for additional header format.

2024-12-22 16:58:55 +00:00 · 2016-02-07 05:08:52 +00:00 · 2016-02-07 05:08:52 +00:00 · 6472eedddc
commit 6472eedddc
parent 938554e569
5 changed files with 158 additions and 85 deletions
--- a/doc/man/s3fs.1
+++ b/doc/man/s3fs.1
@ -105,16 +105,17 @@ specify the path to the password file, which which takes precedence over the pas
 This option specifies the configuration file path which file is the additional HTTP header by file(object) extension.
 The configuration file format is below:
 -----------
- line         = [file suffix] HTTP-header [HTTP-values]
- file suffix  = file(object) suffix, if this field is empty, it means "*"(all object).
+ line         = [file suffix or regex] HTTP-header [HTTP-values]
+ file suffix  = file(object) suffix, if this field is empty, it means "reg:(.*)".(=all object).
+ regex        = regular expression to match the file(object) path. this type starts with "reg:" prefix.
 HTTP-header  = additional HTTP header name
 HTTP-values  = additional HTTP header value
 -----------
 Sample:
 -----------
- .gz      Content-Encoding     gzip
- .Z       Content-Encoding     compress
-          X-S3FS-MYHTTPHEAD    myvalue
+ .gz                    Content-Encoding  gzip
+ .Z                     Content-Encoding  compress
+ reg:^/MYDIR/(.*)[.]t2$ Content-Encoding  text2
 -----------
 A sample configuration file is uploaded in "test" directory.
 If you specify this option for set "Content-Encoding" HTTP header, please take care for RFC 2616.
--- a/src/curl.cpp
+++ b/src/curl.cpp
@ -3780,6 +3780,11 @@ int S3fsMultiCurl::Request(void)
  return 0;
 }

+//-------------------------------------------------------------------
+// Symbols
+//-------------------------------------------------------------------
+#define ADD_HEAD_REGEX              "reg:"
+
 //-------------------------------------------------------------------
 // Class AdditionalHeader
 //-------------------------------------------------------------------
@ -3821,7 +3826,8 @@ bool AdditionalHeader::Load(const char* file)
  }

  // read file
-  string line;
+  string   line;
+  PADDHEAD paddhead;
  while(getline(AH, line)){
    if('#' == line[0]){
      continue;
@ -3854,26 +3860,44 @@ bool AdditionalHeader::Load(const char* file)
      return false;
    }

-    // set charcntlist
-    int keylen = key.size();
-    charcnt_list_t::iterator iter;
-    for(iter = charcntlist.begin(); iter != charcntlist.end(); ++iter){
-      if(keylen == (*iter)){
-        break;
+    paddhead = new ADDHEAD;
+    if(0 == strncasecmp(key.c_str(), ADD_HEAD_REGEX, strlen(ADD_HEAD_REGEX))){
+      // regex
+      if(key.size() <= strlen(ADD_HEAD_REGEX)){
+        S3FS_PRN_ERR("file format error: %s key(suffix) does not have key string.", key.c_str());
+        continue;
      }
-    }
-    if(iter == charcntlist.end()){
-      charcntlist.push_back(keylen);
-    }
-    // set addheader
-    addheader_t::iterator aiter;
-    if(addheader.end() == (aiter = addheader.find(key))){
-      headerpair_t hpair;
-      hpair[head]    = value;
-      addheader[key] = hpair;
+      key = key.substr(strlen(ADD_HEAD_REGEX));
+
+      // compile
+      regex_t*  preg = new regex_t;
+      int       result;
+      char      errbuf[256];
+      if(0 != (result = regcomp(preg, key.c_str(), REG_EXTENDED | REG_NOSUB))){ // we do not need matching info
+        regerror(result, preg, errbuf, sizeof(errbuf));
+        S3FS_PRN_ERR("failed to compile regex from %s key by %s.", key.c_str(), errbuf);
+        delete preg;
+        delete paddhead;
+        continue;
+      }
+
+      // set
+      paddhead->pregex     = preg;
+      paddhead->basestring = key;
+      paddhead->headkey    = head;
+      paddhead->headvalue  = value;
+
    }else{
-      aiter->second[head] = value;
+      // not regex, directly comparing
+      paddhead->pregex     = NULL;
+      paddhead->basestring = key;
+      paddhead->headkey    = head;
+      paddhead->headvalue  = value;
    }
+
+    // add list
+    addheadlist.push_back(paddhead);
+
    // set flag
    if(!is_enable){
      is_enable = true;
@ -3885,8 +3909,17 @@ bool AdditionalHeader::Load(const char* file)
 void AdditionalHeader::Unload(void)
 {
  is_enable = false;
-  charcntlist.clear();
-  addheader.clear();
+
+  for(addheadlist_t::iterator iter = addheadlist.begin(); iter != addheadlist.end(); iter = addheadlist.erase(iter)){
+    PADDHEAD paddhead = *iter;
+    if(paddhead){
+      if(paddhead->pregex){
+        regfree(paddhead->pregex);
+        delete paddhead->pregex;
+      }
+      delete paddhead;
+    }
+  }
 }

 bool AdditionalHeader::AddHeader(headers_t& meta, const char* path) const
@ -3898,21 +3931,35 @@ bool AdditionalHeader::AddHeader(headers_t& meta, const char* path) const
    S3FS_PRN_WARN("path is NULL.");
    return false;
  }
-  int nPathLen = strlen(path);
-  for(charcnt_list_t::const_iterator iter = charcntlist.begin(); iter != charcntlist.end(); ++iter){
-    // get target character count
-    if(nPathLen < (*iter)){
+
+  size_t pathlength = strlen(path);
+
+  // loop
+  for(addheadlist_t::const_iterator iter = addheadlist.begin(); iter != addheadlist.end(); ++iter){
+    const PADDHEAD paddhead = *iter;
+    if(!paddhead){
      continue;
    }
-    // make target suffix(same character count) & find
-    string suffix(&path[nPathLen - (*iter)]);
-    addheader_t::const_iterator aiter;
-    if(addheader.end() == (aiter = addheader.find(suffix))){
-      continue;
-    }
-    for(headerpair_t::const_iterator piter = aiter->second.begin(); piter != aiter->second.end(); ++piter){
-      // Adding header
-      meta[(*piter).first] = (*piter).second;
+
+    if(paddhead->pregex){
+      // regex
+      int        result;
+      regmatch_t match;         // not use
+
+      if(0 == (result = regexec(paddhead->pregex, path, 1, &match, 0))){
+        // match -> adding header
+        meta[paddhead->headkey] = paddhead->headvalue;
+        break;
+      }
+    }else{
+      // directly comparing
+      if(paddhead->basestring.length() < pathlength){
+        if(0 == paddhead->basestring.length() || 0 == strcmp(&path[pathlength - paddhead->basestring.length()], paddhead->basestring.c_str())){
+          // match -> adding header
+          meta[paddhead->headkey] = paddhead->headvalue;
+          break;
+        }
+      }
    }
  }
  return true;
@ -3939,26 +3986,31 @@ bool AdditionalHeader::Dump(void) const
  if(!IS_S3FS_LOG_DBG()){
    return true;
  }
-  // character count list
-  stringstream ssdbg;
-  ssdbg << "Character count list[" << charcntlist.size() << "] = {";
-  for(charcnt_list_t::const_iterator citer = charcntlist.begin(); citer != charcntlist.end(); ++citer){
-    ssdbg << " " << (*citer);
-  }
-  ssdbg << " }\n";

-  // additional header
-  ssdbg << "Additional Header list[" << addheader.size() << "] = {\n";
-  for(addheader_t::const_iterator aiter = addheader.begin(); aiter != addheader.end(); ++aiter){
-    string key = (*aiter).first;
-    if(0 == key.size()){
-      key = "*";
-    }
-    for(headerpair_t::const_iterator piter = (*aiter).second.begin(); piter != (*aiter).second.end(); ++piter){
-      ssdbg << "    " << key << "\t--->\t" << (*piter).first << ": " << (*piter).second << "\n";
+  stringstream ssdbg;
+  int          cnt = 1;
+
+  ssdbg << "Additional Header list[" << addheadlist.size() << "] = {" << endl;
+
+  for(addheadlist_t::const_iterator iter = addheadlist.begin(); iter != addheadlist.end(); ++iter, ++cnt){
+    const PADDHEAD paddhead = *iter;
+
+    ssdbg << "    [" << cnt << "] = {" << endl;
+
+    if(paddhead){
+      if(paddhead->pregex){
+        ssdbg << "        type\t\t--->\tregex" << endl;
+      }else{
+        ssdbg << "        type\t\t--->\tsuffix matching" << endl;
+      }
+      ssdbg << "        base string\t--->\t" << paddhead->basestring << endl;
+      ssdbg << "        add header\t--->\t"  << paddhead->headkey << ": " << paddhead->headvalue << endl;
    }
+    ssdbg << "    }" << endl;
  }
-  ssdbg << "}";
+
+
+  ssdbg << "}" << endl;

  // print all
  S3FS_PRN_DBG("%s", ssdbg.str().c_str());
--- a/src/curl.h
+++ b/src/curl.h
@ -428,25 +428,32 @@ class S3fsMultiCurl
 //----------------------------------------------
 // class AdditionalHeader
 //----------------------------------------------
-typedef std::list<int> charcnt_list_t;
-typedef std::map<std::string, std::string> headerpair_t;
-typedef std::map<std::string, headerpair_t> addheader_t;
+#include <regex.h>
+
+typedef struct add_header{
+  regex_t*      pregex;         // not NULL means using regex, NULL means comparing suffix directly.
+  std::string   basestring;
+  std::string   headkey;
+  std::string   headvalue;
+}ADDHEAD, *PADDHEAD;
+
+typedef std::vector<PADDHEAD>  addheadlist_t;

 class AdditionalHeader
 {
  private:
    static AdditionalHeader singleton;
    bool                    is_enable;
-    charcnt_list_t          charcntlist;
-    addheader_t             addheader;
+    addheadlist_t           addheadlist;
+
+  protected:
+    AdditionalHeader();
+    ~AdditionalHeader();

  public:
    // Reference singleton
    static AdditionalHeader* get(void) { return &singleton; }

-    AdditionalHeader();
-    ~AdditionalHeader();
-
    bool Load(const char* file);
    void Unload(void);

--- a/src/s3fs_util.cpp
+++ b/src/s3fs_util.cpp
@ -954,17 +954,19 @@ void show_help (void)
    "      file is the additional HTTP header by file(object) extension.\n"
    "      The configuration file format is below:\n"
    "      -----------\n"
-    "      line         = [file suffix] HTTP-header [HTTP-values]\n"
+    "      line         = [file suffix or regex] HTTP-header [HTTP-values]\n"
    "      file suffix  = file(object) suffix, if this field is empty,\n"
-    "                     it means \"*\"(all object).\n"
+    "                     it means \"reg:(.*)\".(=all object).\n"
+    "      regex        = regular expression to match the file(object) path.\n"
+    "                     this type starts with \"reg:\" prefix.\n"
    "      HTTP-header  = additional HTTP header name\n"
    "      HTTP-values  = additional HTTP header value\n"
    "      -----------\n"
    "      Sample:\n"
    "      -----------\n"
-    "      .gz      Content-Encoding     gzip\n"
-    "      .Z       Content-Encoding     compress\n"
-    "               X-S3FS-MYHTTPHEAD    myvalue\n"
+    "      .gz                    Content-Encoding  gzip\n"
+    "      .Z                     Content-Encoding  compress\n"
+    "      reg:^/MYDIR/(.*)[.]t2$ Content-Encoding  text2\n"
    "      -----------\n"
    "      A sample configuration file is uploaded in \"test\" directory.\n"
    "      If you specify this option for set \"Content-Encoding\" HTTP \n"
--- a/test/sample_ahbe.conf
+++ b/test/sample_ahbe.conf
@ -4,21 +4,27 @@
 # s3fs loads this file at starting.
 #
 # Format:
-#  line                = [file suffix] HTTP-header [HTTP-header-values]
+#  line                = [file suffix or regex] HTTP-header [HTTP-header-values]
 #  file suffix         = file(object) suffix, if this field is empty, 
-#                        it means "*"(all object).
+#                        it means "reg:(.*)".(=all object).
+#  regex               = regular expression to match the file(object) path.
+#                        this type starts with "reg:" prefix.
 #  HTTP-header         = additional HTTP header name
 #  HTTP-header-values  = additional HTTP header value
 #
 #   <suffix(extension)>  <HTTP header>  <HTTP header values>
 #
+# Verification is done in the order in which they are described in the file.
+# That order is very important.
+#
 # Example:
-#   "    Content-Encoding gzip"  --> all object
-#   ".gz Content-Encoding gzip"  --> only ".gz" extension file
+#   "                   Content-Encoding gzip"  --> all object
+#   ".gz                Content-Encoding gzip"  --> only ".gz" extension file
+#   "reg:^/DIR/(.*).t2$ Content-Encoding text2" --> "/DIR/*.t2" extension file
 #
 # Notice:
-#   If you need to set all object, you can specify without "suffix".
-#   Then all of object(file) is added additional header.
+#   If you need to set all object, you can specify without "suffix" or regex
+#   type "reg:(.*)". Then all of object(file) is added additional header.
 #   If you have this configuration file for Content-Encoding, you should
 #   know about RFC 2616.
 #
@ -27,15 +33,20 @@
 #      Encoding header, and SHOULD NOT be used in the Content-Encoding
 #      header."
 #
-.gz	Content-Encoding	gzip
-.Z	Content-Encoding	compress
-.bz2	Content-Encoding	bzip2
-.svgz	Content-Encoding	gzip
-.svg.gz	Content-Encoding	gzip
-.tgz	Content-Encoding	gzip
-.tar.gz	Content-Encoding	gzip
-.taz	Content-Encoding	gzip
-.tz	Content-Encoding	gzip
-.tbz2	Content-Encoding	gzip
-gz.js	Content-Encoding	gzip
+
+# file suffix type
+.gz			Content-Encoding	gzip
+.Z			Content-Encoding	compress
+.bz2			Content-Encoding	bzip2
+.svgz			Content-Encoding	gzip
+.svg.gz			Content-Encoding	gzip
+.tgz			Content-Encoding	gzip
+.tar.gz			Content-Encoding	gzip
+.taz			Content-Encoding	gzip
+.tz			Content-Encoding	gzip
+.tbz2			Content-Encoding	gzip
+gz.js			Content-Encoding	gzip
+
+# regex type(test)
+reg:^/MYDIR/(.*)[.]t2$	Content-Encoding	text2