From 6472eedddcd56ee26db580eb83c24200be6a9b25 Mon Sep 17 00:00:00 2001
From: Takeshi Nakatani <ggtakec@gmail.com>
Date: Sun, 7 Feb 2016 05:08:52 +0000
Subject: [PATCH] Supported regex type for additional header format.

---
 doc/man/s3fs.1        |  11 +--
 src/curl.cpp          | 152 ++++++++++++++++++++++++++++--------------
 src/curl.h            |  23 ++++---
 src/s3fs_util.cpp     |  12 ++--
 test/sample_ahbe.conf |  45 ++++++++-----
 5 files changed, 158 insertions(+), 85 deletions(-)

diff --git a/doc/man/s3fs.1 b/doc/man/s3fs.1
index 2bb06d9..e0c9a6c 100644
--- a/doc/man/s3fs.1
+++ b/doc/man/s3fs.1
@@ -105,16 +105,17 @@ specify the path to the password file, which which takes precedence over the pas
 This option specifies the configuration file path which file is the additional HTTP header by file(object) extension.
  The configuration file format is below:
  -----------
- line         = [file suffix] HTTP-header [HTTP-values]
- file suffix  = file(object) suffix, if this field is empty, it means "*"(all object).
+ line         = [file suffix or regex] HTTP-header [HTTP-values]
+ file suffix  = file(object) suffix, if this field is empty, it means "reg:(.*)".(=all object).
+ regex        = regular expression to match the file(object) path. this type starts with "reg:" prefix.
  HTTP-header  = additional HTTP header name
  HTTP-values  = additional HTTP header value
  -----------
  Sample:
  -----------
- .gz      Content-Encoding     gzip
- .Z       Content-Encoding     compress
-          X-S3FS-MYHTTPHEAD    myvalue
+ .gz                    Content-Encoding  gzip
+ .Z                     Content-Encoding  compress
+ reg:^/MYDIR/(.*)[.]t2$ Content-Encoding  text2
  -----------
  A sample configuration file is uploaded in "test" directory.
 If you specify this option for set "Content-Encoding" HTTP header, please take care for RFC 2616.
diff --git a/src/curl.cpp b/src/curl.cpp
index 55d752c..91902af 100644
--- a/src/curl.cpp
+++ b/src/curl.cpp
@@ -3780,6 +3780,11 @@ int S3fsMultiCurl::Request(void)
   return 0;
 }
 
+//-------------------------------------------------------------------
+// Symbols
+//-------------------------------------------------------------------
+#define ADD_HEAD_REGEX              "reg:"
+
 //-------------------------------------------------------------------
 // Class AdditionalHeader
 //-------------------------------------------------------------------
@@ -3821,7 +3826,8 @@ bool AdditionalHeader::Load(const char* file)
   }
 
   // read file
-  string line;
+  string   line;
+  PADDHEAD paddhead;
   while(getline(AH, line)){
     if('#' == line[0]){
       continue;
@@ -3854,26 +3860,44 @@ bool AdditionalHeader::Load(const char* file)
       return false;
     }
 
-    // set charcntlist
-    int keylen = key.size();
-    charcnt_list_t::iterator iter;
-    for(iter = charcntlist.begin(); iter != charcntlist.end(); ++iter){
-      if(keylen == (*iter)){
-        break;
+    paddhead = new ADDHEAD;
+    if(0 == strncasecmp(key.c_str(), ADD_HEAD_REGEX, strlen(ADD_HEAD_REGEX))){
+      // regex
+      if(key.size() <= strlen(ADD_HEAD_REGEX)){
+        S3FS_PRN_ERR("file format error: %s key(suffix) does not have key string.", key.c_str());
+        continue;
       }
-    }
-    if(iter == charcntlist.end()){
-      charcntlist.push_back(keylen);
-    }
-    // set addheader
-    addheader_t::iterator aiter;
-    if(addheader.end() == (aiter = addheader.find(key))){
-      headerpair_t hpair;
-      hpair[head]    = value;
-      addheader[key] = hpair;
+      key = key.substr(strlen(ADD_HEAD_REGEX));
+
+      // compile
+      regex_t*  preg = new regex_t;
+      int       result;
+      char      errbuf[256];
+      if(0 != (result = regcomp(preg, key.c_str(), REG_EXTENDED | REG_NOSUB))){ // we do not need matching info
+        regerror(result, preg, errbuf, sizeof(errbuf));
+        S3FS_PRN_ERR("failed to compile regex from %s key by %s.", key.c_str(), errbuf);
+        delete preg;
+        delete paddhead;
+        continue;
+      }
+
+      // set
+      paddhead->pregex     = preg;
+      paddhead->basestring = key;
+      paddhead->headkey    = head;
+      paddhead->headvalue  = value;
+
     }else{
-      aiter->second[head] = value;
+      // not regex, directly comparing
+      paddhead->pregex     = NULL;
+      paddhead->basestring = key;
+      paddhead->headkey    = head;
+      paddhead->headvalue  = value;
     }
+
+    // add list
+    addheadlist.push_back(paddhead);
+
     // set flag
     if(!is_enable){
       is_enable = true;
@@ -3885,8 +3909,17 @@ bool AdditionalHeader::Load(const char* file)
 void AdditionalHeader::Unload(void)
 {
   is_enable = false;
-  charcntlist.clear();
-  addheader.clear();
+
+  for(addheadlist_t::iterator iter = addheadlist.begin(); iter != addheadlist.end(); iter = addheadlist.erase(iter)){
+    PADDHEAD paddhead = *iter;
+    if(paddhead){
+      if(paddhead->pregex){
+        regfree(paddhead->pregex);
+        delete paddhead->pregex;
+      }
+      delete paddhead;
+    }
+  }
 }
 
 bool AdditionalHeader::AddHeader(headers_t& meta, const char* path) const
@@ -3898,21 +3931,35 @@ bool AdditionalHeader::AddHeader(headers_t& meta, const char* path) const
     S3FS_PRN_WARN("path is NULL.");
     return false;
   }
-  int nPathLen = strlen(path);
-  for(charcnt_list_t::const_iterator iter = charcntlist.begin(); iter != charcntlist.end(); ++iter){
-    // get target character count
-    if(nPathLen < (*iter)){
+
+  size_t pathlength = strlen(path);
+
+  // loop
+  for(addheadlist_t::const_iterator iter = addheadlist.begin(); iter != addheadlist.end(); ++iter){
+    const PADDHEAD paddhead = *iter;
+    if(!paddhead){
       continue;
     }
-    // make target suffix(same character count) & find
-    string suffix(&path[nPathLen - (*iter)]);
-    addheader_t::const_iterator aiter;
-    if(addheader.end() == (aiter = addheader.find(suffix))){
-      continue;
-    }
-    for(headerpair_t::const_iterator piter = aiter->second.begin(); piter != aiter->second.end(); ++piter){
-      // Adding header
-      meta[(*piter).first] = (*piter).second;
+
+    if(paddhead->pregex){
+      // regex
+      int        result;
+      regmatch_t match;         // not use
+
+      if(0 == (result = regexec(paddhead->pregex, path, 1, &match, 0))){
+        // match -> adding header
+        meta[paddhead->headkey] = paddhead->headvalue;
+        break;
+      }
+    }else{
+      // directly comparing
+      if(paddhead->basestring.length() < pathlength){
+        if(0 == paddhead->basestring.length() || 0 == strcmp(&path[pathlength - paddhead->basestring.length()], paddhead->basestring.c_str())){
+          // match -> adding header
+          meta[paddhead->headkey] = paddhead->headvalue;
+          break;
+        }
+      }
     }
   }
   return true;
@@ -3939,26 +3986,31 @@ bool AdditionalHeader::Dump(void) const
   if(!IS_S3FS_LOG_DBG()){
     return true;
   }
-  // character count list
-  stringstream ssdbg;
-  ssdbg << "Character count list[" << charcntlist.size() << "] = {";
-  for(charcnt_list_t::const_iterator citer = charcntlist.begin(); citer != charcntlist.end(); ++citer){
-    ssdbg << " " << (*citer);
-  }
-  ssdbg << " }\n";
 
-  // additional header
-  ssdbg << "Additional Header list[" << addheader.size() << "] = {\n";
-  for(addheader_t::const_iterator aiter = addheader.begin(); aiter != addheader.end(); ++aiter){
-    string key = (*aiter).first;
-    if(0 == key.size()){
-      key = "*";
-    }
-    for(headerpair_t::const_iterator piter = (*aiter).second.begin(); piter != (*aiter).second.end(); ++piter){
-      ssdbg << "    " << key << "\t--->\t" << (*piter).first << ": " << (*piter).second << "\n";
+  stringstream ssdbg;
+  int          cnt = 1;
+
+  ssdbg << "Additional Header list[" << addheadlist.size() << "] = {" << endl;
+
+  for(addheadlist_t::const_iterator iter = addheadlist.begin(); iter != addheadlist.end(); ++iter, ++cnt){
+    const PADDHEAD paddhead = *iter;
+
+    ssdbg << "    [" << cnt << "] = {" << endl;
+
+    if(paddhead){
+      if(paddhead->pregex){
+        ssdbg << "        type\t\t--->\tregex" << endl;
+      }else{
+        ssdbg << "        type\t\t--->\tsuffix matching" << endl;
+      }
+      ssdbg << "        base string\t--->\t" << paddhead->basestring << endl;
+      ssdbg << "        add header\t--->\t"  << paddhead->headkey << ": " << paddhead->headvalue << endl;
     }
+    ssdbg << "    }" << endl;
   }
-  ssdbg << "}";
+
+
+  ssdbg << "}" << endl;
 
   // print all
   S3FS_PRN_DBG("%s", ssdbg.str().c_str());
diff --git a/src/curl.h b/src/curl.h
index 5955a2c..5d0a989 100644
--- a/src/curl.h
+++ b/src/curl.h
@@ -428,25 +428,32 @@ class S3fsMultiCurl
 //----------------------------------------------
 // class AdditionalHeader
 //----------------------------------------------
-typedef std::list<int> charcnt_list_t;
-typedef std::map<std::string, std::string> headerpair_t;
-typedef std::map<std::string, headerpair_t> addheader_t;
+#include <regex.h>
+
+typedef struct add_header{
+  regex_t*      pregex;         // not NULL means using regex, NULL means comparing suffix directly.
+  std::string   basestring;
+  std::string   headkey;
+  std::string   headvalue;
+}ADDHEAD, *PADDHEAD;
+
+typedef std::vector<PADDHEAD>  addheadlist_t;
 
 class AdditionalHeader
 {
   private:
     static AdditionalHeader singleton;
     bool                    is_enable;
-    charcnt_list_t          charcntlist;
-    addheader_t             addheader;
+    addheadlist_t           addheadlist;
+
+  protected:
+    AdditionalHeader();
+    ~AdditionalHeader();
 
   public:
     // Reference singleton
     static AdditionalHeader* get(void) { return &singleton; }
 
-    AdditionalHeader();
-    ~AdditionalHeader();
-
     bool Load(const char* file);
     void Unload(void);
 
diff --git a/src/s3fs_util.cpp b/src/s3fs_util.cpp
index a95f312..0f5b50d 100644
--- a/src/s3fs_util.cpp
+++ b/src/s3fs_util.cpp
@@ -954,17 +954,19 @@ void show_help (void)
     "      file is the additional HTTP header by file(object) extension.\n"
     "      The configuration file format is below:\n"
     "      -----------\n"
-    "      line         = [file suffix] HTTP-header [HTTP-values]\n"
+    "      line         = [file suffix or regex] HTTP-header [HTTP-values]\n"
     "      file suffix  = file(object) suffix, if this field is empty,\n"
-    "                     it means \"*\"(all object).\n"
+    "                     it means \"reg:(.*)\".(=all object).\n"
+    "      regex        = regular expression to match the file(object) path.\n"
+    "                     this type starts with \"reg:\" prefix.\n"
     "      HTTP-header  = additional HTTP header name\n"
     "      HTTP-values  = additional HTTP header value\n"
     "      -----------\n"
     "      Sample:\n"
     "      -----------\n"
-    "      .gz      Content-Encoding     gzip\n"
-    "      .Z       Content-Encoding     compress\n"
-    "               X-S3FS-MYHTTPHEAD    myvalue\n"
+    "      .gz                    Content-Encoding  gzip\n"
+    "      .Z                     Content-Encoding  compress\n"
+    "      reg:^/MYDIR/(.*)[.]t2$ Content-Encoding  text2\n"
     "      -----------\n"
     "      A sample configuration file is uploaded in \"test\" directory.\n"
     "      If you specify this option for set \"Content-Encoding\" HTTP \n"
diff --git a/test/sample_ahbe.conf b/test/sample_ahbe.conf
index a93e0a5..4e41b79 100644
--- a/test/sample_ahbe.conf
+++ b/test/sample_ahbe.conf
@@ -4,21 +4,27 @@
 # s3fs loads this file at starting.
 #
 # Format:
-#  line                = [file suffix] HTTP-header [HTTP-header-values]
+#  line                = [file suffix or regex] HTTP-header [HTTP-header-values]
 #  file suffix         = file(object) suffix, if this field is empty, 
-#                        it means "*"(all object).
+#                        it means "reg:(.*)".(=all object).
+#  regex               = regular expression to match the file(object) path.
+#                        this type starts with "reg:" prefix.
 #  HTTP-header         = additional HTTP header name
 #  HTTP-header-values  = additional HTTP header value
 #
 #   <suffix(extension)>  <HTTP header>  <HTTP header values>
 #
+# Verification is done in the order in which they are described in the file.
+# That order is very important.
+#
 # Example:
-#   "    Content-Encoding gzip"  --> all object
-#   ".gz Content-Encoding gzip"  --> only ".gz" extension file
+#   "                   Content-Encoding gzip"  --> all object
+#   ".gz                Content-Encoding gzip"  --> only ".gz" extension file
+#   "reg:^/DIR/(.*).t2$ Content-Encoding text2" --> "/DIR/*.t2" extension file
 #
 # Notice:
-#   If you need to set all object, you can specify without "suffix".
-#   Then all of object(file) is added additional header.
+#   If you need to set all object, you can specify without "suffix" or regex
+#   type "reg:(.*)". Then all of object(file) is added additional header.
 #   If you have this configuration file for Content-Encoding, you should
 #   know about RFC 2616.
 #
@@ -27,15 +33,20 @@
 #      Encoding header, and SHOULD NOT be used in the Content-Encoding
 #      header."
 #
-.gz	Content-Encoding	gzip
-.Z	Content-Encoding	compress
-.bz2	Content-Encoding	bzip2
-.svgz	Content-Encoding	gzip
-.svg.gz	Content-Encoding	gzip
-.tgz	Content-Encoding	gzip
-.tar.gz	Content-Encoding	gzip
-.taz	Content-Encoding	gzip
-.tz	Content-Encoding	gzip
-.tbz2	Content-Encoding	gzip
-gz.js	Content-Encoding	gzip
+
+# file suffix type
+.gz			Content-Encoding	gzip
+.Z			Content-Encoding	compress
+.bz2			Content-Encoding	bzip2
+.svgz			Content-Encoding	gzip
+.svg.gz			Content-Encoding	gzip
+.tgz			Content-Encoding	gzip
+.tar.gz			Content-Encoding	gzip
+.taz			Content-Encoding	gzip
+.tz			Content-Encoding	gzip
+.tbz2			Content-Encoding	gzip
+gz.js			Content-Encoding	gzip
+
+# regex type(test)
+reg:^/MYDIR/(.*)[.]t2$	Content-Encoding	text2