diff --git a/.gitignore b/.gitignore index d656a77..2baaf73 100644 --- a/.gitignore +++ b/.gitignore @@ -88,6 +88,7 @@ test/s3proxy-* test/write_multiblock test/mknod_test test/truncate_read_file +test/cr_filename # # Windows ports diff --git a/src/s3fs.cpp b/src/s3fs.cpp index bc7cb4c..6f0c80a 100644 --- a/src/s3fs.cpp +++ b/src/s3fs.cpp @@ -3322,8 +3322,15 @@ static int list_bucket(const char* path, S3ObjList& head, const char* delimiter, } const BodyData* body = s3fscurl.GetBodyData(); + // [NOTE] + // CR code(\r) is replaced with LF(\n) by xmlReadMemory() function. + // To prevent that, only CR code is encoded by following function. + // The encoded CR code is decoded with append_objects_from_xml(_ex). + // + std::string encbody = get_encoded_cr_code(body->str()); + // xmlDocPtr - if(NULL == (doc = xmlReadMemory(body->str(), static_cast(body->size()), "", NULL, 0))){ + if(NULL == (doc = xmlReadMemory(encbody.c_str(), static_cast(encbody.size()), "", NULL, 0))){ S3FS_PRN_ERR("xmlReadMemory returns with error."); return -EIO; } diff --git a/src/s3fs_xml.cpp b/src/s3fs_xml.cpp index 7f0dd89..5ee9a9d 100644 --- a/src/s3fs_xml.cpp +++ b/src/s3fs_xml.cpp @@ -29,6 +29,7 @@ #include "s3fs_util.h" #include "s3objlist.h" #include "autolock.h" +#include "string_util.h" //------------------------------------------------------------------- // Variables @@ -400,15 +401,21 @@ int append_objects_from_xml_ex(const char* path, xmlDocPtr doc, xmlXPathContextP xmlXPathFreeObject(ETag); } } - if(!head.insert(name, (!stretag.empty() ? stretag.c_str() : NULL), is_dir)){ + + // [NOTE] + // The XML data passed to this function is CR code(\r) encoded. + // The function below decodes that encoded CR code. + // + std::string decname = get_decoded_cr_code(name); + free(name); + + if(!head.insert(decname.c_str(), (!stretag.empty() ? stretag.c_str() : NULL), is_dir)){ S3FS_PRN_ERR("insert_object returns with error."); xmlXPathFreeObject(key); xmlXPathFreeObject(contents_xp); - free(name); S3FS_MALLOCTRIM(0); return -1; } - free(name); }else{ S3FS_PRN_DBG("name is file or subdir in dir. but continue."); } diff --git a/src/string_util.cpp b/src/string_util.cpp index 7ee0af1..c996c00 100644 --- a/src/string_util.cpp +++ b/src/string_util.cpp @@ -601,6 +601,89 @@ std::string s3fs_wtf8_decode(const std::string &s) return result; } +// +// Encode only CR('\r'=0x0D) and it also encodes the '%' character accordingly. +// +// The xmlReadMemory() function in libxml2 replaces CR code with LF code('\n'=0x0A) +// due to the XML specification. +// s3fs uses libxml2 to parse the S3 response, and this automatic substitution +// of libxml2 may change the object name(file/dir name). Therefore, before passing +// the response to the xmlReadMemory() function, we need the string encoded by +// this function. +// +// [NOTE] +// Normally the quotes included in the XML content data are HTML encoded("""). +// Encoding for CR can also be HTML encoded as binary code (ex, " "), but +// if the same string content(as file name) as this encoded string exists, the +// original string cannot be distinguished whichever encoded or not encoded. +// Therefore, CR is encoded in the same manner as URL encoding("%0A"). +// And it is assumed that there is no CR code in the S3 response tag etc.(actually +// it shouldn't exist) +// +std::string get_encoded_cr_code(const char* pbase) +{ + std::string result; + if(!pbase){ + return result; + } + std::string strbase(pbase); + size_t baselength = strbase.length(); + size_t startpos = 0; + size_t foundpos; + while(startpos < baselength && std::string::npos != (foundpos = strbase.find_first_of("%\r", startpos))){ + if(0 < (foundpos - startpos)){ + result += strbase.substr(startpos, foundpos - startpos); + } + if('%' == strbase[foundpos]){ + result += "%45"; + }else if('\r' == strbase[foundpos]){ + result += "%0D"; + } + startpos = foundpos + 1; + } + if(startpos < baselength){ + result += strbase.substr(startpos); + } + return result; +} + +// +// Decode a string encoded with get_encoded_cr_code(). +// +std::string get_decoded_cr_code(const char* pencode) +{ + std::string result; + if(!pencode){ + return result; + } + std::string strencode(pencode); + size_t encodelength = strencode.length(); + size_t startpos = 0; + size_t foundpos; + while(startpos < encodelength && std::string::npos != (foundpos = strencode.find('%', startpos))){ + if(0 < (foundpos - startpos)){ + result += strencode.substr(startpos, foundpos - startpos); + } + if((foundpos + 2) < encodelength && 0 == strencode.compare(foundpos, 3, "%45")){ + result += '%'; + startpos = foundpos + 3; + }else if((foundpos + 2) < encodelength && 0 == strencode.compare(foundpos, 3, "%0D")){ + result += '\r'; + startpos = foundpos + 3; + }else if((foundpos + 1) < encodelength && 0 == strencode.compare(foundpos, 2, "%%")){ + result += '%'; + startpos = foundpos + 2; + }else{ + result += '%'; + startpos = foundpos + 1; + } + } + if(startpos < encodelength){ + result += strencode.substr(startpos); + } + return result; +} + /* * Local variables: * tab-width: 4 diff --git a/src/string_util.h b/src/string_util.h index bf6e13b..0220666 100644 --- a/src/string_util.h +++ b/src/string_util.h @@ -118,6 +118,12 @@ std::string s3fs_wtf8_encode(const std::string &s); bool s3fs_wtf8_decode(const char *s, std::string *result); std::string s3fs_wtf8_decode(const std::string &s); +// +// For CR in XML +// +std::string get_encoded_cr_code(const char* pbase); +std::string get_decoded_cr_code(const char* pencode); + #endif // S3FS_STRING_UTIL_H_ /* diff --git a/src/test_string_util.cpp b/src/test_string_util.cpp index 6e6e5b0..a081c1d 100644 --- a/src/test_string_util.cpp +++ b/src/test_string_util.cpp @@ -147,6 +147,55 @@ void test_wtf8_encoding() ASSERT_EQUALS(s3fs_wtf8_decode(s3fs_wtf8_encode(mixed)), mixed); } +void test_cr_encoding() +{ + // bse strings + std::string base_no("STR"); + + std::string base_end_cr1("STR\r"); + std::string base_mid_cr1("STR\rSTR"); + std::string base_end_cr2("STR\r\r"); + std::string base_mid_cr2("STR\r\rSTR"); + + std::string base_end_per1("STR%"); + std::string base_mid_per1("STR%STR"); + std::string base_end_per2("STR%%"); + std::string base_mid_per2("STR%%STR"); + + std::string base_end_crlf1("STR\r\n"); + std::string base_mid_crlf1("STR\r\nSTR"); + std::string base_end_crlf2("STR\r\n\r\n"); + std::string base_mid_crlf2("STR\r\n\r\nSTR"); + + std::string base_end_crper1("STR%\r"); + std::string base_mid_crper1("STR%\rSTR"); + std::string base_end_crper2("STR%\r%\r"); + std::string base_mid_crper2("STR%\r%\rSTR"); + + // encode->decode->compare + ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_no.c_str()).c_str()), base_no); + + ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_end_cr1.c_str()).c_str()), base_end_cr1); + ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_mid_cr1.c_str()).c_str()), base_mid_cr1); + ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_end_cr2.c_str()).c_str()), base_end_cr2); + ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_mid_cr2.c_str()).c_str()), base_mid_cr2); + + ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_end_per1.c_str()).c_str()), base_end_per1); + ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_mid_per1.c_str()).c_str()), base_mid_per1); + ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_end_per2.c_str()).c_str()), base_end_per2); + ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_mid_per2.c_str()).c_str()), base_mid_per2); + + ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_end_crlf1.c_str()).c_str()), base_end_crlf1); + ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_mid_crlf1.c_str()).c_str()), base_mid_crlf1); + ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_end_crlf2.c_str()).c_str()), base_end_crlf2); + ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_mid_crlf2.c_str()).c_str()), base_mid_crlf2); + + ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_end_crper1.c_str()).c_str()), base_end_crper1); + ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_mid_crper1.c_str()).c_str()), base_mid_crper1); + ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_end_crper2.c_str()).c_str()), base_end_crper2); + ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_mid_crper2.c_str()).c_str()), base_mid_crper2); +} + int main(int argc, char *argv[]) { S3fsLog singletonLog; @@ -155,6 +204,7 @@ int main(int argc, char *argv[]) test_base64(); test_strtoofft(); test_wtf8_encoding(); + test_cr_encoding(); return 0; } diff --git a/test/Makefile.am b/test/Makefile.am index 808c059..11f7667 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -33,12 +33,14 @@ noinst_PROGRAMS = \ junk_data \ write_multiblock \ mknod_test \ - truncate_read_file + truncate_read_file \ + cr_filename junk_data_SOURCES = junk_data.c write_multiblock_SOURCES = write_multiblock.cc mknod_test_SOURCES = mknod_test.c truncate_read_file_SOURCES = truncate_read_file.c +cr_filename_SOURCES = cr_filename.c # # Local variables: diff --git a/test/cr_filename.c b/test/cr_filename.c new file mode 100644 index 0000000..11367fc --- /dev/null +++ b/test/cr_filename.c @@ -0,0 +1,76 @@ +/* + * s3fs - FUSE-based file system backed by Amazon S3 + * + * Copyright(C) 2021 Andrew Gaul + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include + +// [NOTE] +// This is a program used for file size inspection. +// File size checking should be done by the caller of this program. +// This program truncates the file and reads the file in another process +// between truncate and flush(close file). +// +int main(int argc, char *argv[]) +{ + if(argc != 2){ + fprintf(stderr, "[ERROR] Wrong paraemters\n"); + fprintf(stdout, "[Usage] cr_filename \n"); + exit(EXIT_FAILURE); + } + + int fd; + char filepath[4096]; + sprintf(filepath, "%s\r", argv[1]); + + // create empty file + if(-1 == (fd = open(filepath, O_CREAT|O_RDWR, 0644))){ + fprintf(stderr, "[ERROR] Could not open file(%s)\n", filepath); + exit(EXIT_FAILURE); + } + close(fd); + + // stat + struct stat buf; + if(0 != stat(filepath, &buf)){ + fprintf(stderr, "[ERROR] Could not get stat for file(%s)\n", filepath); + exit(EXIT_FAILURE); + } + + // remove file + if(0 != unlink(filepath)){ + fprintf(stderr, "[ERROR] Could not remove file(%s)\n", filepath); + exit(EXIT_FAILURE); + } + + exit(EXIT_SUCCESS); +} + +/* +* Local variables: +* tab-width: 4 +* c-basic-offset: 4 +* End: +* vim600: expandtab sw=4 ts=4 fdm=marker +* vim<600: expandtab sw=4 ts=4 +*/ diff --git a/test/integration-test-main.sh b/test/integration-test-main.sh index ee6adc8..ecbaf2f 100755 --- a/test/integration-test-main.sh +++ b/test/integration-test-main.sh @@ -2398,6 +2398,15 @@ function test_ut_ossfs { ../../ut_test.py } +function test_cr_filename { + describe "Testing filename with CR code ..." + + # The following tests create a file, test it, and delete it. + # So this test just calls the following program. + # + ../../cr_filename "${TEST_TEXT_FILE}" +} + # # This test opens a file and writes multiple sets of data. # The file is opened only once and multiple blocks of data are written @@ -2709,6 +2718,7 @@ function add_all_tests { add_tests test_mix_upload_entities add_tests test_not_existed_dir_obj add_tests test_ut_ossfs + add_tests test_cr_filename # shellcheck disable=SC2009 if ! ps u -p "${S3FS_PID}" | grep -q ensure_diskfree && ! uname | grep -q Darwin; then add_tests test_ensurespace_move_file