Fixed a bug in handling file names containing CR(0x1D) (#2136)

This commit is contained in:
Takeshi Nakatani 2023-03-26 13:19:16 +09:00 committed by GitHub
parent a25cb9e07a
commit 9c74014443
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 247 additions and 5 deletions

1
.gitignore vendored
View File

@ -88,6 +88,7 @@ test/s3proxy-*
test/write_multiblock
test/mknod_test
test/truncate_read_file
test/cr_filename
#
# Windows ports

View File

@ -3322,8 +3322,15 @@ static int list_bucket(const char* path, S3ObjList& head, const char* delimiter,
}
const BodyData* body = s3fscurl.GetBodyData();
// [NOTE]
// CR code(\r) is replaced with LF(\n) by xmlReadMemory() function.
// To prevent that, only CR code is encoded by following function.
// The encoded CR code is decoded with append_objects_from_xml(_ex).
//
std::string encbody = get_encoded_cr_code(body->str());
// xmlDocPtr
if(NULL == (doc = xmlReadMemory(body->str(), static_cast<int>(body->size()), "", NULL, 0))){
if(NULL == (doc = xmlReadMemory(encbody.c_str(), static_cast<int>(encbody.size()), "", NULL, 0))){
S3FS_PRN_ERR("xmlReadMemory returns with error.");
return -EIO;
}

View File

@ -29,6 +29,7 @@
#include "s3fs_util.h"
#include "s3objlist.h"
#include "autolock.h"
#include "string_util.h"
//-------------------------------------------------------------------
// Variables
@ -400,15 +401,21 @@ int append_objects_from_xml_ex(const char* path, xmlDocPtr doc, xmlXPathContextP
xmlXPathFreeObject(ETag);
}
}
if(!head.insert(name, (!stretag.empty() ? stretag.c_str() : NULL), is_dir)){
// [NOTE]
// The XML data passed to this function is CR code(\r) encoded.
// The function below decodes that encoded CR code.
//
std::string decname = get_decoded_cr_code(name);
free(name);
if(!head.insert(decname.c_str(), (!stretag.empty() ? stretag.c_str() : NULL), is_dir)){
S3FS_PRN_ERR("insert_object returns with error.");
xmlXPathFreeObject(key);
xmlXPathFreeObject(contents_xp);
free(name);
S3FS_MALLOCTRIM(0);
return -1;
}
free(name);
}else{
S3FS_PRN_DBG("name is file or subdir in dir. but continue.");
}

View File

@ -601,6 +601,89 @@ std::string s3fs_wtf8_decode(const std::string &s)
return result;
}
//
// Encode only CR('\r'=0x0D) and it also encodes the '%' character accordingly.
//
// The xmlReadMemory() function in libxml2 replaces CR code with LF code('\n'=0x0A)
// due to the XML specification.
// s3fs uses libxml2 to parse the S3 response, and this automatic substitution
// of libxml2 may change the object name(file/dir name). Therefore, before passing
// the response to the xmlReadMemory() function, we need the string encoded by
// this function.
//
// [NOTE]
// Normally the quotes included in the XML content data are HTML encoded("&quot;").
// Encoding for CR can also be HTML encoded as binary code (ex, "&#13;"), but
// if the same string content(as file name) as this encoded string exists, the
// original string cannot be distinguished whichever encoded or not encoded.
// Therefore, CR is encoded in the same manner as URL encoding("%0A").
// And it is assumed that there is no CR code in the S3 response tag etc.(actually
// it shouldn't exist)
//
std::string get_encoded_cr_code(const char* pbase)
{
std::string result;
if(!pbase){
return result;
}
std::string strbase(pbase);
size_t baselength = strbase.length();
size_t startpos = 0;
size_t foundpos;
while(startpos < baselength && std::string::npos != (foundpos = strbase.find_first_of("%\r", startpos))){
if(0 < (foundpos - startpos)){
result += strbase.substr(startpos, foundpos - startpos);
}
if('%' == strbase[foundpos]){
result += "%45";
}else if('\r' == strbase[foundpos]){
result += "%0D";
}
startpos = foundpos + 1;
}
if(startpos < baselength){
result += strbase.substr(startpos);
}
return result;
}
//
// Decode a string encoded with get_encoded_cr_code().
//
std::string get_decoded_cr_code(const char* pencode)
{
std::string result;
if(!pencode){
return result;
}
std::string strencode(pencode);
size_t encodelength = strencode.length();
size_t startpos = 0;
size_t foundpos;
while(startpos < encodelength && std::string::npos != (foundpos = strencode.find('%', startpos))){
if(0 < (foundpos - startpos)){
result += strencode.substr(startpos, foundpos - startpos);
}
if((foundpos + 2) < encodelength && 0 == strencode.compare(foundpos, 3, "%45")){
result += '%';
startpos = foundpos + 3;
}else if((foundpos + 2) < encodelength && 0 == strencode.compare(foundpos, 3, "%0D")){
result += '\r';
startpos = foundpos + 3;
}else if((foundpos + 1) < encodelength && 0 == strencode.compare(foundpos, 2, "%%")){
result += '%';
startpos = foundpos + 2;
}else{
result += '%';
startpos = foundpos + 1;
}
}
if(startpos < encodelength){
result += strencode.substr(startpos);
}
return result;
}
/*
* Local variables:
* tab-width: 4

View File

@ -118,6 +118,12 @@ std::string s3fs_wtf8_encode(const std::string &s);
bool s3fs_wtf8_decode(const char *s, std::string *result);
std::string s3fs_wtf8_decode(const std::string &s);
//
// For CR in XML
//
std::string get_encoded_cr_code(const char* pbase);
std::string get_decoded_cr_code(const char* pencode);
#endif // S3FS_STRING_UTIL_H_
/*

View File

@ -147,6 +147,55 @@ void test_wtf8_encoding()
ASSERT_EQUALS(s3fs_wtf8_decode(s3fs_wtf8_encode(mixed)), mixed);
}
void test_cr_encoding()
{
// bse strings
std::string base_no("STR");
std::string base_end_cr1("STR\r");
std::string base_mid_cr1("STR\rSTR");
std::string base_end_cr2("STR\r\r");
std::string base_mid_cr2("STR\r\rSTR");
std::string base_end_per1("STR%");
std::string base_mid_per1("STR%STR");
std::string base_end_per2("STR%%");
std::string base_mid_per2("STR%%STR");
std::string base_end_crlf1("STR\r\n");
std::string base_mid_crlf1("STR\r\nSTR");
std::string base_end_crlf2("STR\r\n\r\n");
std::string base_mid_crlf2("STR\r\n\r\nSTR");
std::string base_end_crper1("STR%\r");
std::string base_mid_crper1("STR%\rSTR");
std::string base_end_crper2("STR%\r%\r");
std::string base_mid_crper2("STR%\r%\rSTR");
// encode->decode->compare
ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_no.c_str()).c_str()), base_no);
ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_end_cr1.c_str()).c_str()), base_end_cr1);
ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_mid_cr1.c_str()).c_str()), base_mid_cr1);
ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_end_cr2.c_str()).c_str()), base_end_cr2);
ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_mid_cr2.c_str()).c_str()), base_mid_cr2);
ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_end_per1.c_str()).c_str()), base_end_per1);
ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_mid_per1.c_str()).c_str()), base_mid_per1);
ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_end_per2.c_str()).c_str()), base_end_per2);
ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_mid_per2.c_str()).c_str()), base_mid_per2);
ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_end_crlf1.c_str()).c_str()), base_end_crlf1);
ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_mid_crlf1.c_str()).c_str()), base_mid_crlf1);
ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_end_crlf2.c_str()).c_str()), base_end_crlf2);
ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_mid_crlf2.c_str()).c_str()), base_mid_crlf2);
ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_end_crper1.c_str()).c_str()), base_end_crper1);
ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_mid_crper1.c_str()).c_str()), base_mid_crper1);
ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_end_crper2.c_str()).c_str()), base_end_crper2);
ASSERT_EQUALS(get_decoded_cr_code(get_encoded_cr_code(base_mid_crper2.c_str()).c_str()), base_mid_crper2);
}
int main(int argc, char *argv[])
{
S3fsLog singletonLog;
@ -155,6 +204,7 @@ int main(int argc, char *argv[])
test_base64();
test_strtoofft();
test_wtf8_encoding();
test_cr_encoding();
return 0;
}

View File

@ -33,12 +33,14 @@ noinst_PROGRAMS = \
junk_data \
write_multiblock \
mknod_test \
truncate_read_file
truncate_read_file \
cr_filename
junk_data_SOURCES = junk_data.c
write_multiblock_SOURCES = write_multiblock.cc
mknod_test_SOURCES = mknod_test.c
truncate_read_file_SOURCES = truncate_read_file.c
cr_filename_SOURCES = cr_filename.c
#
# Local variables:

76
test/cr_filename.c Normal file
View File

@ -0,0 +1,76 @@
/*
* s3fs - FUSE-based file system backed by Amazon S3
*
* Copyright(C) 2021 Andrew Gaul <andrew@gaul.org>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
// [NOTE]
// This is a program used for file size inspection.
// File size checking should be done by the caller of this program.
// This program truncates the file and reads the file in another process
// between truncate and flush(close file).
//
int main(int argc, char *argv[])
{
if(argc != 2){
fprintf(stderr, "[ERROR] Wrong paraemters\n");
fprintf(stdout, "[Usage] cr_filename <base file path>\n");
exit(EXIT_FAILURE);
}
int fd;
char filepath[4096];
sprintf(filepath, "%s\r", argv[1]);
// create empty file
if(-1 == (fd = open(filepath, O_CREAT|O_RDWR, 0644))){
fprintf(stderr, "[ERROR] Could not open file(%s)\n", filepath);
exit(EXIT_FAILURE);
}
close(fd);
// stat
struct stat buf;
if(0 != stat(filepath, &buf)){
fprintf(stderr, "[ERROR] Could not get stat for file(%s)\n", filepath);
exit(EXIT_FAILURE);
}
// remove file
if(0 != unlink(filepath)){
fprintf(stderr, "[ERROR] Could not remove file(%s)\n", filepath);
exit(EXIT_FAILURE);
}
exit(EXIT_SUCCESS);
}
/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
* vim600: expandtab sw=4 ts=4 fdm=marker
* vim<600: expandtab sw=4 ts=4
*/

View File

@ -2398,6 +2398,15 @@ function test_ut_ossfs {
../../ut_test.py
}
function test_cr_filename {
describe "Testing filename with CR code ..."
# The following tests create a file, test it, and delete it.
# So this test just calls the following program.
#
../../cr_filename "${TEST_TEXT_FILE}"
}
#
# This test opens a file and writes multiple sets of data.
# The file is opened only once and multiple blocks of data are written
@ -2709,6 +2718,7 @@ function add_all_tests {
add_tests test_mix_upload_entities
add_tests test_not_existed_dir_obj
add_tests test_ut_ossfs
add_tests test_cr_filename
# shellcheck disable=SC2009
if ! ps u -p "${S3FS_PID}" | grep -q ensure_diskfree && ! uname | grep -q Darwin; then
add_tests test_ensurespace_move_file