From ff8a0c2eeacba34085bb44bbf442d6de129a5f74 Mon Sep 17 00:00:00 2001 From: Andrew Gaul Date: Mon, 27 Jul 2015 15:47:08 -0700 Subject: [PATCH] Parse ETag from copy multipart correctly Previously s3fs misparsed this, preventing renames of files larger than 5 GB. Integration test disabled until S3Proxy 1.5.0 is released. --- src/curl.cpp | 33 ++++++++++++++++++++++++++++----- src/s3fs.cpp | 7 ++++++- src/s3fs_util.cpp | 4 ++++ test/integration-test-main.sh | 26 +++++++++++++++++++++++++- test/small-integration-test.sh | 1 + 5 files changed, 64 insertions(+), 7 deletions(-) diff --git a/src/curl.cpp b/src/curl.cpp index ab187e2..cf685f0 100644 --- a/src/curl.cpp +++ b/src/curl.cpp @@ -3127,12 +3127,35 @@ int S3fsCurl::CopyMultipartPostRequest(const char* from, const char* to, int par int result = RequestPerform(); if(0 == result){ - const char* start_etag= strstr(bodydata->str(), "ETag"); - const char* end_etag = strstr(bodydata->str(), "/ETag>"); - - partdata.etag.assign((start_etag + 11), (size_t)(end_etag - (start_etag + 11) - 7)); - partdata.uploaded = true; + // parse ETag from response + xmlDocPtr doc; + if(NULL == (doc = xmlReadMemory(bodydata->str(), bodydata->size(), "", NULL, 0))){ + return result; + } + if(NULL == doc->children){ + S3FS_XMLFREEDOC(doc); + return result; + } + for(xmlNodePtr cur_node = doc->children->children; NULL != cur_node; cur_node = cur_node->next){ + if(XML_ELEMENT_NODE == cur_node->type){ + string elementName = reinterpret_cast(cur_node->name); + if(cur_node->children){ + if(XML_TEXT_NODE == cur_node->children->type){ + if(elementName == "ETag") { + string etag = reinterpret_cast(cur_node->children->content); + if(etag.size() >= 2 && *etag.begin() == '"' && *etag.rbegin() == '"'){ + etag.assign(etag.substr(1, etag.size() - 2)); + } + partdata.etag.assign(etag); + partdata.uploaded = true; + } + } + } + } + } + S3FS_XMLFREEDOC(doc); } + delete bodydata; bodydata = NULL; delete headdata; diff --git a/src/s3fs.cpp b/src/s3fs.cpp index 87a6e9b..89097f0 100644 --- a/src/s3fs.cpp +++ b/src/s3fs.cpp @@ -120,6 +120,7 @@ static bool is_s3fs_gid = false;// default does not set. static bool is_s3fs_umask = false;// default does not set. static bool is_remove_cache = false; static bool create_bucket = false; +static int64_t singlepart_copy_limit = FIVE_GB; //------------------------------------------------------------------- // Static functions : prototype @@ -1388,7 +1389,7 @@ static int s3fs_rename(const char* from, const char* to) // files larger than 5GB must be modified via the multipart interface if(S_ISDIR(buf.st_mode)){ result = rename_directory(from, to); - }else if(!nomultipart && buf.st_size >= FIVE_GB){ + }else if(!nomultipart && buf.st_size >= singlepart_copy_limit){ result = rename_large_object(from, to); }else{ if(!nocopyapi && !norenameapi){ @@ -4392,6 +4393,10 @@ static int my_fuse_opt_proc(void* data, const char* arg, int key, struct fuse_ar } return 0; } + if(0 == STR2NCMP(arg, "singlepart_copy_limit=")){ + singlepart_copy_limit = static_cast(s3fs_strtoofft(strchr(arg, '=') + sizeof(char))) * 1024; + return 0; + } if(0 == STR2NCMP(arg, "ahbe_conf=")){ string ahbe_conf = strchr(arg, '=') + sizeof(char); if(!AdditionalHeader::get()->Load(ahbe_conf.c_str())){ diff --git a/src/s3fs_util.cpp b/src/s3fs_util.cpp index b81256a..7973b13 100644 --- a/src/s3fs_util.cpp +++ b/src/s3fs_util.cpp @@ -971,6 +971,10 @@ void show_help (void) " multipart_size (default=\"10\")\n" " - part size, in MB, for each multipart request.\n" "\n" + " singlepart_copy_limit (default=\"5120\")\n" + " - maximum size, in MB, of a single-part copy before trying \n" + " multipart copy.\n" + "\n" " fd_page_size (default=\"52428800\"(50MB))\n" " - number of internal management page size for each file descriptor.\n" " For delayed reading and writing by s3fs, s3fs manages pages which \n" diff --git a/test/integration-test-main.sh b/test/integration-test-main.sh index d63c846..58e592c 100755 --- a/test/integration-test-main.sh +++ b/test/integration-test-main.sh @@ -301,7 +301,31 @@ then fi rm -f "/tmp/${BIG_FILE}" -rm -f "${BIG_FILE}" +rm_test_file "${BIG_FILE}" + +########################################################## +# Testing multi-part copy +########################################################## +# TODO: test disabled until S3Proxy 1.5.0 is released +if false +then + +echo "Testing multi-part copy ..." +dd if=/dev/urandom of="/tmp/${BIG_FILE}" bs=$BIG_FILE_LENGTH count=1 +dd if="/tmp/${BIG_FILE}" of="${BIG_FILE}" bs=$BIG_FILE_LENGTH count=1 +mv "${BIG_FILE}" "${BIG_FILE}-copy" + +# Verify contents of file +echo "Comparing test file" +if ! cmp "/tmp/${BIG_FILE}" "${BIG_FILE}-copy" +then + exit 1 +fi + +rm -f "/tmp/${BIG_FILE}" +rm_test_file "${BIG_FILE}-copy" + +fi ########################################################## # Testing special characters diff --git a/test/small-integration-test.sh b/test/small-integration-test.sh index 188c566..6bdd7a4 100755 --- a/test/small-integration-test.sh +++ b/test/small-integration-test.sh @@ -59,6 +59,7 @@ stdbuf -oL -eL $S3FS $TEST_BUCKET_1 $TEST_BUCKET_MOUNT_POINT_1 \ -o createbucket \ -o passwd_file=$S3FS_CREDENTIALS_FILE \ -o sigv2 \ + -o singlepart_copy_limit=$((10 * 1024)) \ -o url=http://127.0.0.1:8080 \ -o use_path_request_style -f -o f2 -d -d |& stdbuf -oL -eL sed -u "s/^/s3fs: /" &