From 4a813aec421b8e04e4d1f57e69358add400af79b Mon Sep 17 00:00:00 2001 From: Takeshi Nakatani Date: Sat, 22 Oct 2022 15:12:00 +0900 Subject: [PATCH] Support the object under no directory object path by compat_dir (#2023) --- src/curl_multi.cpp | 26 +++++- src/curl_multi.h | 11 ++- src/s3fs.cpp | 116 ++++++++++++++++++++++---- test/integration-test-main.sh | 152 ++++++++++++++++++++++++++++++++++ 4 files changed, 282 insertions(+), 23 deletions(-) diff --git a/src/curl_multi.cpp b/src/curl_multi.cpp index bedaa47..ccf8b69 100644 --- a/src/curl_multi.cpp +++ b/src/curl_multi.cpp @@ -33,7 +33,7 @@ //------------------------------------------------------------------- // Class S3fsMultiCurl //------------------------------------------------------------------- -S3fsMultiCurl::S3fsMultiCurl(int maxParallelism) : maxParallelism(maxParallelism), SuccessCallback(NULL), RetryCallback(NULL), pSuccessCallbackParam(NULL) +S3fsMultiCurl::S3fsMultiCurl(int maxParallelism) : maxParallelism(maxParallelism), SuccessCallback(NULL), NotFoundCallback(NULL), RetryCallback(NULL), pSuccessCallbackParam(NULL), pNotFoundCallbackParam(NULL) { int result; pthread_mutexattr_t attr; @@ -88,7 +88,14 @@ S3fsMultiSuccessCallback S3fsMultiCurl::SetSuccessCallback(S3fsMultiSuccessCallb SuccessCallback = function; return old; } - + +S3fsMultiNotFoundCallback S3fsMultiCurl::SetNotFoundCallback(S3fsMultiNotFoundCallback function) +{ + S3fsMultiNotFoundCallback old = NotFoundCallback; + NotFoundCallback = function; + return old; +} + S3fsMultiRetryCallback S3fsMultiCurl::SetRetryCallback(S3fsMultiRetryCallback function) { S3fsMultiRetryCallback old = RetryCallback; @@ -102,7 +109,14 @@ void* S3fsMultiCurl::SetSuccessCallbackParam(void* param) pSuccessCallbackParam = param; return old; } - + +void* S3fsMultiCurl::SetNotFoundCallbackParam(void* param) +{ + void* old = pNotFoundCallbackParam; + pNotFoundCallbackParam = param; + return old; +} + bool S3fsMultiCurl::SetS3fsCurlObject(S3fsCurl* s3fscurl) { if(!s3fscurl){ @@ -207,7 +221,7 @@ int S3fsMultiCurl::MultiRead() }else if(400 > responseCode){ // add into stat cache if(SuccessCallback && !SuccessCallback(s3fscurl, pSuccessCallbackParam)){ - S3FS_PRN_WARN("error from callback function(%s).", s3fscurl->url.c_str()); + S3FS_PRN_WARN("error from success callback function(%s).", s3fscurl->url.c_str()); } }else if(400 == responseCode){ // as possibly in multipart @@ -218,6 +232,10 @@ int S3fsMultiCurl::MultiRead() // HEAD requests on readdir_multi_head can return 404 if(s3fscurl->GetOp() != "HEAD"){ S3FS_PRN_WARN("failed a request(%ld: %s)", responseCode, s3fscurl->url.c_str()); + } + // Call callback function + if(NotFoundCallback && !NotFoundCallback(s3fscurl, pNotFoundCallbackParam)){ + S3FS_PRN_WARN("error from not found callback function(%s).", s3fscurl->url.c_str()); } }else if(500 == responseCode){ // case of all other result, do retry.(11/13/2013) diff --git a/src/curl_multi.h b/src/curl_multi.h index 77a6f52..003f22c 100644 --- a/src/curl_multi.h +++ b/src/curl_multi.h @@ -28,6 +28,7 @@ class S3fsCurl; typedef std::vector s3fscurllist_t; typedef bool (*S3fsMultiSuccessCallback)(S3fsCurl* s3fscurl, void* param); // callback for succeed multi request +typedef bool (*S3fsMultiNotFoundCallback)(S3fsCurl* s3fscurl, void* param); // callback for succeed multi request typedef S3fsCurl* (*S3fsMultiRetryCallback)(S3fsCurl* s3fscurl); // callback for failure and retrying //---------------------------------------------- @@ -41,9 +42,11 @@ class S3fsMultiCurl s3fscurllist_t clist_all; // all of curl requests s3fscurllist_t clist_req; // curl requests are sent - S3fsMultiSuccessCallback SuccessCallback; - S3fsMultiRetryCallback RetryCallback; - void* pSuccessCallbackParam; + S3fsMultiSuccessCallback SuccessCallback; + S3fsMultiNotFoundCallback NotFoundCallback; + S3fsMultiRetryCallback RetryCallback; + void* pSuccessCallbackParam; + void* pNotFoundCallbackParam; pthread_mutex_t completed_tids_lock; std::vector completed_tids; @@ -62,8 +65,10 @@ class S3fsMultiCurl int GetMaxParallelism() { return maxParallelism; } S3fsMultiSuccessCallback SetSuccessCallback(S3fsMultiSuccessCallback function); + S3fsMultiNotFoundCallback SetNotFoundCallback(S3fsMultiNotFoundCallback function); S3fsMultiRetryCallback SetRetryCallback(S3fsMultiRetryCallback function); void* SetSuccessCallbackParam(void* param); + void* SetNotFoundCallbackParam(void* param); bool Clear() { return ClearEx(true); } bool SetS3fsCurlObject(S3fsCurl* s3fscurl); int Request(); diff --git a/src/s3fs.cpp b/src/s3fs.cpp index 7af65f0..b43fcfd 100644 --- a/src/s3fs.cpp +++ b/src/s3fs.cpp @@ -48,6 +48,7 @@ #include "s3fs_util.h" #include "mpu_util.h" #include "threadpoolman.h" +#include "autolock.h" //------------------------------------------------------------------- // Symbols @@ -2719,12 +2720,39 @@ static bool multi_head_callback(S3fsCurl* s3fscurl, void* param) pcbparam->filler(pcbparam->buf, bpath.c_str(), 0, 0); } }else{ - S3FS_PRN_WARN("param(fuse_fill_dir_t filler) is NULL, then can not call filler."); + S3FS_PRN_WARN("param(multi_head_callback_param*) is NULL, then can not call filler."); } return true; } +struct multi_head_notfound_callback_param +{ + pthread_mutex_t list_lock; + s3obj_list_t notfound_list; +}; + +static bool multi_head_notfound_callback(S3fsCurl* s3fscurl, void* param) +{ + if(!s3fscurl){ + return false; + } + S3FS_PRN_INFO("HEAD returned NotFound(404) for %s object, it maybe only the path exists and the object does not exist.", s3fscurl->GetPath().c_str()); + + if(!param){ + S3FS_PRN_WARN("param(multi_head_notfound_callback_param*) is NULL, then can not call filler."); + return false; + } + + // set path to not found list + struct multi_head_notfound_callback_param* pcbparam = reinterpret_cast(param); + + AutoLock auto_lock(&(pcbparam->list_lock)); + pcbparam->notfound_list.push_back(s3fscurl->GetBasePath()); + + return true; +} + static S3fsCurl* multi_head_retry_callback(S3fsCurl* s3fscurl) { if(!s3fscurl){ @@ -2764,7 +2792,6 @@ static int readdir_multi_head(const char* path, const S3ObjList& head, void* buf { S3fsMultiCurl curlmulti(S3fsCurl::GetMaxMultiRequest()); s3obj_list_t headlist; - s3obj_list_t fillerlist; int result = 0; S3FS_PRN_INFO1("[path=%s][list=%zu]", path, headlist.size()); @@ -2776,17 +2803,31 @@ static int readdir_multi_head(const char* path, const S3ObjList& head, void* buf curlmulti.SetSuccessCallback(multi_head_callback); curlmulti.SetRetryCallback(multi_head_retry_callback); - // Callback function parameter - struct multi_head_callback_param param; - param.buf = buf; - param.filler = filler; - curlmulti.SetSuccessCallbackParam(reinterpret_cast(¶m)); + // Success Callback function parameter + struct multi_head_callback_param success_param; + success_param.buf = buf; + success_param.filler = filler; + curlmulti.SetSuccessCallbackParam(reinterpret_cast(&success_param)); - s3obj_list_t::iterator iter; + // Not found Callback function parameter + struct multi_head_notfound_callback_param notfound_param; + if(support_compat_dir){ + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + #if S3FS_PTHREAD_ERRORCHECK + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); + #endif + + if(0 != (result = pthread_mutex_init(&(notfound_param.list_lock), &attr))){ + S3FS_PRN_CRIT("failed to init notfound_param.list_lock: %d", result); + abort(); + } + curlmulti.SetNotFoundCallback(multi_head_notfound_callback); + curlmulti.SetNotFoundCallbackParam(reinterpret_cast(¬found_param)); + } - fillerlist.clear(); // Make single head request(with max). - for(iter = headlist.begin(); headlist.end() != iter; iter = headlist.erase(iter)){ + for(s3obj_list_t::iterator iter = headlist.begin(); headlist.end() != iter; iter = headlist.erase(iter)){ std::string disppath = path + (*iter); std::string etag = head.GetETag((*iter).c_str()); struct stat st; @@ -2803,12 +2844,6 @@ static int readdir_multi_head(const char* path, const S3ObjList& head, void* buf continue; } - std::string fillpath = disppath; - if('/' == *disppath.rbegin()){ - fillpath.erase(fillpath.length() -1); - } - fillerlist.push_back(fillpath); - // First check for directory, start checking "not SSE-C". // If checking failed, retry to check with "SSE-C" by retry callback func when SSE-C mode. S3fsCurl* s3fscurl = new S3fsCurl(); @@ -2839,6 +2874,55 @@ static int readdir_multi_head(const char* path, const S3ObjList& head, void* buf } } + // [NOTE] + // Objects that could not be found by HEAD request may exist only + // as a path, so search for objects under that path.(a case of no dir object) + // + if(support_compat_dir && !notfound_param.notfound_list.empty()){ // [NOTE] not need to lock to access this here. + // dummy header + mode_t dirmask = umask(0); // macos does not have getumask() + umask(dirmask); + + headers_t dummy_header; + dummy_header["Content-Type"] = std::string("application/x-directory"); // directory + dummy_header["x-amz-meta-uid"] = str(is_s3fs_uid ? s3fs_uid : geteuid()); + dummy_header["x-amz-meta-gid"] = str(is_s3fs_gid ? s3fs_gid : getegid()); + dummy_header["x-amz-meta-mode"] = str(S_IFDIR | (~dirmask & (S_IRWXU | S_IRWXG | S_IRWXO))); + dummy_header["x-amz-meta-atime"] = str(0); + dummy_header["x-amz-meta-ctime"] = str(0); + dummy_header["x-amz-meta-mtime"] = str(0); + + for(s3obj_list_t::iterator reiter = notfound_param.notfound_list.begin(); reiter != notfound_param.notfound_list.end(); ++reiter){ + int dir_result; + if(0 == (dir_result = directory_empty(reiter->c_str()))){ + // Found objects under the path, so the path is directory. + // + std::string dirpath = path + (*reiter); + + // Add stat cache + if(StatCache::getStatCacheData()->AddStat(dirpath, dummy_header, true)){ // set forcedir=true + // Get stats from stats cache(for converting from meta), and fill + std::string base_path = mybasename(dirpath); + if(use_wtf8){ + base_path = s3fs_wtf8_decode(base_path); + } + + struct stat st; + if(StatCache::getStatCacheData()->GetStat(dirpath, &st)){ + filler(buf, base_path.c_str(), &st, 0); + }else{ + S3FS_PRN_INFO2("Could not find %s directory(no dir object) in stat cache.", dirpath.c_str()); + filler(buf, base_path.c_str(), 0, 0); + } + }else{ + S3FS_PRN_ERR("failed adding stat cache [path=%s], but dontinue...", dirpath.c_str()); + } + }else{ + S3FS_PRN_WARN("%s object does not have any object under it(errno=%d),", reiter->c_str(), dir_result); + } + } + } + return result; } diff --git a/test/integration-test-main.sh b/test/integration-test-main.sh index cdc4a07..5bc3da5 100755 --- a/test/integration-test-main.sh +++ b/test/integration-test-main.sh @@ -1745,6 +1745,157 @@ function test_ensurespace_move_file() { rm -rf "${CACHE_DIR}/.s3fs_test_tmpdir" } +function test_not_existed_dir_obj() { + describe "Test not existed directory object..." + + local DIR_NAME; DIR_NAME=$(basename "${PWD}") + + # + # Create files under not existed directory by aws command + # + local OBJECT_NAME_1; OBJECT_NAME_1="${DIR_NAME}/not_existed_dir_single/${TEST_TEXT_FILE}" + local OBJECT_NAME_2; OBJECT_NAME_2="${DIR_NAME}/not_existed_dir_parent/not_existed_dir_child/${TEST_TEXT_FILE}" + echo data1 | aws_cli s3 cp - "s3://${TEST_BUCKET_1}/${OBJECT_NAME_1}" + echo data2 | aws_cli s3 cp - "s3://${TEST_BUCKET_1}/${OBJECT_NAME_2}" + + # shellcheck disable=SC2009 + if ps u -p "${S3FS_PID}" | grep -q compat_dir; then + # + # with "compat_dir", found directories and files + # + + # Top directory + # shellcheck disable=SC2010 + if ! ls -1 | grep -q '^not_existed_dir_single$'; then + echo "Expect to find \"not_existed_dir_single\" directory, but it is not found" + return 1; + fi + # shellcheck disable=SC2010 + if ! ls -1 | grep -q '^not_existed_dir_parent$'; then + echo "Expect to find \"not_existed_dir_parent\" directory, but it is not found" + return 1; + fi + + # Single nest directory + # shellcheck disable=SC2010 + if ! ls -d not_existed_dir_single | grep -q '^not_existed_dir_single$'; then + echo "Expect to find \"not_existed_dir_single\" directory, but it is not found" + return 1; + fi + # shellcheck disable=SC2010 + if ! ls -1 not_existed_dir_single | grep -q "^${TEST_TEXT_FILE}\$"; then + echo "Expect to find \"not_existed_dir_single/${TEST_TEXT_FILE}\" file, but it is not found" + return 1; + fi + # shellcheck disable=SC2010 + if ! ls -1 "not_existed_dir_single/${TEST_TEXT_FILE}" | grep -q "^not_existed_dir_single/${TEST_TEXT_FILE}\$"; then + echo "Expect to find \"not_existed_dir_single/${TEST_TEXT_FILE}\" file, but it is not found" + return 1; + fi + + # Double nest directory + # shellcheck disable=SC2010 + if ! ls -d not_existed_dir_parent | grep -q '^not_existed_dir_parent'; then + echo "Expect to find \"not_existed_dir_parent\" directory, but it is not found" + return 1; + fi + # shellcheck disable=SC2010 + if ! ls -1 not_existed_dir_parent | grep -q '^not_existed_dir_child'; then + echo "Expect to find \"not_existed_dir_parent/not_existed_dir_child\" directory, but it is not found" + return 1; + fi + # shellcheck disable=SC2010 + if ! ls -d not_existed_dir_parent/not_existed_dir_child | grep -q '^not_existed_dir_parent/not_existed_dir_child'; then + echo "Expect to find \"not_existed_dir_parent/not_existed_dir_child\" directory, but it is not found" + return 1; + fi + # shellcheck disable=SC2010 + if ! ls -1 not_existed_dir_parent/not_existed_dir_child | grep -q "^${TEST_TEXT_FILE}\$"; then + echo "Expect to find \"not_existed_dir_parent/not_existed_dir_child/${TEST_TEXT_FILE}\" directory, but it is not found" + return 1; + fi + # shellcheck disable=SC2010 + if ! ls -1 "not_existed_dir_parent/not_existed_dir_child/${TEST_TEXT_FILE}" | grep -q "^not_existed_dir_parent/not_existed_dir_child/${TEST_TEXT_FILE}\$"; then + echo "Expect to find \"not_existed_dir_parent/not_existed_dir_child/${TEST_TEXT_FILE}\" directory, but it is not found" + return 1; + fi + + rm -rf not_existed_dir_single + rm -rf not_existed_dir_parent + + else + # + # without "compat_dir", found directories and files + # + # [NOTE] + # If specify a directory path, the file under that directory will be found. + # And if specify a file full path, it will be found. + # + + # Top directory + # shellcheck disable=SC2010 + if ls -1 | grep -q '^not_existed_dir_single$'; then + echo "Expect to not find \"not_existed_dir_single\" directory, but it is found" + return 1; + fi + # shellcheck disable=SC2010 + if ls -1 | grep -q '^not_existed_dir_parent$'; then + echo "Expect to not find \"not_existed_dir_parent\" directory, but it is found" + return 1; + fi + + # Single nest directory + # shellcheck disable=SC2010 + if ! ls -d not_existed_dir_single | grep -q '^not_existed_dir_single$'; then + echo "Expect to find \"not_existed_dir_single\" directory, but it is not found" + return 1; + fi + # shellcheck disable=SC2010 + if ! ls -1 not_existed_dir_single | grep -q "^${TEST_TEXT_FILE}\$"; then + echo "Expect to find \"not_existed_dir_single/${TEST_TEXT_FILE}\" file, but it is not found" + return 1; + fi + # shellcheck disable=SC2010 + if ! ls -1 "not_existed_dir_single/${TEST_TEXT_FILE}" | grep -q "^not_existed_dir_single/${TEST_TEXT_FILE}\$"; then + echo "Expect to find \"not_existed_dir_single/${TEST_TEXT_FILE}\" file, but it is not found" + return 1; + fi + + # Double nest directory + # shellcheck disable=SC2010 + if ! ls -d not_existed_dir_parent | grep -q '^not_existed_dir_parent'; then + echo "Expect to find \"not_existed_dir_parent\" directory, but it is not found" + return 1; + fi + # shellcheck disable=SC2010 + if ls -1 not_existed_dir_parent | grep -q '^not_existed_dir_child'; then + echo "Expect to not find \"not_existed_dir_parent/not_existed_dir_child\" directory, but it is found" + return 1; + fi + # shellcheck disable=SC2010 + if ! ls -d not_existed_dir_parent/not_existed_dir_child | grep -q '^not_existed_dir_parent/not_existed_dir_child'; then + echo "Expect to find \"not_existed_dir_parent/not_existed_dir_child\" directory, but it is not found" + return 1; + fi + # shellcheck disable=SC2010 + if ! ls -1 not_existed_dir_parent/not_existed_dir_child | grep -q "^${TEST_TEXT_FILE}\$"; then + echo "Expect to find \"not_existed_dir_parent/not_existed_dir_child/${TEST_TEXT_FILE}\" directory, but it is not found" + return 1; + fi + # shellcheck disable=SC2010 + if ! ls -1 "not_existed_dir_parent/not_existed_dir_child/${TEST_TEXT_FILE}" | grep -q "^not_existed_dir_parent/not_existed_dir_child/${TEST_TEXT_FILE}\$"; then + echo "Expect to find \"not_existed_dir_parent/not_existed_dir_child/${TEST_TEXT_FILE}\" directory, but it is not found" + return 1; + fi + + rm -rf not_existed_dir_single + + # [NOTE] + # This case could not remove sub directory, then below command will be failed. + #rm -rf not_existed_dir_parent + fi +} + function test_ut_ossfs { describe "Testing ossfs python ut..." @@ -2053,6 +2204,7 @@ function add_all_tests { add_tests test_truncate_cache add_tests test_upload_sparsefile add_tests test_mix_upload_entities + add_tests test_not_existed_dir_obj add_tests test_ut_ossfs # shellcheck disable=SC2009 if ! ps u -p "${S3FS_PID}" | grep -q ensure_diskfree && ! uname | grep -q Darwin; then