Support the object under no directory object path by compat_dir (#2023)

This commit is contained in:
Takeshi Nakatani 2022-10-22 15:12:00 +09:00 committed by GitHub
parent 4304ec63bb
commit 4a813aec42
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 282 additions and 23 deletions

View File

@ -33,7 +33,7 @@
//-------------------------------------------------------------------
// Class S3fsMultiCurl
//-------------------------------------------------------------------
S3fsMultiCurl::S3fsMultiCurl(int maxParallelism) : maxParallelism(maxParallelism), SuccessCallback(NULL), RetryCallback(NULL), pSuccessCallbackParam(NULL)
S3fsMultiCurl::S3fsMultiCurl(int maxParallelism) : maxParallelism(maxParallelism), SuccessCallback(NULL), NotFoundCallback(NULL), RetryCallback(NULL), pSuccessCallbackParam(NULL), pNotFoundCallbackParam(NULL)
{
int result;
pthread_mutexattr_t attr;
@ -88,7 +88,14 @@ S3fsMultiSuccessCallback S3fsMultiCurl::SetSuccessCallback(S3fsMultiSuccessCallb
SuccessCallback = function;
return old;
}
S3fsMultiNotFoundCallback S3fsMultiCurl::SetNotFoundCallback(S3fsMultiNotFoundCallback function)
{
S3fsMultiNotFoundCallback old = NotFoundCallback;
NotFoundCallback = function;
return old;
}
S3fsMultiRetryCallback S3fsMultiCurl::SetRetryCallback(S3fsMultiRetryCallback function)
{
S3fsMultiRetryCallback old = RetryCallback;
@ -102,7 +109,14 @@ void* S3fsMultiCurl::SetSuccessCallbackParam(void* param)
pSuccessCallbackParam = param;
return old;
}
void* S3fsMultiCurl::SetNotFoundCallbackParam(void* param)
{
void* old = pNotFoundCallbackParam;
pNotFoundCallbackParam = param;
return old;
}
bool S3fsMultiCurl::SetS3fsCurlObject(S3fsCurl* s3fscurl)
{
if(!s3fscurl){
@ -207,7 +221,7 @@ int S3fsMultiCurl::MultiRead()
}else if(400 > responseCode){
// add into stat cache
if(SuccessCallback && !SuccessCallback(s3fscurl, pSuccessCallbackParam)){
S3FS_PRN_WARN("error from callback function(%s).", s3fscurl->url.c_str());
S3FS_PRN_WARN("error from success callback function(%s).", s3fscurl->url.c_str());
}
}else if(400 == responseCode){
// as possibly in multipart
@ -218,6 +232,10 @@ int S3fsMultiCurl::MultiRead()
// HEAD requests on readdir_multi_head can return 404
if(s3fscurl->GetOp() != "HEAD"){
S3FS_PRN_WARN("failed a request(%ld: %s)", responseCode, s3fscurl->url.c_str());
}
// Call callback function
if(NotFoundCallback && !NotFoundCallback(s3fscurl, pNotFoundCallbackParam)){
S3FS_PRN_WARN("error from not found callback function(%s).", s3fscurl->url.c_str());
}
}else if(500 == responseCode){
// case of all other result, do retry.(11/13/2013)

View File

@ -28,6 +28,7 @@ class S3fsCurl;
typedef std::vector<S3fsCurl*> s3fscurllist_t;
typedef bool (*S3fsMultiSuccessCallback)(S3fsCurl* s3fscurl, void* param); // callback for succeed multi request
typedef bool (*S3fsMultiNotFoundCallback)(S3fsCurl* s3fscurl, void* param); // callback for succeed multi request
typedef S3fsCurl* (*S3fsMultiRetryCallback)(S3fsCurl* s3fscurl); // callback for failure and retrying
//----------------------------------------------
@ -41,9 +42,11 @@ class S3fsMultiCurl
s3fscurllist_t clist_all; // all of curl requests
s3fscurllist_t clist_req; // curl requests are sent
S3fsMultiSuccessCallback SuccessCallback;
S3fsMultiRetryCallback RetryCallback;
void* pSuccessCallbackParam;
S3fsMultiSuccessCallback SuccessCallback;
S3fsMultiNotFoundCallback NotFoundCallback;
S3fsMultiRetryCallback RetryCallback;
void* pSuccessCallbackParam;
void* pNotFoundCallbackParam;
pthread_mutex_t completed_tids_lock;
std::vector<pthread_t> completed_tids;
@ -62,8 +65,10 @@ class S3fsMultiCurl
int GetMaxParallelism() { return maxParallelism; }
S3fsMultiSuccessCallback SetSuccessCallback(S3fsMultiSuccessCallback function);
S3fsMultiNotFoundCallback SetNotFoundCallback(S3fsMultiNotFoundCallback function);
S3fsMultiRetryCallback SetRetryCallback(S3fsMultiRetryCallback function);
void* SetSuccessCallbackParam(void* param);
void* SetNotFoundCallbackParam(void* param);
bool Clear() { return ClearEx(true); }
bool SetS3fsCurlObject(S3fsCurl* s3fscurl);
int Request();

View File

@ -48,6 +48,7 @@
#include "s3fs_util.h"
#include "mpu_util.h"
#include "threadpoolman.h"
#include "autolock.h"
//-------------------------------------------------------------------
// Symbols
@ -2719,12 +2720,39 @@ static bool multi_head_callback(S3fsCurl* s3fscurl, void* param)
pcbparam->filler(pcbparam->buf, bpath.c_str(), 0, 0);
}
}else{
S3FS_PRN_WARN("param(fuse_fill_dir_t filler) is NULL, then can not call filler.");
S3FS_PRN_WARN("param(multi_head_callback_param*) is NULL, then can not call filler.");
}
return true;
}
struct multi_head_notfound_callback_param
{
pthread_mutex_t list_lock;
s3obj_list_t notfound_list;
};
static bool multi_head_notfound_callback(S3fsCurl* s3fscurl, void* param)
{
if(!s3fscurl){
return false;
}
S3FS_PRN_INFO("HEAD returned NotFound(404) for %s object, it maybe only the path exists and the object does not exist.", s3fscurl->GetPath().c_str());
if(!param){
S3FS_PRN_WARN("param(multi_head_notfound_callback_param*) is NULL, then can not call filler.");
return false;
}
// set path to not found list
struct multi_head_notfound_callback_param* pcbparam = reinterpret_cast<struct multi_head_notfound_callback_param*>(param);
AutoLock auto_lock(&(pcbparam->list_lock));
pcbparam->notfound_list.push_back(s3fscurl->GetBasePath());
return true;
}
static S3fsCurl* multi_head_retry_callback(S3fsCurl* s3fscurl)
{
if(!s3fscurl){
@ -2764,7 +2792,6 @@ static int readdir_multi_head(const char* path, const S3ObjList& head, void* buf
{
S3fsMultiCurl curlmulti(S3fsCurl::GetMaxMultiRequest());
s3obj_list_t headlist;
s3obj_list_t fillerlist;
int result = 0;
S3FS_PRN_INFO1("[path=%s][list=%zu]", path, headlist.size());
@ -2776,17 +2803,31 @@ static int readdir_multi_head(const char* path, const S3ObjList& head, void* buf
curlmulti.SetSuccessCallback(multi_head_callback);
curlmulti.SetRetryCallback(multi_head_retry_callback);
// Callback function parameter
struct multi_head_callback_param param;
param.buf = buf;
param.filler = filler;
curlmulti.SetSuccessCallbackParam(reinterpret_cast<void*>(&param));
// Success Callback function parameter
struct multi_head_callback_param success_param;
success_param.buf = buf;
success_param.filler = filler;
curlmulti.SetSuccessCallbackParam(reinterpret_cast<void*>(&success_param));
s3obj_list_t::iterator iter;
// Not found Callback function parameter
struct multi_head_notfound_callback_param notfound_param;
if(support_compat_dir){
pthread_mutexattr_t attr;
pthread_mutexattr_init(&attr);
#if S3FS_PTHREAD_ERRORCHECK
pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK);
#endif
if(0 != (result = pthread_mutex_init(&(notfound_param.list_lock), &attr))){
S3FS_PRN_CRIT("failed to init notfound_param.list_lock: %d", result);
abort();
}
curlmulti.SetNotFoundCallback(multi_head_notfound_callback);
curlmulti.SetNotFoundCallbackParam(reinterpret_cast<void*>(&notfound_param));
}
fillerlist.clear();
// Make single head request(with max).
for(iter = headlist.begin(); headlist.end() != iter; iter = headlist.erase(iter)){
for(s3obj_list_t::iterator iter = headlist.begin(); headlist.end() != iter; iter = headlist.erase(iter)){
std::string disppath = path + (*iter);
std::string etag = head.GetETag((*iter).c_str());
struct stat st;
@ -2803,12 +2844,6 @@ static int readdir_multi_head(const char* path, const S3ObjList& head, void* buf
continue;
}
std::string fillpath = disppath;
if('/' == *disppath.rbegin()){
fillpath.erase(fillpath.length() -1);
}
fillerlist.push_back(fillpath);
// First check for directory, start checking "not SSE-C".
// If checking failed, retry to check with "SSE-C" by retry callback func when SSE-C mode.
S3fsCurl* s3fscurl = new S3fsCurl();
@ -2839,6 +2874,55 @@ static int readdir_multi_head(const char* path, const S3ObjList& head, void* buf
}
}
// [NOTE]
// Objects that could not be found by HEAD request may exist only
// as a path, so search for objects under that path.(a case of no dir object)
//
if(support_compat_dir && !notfound_param.notfound_list.empty()){ // [NOTE] not need to lock to access this here.
// dummy header
mode_t dirmask = umask(0); // macos does not have getumask()
umask(dirmask);
headers_t dummy_header;
dummy_header["Content-Type"] = std::string("application/x-directory"); // directory
dummy_header["x-amz-meta-uid"] = str(is_s3fs_uid ? s3fs_uid : geteuid());
dummy_header["x-amz-meta-gid"] = str(is_s3fs_gid ? s3fs_gid : getegid());
dummy_header["x-amz-meta-mode"] = str(S_IFDIR | (~dirmask & (S_IRWXU | S_IRWXG | S_IRWXO)));
dummy_header["x-amz-meta-atime"] = str(0);
dummy_header["x-amz-meta-ctime"] = str(0);
dummy_header["x-amz-meta-mtime"] = str(0);
for(s3obj_list_t::iterator reiter = notfound_param.notfound_list.begin(); reiter != notfound_param.notfound_list.end(); ++reiter){
int dir_result;
if(0 == (dir_result = directory_empty(reiter->c_str()))){
// Found objects under the path, so the path is directory.
//
std::string dirpath = path + (*reiter);
// Add stat cache
if(StatCache::getStatCacheData()->AddStat(dirpath, dummy_header, true)){ // set forcedir=true
// Get stats from stats cache(for converting from meta), and fill
std::string base_path = mybasename(dirpath);
if(use_wtf8){
base_path = s3fs_wtf8_decode(base_path);
}
struct stat st;
if(StatCache::getStatCacheData()->GetStat(dirpath, &st)){
filler(buf, base_path.c_str(), &st, 0);
}else{
S3FS_PRN_INFO2("Could not find %s directory(no dir object) in stat cache.", dirpath.c_str());
filler(buf, base_path.c_str(), 0, 0);
}
}else{
S3FS_PRN_ERR("failed adding stat cache [path=%s], but dontinue...", dirpath.c_str());
}
}else{
S3FS_PRN_WARN("%s object does not have any object under it(errno=%d),", reiter->c_str(), dir_result);
}
}
}
return result;
}

View File

@ -1745,6 +1745,157 @@ function test_ensurespace_move_file() {
rm -rf "${CACHE_DIR}/.s3fs_test_tmpdir"
}
function test_not_existed_dir_obj() {
describe "Test not existed directory object..."
local DIR_NAME; DIR_NAME=$(basename "${PWD}")
#
# Create files under not existed directory by aws command
#
local OBJECT_NAME_1; OBJECT_NAME_1="${DIR_NAME}/not_existed_dir_single/${TEST_TEXT_FILE}"
local OBJECT_NAME_2; OBJECT_NAME_2="${DIR_NAME}/not_existed_dir_parent/not_existed_dir_child/${TEST_TEXT_FILE}"
echo data1 | aws_cli s3 cp - "s3://${TEST_BUCKET_1}/${OBJECT_NAME_1}"
echo data2 | aws_cli s3 cp - "s3://${TEST_BUCKET_1}/${OBJECT_NAME_2}"
# shellcheck disable=SC2009
if ps u -p "${S3FS_PID}" | grep -q compat_dir; then
#
# with "compat_dir", found directories and files
#
# Top directory
# shellcheck disable=SC2010
if ! ls -1 | grep -q '^not_existed_dir_single$'; then
echo "Expect to find \"not_existed_dir_single\" directory, but it is not found"
return 1;
fi
# shellcheck disable=SC2010
if ! ls -1 | grep -q '^not_existed_dir_parent$'; then
echo "Expect to find \"not_existed_dir_parent\" directory, but it is not found"
return 1;
fi
# Single nest directory
# shellcheck disable=SC2010
if ! ls -d not_existed_dir_single | grep -q '^not_existed_dir_single$'; then
echo "Expect to find \"not_existed_dir_single\" directory, but it is not found"
return 1;
fi
# shellcheck disable=SC2010
if ! ls -1 not_existed_dir_single | grep -q "^${TEST_TEXT_FILE}\$"; then
echo "Expect to find \"not_existed_dir_single/${TEST_TEXT_FILE}\" file, but it is not found"
return 1;
fi
# shellcheck disable=SC2010
if ! ls -1 "not_existed_dir_single/${TEST_TEXT_FILE}" | grep -q "^not_existed_dir_single/${TEST_TEXT_FILE}\$"; then
echo "Expect to find \"not_existed_dir_single/${TEST_TEXT_FILE}\" file, but it is not found"
return 1;
fi
# Double nest directory
# shellcheck disable=SC2010
if ! ls -d not_existed_dir_parent | grep -q '^not_existed_dir_parent'; then
echo "Expect to find \"not_existed_dir_parent\" directory, but it is not found"
return 1;
fi
# shellcheck disable=SC2010
if ! ls -1 not_existed_dir_parent | grep -q '^not_existed_dir_child'; then
echo "Expect to find \"not_existed_dir_parent/not_existed_dir_child\" directory, but it is not found"
return 1;
fi
# shellcheck disable=SC2010
if ! ls -d not_existed_dir_parent/not_existed_dir_child | grep -q '^not_existed_dir_parent/not_existed_dir_child'; then
echo "Expect to find \"not_existed_dir_parent/not_existed_dir_child\" directory, but it is not found"
return 1;
fi
# shellcheck disable=SC2010
if ! ls -1 not_existed_dir_parent/not_existed_dir_child | grep -q "^${TEST_TEXT_FILE}\$"; then
echo "Expect to find \"not_existed_dir_parent/not_existed_dir_child/${TEST_TEXT_FILE}\" directory, but it is not found"
return 1;
fi
# shellcheck disable=SC2010
if ! ls -1 "not_existed_dir_parent/not_existed_dir_child/${TEST_TEXT_FILE}" | grep -q "^not_existed_dir_parent/not_existed_dir_child/${TEST_TEXT_FILE}\$"; then
echo "Expect to find \"not_existed_dir_parent/not_existed_dir_child/${TEST_TEXT_FILE}\" directory, but it is not found"
return 1;
fi
rm -rf not_existed_dir_single
rm -rf not_existed_dir_parent
else
#
# without "compat_dir", found directories and files
#
# [NOTE]
# If specify a directory path, the file under that directory will be found.
# And if specify a file full path, it will be found.
#
# Top directory
# shellcheck disable=SC2010
if ls -1 | grep -q '^not_existed_dir_single$'; then
echo "Expect to not find \"not_existed_dir_single\" directory, but it is found"
return 1;
fi
# shellcheck disable=SC2010
if ls -1 | grep -q '^not_existed_dir_parent$'; then
echo "Expect to not find \"not_existed_dir_parent\" directory, but it is found"
return 1;
fi
# Single nest directory
# shellcheck disable=SC2010
if ! ls -d not_existed_dir_single | grep -q '^not_existed_dir_single$'; then
echo "Expect to find \"not_existed_dir_single\" directory, but it is not found"
return 1;
fi
# shellcheck disable=SC2010
if ! ls -1 not_existed_dir_single | grep -q "^${TEST_TEXT_FILE}\$"; then
echo "Expect to find \"not_existed_dir_single/${TEST_TEXT_FILE}\" file, but it is not found"
return 1;
fi
# shellcheck disable=SC2010
if ! ls -1 "not_existed_dir_single/${TEST_TEXT_FILE}" | grep -q "^not_existed_dir_single/${TEST_TEXT_FILE}\$"; then
echo "Expect to find \"not_existed_dir_single/${TEST_TEXT_FILE}\" file, but it is not found"
return 1;
fi
# Double nest directory
# shellcheck disable=SC2010
if ! ls -d not_existed_dir_parent | grep -q '^not_existed_dir_parent'; then
echo "Expect to find \"not_existed_dir_parent\" directory, but it is not found"
return 1;
fi
# shellcheck disable=SC2010
if ls -1 not_existed_dir_parent | grep -q '^not_existed_dir_child'; then
echo "Expect to not find \"not_existed_dir_parent/not_existed_dir_child\" directory, but it is found"
return 1;
fi
# shellcheck disable=SC2010
if ! ls -d not_existed_dir_parent/not_existed_dir_child | grep -q '^not_existed_dir_parent/not_existed_dir_child'; then
echo "Expect to find \"not_existed_dir_parent/not_existed_dir_child\" directory, but it is not found"
return 1;
fi
# shellcheck disable=SC2010
if ! ls -1 not_existed_dir_parent/not_existed_dir_child | grep -q "^${TEST_TEXT_FILE}\$"; then
echo "Expect to find \"not_existed_dir_parent/not_existed_dir_child/${TEST_TEXT_FILE}\" directory, but it is not found"
return 1;
fi
# shellcheck disable=SC2010
if ! ls -1 "not_existed_dir_parent/not_existed_dir_child/${TEST_TEXT_FILE}" | grep -q "^not_existed_dir_parent/not_existed_dir_child/${TEST_TEXT_FILE}\$"; then
echo "Expect to find \"not_existed_dir_parent/not_existed_dir_child/${TEST_TEXT_FILE}\" directory, but it is not found"
return 1;
fi
rm -rf not_existed_dir_single
# [NOTE]
# This case could not remove sub directory, then below command will be failed.
#rm -rf not_existed_dir_parent
fi
}
function test_ut_ossfs {
describe "Testing ossfs python ut..."
@ -2053,6 +2204,7 @@ function add_all_tests {
add_tests test_truncate_cache
add_tests test_upload_sparsefile
add_tests test_mix_upload_entities
add_tests test_not_existed_dir_obj
add_tests test_ut_ossfs
# shellcheck disable=SC2009
if ! ps u -p "${S3FS_PID}" | grep -q ensure_diskfree && ! uname | grep -q Darwin; then