s3fs: add option free_space_ratio to control cache size (#2351)

* Try to cleanup cache directory when initing without enough disk space

Also optimize log messages to print detailed errors to the user.

Signed-off-by: Qinqi Qu <quqinqi@linux.alibaba.com>

* s3fs: add option free_space_ratio to control cache size

Since the ensure_diskfree option is not convenient enough, we have added
a new option "-o free_space_ratio" to control the space used by the s3fs
cache based on the current disk size.

The value of this option can be between 0 and 100. It will control the
size of the cache according to this ratio to ensure that the idle ratio
of the disk is greater than this value.

For example, when the value is 10 and the disk space is 50GB, it will
ensure that the disk will reserve at least 50GB * 10% = 5GB of remaining
space.

Signed-off-by: Qinqi Qu <quqinqi@linux.alibaba.com>

---------

Signed-off-by: Qinqi Qu <quqinqi@linux.alibaba.com>
This commit is contained in:
AdamQQQ 2023-10-20 17:11:47 +08:00 committed by GitHub
parent 2871975d1e
commit 3856637cd2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 117 additions and 11 deletions

View File

@ -227,6 +227,12 @@ sets MB to ensure disk free space. This option means the threshold of free space
s3fs makes file for downloading, uploading and caching files.
If the disk free space is smaller than this value, s3fs do not use disk space as possible in exchange for the performance.
.TP
\fB\-o\fR free_space_ratio (default="10")
sets min free space ratio of the disk. The value of this option can be between 0 and 100. It will control
the size of the cache according to this ratio to ensure that the idle ratio of the disk is greater than this value.
For example, when the disk space is 50GB, the default value will
ensure that the disk will reserve at least 50GB * 10%% = 5GB of remaining space.
.TP
\fB\-o\fR multipart_threshold (default="25")
threshold, in MB, to use multipart upload instead of
single-part. Must be at least 5 MB.

View File

@ -266,9 +266,38 @@ bool FdManager::InitFakeUsedDiskSize(off_t fake_freesize)
return true;
}
off_t FdManager::GetTotalDiskSpaceByRatio(int ratio)
{
return FdManager::GetTotalDiskSpace(nullptr) * ratio / 100;
}
off_t FdManager::GetTotalDiskSpace(const char* path)
{
struct statvfs vfsbuf;
int result = FdManager::GetVfsStat(path, &vfsbuf);
if(result == -1){
return 0;
}
off_t actual_totalsize = vfsbuf.f_blocks * vfsbuf.f_frsize;
return actual_totalsize;
}
off_t FdManager::GetFreeDiskSpace(const char* path)
{
struct statvfs vfsbuf;
int result = FdManager::GetVfsStat(path, &vfsbuf);
if(result == -1){
return 0;
}
off_t actual_freesize = vfsbuf.f_bavail * vfsbuf.f_frsize;
return (FdManager::fake_used_disk_space < actual_freesize ? (actual_freesize - FdManager::fake_used_disk_space) : 0);
}
int FdManager::GetVfsStat(const char* path, struct statvfs* vfsbuf){
std::string ctoppath;
if(!FdManager::cache_dir.empty()){
ctoppath = FdManager::cache_dir + "/";
@ -284,14 +313,12 @@ off_t FdManager::GetFreeDiskSpace(const char* path)
}else{
ctoppath += ".";
}
if(-1 == statvfs(ctoppath.c_str(), &vfsbuf)){
if(-1 == statvfs(ctoppath.c_str(), vfsbuf)){
S3FS_PRN_ERR("could not get vfs stat by errno(%d)", errno);
return 0;
return -1;
}
off_t actual_freesize = vfsbuf.f_bavail * vfsbuf.f_frsize;
return (FdManager::fake_used_disk_space < actual_freesize ? (actual_freesize - FdManager::fake_used_disk_space) : 0);
return 0;
}
bool FdManager::IsSafeDiskSpace(const char* path, off_t size)
@ -300,6 +327,18 @@ bool FdManager::IsSafeDiskSpace(const char* path, off_t size)
return size + FdManager::GetEnsureFreeDiskSpace() <= fsize;
}
bool FdManager::IsSafeDiskSpaceWithLog(const char* path, off_t size)
{
off_t fsize = FdManager::GetFreeDiskSpace(path);
off_t needsize = size + FdManager::GetEnsureFreeDiskSpace();
if(needsize <= fsize){
return true;
} else {
S3FS_PRN_EXIT("There is no enough disk space for used as cache(or temporary) directory by s3fs. Requires %.3f MB, already has %.3f MB.", static_cast<double>(needsize) / 1024 / 1024, static_cast<double>(fsize) / 1024 / 1024);
return false;
}
}
bool FdManager::HaveLseekHole()
{
if(FdManager::checked_lseek){

View File

@ -47,7 +47,9 @@ class FdManager
private:
static off_t GetFreeDiskSpace(const char* path);
static off_t GetTotalDiskSpace(const char* path);
static bool IsDir(const std::string* dir);
static int GetVfsStat(const char* path, struct statvfs* vfsbuf);
int GetPseudoFdCount(const char* path);
void CleanupCacheDirInternal(const std::string &path = "");
@ -78,12 +80,14 @@ class FdManager
static off_t SetEnsureFreeDiskSpace(off_t size);
static bool InitFakeUsedDiskSize(off_t fake_freesize);
static bool IsSafeDiskSpace(const char* path, off_t size);
static bool IsSafeDiskSpaceWithLog(const char* path, off_t size);
static void FreeReservedDiskSpace(off_t size);
static bool ReserveDiskSpace(off_t size);
static bool HaveLseekHole();
static bool SetTmpDir(const char* dir);
static bool CheckTmpDirExist();
static FILE* MakeTempFile();
static off_t GetTotalDiskSpaceByRatio(int ratio);
// Return FdEntity associated with path, returning nullptr on error. This operation increments the reference count; callers must decrement via Close after use.
FdEntity* GetFdEntity(const char* path, int& existfd, bool newfd = true, AutoLock::Type locktype = AutoLock::NONE);

View File

@ -5142,8 +5142,38 @@ static int my_fuse_opt_proc(void* data, const char* arg, int key, struct fuse_ar
max_dirty_data = size;
return 0;
}
if(is_prefix(arg, "free_space_ratio=")){
int ratio = static_cast<int>(cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10));
if(FdManager::GetEnsureFreeDiskSpace()!=0){
S3FS_PRN_EXIT("option free_space_ratio conflicts with ensure_diskfree, please set only one of them.");
return -1;
}
if(ratio < 0 || ratio > 100){
S3FS_PRN_EXIT("option free_space_ratio must between 0 to 100, which is: %d", ratio);
return -1;
}
off_t dfsize = FdManager::GetTotalDiskSpaceByRatio(ratio);
S3FS_PRN_INFO("Free space ratio set to %d %%, ensure the available disk space is greater than %.3f MB", ratio, static_cast<double>(dfsize) / 1024 / 1024);
if(dfsize < S3fsCurl::GetMultipartSize()){
S3FS_PRN_WARN("specified size to ensure disk free space is smaller than multipart size, so set multipart size to it.");
dfsize = S3fsCurl::GetMultipartSize();
}
FdManager::SetEnsureFreeDiskSpace(dfsize);
return 0;
}
else if(is_prefix(arg, "ensure_diskfree=")){
off_t dfsize = cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10) * 1024 * 1024;
if(FdManager::GetEnsureFreeDiskSpace()!=0){
S3FS_PRN_EXIT("option free_space_ratio conflicts with ensure_diskfree, please set only one of them.");
return -1;
}
S3FS_PRN_INFO("Set and ensure the available disk space is greater than %.3f MB.", static_cast<double>(dfsize) / 1024 / 1024);
if(dfsize < S3fsCurl::GetMultipartSize()){
S3FS_PRN_WARN("specified size to ensure disk free space is smaller than multipart size, so set multipart size to it.");
dfsize = S3fsCurl::GetMultipartSize();
@ -5700,6 +5730,19 @@ int main(int argc, char* argv[])
FdManager::InitFakeUsedDiskSize(fake_diskfree_size);
}
// Set default value of free_space_ratio to 10%
if(FdManager::GetEnsureFreeDiskSpace()==0){
int ratio = 10;
off_t dfsize = FdManager::GetTotalDiskSpaceByRatio(ratio);
S3FS_PRN_INFO("Free space ratio default to %d %%, ensure the available disk space is greater than %.3f MB", ratio, static_cast<double>(dfsize) / 1024 / 1024);
if(dfsize < S3fsCurl::GetMultipartSize()){
S3FS_PRN_WARN("specified size to ensure disk free space is smaller than multipart size, so set multipart size to it.");
dfsize = S3fsCurl::GetMultipartSize();
}
FdManager::SetEnsureFreeDiskSpace(dfsize);
}
// set user agent
S3fsCurl::InitUserAgent();
@ -5750,12 +5793,17 @@ int main(int argc, char* argv[])
// check free disk space
if(!FdManager::IsSafeDiskSpace(nullptr, S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount())){
S3FS_PRN_EXIT("There is no enough disk space for used as cache(or temporary) directory by s3fs.");
S3fsCurl::DestroyS3fsCurl();
s3fs_destroy_global_ssl();
destroy_parser_xml_lock();
destroy_basename_lock();
exit(EXIT_FAILURE);
// clean cache dir and retry
S3FS_PRN_WARN("No enough disk space for s3fs, try to clean cache dir");
FdManager::get()->CleanupCacheDir();
if(!FdManager::IsSafeDiskSpaceWithLog(nullptr, S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount())){
S3fsCurl::DestroyS3fsCurl();
s3fs_destroy_global_ssl();
destroy_parser_xml_lock();
destroy_basename_lock();
exit(EXIT_FAILURE);
}
}
// set mp stat flag object

View File

@ -270,6 +270,15 @@ static const char help_string[] =
" space is smaller than this value, s3fs do not use disk space\n"
" as possible in exchange for the performance.\n"
"\n"
" free_space_ratio (default=\"10\")\n"
" - sets min free space ratio of the disk.\n"
" The value of this option can be between 0 and 100. It will control\n"
" the size of the cache according to this ratio to ensure that the\n"
" idle ratio of the disk is greater than this value.\n"
" For example, when the disk space is 50GB, the default value will\n"
" ensure that the disk will reserve at least 50GB * 10%% = 5GB of\n"
" remaining space.\n"
"\n"
" multipart_threshold (default=\"25\")\n"
" - threshold, in MB, to use multipart upload instead of\n"
" single-part. Must be at least 5 MB.\n"