From 3856637cd24868ce060519363fff22cdc33b0def Mon Sep 17 00:00:00 2001 From: AdamQQQ Date: Fri, 20 Oct 2023 17:11:47 +0800 Subject: [PATCH] s3fs: add option free_space_ratio to control cache size (#2351) * Try to cleanup cache directory when initing without enough disk space Also optimize log messages to print detailed errors to the user. Signed-off-by: Qinqi Qu * s3fs: add option free_space_ratio to control cache size Since the ensure_diskfree option is not convenient enough, we have added a new option "-o free_space_ratio" to control the space used by the s3fs cache based on the current disk size. The value of this option can be between 0 and 100. It will control the size of the cache according to this ratio to ensure that the idle ratio of the disk is greater than this value. For example, when the value is 10 and the disk space is 50GB, it will ensure that the disk will reserve at least 50GB * 10% = 5GB of remaining space. Signed-off-by: Qinqi Qu --------- Signed-off-by: Qinqi Qu --- doc/man/s3fs.1.in | 6 +++++ src/fdcache.cpp | 49 ++++++++++++++++++++++++++++++++++---- src/fdcache.h | 4 ++++ src/s3fs.cpp | 60 ++++++++++++++++++++++++++++++++++++++++++----- src/s3fs_help.cpp | 9 +++++++ 5 files changed, 117 insertions(+), 11 deletions(-) diff --git a/doc/man/s3fs.1.in b/doc/man/s3fs.1.in index e846685..08f6caf 100644 --- a/doc/man/s3fs.1.in +++ b/doc/man/s3fs.1.in @@ -227,6 +227,12 @@ sets MB to ensure disk free space. This option means the threshold of free space s3fs makes file for downloading, uploading and caching files. If the disk free space is smaller than this value, s3fs do not use disk space as possible in exchange for the performance. .TP +\fB\-o\fR free_space_ratio (default="10") +sets min free space ratio of the disk. The value of this option can be between 0 and 100. It will control +the size of the cache according to this ratio to ensure that the idle ratio of the disk is greater than this value. +For example, when the disk space is 50GB, the default value will +ensure that the disk will reserve at least 50GB * 10%% = 5GB of remaining space. +.TP \fB\-o\fR multipart_threshold (default="25") threshold, in MB, to use multipart upload instead of single-part. Must be at least 5 MB. diff --git a/src/fdcache.cpp b/src/fdcache.cpp index 405ba17..5dd376d 100644 --- a/src/fdcache.cpp +++ b/src/fdcache.cpp @@ -266,9 +266,38 @@ bool FdManager::InitFakeUsedDiskSize(off_t fake_freesize) return true; } +off_t FdManager::GetTotalDiskSpaceByRatio(int ratio) +{ + return FdManager::GetTotalDiskSpace(nullptr) * ratio / 100; +} + +off_t FdManager::GetTotalDiskSpace(const char* path) +{ + struct statvfs vfsbuf; + int result = FdManager::GetVfsStat(path, &vfsbuf); + if(result == -1){ + return 0; + } + + off_t actual_totalsize = vfsbuf.f_blocks * vfsbuf.f_frsize; + + return actual_totalsize; +} + off_t FdManager::GetFreeDiskSpace(const char* path) { struct statvfs vfsbuf; + int result = FdManager::GetVfsStat(path, &vfsbuf); + if(result == -1){ + return 0; + } + + off_t actual_freesize = vfsbuf.f_bavail * vfsbuf.f_frsize; + + return (FdManager::fake_used_disk_space < actual_freesize ? (actual_freesize - FdManager::fake_used_disk_space) : 0); +} + +int FdManager::GetVfsStat(const char* path, struct statvfs* vfsbuf){ std::string ctoppath; if(!FdManager::cache_dir.empty()){ ctoppath = FdManager::cache_dir + "/"; @@ -284,14 +313,12 @@ off_t FdManager::GetFreeDiskSpace(const char* path) }else{ ctoppath += "."; } - if(-1 == statvfs(ctoppath.c_str(), &vfsbuf)){ + if(-1 == statvfs(ctoppath.c_str(), vfsbuf)){ S3FS_PRN_ERR("could not get vfs stat by errno(%d)", errno); - return 0; + return -1; } - off_t actual_freesize = vfsbuf.f_bavail * vfsbuf.f_frsize; - - return (FdManager::fake_used_disk_space < actual_freesize ? (actual_freesize - FdManager::fake_used_disk_space) : 0); + return 0; } bool FdManager::IsSafeDiskSpace(const char* path, off_t size) @@ -300,6 +327,18 @@ bool FdManager::IsSafeDiskSpace(const char* path, off_t size) return size + FdManager::GetEnsureFreeDiskSpace() <= fsize; } +bool FdManager::IsSafeDiskSpaceWithLog(const char* path, off_t size) +{ + off_t fsize = FdManager::GetFreeDiskSpace(path); + off_t needsize = size + FdManager::GetEnsureFreeDiskSpace(); + if(needsize <= fsize){ + return true; + } else { + S3FS_PRN_EXIT("There is no enough disk space for used as cache(or temporary) directory by s3fs. Requires %.3f MB, already has %.3f MB.", static_cast(needsize) / 1024 / 1024, static_cast(fsize) / 1024 / 1024); + return false; + } +} + bool FdManager::HaveLseekHole() { if(FdManager::checked_lseek){ diff --git a/src/fdcache.h b/src/fdcache.h index a0dfb21..b09e508 100644 --- a/src/fdcache.h +++ b/src/fdcache.h @@ -47,7 +47,9 @@ class FdManager private: static off_t GetFreeDiskSpace(const char* path); + static off_t GetTotalDiskSpace(const char* path); static bool IsDir(const std::string* dir); + static int GetVfsStat(const char* path, struct statvfs* vfsbuf); int GetPseudoFdCount(const char* path); void CleanupCacheDirInternal(const std::string &path = ""); @@ -78,12 +80,14 @@ class FdManager static off_t SetEnsureFreeDiskSpace(off_t size); static bool InitFakeUsedDiskSize(off_t fake_freesize); static bool IsSafeDiskSpace(const char* path, off_t size); + static bool IsSafeDiskSpaceWithLog(const char* path, off_t size); static void FreeReservedDiskSpace(off_t size); static bool ReserveDiskSpace(off_t size); static bool HaveLseekHole(); static bool SetTmpDir(const char* dir); static bool CheckTmpDirExist(); static FILE* MakeTempFile(); + static off_t GetTotalDiskSpaceByRatio(int ratio); // Return FdEntity associated with path, returning nullptr on error. This operation increments the reference count; callers must decrement via Close after use. FdEntity* GetFdEntity(const char* path, int& existfd, bool newfd = true, AutoLock::Type locktype = AutoLock::NONE); diff --git a/src/s3fs.cpp b/src/s3fs.cpp index 312cbaa..4fe6fdc 100644 --- a/src/s3fs.cpp +++ b/src/s3fs.cpp @@ -5142,8 +5142,38 @@ static int my_fuse_opt_proc(void* data, const char* arg, int key, struct fuse_ar max_dirty_data = size; return 0; } + if(is_prefix(arg, "free_space_ratio=")){ + int ratio = static_cast(cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10)); + + if(FdManager::GetEnsureFreeDiskSpace()!=0){ + S3FS_PRN_EXIT("option free_space_ratio conflicts with ensure_diskfree, please set only one of them."); + return -1; + } + + if(ratio < 0 || ratio > 100){ + S3FS_PRN_EXIT("option free_space_ratio must between 0 to 100, which is: %d", ratio); + return -1; + } + + off_t dfsize = FdManager::GetTotalDiskSpaceByRatio(ratio); + S3FS_PRN_INFO("Free space ratio set to %d %%, ensure the available disk space is greater than %.3f MB", ratio, static_cast(dfsize) / 1024 / 1024); + + if(dfsize < S3fsCurl::GetMultipartSize()){ + S3FS_PRN_WARN("specified size to ensure disk free space is smaller than multipart size, so set multipart size to it."); + dfsize = S3fsCurl::GetMultipartSize(); + } + FdManager::SetEnsureFreeDiskSpace(dfsize); + return 0; + } else if(is_prefix(arg, "ensure_diskfree=")){ off_t dfsize = cvt_strtoofft(strchr(arg, '=') + sizeof(char), /*base=*/ 10) * 1024 * 1024; + + if(FdManager::GetEnsureFreeDiskSpace()!=0){ + S3FS_PRN_EXIT("option free_space_ratio conflicts with ensure_diskfree, please set only one of them."); + return -1; + } + + S3FS_PRN_INFO("Set and ensure the available disk space is greater than %.3f MB.", static_cast(dfsize) / 1024 / 1024); if(dfsize < S3fsCurl::GetMultipartSize()){ S3FS_PRN_WARN("specified size to ensure disk free space is smaller than multipart size, so set multipart size to it."); dfsize = S3fsCurl::GetMultipartSize(); @@ -5700,6 +5730,19 @@ int main(int argc, char* argv[]) FdManager::InitFakeUsedDiskSize(fake_diskfree_size); } + // Set default value of free_space_ratio to 10% + if(FdManager::GetEnsureFreeDiskSpace()==0){ + int ratio = 10; + off_t dfsize = FdManager::GetTotalDiskSpaceByRatio(ratio); + S3FS_PRN_INFO("Free space ratio default to %d %%, ensure the available disk space is greater than %.3f MB", ratio, static_cast(dfsize) / 1024 / 1024); + + if(dfsize < S3fsCurl::GetMultipartSize()){ + S3FS_PRN_WARN("specified size to ensure disk free space is smaller than multipart size, so set multipart size to it."); + dfsize = S3fsCurl::GetMultipartSize(); + } + FdManager::SetEnsureFreeDiskSpace(dfsize); + } + // set user agent S3fsCurl::InitUserAgent(); @@ -5750,12 +5793,17 @@ int main(int argc, char* argv[]) // check free disk space if(!FdManager::IsSafeDiskSpace(nullptr, S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount())){ - S3FS_PRN_EXIT("There is no enough disk space for used as cache(or temporary) directory by s3fs."); - S3fsCurl::DestroyS3fsCurl(); - s3fs_destroy_global_ssl(); - destroy_parser_xml_lock(); - destroy_basename_lock(); - exit(EXIT_FAILURE); + // clean cache dir and retry + S3FS_PRN_WARN("No enough disk space for s3fs, try to clean cache dir"); + FdManager::get()->CleanupCacheDir(); + + if(!FdManager::IsSafeDiskSpaceWithLog(nullptr, S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount())){ + S3fsCurl::DestroyS3fsCurl(); + s3fs_destroy_global_ssl(); + destroy_parser_xml_lock(); + destroy_basename_lock(); + exit(EXIT_FAILURE); + } } // set mp stat flag object diff --git a/src/s3fs_help.cpp b/src/s3fs_help.cpp index 9b776ca..678d8fb 100644 --- a/src/s3fs_help.cpp +++ b/src/s3fs_help.cpp @@ -270,6 +270,15 @@ static const char help_string[] = " space is smaller than this value, s3fs do not use disk space\n" " as possible in exchange for the performance.\n" "\n" + " free_space_ratio (default=\"10\")\n" + " - sets min free space ratio of the disk.\n" + " The value of this option can be between 0 and 100. It will control\n" + " the size of the cache according to this ratio to ensure that the\n" + " idle ratio of the disk is greater than this value.\n" + " For example, when the disk space is 50GB, the default value will\n" + " ensure that the disk will reserve at least 50GB * 10%% = 5GB of\n" + " remaining space.\n" + "\n" " multipart_threshold (default=\"25\")\n" " - threshold, in MB, to use multipart upload instead of\n" " single-part. Must be at least 5 MB.\n"