mirror of
https://github.com/s3fs-fuse/s3fs-fuse.git
synced 2024-11-17 17:55:12 +00:00
Flush dirty data after a writing a number of bytes (#1448)
This allows s3fs to write large files without consuming a large amount of temporary local storage but can slow uploads due to server-side copies. References #617. Fixed #1056. Fixes #1257.
This commit is contained in:
parent
58750cc441
commit
6aa786b886
@ -195,6 +195,11 @@ It is necessary to set this value depending on a CPU and a network band.
|
||||
part size, in MB, for each multipart request.
|
||||
The minimum value is 5 MB and the maximum value is 5 GB.
|
||||
.TP
|
||||
\fB\-o\fR max_dirty_data (default="5120")
|
||||
Flush dirty data to S3 after a certain number of MB written.
|
||||
The minimum value is 50 MB. -1 value means disable.
|
||||
Cannot be used with nomixupload.
|
||||
.TP
|
||||
\fB\-o\fR ensure_diskfree (default 0)
|
||||
sets MB to ensure disk free space. This option means the threshold of free space size on disk which is used for the cache file by s3fs.
|
||||
s3fs makes file for downloading, uploading and caching files.
|
||||
|
@ -1175,6 +1175,10 @@ int FdEntity::NoCacheCompleteMultipartPost()
|
||||
return 0;
|
||||
}
|
||||
|
||||
off_t FdEntity::BytesModified() const {
|
||||
return pagelist.BytesModified();
|
||||
}
|
||||
|
||||
int FdEntity::RowFlush(const char* tpath, bool force_sync)
|
||||
{
|
||||
int result = 0;
|
||||
|
@ -66,6 +66,7 @@ class FdEntity
|
||||
int UploadPendingMeta();
|
||||
|
||||
public:
|
||||
static bool GetNoMixMultipart() { return mixmultipart; }
|
||||
static bool SetNoMixMultipart();
|
||||
|
||||
explicit FdEntity(const char* tpath = NULL, const char* cpath = NULL);
|
||||
@ -108,6 +109,7 @@ class FdEntity
|
||||
int NoCacheMultipartPost(int tgfd, off_t start, off_t size);
|
||||
int NoCacheCompleteMultipartPost();
|
||||
|
||||
off_t BytesModified() const;
|
||||
int RowFlush(const char* tpath, bool force_sync = false);
|
||||
int Flush(bool force_sync = false) { return RowFlush(NULL, force_sync); }
|
||||
|
||||
|
@ -689,6 +689,17 @@ bool PageList::GetPageListsForMultipartUpload(fdpage_list_t& dlpages, fdpage_lis
|
||||
return true;
|
||||
}
|
||||
|
||||
off_t PageList::BytesModified() const
|
||||
{
|
||||
off_t total = 0;
|
||||
for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){
|
||||
if(iter->modified){
|
||||
total += iter->bytes;
|
||||
}
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
bool PageList::IsModified() const
|
||||
{
|
||||
for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){
|
||||
|
@ -110,6 +110,7 @@ class PageList
|
||||
int GetUnloadedPages(fdpage_list_t& unloaded_list, off_t start = 0, off_t size = 0) const; // size=0 is checking to end of list
|
||||
bool GetPageListsForMultipartUpload(fdpage_list_t& dlpages, fdpage_list_t& mixuppages, off_t max_partsize);
|
||||
|
||||
off_t BytesModified() const;
|
||||
bool IsModified() const;
|
||||
bool ClearAllModified();
|
||||
|
||||
|
24
src/s3fs.cpp
24
src/s3fs.cpp
@ -95,6 +95,7 @@ static bool is_specified_endpoint = false;
|
||||
static int s3fs_init_deferred_exit_status = 0;
|
||||
static bool support_compat_dir = true;// default supports compatibility directory type
|
||||
static int max_keys_list_object = 1000;// default is 1000
|
||||
static off_t max_dirty_data = 5LL * 1024LL * 1024LL * 1024LL;
|
||||
static bool use_wtf8 = false;
|
||||
|
||||
static const std::string allbucket_fields_type; // special key for mapping(This name is absolutely not used as a bucket name)
|
||||
@ -2283,6 +2284,14 @@ static int s3fs_write(const char* _path, const char* buf, size_t size, off_t off
|
||||
S3FS_PRN_WARN("failed to write file(%s). result=%zd", path, res);
|
||||
}
|
||||
|
||||
if(max_dirty_data != -1 && ent->BytesModified() >= max_dirty_data){
|
||||
if(0 != (res = ent->RowFlush(path, true))){
|
||||
S3FS_PRN_ERR("could not upload file(%s): result=%zd", path, res);
|
||||
StatCache::getStatCacheData()->DelStat(path);
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
return static_cast<int>(res);
|
||||
}
|
||||
|
||||
@ -4469,6 +4478,16 @@ static int my_fuse_opt_proc(void* data, const char* arg, int key, struct fuse_ar
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
if(is_prefix(arg, "max_dirty_data=")){
|
||||
off_t size = static_cast<off_t>(cvt_strtoofft(strchr(arg, '=') + sizeof(char)));
|
||||
if(size < 50){
|
||||
S3FS_PRN_EXIT("max_dirty_data option must be at least 50 MB.");
|
||||
return -1;
|
||||
}
|
||||
size *= 1024 * 1024;
|
||||
max_dirty_data = size;
|
||||
return 0;
|
||||
}
|
||||
if(is_prefix(arg, "ensure_diskfree=")){
|
||||
off_t dfsize = cvt_strtoofft(strchr(arg, '=') + sizeof(char)) * 1024 * 1024;
|
||||
if(dfsize < S3fsCurl::GetMultipartSize()){
|
||||
@ -4860,6 +4879,11 @@ int main(int argc, char* argv[])
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if(!FdEntity::GetNoMixMultipart() && max_dirty_data != -1){
|
||||
S3FS_PRN_WARN("Setting max_dirty_data to -1 when nomixupload is enabled");
|
||||
max_dirty_data = -1;
|
||||
}
|
||||
|
||||
// The first plain argument is the bucket
|
||||
if(bucket.empty()){
|
||||
S3FS_PRN_EXIT("missing BUCKET argument.");
|
||||
|
@ -240,6 +240,11 @@ static const char help_string[] =
|
||||
" - part size, in MB, for each multipart request.\n"
|
||||
" The minimum value is 5 MB and the maximum value is 5 GB.\n"
|
||||
"\n"
|
||||
" max_dirty_data (default=\"5120\")\n"
|
||||
" - flush dirty data to S3 after a certain number of MB written.\n"
|
||||
" The minimum value is 50 MB. -1 value means disable.\n"
|
||||
" Cannot be used with nomixupload.\n"
|
||||
"\n"
|
||||
" ensure_diskfree (default 0)\n"
|
||||
" - sets MB to ensure disk free space. This option means the\n"
|
||||
" threshold of free space size on disk which is used for the\n"
|
||||
|
Loading…
Reference in New Issue
Block a user