Allow configuring multipart copy size (#1555)

Also align configuration with mixupload.
This commit is contained in:
Andrew Gaul 2021-02-08 20:32:12 +09:00 committed by GitHub
parent 32f096fa3f
commit a4d916af13
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 40 additions and 8 deletions

View File

@ -195,6 +195,13 @@ It is necessary to set this value depending on a CPU and a network band.
part size, in MB, for each multipart request.
The minimum value is 5 MB and the maximum value is 5 GB.
.TP
\fB\-o\fR multipart_copy_size (default="512")
part size, in MB, for each multipart copy request, used for
renames and mixupload.
The minimum value is 5 MB and the maximum value is 5 GB.
Must be at least 512 MB to copy the maximum 5 TB object size
but lower values may improve performance.
.TP
\fB\-o\fR max_dirty_data (default="5120")
Flush dirty data to S3 after a certain number of MB written.
The minimum value is 50 MB. -1 value means disable.

View File

@ -47,10 +47,6 @@ static const std::string empty_payload_hash = "e3b0c44298fc1c149afbf4c89
//-------------------------------------------------------------------
static const int MULTIPART_SIZE = 10 * 1024 * 1024;
// constant must be at least 512 MB to copy the maximum 5 TB object size
// TODO: scale part size with object size
static const int MAX_MULTI_COPY_SOURCE_SIZE = 512 * 1024 * 1024;
static const int IAM_EXPIRE_MERGIN = 20 * 60; // update timing
static const std::string ECS_IAM_ENV_VAR = "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI";
static const std::string IAMCRED_ACCESSKEYID = "AccessKeyId";
@ -137,6 +133,7 @@ std::string S3fsCurl::userAgent;
int S3fsCurl::max_parallel_cnt = 5; // default
int S3fsCurl::max_multireq = 20; // default
off_t S3fsCurl::multipart_size = MULTIPART_SIZE; // default
off_t S3fsCurl::multipart_copy_size = 512 * 1024 * 1024; // default
signature_type_t S3fsCurl::signature_type = V2_OR_V4; // default
bool S3fsCurl::is_ua = true; // default
bool S3fsCurl::is_use_session_token= false; // default
@ -1113,6 +1110,16 @@ bool S3fsCurl::SetMultipartSize(off_t size)
return true;
}
bool S3fsCurl::SetMultipartCopySize(off_t size)
{
size = size * 1024 * 1024;
if(size < MIN_MULTIPART_SIZE){
return false;
}
S3fsCurl::multipart_copy_size = size;
return true;
}
int S3fsCurl::SetMaxParallelCount(int value)
{
int old = S3fsCurl::max_parallel_cnt;
@ -1415,10 +1422,10 @@ int S3fsCurl::ParallelMixMultipartUploadRequest(const char* tpath, headers_t& me
}
}else{
// Multipart copy
for(off_t i = 0; i < iter->bytes; i += FIVE_GB){
for(off_t i = 0; i < iter->bytes; i += GetMultipartCopySize()){
S3fsCurl* s3fscurl_para = new S3fsCurl(true);
off_t bytes = std::min(FIVE_GB, iter->bytes - i);
off_t bytes = std::min(static_cast<off_t>(GetMultipartCopySize()), iter->bytes - i);
std::ostringstream strrange;
strrange << "bytes=" << (iter->offset + i) << "-" << (iter->offset + i + bytes - 1);
meta["x-amz-copy-source-range"] = strrange.str();
@ -3903,7 +3910,7 @@ int S3fsCurl::MultipartHeadRequest(const char* tpath, off_t size, headers_t& met
curlmulti.SetRetryCallback(S3fsCurl::CopyMultipartPostRetryCallback);
for(bytes_remaining = size, chunk = 0; 0 < bytes_remaining; bytes_remaining -= chunk){
chunk = bytes_remaining > MAX_MULTI_COPY_SOURCE_SIZE ? MAX_MULTI_COPY_SOURCE_SIZE : bytes_remaining;
chunk = bytes_remaining > GetMultipartCopySize() ? GetMultipartCopySize() : bytes_remaining;
std::ostringstream strrange;
strrange << "bytes=" << (size - bytes_remaining) << "-" << (size - bytes_remaining + chunk - 1);
@ -4073,7 +4080,7 @@ int S3fsCurl::MultipartRenameRequest(const char* from, const char* to, headers_t
curlmulti.SetRetryCallback(S3fsCurl::CopyMultipartPostRetryCallback);
for(bytes_remaining = size, chunk = 0; 0 < bytes_remaining; bytes_remaining -= chunk){
chunk = bytes_remaining > MAX_MULTI_COPY_SOURCE_SIZE ? MAX_MULTI_COPY_SOURCE_SIZE : bytes_remaining;
chunk = bytes_remaining > GetMultipartCopySize() ? GetMultipartCopySize() : bytes_remaining;
std::ostringstream strrange;
strrange << "bytes=" << (size - bytes_remaining) << "-" << (size - bytes_remaining + chunk - 1);

View File

@ -161,6 +161,7 @@ class S3fsCurl
static int max_parallel_cnt;
static int max_multireq;
static off_t multipart_size;
static off_t multipart_copy_size;
static signature_type_t signature_type;
static bool is_ua; // User-Agent
static bool requester_pays;
@ -351,6 +352,8 @@ class S3fsCurl
static const char* GetIAMRole() { return S3fsCurl::IAM_role.c_str(); }
static bool SetMultipartSize(off_t size);
static off_t GetMultipartSize() { return S3fsCurl::multipart_size; }
static bool SetMultipartCopySize(off_t size);
static off_t GetMultipartCopySize() { return S3fsCurl::multipart_copy_size; }
static signature_type_t SetSignatureType(signature_type_t signature_type) { signature_type_t bresult = S3fsCurl::signature_type; S3fsCurl::signature_type = signature_type; return bresult; }
static signature_type_t GetSignatureType() { return S3fsCurl::signature_type; }
static bool SetUserAgentFlag(bool isset) { bool bresult = S3fsCurl::is_ua; S3fsCurl::is_ua = isset; return bresult; }

View File

@ -4490,6 +4490,14 @@ static int my_fuse_opt_proc(void* data, const char* arg, int key, struct fuse_ar
}
return 0;
}
if(is_prefix(arg, "multipart_copy_size=")){
off_t size = static_cast<off_t>(cvt_strtoofft(strchr(arg, '=') + sizeof(char)));
if(!S3fsCurl::SetMultipartCopySize(size)){
S3FS_PRN_EXIT("multipart_copy_size option must be at least 5 MB.");
return -1;
}
return 0;
}
if(is_prefix(arg, "max_dirty_data=")){
off_t size = static_cast<off_t>(cvt_strtoofft(strchr(arg, '=') + sizeof(char)));
if(size >= 50){

View File

@ -240,6 +240,13 @@ static const char help_string[] =
" - part size, in MB, for each multipart request.\n"
" The minimum value is 5 MB and the maximum value is 5 GB.\n"
"\n"
" multipart_copy_size (default=\"512\")\n"
" - part size, in MB, for each multipart copy request, used for\n"
" renames and mixupload.\n"
" The minimum value is 5 MB and the maximum value is 5 GB.\n"
" Must be at least 512 MB to copy the maximum 5 TB object size\n"
" but lower values may improve performance.\n"
"\n"
" max_dirty_data (default=\"5120\")\n"
" - flush dirty data to S3 after a certain number of MB written.\n"
" The minimum value is 50 MB. -1 value means disable.\n"