Added multipart_size option for #16
This commit is contained in:
Takeshi Nakatani 2014-04-05 00:53:04 +09:00
commit 6148415b4b
5 changed files with 50 additions and 13 deletions

View File

@ -125,14 +125,22 @@ maximum number of parallel request for listing objects.
.TP
\fB\-o\fR parallel_count (default="5")
number of parallel request for uploading big objects.
s3fs uploads large object(over 20MB) by multipart post request, and sends parallel requests.
s3fs uploads large object(default:over 20MB) by multipart post request, and sends parallel requests.
This option limits parallel request count which s3fs requests at once.
It is necessary to set this value depending on a CPU and a network band.
This option is lated to fd_page_size option and affects it.
.TP
\fB\-o\fR fd_page_size(default="52428800"(50MB))
number of internal management page size for each file discriptor.
For delayed reading and writing by s3fs, s3fs manages pages which is separated from object. Each pages has a status that data is already loaded(or not loaded yet).
This option should not be changed when you don't have a trouble with performance.
This value is changed automatically by parallel_count and multipart_size values(fd_page_size value = parallel_count * multipart_size).
.TP
\fB\-o\fR multipart_size(default="10"(10MB))
number of one part size in multipart uploading request.
The default size is 10MB(10485760byte), this value is minimum size.
Specify number of MB and over 10(MB).
This option is lated to fd_page_size option and affects it.
.TP
\fB\-o\fR url (default="http://s3.amazonaws.com")
sets the url to use to access Amazon S3. If you want to use HTTPS, then you can set url=https://s3.amazonaws.com

View File

@ -173,7 +173,8 @@ curltime_t S3fsCurl::curl_times;
curlprogress_t S3fsCurl::curl_progress;
string S3fsCurl::curl_ca_bundle;
mimes_t S3fsCurl::mimeTypes;
int S3fsCurl::max_parallel_cnt = 5; // default
int S3fsCurl::max_parallel_cnt = 5; // default
off_t S3fsCurl::multipart_size = MULTIPART_SIZE; // default
//-------------------------------------------------------------------
// Class methods for S3fsCurl
@ -822,6 +823,16 @@ string S3fsCurl::SetIAMRole(const char* role)
return old;
}
bool S3fsCurl::SetMultipartSize(off_t size)
{
size = size * 1024 * 1024;
if(size < MULTIPART_SIZE){
return false;
}
S3fsCurl::multipart_size = size;
return true;
}
int S3fsCurl::SetMaxParallelCount(int value)
{
int old = S3fsCurl::max_parallel_cnt;
@ -931,7 +942,7 @@ int S3fsCurl::ParallelMultipartUploadRequest(const char* tpath, headers_t& meta,
// Loop for setup parallel upload(multipart) request.
for(para_cnt = 0; para_cnt < S3fsCurl::max_parallel_cnt && 0 < remaining_bytes; para_cnt++, remaining_bytes -= chunk){
// chunk size
chunk = remaining_bytes > MULTIPART_SIZE ? MULTIPART_SIZE : remaining_bytes;
chunk = remaining_bytes > S3fsCurl::multipart_size ? S3fsCurl::multipart_size : remaining_bytes;
// s3fscurl sub object
S3fsCurl* s3fscurl_para = new S3fsCurl(true);
@ -1021,7 +1032,7 @@ int S3fsCurl::ParallelGetObjectRequest(const char* tpath, int fd, off_t start, s
// Loop for setup parallel upload(multipart) request.
for(para_cnt = 0; para_cnt < S3fsCurl::max_parallel_cnt && 0 < remaining_bytes; para_cnt++, remaining_bytes -= chunk){
// chunk size
chunk = remaining_bytes > MULTIPART_SIZE ? MULTIPART_SIZE : remaining_bytes;
chunk = remaining_bytes > S3fsCurl::multipart_size ? S3fsCurl::multipart_size : remaining_bytes;
// s3fscurl sub object
S3fsCurl* s3fscurl_para = new S3fsCurl();
@ -2837,7 +2848,7 @@ int S3fsCurl::MultipartUploadRequest(const char* tpath, headers_t& meta, int fd,
// cycle through open fd, pulling off 10MB chunks at a time
for(remaining_bytes = st.st_size; 0 < remaining_bytes; remaining_bytes -= chunk){
// chunk size
chunk = remaining_bytes > MULTIPART_SIZE ? MULTIPART_SIZE : remaining_bytes;
chunk = remaining_bytes > S3fsCurl::multipart_size ? S3fsCurl::multipart_size : remaining_bytes;
// set
partdata.fd = fd2;

View File

@ -166,6 +166,7 @@ class S3fsCurl
static std::string curl_ca_bundle;
static mimes_t mimeTypes;
static int max_parallel_cnt;
static off_t multipart_size;
// variables
CURL* hCurl;
@ -274,8 +275,11 @@ class S3fsCurl
static long SetSslVerifyHostname(long value);
static long GetSslVerifyHostname(void) { return S3fsCurl::ssl_verify_hostname; }
static int SetMaxParallelCount(int value);
static int GetMaxParallelCount(void) { return S3fsCurl::max_parallel_cnt; }
static std::string SetIAMRole(const char* role);
static const char* GetIAMRole(void) { return S3fsCurl::IAM_role.c_str(); }
static bool SetMultipartSize(off_t size);
static off_t GetMultipartSize(void) { return S3fsCurl::multipart_size; }
// methods
bool CreateCurlHandle(bool force = false);

View File

@ -51,8 +51,7 @@ using namespace std;
//------------------------------------------------
// Symbols
//------------------------------------------------
#define MAX_OBJECT_SIZE 68719476735LL // 64GB - 1L
#define MULTIPART_LOWLIMIT (20 * 1024 * 1024) // 20MB
#define MAX_MULTIPART_CNT 10000 // S3 multipart max count
#define FDPAGE_SIZE (50 * 1024 * 1024) // 50MB(parallel uploading is 5 parallel(default) * 10 MB)
//------------------------------------------------
@ -798,7 +797,7 @@ int FdEntity::Load(off_t start, off_t size)
break;
}
// download
if((*iter)->bytes >= MULTIPART_LOWLIMIT && !nomultipart){ // 20MB
if((*iter)->bytes >= (2 * S3fsCurl::GetMultipartSize()) && !nomultipart){ // default 20MB
// parallel request
// Additional time is needed for large files
time_t backup = 0;
@ -880,14 +879,14 @@ int FdEntity::RowFlush(const char* tpath, headers_t& meta, bool ow_sse_flg, bool
* - 1 to 10,000 parts are allowed
* - minimum size of parts is 5MB (expect for the last part)
*
* For our application, we will define part size to be 10MB (10 * 2^20 Bytes)
* maximum file size will be ~64 GB - 2 ** 36
* For our application, we will define minimum part size to be 10MB (10 * 2^20 Bytes)
* minimum file size will be 64 GB - 2 ** 36
*
* Initially uploads will be done serially
*
* If file is > 20MB, then multipart will kick in
*/
if(pagelist.Size() > MAX_OBJECT_SIZE){ // 64GB - 1
if(pagelist.Size() > (MAX_MULTIPART_CNT * S3fsCurl::GetMultipartSize())){
// close f ?
return -ENOTSUP;
}
@ -898,7 +897,7 @@ int FdEntity::RowFlush(const char* tpath, headers_t& meta, bool ow_sse_flg, bool
return -errno;
}
if(pagelist.Size() >= MULTIPART_LOWLIMIT && !nomultipart){ // 20MB
if(pagelist.Size() >= (2 * S3fsCurl::GetMultipartSize()) && !nomultipart){ // default 20MB
// Additional time is needed for large files
time_t backup = 0;
if(120 > S3fsCurl::GetReadwriteTimeout()){

View File

@ -3574,11 +3574,15 @@ static int my_fuse_opt_proc(void* data, const char* arg, int key, struct fuse_ar
return -1;
}
S3fsCurl::SetMaxParallelCount(maxpara);
if(FdManager::GetPageSize() < static_cast<size_t>(S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount())){
FdManager::SetPageSize(static_cast<size_t>(S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount()));
}
return 0;
}
if(0 == STR2NCMP(arg, "fd_page_size=")){
size_t pagesize = static_cast<size_t>(s3fs_strtoofft(strchr(arg, '=') + sizeof(char)));
if((1024 * 1024) >= pagesize){
if(pagesize < static_cast<size_t>(S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount())){
fprintf(stderr, "%s: argument should be over 1MB: fd_page_size\n",
program_name.c_str());
return -1;
@ -3586,6 +3590,17 @@ static int my_fuse_opt_proc(void* data, const char* arg, int key, struct fuse_ar
FdManager::SetPageSize(pagesize);
return 0;
}
if(0 == STR2NCMP(arg, "multipart_size=")){
off_t size = static_cast<off_t>(s3fs_strtoofft(strchr(arg, '=') + sizeof(char)));
if(!S3fsCurl::SetMultipartSize(size)){
fprintf(stderr, "%s: multipart_size option could not be specified over 10(MB)\n", program_name.c_str());
return -1;
}
if(FdManager::GetPageSize() < static_cast<size_t>(S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount())){
FdManager::SetPageSize(static_cast<size_t>(S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount()));
}
return 0;
}
if(0 == STR2NCMP(arg, "ahbe_conf=")){
string ahbe_conf = strchr(arg, '=') + sizeof(char);
if(!AdditionalHeader::get()->Load(ahbe_conf.c_str())){