From 4762e53b5d8e13d3eb634960e824aab1358c31ab Mon Sep 17 00:00:00 2001 From: Takeshi Nakatani Date: Sun, 30 Mar 2014 07:53:41 +0000 Subject: [PATCH] Added multipart_size option for #16 --- doc/man/s3fs.1 | 10 +++++++++- src/curl.cpp | 19 +++++++++++++++---- src/curl.h | 4 ++++ src/fdcache.cpp | 13 ++++++------- src/s3fs.cpp | 17 ++++++++++++++++- 5 files changed, 50 insertions(+), 13 deletions(-) diff --git a/doc/man/s3fs.1 b/doc/man/s3fs.1 index 540f22f..3650f90 100644 --- a/doc/man/s3fs.1 +++ b/doc/man/s3fs.1 @@ -125,14 +125,22 @@ maximum number of parallel request for listing objects. .TP \fB\-o\fR parallel_count (default="5") number of parallel request for uploading big objects. -s3fs uploads large object(over 20MB) by multipart post request, and sends parallel requests. +s3fs uploads large object(default:over 20MB) by multipart post request, and sends parallel requests. This option limits parallel request count which s3fs requests at once. It is necessary to set this value depending on a CPU and a network band. +This option is lated to fd_page_size option and affects it. .TP \fB\-o\fR fd_page_size(default="52428800"(50MB)) number of internal management page size for each file discriptor. For delayed reading and writing by s3fs, s3fs manages pages which is separated from object. Each pages has a status that data is already loaded(or not loaded yet). This option should not be changed when you don't have a trouble with performance. +This value is changed automatically by parallel_count and multipart_size values(fd_page_size value = parallel_count * multipart_size). +.TP +\fB\-o\fR multipart_size(default="10"(10MB)) +number of one part size in multipart uploading request. +The default size is 10MB(10485760byte), this value is minimum size. +Specify number of MB and over 10(MB). +This option is lated to fd_page_size option and affects it. .TP \fB\-o\fR url (default="http://s3.amazonaws.com") sets the url to use to access Amazon S3. If you want to use HTTPS, then you can set url=https://s3.amazonaws.com diff --git a/src/curl.cpp b/src/curl.cpp index 76d842f..fd10642 100644 --- a/src/curl.cpp +++ b/src/curl.cpp @@ -173,7 +173,8 @@ curltime_t S3fsCurl::curl_times; curlprogress_t S3fsCurl::curl_progress; string S3fsCurl::curl_ca_bundle; mimes_t S3fsCurl::mimeTypes; -int S3fsCurl::max_parallel_cnt = 5; // default +int S3fsCurl::max_parallel_cnt = 5; // default +off_t S3fsCurl::multipart_size = MULTIPART_SIZE; // default //------------------------------------------------------------------- // Class methods for S3fsCurl @@ -822,6 +823,16 @@ string S3fsCurl::SetIAMRole(const char* role) return old; } +bool S3fsCurl::SetMultipartSize(off_t size) +{ + size = size * 1024 * 1024; + if(size < MULTIPART_SIZE){ + return false; + } + S3fsCurl::multipart_size = size; + return true; +} + int S3fsCurl::SetMaxParallelCount(int value) { int old = S3fsCurl::max_parallel_cnt; @@ -931,7 +942,7 @@ int S3fsCurl::ParallelMultipartUploadRequest(const char* tpath, headers_t& meta, // Loop for setup parallel upload(multipart) request. for(para_cnt = 0; para_cnt < S3fsCurl::max_parallel_cnt && 0 < remaining_bytes; para_cnt++, remaining_bytes -= chunk){ // chunk size - chunk = remaining_bytes > MULTIPART_SIZE ? MULTIPART_SIZE : remaining_bytes; + chunk = remaining_bytes > S3fsCurl::multipart_size ? S3fsCurl::multipart_size : remaining_bytes; // s3fscurl sub object S3fsCurl* s3fscurl_para = new S3fsCurl(true); @@ -1021,7 +1032,7 @@ int S3fsCurl::ParallelGetObjectRequest(const char* tpath, int fd, off_t start, s // Loop for setup parallel upload(multipart) request. for(para_cnt = 0; para_cnt < S3fsCurl::max_parallel_cnt && 0 < remaining_bytes; para_cnt++, remaining_bytes -= chunk){ // chunk size - chunk = remaining_bytes > MULTIPART_SIZE ? MULTIPART_SIZE : remaining_bytes; + chunk = remaining_bytes > S3fsCurl::multipart_size ? S3fsCurl::multipart_size : remaining_bytes; // s3fscurl sub object S3fsCurl* s3fscurl_para = new S3fsCurl(); @@ -2837,7 +2848,7 @@ int S3fsCurl::MultipartUploadRequest(const char* tpath, headers_t& meta, int fd, // cycle through open fd, pulling off 10MB chunks at a time for(remaining_bytes = st.st_size; 0 < remaining_bytes; remaining_bytes -= chunk){ // chunk size - chunk = remaining_bytes > MULTIPART_SIZE ? MULTIPART_SIZE : remaining_bytes; + chunk = remaining_bytes > S3fsCurl::multipart_size ? S3fsCurl::multipart_size : remaining_bytes; // set partdata.fd = fd2; diff --git a/src/curl.h b/src/curl.h index 0b164a2..169d9ce 100644 --- a/src/curl.h +++ b/src/curl.h @@ -166,6 +166,7 @@ class S3fsCurl static std::string curl_ca_bundle; static mimes_t mimeTypes; static int max_parallel_cnt; + static off_t multipart_size; // variables CURL* hCurl; @@ -274,8 +275,11 @@ class S3fsCurl static long SetSslVerifyHostname(long value); static long GetSslVerifyHostname(void) { return S3fsCurl::ssl_verify_hostname; } static int SetMaxParallelCount(int value); + static int GetMaxParallelCount(void) { return S3fsCurl::max_parallel_cnt; } static std::string SetIAMRole(const char* role); static const char* GetIAMRole(void) { return S3fsCurl::IAM_role.c_str(); } + static bool SetMultipartSize(off_t size); + static off_t GetMultipartSize(void) { return S3fsCurl::multipart_size; } // methods bool CreateCurlHandle(bool force = false); diff --git a/src/fdcache.cpp b/src/fdcache.cpp index 9dc52b1..fe61e21 100644 --- a/src/fdcache.cpp +++ b/src/fdcache.cpp @@ -51,8 +51,7 @@ using namespace std; //------------------------------------------------ // Symbols //------------------------------------------------ -#define MAX_OBJECT_SIZE 68719476735LL // 64GB - 1L -#define MULTIPART_LOWLIMIT (20 * 1024 * 1024) // 20MB +#define MAX_MULTIPART_CNT 10000 // S3 multipart max count #define FDPAGE_SIZE (50 * 1024 * 1024) // 50MB(parallel uploading is 5 parallel(default) * 10 MB) //------------------------------------------------ @@ -798,7 +797,7 @@ int FdEntity::Load(off_t start, off_t size) break; } // download - if((*iter)->bytes >= MULTIPART_LOWLIMIT && !nomultipart){ // 20MB + if((*iter)->bytes >= (2 * S3fsCurl::GetMultipartSize()) && !nomultipart){ // default 20MB // parallel request // Additional time is needed for large files time_t backup = 0; @@ -880,14 +879,14 @@ int FdEntity::RowFlush(const char* tpath, headers_t& meta, bool ow_sse_flg, bool * - 1 to 10,000 parts are allowed * - minimum size of parts is 5MB (expect for the last part) * - * For our application, we will define part size to be 10MB (10 * 2^20 Bytes) - * maximum file size will be ~64 GB - 2 ** 36 + * For our application, we will define minimum part size to be 10MB (10 * 2^20 Bytes) + * minimum file size will be 64 GB - 2 ** 36 * * Initially uploads will be done serially * * If file is > 20MB, then multipart will kick in */ - if(pagelist.Size() > MAX_OBJECT_SIZE){ // 64GB - 1 + if(pagelist.Size() > (MAX_MULTIPART_CNT * S3fsCurl::GetMultipartSize())){ // close f ? return -ENOTSUP; } @@ -898,7 +897,7 @@ int FdEntity::RowFlush(const char* tpath, headers_t& meta, bool ow_sse_flg, bool return -errno; } - if(pagelist.Size() >= MULTIPART_LOWLIMIT && !nomultipart){ // 20MB + if(pagelist.Size() >= (2 * S3fsCurl::GetMultipartSize()) && !nomultipart){ // default 20MB // Additional time is needed for large files time_t backup = 0; if(120 > S3fsCurl::GetReadwriteTimeout()){ diff --git a/src/s3fs.cpp b/src/s3fs.cpp index 2f52776..52be4f9 100644 --- a/src/s3fs.cpp +++ b/src/s3fs.cpp @@ -3574,11 +3574,15 @@ static int my_fuse_opt_proc(void* data, const char* arg, int key, struct fuse_ar return -1; } S3fsCurl::SetMaxParallelCount(maxpara); + + if(FdManager::GetPageSize() < static_cast(S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount())){ + FdManager::SetPageSize(static_cast(S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount())); + } return 0; } if(0 == STR2NCMP(arg, "fd_page_size=")){ size_t pagesize = static_cast(s3fs_strtoofft(strchr(arg, '=') + sizeof(char))); - if((1024 * 1024) >= pagesize){ + if(pagesize < static_cast(S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount())){ fprintf(stderr, "%s: argument should be over 1MB: fd_page_size\n", program_name.c_str()); return -1; @@ -3586,6 +3590,17 @@ static int my_fuse_opt_proc(void* data, const char* arg, int key, struct fuse_ar FdManager::SetPageSize(pagesize); return 0; } + if(0 == STR2NCMP(arg, "multipart_size=")){ + off_t size = static_cast(s3fs_strtoofft(strchr(arg, '=') + sizeof(char))); + if(!S3fsCurl::SetMultipartSize(size)){ + fprintf(stderr, "%s: multipart_size option could not be specified over 10(MB)\n", program_name.c_str()); + return -1; + } + if(FdManager::GetPageSize() < static_cast(S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount())){ + FdManager::SetPageSize(static_cast(S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount())); + } + return 0; + } if(0 == STR2NCMP(arg, "ahbe_conf=")){ string ahbe_conf = strchr(arg, '=') + sizeof(char); if(!AdditionalHeader::get()->Load(ahbe_conf.c_str())){