Fixed to use copy api in multipart upload

This commit is contained in:
Takeshi Nakatani 2019-09-26 11:30:58 +09:00 committed by Andrew Gaul
parent b6349e9428
commit 1db94a0b30
8 changed files with 619 additions and 20 deletions

View File

@ -259,6 +259,11 @@ Notice: if s3fs handles the extended attribute, s3fs can not work to copy comman
disable registering xml name space for response of ListBucketResult and ListVersionsResult etc. Default name space is looked up from "http://s3.amazonaws.com/doc/2006-03-01".
This option should not be specified now, because s3fs looks up xmlns automatically after v1.66.
.TP
\fB\-o\fR nomixupload - disable copy in multipart uploads.
Disable to use PUT (copy api) when multipart uploading large size objects.
By default, when doing multipart upload, the range of unchanged data will use PUT (copy api) whenever possible.
When nocopyapi or norenameapi is specified, use of PUT (copy api) is invalidated even if this option is not specified.
.TP
\fB\-o\fR nocopyapi - for other incomplete compatibility object storage.
For a distributed object storage which is compatibility S3 API without PUT (copy api).
If you set this option, s3fs do not use PUT with "x-amz-copy-source" (copy api). Because traffic is increased 2-3 times by this option, we do not recommend this.

View File

@ -47,6 +47,7 @@
#include "s3fs_util.h"
#include "s3fs_auth.h"
#include "addhead.h"
#include "fdcache.h"
#include "psemaphore.h"
using namespace std;
@ -424,7 +425,7 @@ bool S3fsCurl::InitS3fsCurl(const char* MimeFile)
return false;
}
// [NOTE]
// sCurlPoolSize must be over parrallel(or multireq) count.
// sCurlPoolSize must be over parallel(or multireq) count.
//
if(sCurlPoolSize < std::max(GetMaxParallelCount(), GetMaxMultiRequest())){
sCurlPoolSize = std::max(GetMaxParallelCount(), GetMaxMultiRequest());
@ -1312,6 +1313,15 @@ bool S3fsCurl::UploadMultipartPostCallback(S3fsCurl* s3fscurl)
return s3fscurl->UploadMultipartPostComplete();
}
bool S3fsCurl::MixMultipartPostCallback(S3fsCurl* s3fscurl)
{
if(!s3fscurl){
return false;
}
return s3fscurl->MixMultipartPostComplete();
}
S3fsCurl* S3fsCurl::UploadMultipartPostRetryCallback(S3fsCurl* s3fscurl)
{
if(!s3fscurl){
@ -1397,6 +1407,21 @@ S3fsCurl* S3fsCurl::CopyMultipartPostRetryCallback(S3fsCurl* s3fscurl)
return newcurl;
}
S3fsCurl* S3fsCurl::MixMultipartPostRetryCallback(S3fsCurl* s3fscurl)
{
if(!s3fscurl){
return NULL;
}
S3fsCurl* pcurl;
if(-1 == s3fscurl->partdata.fd){
pcurl = S3fsCurl::CopyMultipartPostRetryCallback(s3fscurl);
}else{
pcurl = S3fsCurl::UploadMultipartPostRetryCallback(s3fscurl);
}
return pcurl;
}
int S3fsCurl::ParallelMultipartUploadRequest(const char* tpath, headers_t& meta, int fd)
{
int result;
@ -1488,6 +1513,136 @@ int S3fsCurl::ParallelMultipartUploadRequest(const char* tpath, headers_t& meta,
return 0;
}
int S3fsCurl::ParallelMixMultipartUploadRequest(const char* tpath, headers_t& meta, int fd, const PageList& pagelist)
{
int result;
string upload_id;
struct stat st;
int fd2;
etaglist_t list;
S3fsCurl s3fscurl(true);
S3FS_PRN_INFO3("[tpath=%s][fd=%d]", SAFESTRPTR(tpath), fd);
// get upload mixed page list
fdpage_list_t fdplist;
if(!pagelist.GetMultipartSizeList(fdplist, S3fsCurl::multipart_size)){
return -1;
}
// duplicate fd
if(-1 == (fd2 = dup(fd)) || 0 != lseek(fd2, 0, SEEK_SET)){
S3FS_PRN_ERR("Could not duplicate file descriptor(errno=%d)", errno);
PageList::FreeList(fdplist);
if(-1 != fd2){
close(fd2);
}
return -errno;
}
if(-1 == fstat(fd2, &st)){
S3FS_PRN_ERR("Invalid file descriptor(errno=%d)", errno);
PageList::FreeList(fdplist);
close(fd2);
return -errno;
}
if(0 != (result = s3fscurl.PreMultipartPostRequest(tpath, meta, upload_id, true))){
PageList::FreeList(fdplist);
close(fd2);
return result;
}
s3fscurl.DestroyCurlHandle();
// for copy multipart
string srcresource;
string srcurl;
MakeUrlResource(get_realpath(tpath).c_str(), srcresource, srcurl);
meta["Content-Type"] = S3fsCurl::LookupMimeType(string(tpath));
meta["x-amz-copy-source"] = srcresource;
// Initialize S3fsMultiCurl
S3fsMultiCurl curlmulti(GetMaxParallelCount());
curlmulti.SetSuccessCallback(S3fsCurl::MixMultipartPostCallback);
curlmulti.SetRetryCallback(S3fsCurl::MixMultipartPostRetryCallback);
for(fdpage_list_t::const_iterator iter = fdplist.begin(); iter != fdplist.end(); ++iter){
// s3fscurl sub object
S3fsCurl* s3fscurl_para = new S3fsCurl(true);
if(iter->modified){
// Multipart upload
s3fscurl_para->partdata.fd = fd2;
s3fscurl_para->partdata.startpos = iter->offset;
s3fscurl_para->partdata.size = iter->bytes;
s3fscurl_para->b_partdata_startpos = s3fscurl_para->partdata.startpos;
s3fscurl_para->b_partdata_size = s3fscurl_para->partdata.size;
s3fscurl_para->partdata.add_etag_list(&list);
S3FS_PRN_INFO3("Upload Part [tpath=%s][start=%jd][size=%jd][part=%jd]", SAFESTRPTR(tpath), (intmax_t)(iter->offset), (intmax_t)(iter->bytes), (intmax_t)(list.size()));
// initiate upload part for parallel
if(0 != (result = s3fscurl_para->UploadMultipartPostSetup(tpath, list.size(), upload_id))){
S3FS_PRN_ERR("failed uploading part setup(%d)", result);
PageList::FreeList(fdplist);
close(fd2);
delete s3fscurl_para;
return result;
}
}else{
// Multipart copy
ostringstream strrange;
strrange << "bytes=" << iter->offset << "-" << (iter->offset + iter->bytes - 1);
meta["x-amz-copy-source-range"] = strrange.str();
strrange.str("");
strrange.clear(stringstream::goodbit);
s3fscurl_para->b_from = SAFESTRPTR(tpath);
s3fscurl_para->b_meta = meta;
s3fscurl_para->partdata.add_etag_list(&list);
S3FS_PRN_INFO3("Copy Part [tpath=%s][start=%jd][size=%jd][part=%jd]", SAFESTRPTR(tpath), (intmax_t)(iter->offset), (intmax_t)(iter->bytes), (intmax_t)(list.size()));
// initiate upload part for parallel
if(0 != (result = s3fscurl_para->CopyMultipartPostSetup(tpath, tpath, list.size(), upload_id, meta))){
S3FS_PRN_ERR("failed uploading part setup(%d)", result);
close(fd2);
delete s3fscurl_para;
return result;
}
}
// set into parallel object
if(!curlmulti.SetS3fsCurlObject(s3fscurl_para)){
S3FS_PRN_ERR("Could not make curl object into multi curl(%s).", tpath);
PageList::FreeList(fdplist);
close(fd2);
delete s3fscurl_para;
return -1;
}
}
PageList::FreeList(fdplist);
// Multi request
if(0 != (result = curlmulti.Request())){
S3FS_PRN_ERR("error occurred in multi request(errno=%d).", result);
S3fsCurl s3fscurl_abort(true);
int result2 = s3fscurl_abort.AbortMultipartUpload(tpath, upload_id);
s3fscurl_abort.DestroyCurlHandle();
if(result2 != 0){
S3FS_PRN_ERR("error aborting multipart upload(errno=%d).", result2);
}
close(fd2);
return result;
}
close(fd2);
if(0 != (result = s3fscurl.CompleteMultipartPostRequest(tpath, upload_id, list))){
return result;
}
return 0;
}
S3fsCurl* S3fsCurl::ParallelGetObjectRetryCallback(S3fsCurl* s3fscurl)
{
int result;
@ -1940,7 +2095,7 @@ bool S3fsCurl::CreateCurlHandle(bool only_pool, bool remake)
return false;
}else{
// [NOTE]
// urther initialization processing is left to lazy processing to be executed later.
// Further initialization processing is left to lazy processing to be executed later.
// (Currently we do not use only_pool=true, but this code is remained for the future)
return true;
}
@ -3759,6 +3914,17 @@ bool S3fsCurl::CopyMultipartPostComplete()
return true;
}
bool S3fsCurl::MixMultipartPostComplete()
{
bool result;
if(-1 == partdata.fd){
result = CopyMultipartPostComplete();
}else{
result = UploadMultipartPostComplete();
}
return result;
}
int S3fsCurl::MultipartHeadRequest(const char* tpath, off_t size, headers_t& meta, bool is_copy)
{
int result;

View File

@ -186,6 +186,7 @@ private:
//----------------------------------------------
// class S3fsCurl
//----------------------------------------------
class PageList;
class S3fsCurl;
// Prototype function for lazy setup options for curl handle
@ -362,8 +363,10 @@ class S3fsCurl
static bool UploadMultipartPostCallback(S3fsCurl* s3fscurl);
static bool CopyMultipartPostCallback(S3fsCurl* s3fscurl);
static bool MixMultipartPostCallback(S3fsCurl* s3fscurl);
static S3fsCurl* UploadMultipartPostRetryCallback(S3fsCurl* s3fscurl);
static S3fsCurl* CopyMultipartPostRetryCallback(S3fsCurl* s3fscurl);
static S3fsCurl* MixMultipartPostRetryCallback(S3fsCurl* s3fscurl);
static S3fsCurl* ParallelGetObjectRetryCallback(S3fsCurl* s3fscurl);
// lazy functions for set curl options
@ -399,12 +402,14 @@ class S3fsCurl
int CopyMultipartPostSetup(const char* from, const char* to, int part_num, const std::string& upload_id, headers_t& meta);
bool UploadMultipartPostComplete();
bool CopyMultipartPostComplete();
bool MixMultipartPostComplete();
public:
// class methods
static bool InitS3fsCurl(const char* MimeFile = NULL);
static bool DestroyS3fsCurl(void);
static int ParallelMultipartUploadRequest(const char* tpath, headers_t& meta, int fd);
static int ParallelMixMultipartUploadRequest(const char* tpath, headers_t& meta, int fd, const PageList& pagelist);
static int ParallelGetObjectRequest(const char* tpath, int fd, off_t start, ssize_t size);
static bool CheckIAMCredentialUpdate(void);

View File

@ -382,9 +382,11 @@ bool PageList::IsPageLoaded(off_t start, off_t size) const
return true;
}
bool PageList::SetPageLoadedStatus(off_t start, off_t size, bool is_loaded, bool is_modified, bool is_compress)
bool PageList::SetPageLoadedStatus(off_t start, off_t size, PageList::page_status pstatus, bool is_compress)
{
off_t now_size = Size();
off_t now_size = Size();
bool is_loaded = (PAGE_LOAD_MODIFIED == pstatus || PAGE_LOADED == pstatus);
bool is_modified = (PAGE_LOAD_MODIFIED == pstatus || PAGE_MODIFIED == pstatus);
if(now_size <= start){
if(now_size < start){
@ -507,6 +509,263 @@ int PageList::GetUnloadedPages(fdpage_list_t& unloaded_list, off_t start, off_t
return unloaded_list.size();
}
// [NOTE]
// This method is called in advance when mixing POST and COPY in multi-part upload.
// The minimum size of each part must be 5 MB, and the data area below this must be
// downloaded from S3.
// This method checks the current PageList status and returns the area that needs
// to be downloaded so that each part is at least 5 MB.
//
bool PageList::GetLoadPageListForMultipartUpload(fdpage_list_t& dlpages)
{
// compress before this processing
if(!Compress()){
return false;
}
bool is_prev_modified_page = false;
off_t accumulated_bytes = 0;
off_t last_modified_bytes = 0;
for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){
if(iter->modified){
// this is modified page
if(is_prev_modified_page){
// in case of continuous modified page
accumulated_bytes += iter->bytes;
}else{
// previous page is unmodified page
// check unmodified page bytes is over minimum size(5MB)
if(static_cast<const off_t>(MIN_MULTIPART_SIZE) <= accumulated_bytes){
// over minimum size
accumulated_bytes = iter->bytes; // reset accumulated size
}else{
// less than minimum size(5MB)
// the previous unmodified page needs to load, if it is not loaded.
// and that page will be included in consecutive modified page.
PageList::RawGetUnloadPageList(dlpages, (iter->offset - accumulated_bytes), accumulated_bytes);
accumulated_bytes += last_modified_bytes + iter->bytes; // this page size and last modified page size are accumulated
last_modified_bytes = 0;
}
is_prev_modified_page = true;
}
}else{
// this is unmodified page
if(!is_prev_modified_page){
// in case of continuous unmodified page
accumulated_bytes += iter->bytes;
}else{
// previous page is modified page
// check modified page bytes is over minimum size(5MB)
if(static_cast<const off_t>(MIN_MULTIPART_SIZE) <= accumulated_bytes){
// over minimum size
last_modified_bytes = accumulated_bytes; // backup last modified page size
accumulated_bytes = iter->bytes; // set new accumulated size(this page size)
is_prev_modified_page = false;
}else{
// less than minimum size(5MB)
// this unmodified page needs to load, if it is not loaded.
// and this page will be included in consecutive modified page.
if((static_cast<const off_t>(MIN_MULTIPART_SIZE) - accumulated_bytes) <= iter->bytes){
// Split the missing size from this page size for just before modified page.
if(!iter->loaded){
// because this page is not loaded
fdpage dlpage(iter->offset, (iter->bytes - (static_cast<const off_t>(MIN_MULTIPART_SIZE) - accumulated_bytes))); // don't care for loaded/modified flag
dlpages.push_back(dlpage);
}
last_modified_bytes = static_cast<const off_t>(MIN_MULTIPART_SIZE); // backup last modified page size
accumulated_bytes = iter->bytes - (static_cast<const off_t>(MIN_MULTIPART_SIZE) - accumulated_bytes); // set rest bytes to accumulated size
is_prev_modified_page = false;
}else{
// assign all this page sizes to just before modified page.
// but still it is not enough for the minimum size.
if(!iter->loaded){
// because this page is not loaded
fdpage dlpage(iter->offset, iter->bytes); // don't care for loaded/modified flag
dlpages.push_back(dlpage);
}
accumulated_bytes += iter->bytes; // add all bytes to accumulated size
}
}
}
}
}
// compress dlpages
bool is_first = true;
for(fdpage_list_t::iterator dliter = dlpages.begin(); dliter != dlpages.end(); ){
if(is_first){
is_first = false;
++dliter;
continue;
}
fdpage_list_t::iterator biter = dliter;
--biter;
if((biter->offset + biter->bytes) == dliter->offset){
biter->bytes += dliter->bytes;
dliter = dlpages.erase(dliter);
}else{
++dliter;
}
}
return true;
}
// [NOTE]
// This static method assumes that it is called only from GetLoadPageListForMultipartUpload.
// If you want to exclusive control, please do with GetLoadPageListForMultipartUpload,
// not with this method.
//
bool PageList::RawGetUnloadPageList(fdpage_list_t& dlpages, off_t offset, off_t size)
{
for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){
if((iter->offset + iter->bytes) <= offset){
continue;
}else if((offset + size) <= iter->offset){
break;
}else{
if(!iter->loaded && !iter->modified){
fdpage dlpage(iter->offset, iter->bytes); // don't care for loaded/modified flag
dlpages.push_back(dlpage);
}
}
}
return true;
}
bool PageList::GetMultipartSizeList(fdpage_list_t& mplist, off_t partsize) const
{
if(!mplist.empty()){
return false;
}
// temporary page list
PageList tmpPageObj(*this);
if(!tmpPageObj.Compress(true)){ // compress by modified flag
return false;
}
// [NOTE]
// Set the modified flag in page list to the minimum size.
// This process needs to match the GetLoadPageListForMultipartUpload method exactly.
//
// [FIXME]
// Make the common processing of GetLoadPageListForMultipartUpload and this method
// to one method.
//
bool is_first = true;
bool is_prev_modified_page = false;
off_t accumulated_bytes = 0;
off_t last_modified_bytes = 0;
fdpage_list_t::iterator iter;
for(iter = tmpPageObj.pages.begin(); iter != tmpPageObj.pages.end(); ++iter){
if(is_first){
is_prev_modified_page = iter->modified;
is_first = false;
}
if(iter->modified){
// this is modified page
if(is_prev_modified_page){
// in case of continuous modified page
accumulated_bytes += iter->bytes;
}else{
// previous page is unmodified page
// check unmodified page bytes is over minimum size(5MB)
if(static_cast<const off_t>(MIN_MULTIPART_SIZE) <= accumulated_bytes){
// over minimum size
accumulated_bytes = iter->bytes; // reset accumulated size
}else{
// less than minimum size(5MB)
// the previous unmodified page is set modified flag.
fdpage_list_t::iterator biter = iter;
--biter;
biter->loaded = true;
biter->modified = true;
accumulated_bytes += last_modified_bytes + iter->bytes; // this page size and last modified page size are accumulated
last_modified_bytes = 0;
}
is_prev_modified_page = true;
}
}else{
// this is unmodified page
if(!is_prev_modified_page){
// in case of continuous unmodified page
accumulated_bytes += iter->bytes;
}else{
// previous page is modified page
// check modified page bytes is over minimum size(5MB)
if(static_cast<const off_t>(MIN_MULTIPART_SIZE) <= accumulated_bytes){
// over minimum size
last_modified_bytes = accumulated_bytes; // backup last modified page size
accumulated_bytes = iter->bytes; // set new accumulated size(this page size)
is_prev_modified_page = false;
}else{
// less than minimum size(5MB)
// this unmodified page is set modified flag.
if((static_cast<const off_t>(MIN_MULTIPART_SIZE) - accumulated_bytes) <= iter->bytes){
// Split the missing size from this page size for just before modified page.
fdpage newpage(iter->offset, (static_cast<const off_t>(MIN_MULTIPART_SIZE) - accumulated_bytes), true, true);
iter->bytes -= (static_cast<const off_t>(MIN_MULTIPART_SIZE) - accumulated_bytes);
iter->offset += (static_cast<const off_t>(MIN_MULTIPART_SIZE) - accumulated_bytes);
tmpPageObj.pages.insert(iter, newpage);
last_modified_bytes = static_cast<const off_t>(MIN_MULTIPART_SIZE); // backup last modified page size
accumulated_bytes = iter->bytes; // set rest bytes to accumulated size
is_prev_modified_page = false;
}else{
// assign all this page sizes to just before modified page.
// but still it is not enough for the minimum size.
accumulated_bytes += iter->bytes; // add all bytes to accumulated size
}
}
}
}
}
// recompress
if(!tmpPageObj.Compress(true)){ // compress by modified flag
return false;
}
// normalization for uploading parts
for(iter = tmpPageObj.pages.begin(); iter != tmpPageObj.pages.end(); ++iter){
off_t start = iter->offset;
off_t remains = iter->bytes;
while(0 < remains){
off_t onesize;
if(iter->modified){
// Uploading parts, this page must be 5MB - partsize
onesize = std::min(remains, partsize);
}else{
// Not uploading parts, this page must be 5MB - 5GB
onesize = std::min(remains, static_cast<off_t>(FIVE_GB));
}
fdpage page(start, onesize, iter->loaded, iter->modified);
mplist.push_back(page);
start += onesize;
remains -= onesize;
}
}
return true;
}
bool PageList::IsModified(void) const
{
for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){
@ -616,7 +875,12 @@ bool PageList::Serialize(CacheFileStat& file, bool is_output)
is_modified = (1 == s3fs_strtoofft(part.c_str()) ? true : false);
}
// add new area
SetPageLoadedStatus(offset, size, is_loaded, is_modified);
PageList::page_status pstatus =
( is_loaded && is_modified ? PageList::PAGE_LOAD_MODIFIED :
!is_loaded && is_modified ? PageList::PAGE_MODIFIED :
is_loaded && !is_modified ? PageList::PAGE_LOADED : PageList::PAGE_NOT_LOAD_MODIFIED );
SetPageLoadedStatus(offset, size, pstatus);
}
delete[] ptmp;
if(is_err){
@ -649,6 +913,15 @@ void PageList::Dump()
//------------------------------------------------
// FdEntity class methods
//------------------------------------------------
bool FdEntity::mixmultipart = true;
bool FdEntity::SetNoMixMultipart(void)
{
bool old = mixmultipart;
mixmultipart = false;
return old;
}
int FdEntity::FillFile(int fd, unsigned char byte, off_t size, off_t start)
{
unsigned char bytes[1024 * 32]; // 32kb
@ -1265,7 +1538,7 @@ int FdEntity::Load(off_t start, off_t size, bool lock_already_held)
break;
}
// Set loaded flag
pagelist.SetPageLoadedStatus(iter->offset, iter->bytes, true, false);
pagelist.SetPageLoadedStatus(iter->offset, iter->bytes, PageList::PAGE_LOADED);
}
PageList::FreeList(unloaded_list);
}
@ -1532,8 +1805,10 @@ int FdEntity::RowFlush(const char* tpath, bool force_sync)
// check disk space
if(ReserveDiskSpace(restsize)){
// enough disk space
// Load all uninitialized area
result = Load(/*start=*/ 0, /*size=*/ 0, /*lock_already_held=*/ true);
// Load all uninitialized area(no mix multipart uploading)
if(!FdEntity::mixmultipart){
result = Load(/*start=*/ 0, /*size=*/ 0, /*lock_already_held=*/ true);
}
FdManager::FreeReservedDiskSpace(restsize);
if(0 != result){
S3FS_PRN_ERR("failed to upload all area(errno=%d)", result);
@ -1588,8 +1863,37 @@ int FdEntity::RowFlush(const char* tpath, bool force_sync)
}
if(pagelist.Size() >= S3fsCurl::GetMultipartSize() && !nomultipart){
result = S3fsCurl::ParallelMultipartUploadRequest(tpath ? tpath : tmppath.c_str(), tmporgmeta, fd);
if(FdEntity::mixmultipart){
// multipart uploading can use copy api
// This is to ensure that each part is 5MB or more.
// If the part is less than 5MB, download it.
fdpage_list_t dlpages;
if(!pagelist.GetLoadPageListForMultipartUpload(dlpages)){
S3FS_PRN_ERR("something error occurred during getting download pagelist.");
return -1;
}
for(fdpage_list_t::const_iterator iter = dlpages.begin(); iter != dlpages.end(); ++iter){
if(0 != (result = Load(iter->offset, iter->bytes, true))){
S3FS_PRN_ERR("failed to get parts(start=%lld, size=%lld) before uploading.", static_cast<long long int>(iter->offset), static_cast<long long int>(iter->bytes));
return result;
}
}
// multipart uploading with copy api
result = S3fsCurl::ParallelMixMultipartUploadRequest(tpath ? tpath : tmppath.c_str(), tmporgmeta, fd, pagelist);
}else{
// multipart uploading not using copy api
result = S3fsCurl::ParallelMultipartUploadRequest(tpath ? tpath : tmppath.c_str(), tmporgmeta, fd);
}
}else{
// If there are unloaded pages, they are loaded at here.
if(0 != (result = Load(/*start=*/ 0, /*size=*/ 0, /*lock_already_held=*/ true))){
S3FS_PRN_ERR("failed to load parts before uploading object(%d)", result);
return result;
}
S3fsCurl s3fscurl(true);
result = s3fscurl.PutRequest(tpath ? tpath : tmppath.c_str(), tmporgmeta, fd);
}
@ -1667,7 +1971,7 @@ ssize_t FdEntity::Read(char* bytes, off_t start, size_t size, bool force_load)
AutoLock auto_lock(&fdent_data_lock);
if(force_load){
pagelist.SetPageLoadedStatus(start, size, false, false);
pagelist.SetPageLoadedStatus(start, size, PageList::PAGE_NOT_LOAD_MODIFIED);
}
ssize_t rsize;
@ -1737,7 +2041,7 @@ ssize_t FdEntity::Write(const char* bytes, off_t start, size_t size)
return -EIO;
}
// add new area
pagelist.SetPageLoadedStatus(pagelist.Size(), start - pagelist.Size(), false, true);
pagelist.SetPageLoadedStatus(pagelist.Size(), start - pagelist.Size(), PageList::PAGE_MODIFIED);
}
int result = 0;
@ -1750,9 +2054,12 @@ ssize_t FdEntity::Write(const char* bytes, off_t start, size_t size)
// enough disk space
// Load uninitialized area which starts from 0 to (start + size) before writing.
if(0 < start){
result = Load(0, start, /*lock_already_held=*/ true);
if(!FdEntity::mixmultipart){
if(0 < start){
result = Load(0, start, /*lock_already_held=*/ true);
}
}
FdManager::FreeReservedDiskSpace(restsize);
if(0 != result){
S3FS_PRN_ERR("failed to load uninitialized area before writing(errno=%d)", result);
@ -1782,15 +2089,17 @@ ssize_t FdEntity::Write(const char* bytes, off_t start, size_t size)
return -errno;
}
if(0 < wsize){
pagelist.SetPageLoadedStatus(start, wsize, true, true);
pagelist.SetPageLoadedStatus(start, wsize, PageList::PAGE_LOAD_MODIFIED);
}
// Load uninitialized area which starts from (start + size) to EOF after writing.
if(pagelist.Size() > start + static_cast<off_t>(size)){
result = Load(start + size, pagelist.Size(), /*lock_already_held=*/ true);
if(0 != result){
S3FS_PRN_ERR("failed to load uninitialized area after writing(errno=%d)", result);
return static_cast<ssize_t>(result);
if(!FdEntity::mixmultipart){
if(pagelist.Size() > start + static_cast<off_t>(size)){
result = Load(start + size, pagelist.Size(), /*lock_already_held=*/ true);
if(0 != result){
S3FS_PRN_ERR("failed to load uninitialized area after writing(errno=%d)", result);
return static_cast<ssize_t>(result);
}
}
}

View File

@ -81,6 +81,14 @@ class PageList
private:
fdpage_list_t pages;
public:
enum page_status{
PAGE_NOT_LOAD_MODIFIED = 0,
PAGE_LOADED,
PAGE_MODIFIED,
PAGE_LOAD_MODIFIED
};
private:
void Clear(void);
bool Compress(bool force_modified = false);
@ -99,10 +107,12 @@ class PageList
bool Resize(off_t size, bool is_loaded, bool is_modified);
bool IsPageLoaded(off_t start = 0, off_t size = 0) const; // size=0 is checking to end of list
bool SetPageLoadedStatus(off_t start, off_t size, bool is_loaded = true, bool is_modified = false, bool is_compress = true);
bool SetPageLoadedStatus(off_t start, off_t size, PageList::page_status pstatus = PAGE_LOADED, bool is_compress = true);
bool FindUnloadedPage(off_t start, off_t& resstart, off_t& ressize) const;
off_t GetTotalUnloadedPageSize(off_t start = 0, off_t size = 0) const; // size=0 is checking to end of list
int GetUnloadedPages(fdpage_list_t& unloaded_list, off_t start = 0, off_t size = 0) const; // size=0 is checking to end of list
bool GetLoadPageListForMultipartUpload(fdpage_list_t& dlpages);
bool GetMultipartSizeList(fdpage_list_t& mplist, off_t partsize) const;
bool IsModified(void) const;
bool ClearAllModified(void);
@ -117,6 +127,8 @@ class PageList
class FdEntity
{
private:
static bool mixmultipart; // whether multipart uploading can use copy api.
pthread_mutex_t fdent_lock;
bool is_lock_init;
int refcnt; // reference count
@ -146,6 +158,8 @@ class FdEntity
bool SetAllStatusUnloaded(void) { return SetAllStatus(false); }
public:
static bool SetNoMixMultipart(void);
explicit FdEntity(const char* tpath = NULL, const char* cpath = NULL);
~FdEntity();

View File

@ -4912,6 +4912,10 @@ static int my_fuse_opt_proc(void* data, const char* arg, int key, struct fuse_ar
noxmlns = true;
return 0;
}
if(0 == strcmp(arg, "nomixupload")){
FdEntity::SetNoMixMultipart();
return 0;
}
if(0 == strcmp(arg, "nocopyapi")){
nocopyapi = true;
return 0;
@ -5347,6 +5351,11 @@ int main(int argc, char* argv[])
exit(exitcode);
}
// Check multipart / copy api for mix multipart uploading
if(nomultipart || nocopyapi || norenameapi){
FdEntity::SetNoMixMultipart();
}
// check free disk space
if(!FdManager::IsSafeDiskSpace(NULL, S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount())){
S3FS_PRN_EXIT("There is no enough disk space for used as cache(or temporary) directory by s3fs.");

View File

@ -1333,6 +1333,13 @@ void show_help ()
" This option should not be specified now, because s3fs looks up\n"
" xmlns automatically after v1.66.\n"
"\n"
" nomixupload (disable copy in multipart uploads)\n"
" Disable to use PUT (copy api) when multipart uploading large size objects.\n"
" By default, when doing multipart upload, the range of unchanged data\n"
" will use PUT (copy api) whenever possible.\n"
" When nocopyapi or norenameapi is specified, use of PUT (copy api) is\n"
" invalidated even if this option is not specified.\n"
"\n"
" nocopyapi (for other incomplete compatibility object storage)\n"
" For a distributed object storage which is compatibility S3\n"
" API without PUT (copy api).\n"

View File

@ -356,6 +356,89 @@ function test_multipart_copy {
rm_test_file "${BIG_FILE}-copy"
}
function test_multipart_mix {
describe "Testing multi-part mix ..."
if [ `uname` = "Darwin" ]; then
cat /dev/null > $BIG_FILE
fi
dd if=/dev/urandom of="/tmp/${BIG_FILE}" bs=$BIG_FILE_LENGTH seek=0 count=1
dd if="/tmp/${BIG_FILE}" of="${BIG_FILE}" bs=$BIG_FILE_LENGTH seek=0 count=1
# (1) Edit the middle of an existing file
# modify directly(seek 7.5MB offset)
# In the case of nomultipart and nocopyapi,
# it makes no sense, but copying files is because it leaves no cache.
#
cp /tmp/${BIG_FILE} /tmp/${BIG_FILE}-mix
cp ${BIG_FILE} ${BIG_FILE}-mix
MODIFY_START_BLOCK=$((15*1024*1024/2/4))
echo -n "0123456789ABCDEF" | dd of="${BIG_FILE}-mix" bs=4 count=4 seek=$MODIFY_START_BLOCK conv=notrunc
echo -n "0123456789ABCDEF" | dd of="/tmp/${BIG_FILE}-mix" bs=4 count=4 seek=$MODIFY_START_BLOCK conv=notrunc
# Verify contents of file
echo "Comparing test file (1)"
if ! cmp "/tmp/${BIG_FILE}-mix" "${BIG_FILE}-mix"
then
return 1
fi
# (2) Write to an area larger than the size of the existing file
# modify directly(over file end offset)
#
cp /tmp/${BIG_FILE} /tmp/${BIG_FILE}-mix
cp ${BIG_FILE} ${BIG_FILE}-mix
OVER_FILE_BLOCK_POS=$((26*1024*1024/4))
echo -n "0123456789ABCDEF" | dd of="${BIG_FILE}-mix" bs=4 count=4 seek=$OVER_FILE_BLOCK_POS conv=notrunc
echo -n "0123456789ABCDEF" | dd of="/tmp/${BIG_FILE}-mix" bs=4 count=4 seek=$OVER_FILE_BLOCK_POS conv=notrunc
# Verify contents of file
echo "Comparing test file (2)"
if ! cmp "/tmp/${BIG_FILE}-mix" "${BIG_FILE}-mix"
then
return 1
fi
# (3) Writing from the 0th byte
#
cp /tmp/${BIG_FILE} /tmp/${BIG_FILE}-mix
cp ${BIG_FILE} ${BIG_FILE}-mix
echo -n "0123456789ABCDEF" | dd of="${BIG_FILE}-mix" bs=4 count=4 seek=0 conv=notrunc
echo -n "0123456789ABCDEF" | dd of="/tmp/${BIG_FILE}-mix" bs=4 count=4 seek=0 conv=notrunc
# Verify contents of file
echo "Comparing test file (3)"
if ! cmp "/tmp/${BIG_FILE}-mix" "${BIG_FILE}-mix"
then
return 1
fi
# (4) Write to the area within 5MB from the top
# modify directly(seek 1MB offset)
#
cp /tmp/${BIG_FILE} /tmp/${BIG_FILE}-mix
cp ${BIG_FILE} ${BIG_FILE}-mix
MODIFY_START_BLOCK=$((1*1024*1024))
echo -n "0123456789ABCDEF" | dd of="${BIG_FILE}-mix" bs=4 count=4 seek=$MODIFY_START_BLOCK conv=notrunc
echo -n "0123456789ABCDEF" | dd of="/tmp/${BIG_FILE}-mix" bs=4 count=4 seek=$MODIFY_START_BLOCK conv=notrunc
# Verify contents of file
echo "Comparing test file (4)"
if ! cmp "/tmp/${BIG_FILE}-mix" "${BIG_FILE}-mix"
then
return 1
fi
rm -f "/tmp/${BIG_FILE}"
rm -f "/tmp/${BIG_FILE}-mix"
rm_test_file "${BIG_FILE}"
rm_test_file "${BIG_FILE}-mix"
}
function test_special_characters {
describe "Testing special characters ..."
@ -585,6 +668,7 @@ function add_all_tests {
add_tests test_rename_before_close
add_tests test_multipart_upload
add_tests test_multipart_copy
add_tests test_multipart_mix
add_tests test_special_characters
add_tests test_symlink
add_tests test_extended_attributes