/* * s3fs - FUSE-based file system backed by Amazon S3 * * Copyright(C) 2007 Takeshi Nakatani * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "common.h" #include "fdcache.h" #include "s3fs.h" #include "s3fs_util.h" #include "string_util.h" #include "curl.h" using namespace std; //------------------------------------------------ // Symbols //------------------------------------------------ static const int MAX_MULTIPART_CNT = 10 * 1000; // S3 multipart max count // // For cache directory top path // #if defined(P_tmpdir) #define TMPFILE_DIR_0PATH P_tmpdir #else #define TMPFILE_DIR_0PATH "/tmp" #endif //------------------------------------------------ // Local utility function //------------------------------------------------ // Wrapped s3fs_strtoofft() // // PageList::Serialize method uses s3fs_strtoofft function, // but s3fs_strtoofft throws an exception, so wrap it in // this function. // PageList::Serialize works as 0 when the read data is invalid. // static off_t wrap_strtoofft(const char* str) { off_t result; if(str){ try{ result = s3fs_strtoofft(str); }catch(std::exception &e){ S3FS_PRN_WARN("something error is occurred in convert string to off_t, so return 0 as default."); result = 0; } }else{ S3FS_PRN_WARN("parameter is null, so return 0 as default."); result = 0; } return result; } //------------------------------------------------ // CacheFileStat class methods //------------------------------------------------ bool CacheFileStat::MakeCacheFileStatPath(const char* path, string& sfile_path, bool is_create_dir) { // make stat dir top path( "//..stat" ) string top_path = FdManager::GetCacheDir(); top_path += "/."; top_path += bucket; top_path += ".stat"; if(is_create_dir){ int result; if(0 != (result = mkdirp(top_path + mydirname(path), 0777))){ S3FS_PRN_ERR("failed to create dir(%s) by errno(%d).", path, result); return false; } } if(!path || '\0' == path[0]){ sfile_path = top_path; }else{ sfile_path = top_path + SAFESTRPTR(path); } return true; } bool CacheFileStat::CheckCacheFileStatTopDir() { if(!FdManager::IsCacheDir()){ return true; } // make stat dir top path( "//..stat" ) string top_path = FdManager::GetCacheDir(); top_path += "/."; top_path += bucket; top_path += ".stat"; return check_exist_dir_permission(top_path.c_str()); } bool CacheFileStat::DeleteCacheFileStat(const char* path) { if(!path || '\0' == path[0]){ return false; } // stat path string sfile_path; if(!CacheFileStat::MakeCacheFileStatPath(path, sfile_path, false)){ S3FS_PRN_ERR("failed to create cache stat file path(%s)", path); return false; } if(0 != unlink(sfile_path.c_str())){ if(ENOENT == errno){ S3FS_PRN_DBG("failed to delete file(%s): errno=%d", path, errno); }else{ S3FS_PRN_ERR("failed to delete file(%s): errno=%d", path, errno); } return false; } return true; } // [NOTE] // If remove stat file directory, it should do before removing // file cache directory. // bool CacheFileStat::DeleteCacheFileStatDirectory() { string top_path = FdManager::GetCacheDir(); if(top_path.empty() || bucket.empty()){ return true; } top_path += "/."; top_path += bucket; top_path += ".stat"; return delete_files_in_dir(top_path.c_str(), true); } bool CacheFileStat::RenameCacheFileStat(const char* oldpath, const char* newpath) { if(!oldpath || '\0' == oldpath[0] || !newpath || '\0' == newpath[0]){ return false; } // stat path string old_filestat; string new_filestat; if(!CacheFileStat::MakeCacheFileStatPath(oldpath, old_filestat, false) || !CacheFileStat::MakeCacheFileStatPath(newpath, new_filestat, false)){ return false; } // check new stat path struct stat st; if(0 == stat(new_filestat.c_str(), &st)){ // new stat path is existed, then unlink it. if(-1 == unlink(new_filestat.c_str())){ S3FS_PRN_ERR("failed to unlink new cache file stat path(%s) by errno(%d).", new_filestat.c_str(), errno); return false; } } // check old stat path if(0 != stat(old_filestat.c_str(), &st)){ // old stat path is not existed, then nothing to do any more. return true; } // link and unlink if(-1 == link(old_filestat.c_str(), new_filestat.c_str())){ S3FS_PRN_ERR("failed to link old cache file stat path(%s) to new cache file stat path(%s) by errno(%d).", old_filestat.c_str(), new_filestat.c_str(), errno); return false; } if(-1 == unlink(old_filestat.c_str())){ S3FS_PRN_ERR("failed to unlink old cache file stat path(%s) by errno(%d).", old_filestat.c_str(), errno); return false; } return true; } //------------------------------------------------ // CacheFileStat methods //------------------------------------------------ CacheFileStat::CacheFileStat(const char* tpath) : path(""), fd(-1) { if(tpath && '\0' != tpath[0]){ SetPath(tpath, true); } } CacheFileStat::~CacheFileStat() { Release(); } bool CacheFileStat::SetPath(const char* tpath, bool is_open) { if(!tpath || '\0' == tpath[0]){ return false; } if(!Release()){ // could not close old stat file. return false; } path = tpath; if(!is_open){ return true; } return Open(); } bool CacheFileStat::Open() { if(path.empty()){ return false; } if(-1 != fd){ // already opened return true; } // stat path string sfile_path; if(!CacheFileStat::MakeCacheFileStatPath(path.c_str(), sfile_path, true)){ S3FS_PRN_ERR("failed to create cache stat file path(%s)", path.c_str()); return false; } // open if(-1 == (fd = open(sfile_path.c_str(), O_CREAT|O_RDWR, 0600))){ S3FS_PRN_ERR("failed to open cache stat file path(%s) - errno(%d)", path.c_str(), errno); return false; } // lock if(-1 == flock(fd, LOCK_EX)){ S3FS_PRN_ERR("failed to lock cache stat file(%s) - errno(%d)", path.c_str(), errno); close(fd); fd = -1; return false; } // seek top if(0 != lseek(fd, 0, SEEK_SET)){ S3FS_PRN_ERR("failed to lseek cache stat file(%s) - errno(%d)", path.c_str(), errno); flock(fd, LOCK_UN); close(fd); fd = -1; return false; } S3FS_PRN_DBG("file locked(%s - %s)", path.c_str(), sfile_path.c_str()); return true; } bool CacheFileStat::Release() { if(-1 == fd){ // already release return true; } // unlock if(-1 == flock(fd, LOCK_UN)){ S3FS_PRN_ERR("failed to unlock cache stat file(%s) - errno(%d)", path.c_str(), errno); return false; } S3FS_PRN_DBG("file unlocked(%s)", path.c_str()); if(-1 == close(fd)){ S3FS_PRN_ERR("failed to close cache stat file(%s) - errno(%d)", path.c_str(), errno); return false; } fd = -1; return true; } //------------------------------------------------ // PageList methods //------------------------------------------------ void PageList::FreeList(fdpage_list_t& list) { list.clear(); } PageList::PageList(off_t size, bool is_loaded, bool is_modified) { Init(size, is_loaded, is_modified); } PageList::PageList(const PageList& other) { for(fdpage_list_t::const_iterator iter = other.pages.begin(); iter != other.pages.end(); ++iter){ pages.push_back(*iter); } } PageList::~PageList() { Clear(); } void PageList::Clear() { PageList::FreeList(pages); } bool PageList::Init(off_t size, bool is_loaded, bool is_modified) { Clear(); fdpage page(0, size, is_loaded, is_modified); pages.push_back(page); return true; } off_t PageList::Size() const { if(pages.empty()){ return 0; } fdpage_list_t::const_reverse_iterator riter = pages.rbegin(); return riter->next(); } bool PageList::Compress(bool force_modified) { bool is_first = true; bool is_last_loaded = false; bool is_last_modified = false; for(fdpage_list_t::iterator iter = pages.begin(); iter != pages.end(); ){ if(is_first){ is_first = false; is_last_loaded = force_modified ? true : iter->loaded; is_last_modified = iter->modified; ++iter; }else{ if(is_last_modified == iter->modified){ if(force_modified || is_last_loaded == iter->loaded){ fdpage_list_t::iterator biter = iter; --biter; biter->bytes += iter->bytes; iter = pages.erase(iter); }else{ is_last_loaded = iter->loaded; is_last_modified = iter->modified; ++iter; } }else{ is_last_loaded = force_modified ? true : iter->loaded; is_last_modified = iter->modified; ++iter; } } } return true; } bool PageList::Parse(off_t new_pos) { for(fdpage_list_t::iterator iter = pages.begin(); iter != pages.end(); ++iter){ if(new_pos == iter->offset){ // nothing to do return true; }else if(iter->offset < new_pos && new_pos < iter->next()){ fdpage page(iter->offset, new_pos - iter->offset, iter->loaded, false); iter->bytes -= (new_pos - iter->offset); iter->offset = new_pos; pages.insert(iter, page); return true; } } return false; } bool PageList::Resize(off_t size, bool is_loaded, bool is_modified) { off_t total = Size(); if(0 == total){ Init(size, is_loaded, is_modified); }else if(total < size){ // add new area fdpage page(total, (size - total), is_loaded, is_modified); pages.push_back(page); }else if(size < total){ // cut area for(fdpage_list_t::iterator iter = pages.begin(); iter != pages.end(); ){ if(iter->next() <= size){ ++iter; }else{ if(size <= iter->offset){ iter = pages.erase(iter); }else{ iter->bytes = size - iter->offset; } } } }else{ // total == size // nothing to do } // compress area return Compress(); } bool PageList::IsPageLoaded(off_t start, off_t size) const { for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){ if(iter->end() < start){ continue; } if(!iter->loaded){ return false; } if(0 != size && start + size <= iter->next()){ break; } } return true; } bool PageList::SetPageLoadedStatus(off_t start, off_t size, PageList::page_status pstatus, bool is_compress) { off_t now_size = Size(); bool is_loaded = (PAGE_LOAD_MODIFIED == pstatus || PAGE_LOADED == pstatus); bool is_modified = (PAGE_LOAD_MODIFIED == pstatus || PAGE_MODIFIED == pstatus); if(now_size <= start){ if(now_size < start){ // add Resize(start, false, is_modified); // set modified flag from now end pos to specified start pos. } Resize(start + size, is_loaded, is_modified); }else if(now_size <= start + size){ // cut Resize(start, false, false); // not changed loaded/modified flags in existing area. // add Resize(start + size, is_loaded, is_modified); }else{ // start-size are inner pages area // parse "start", and "start + size" position Parse(start); Parse(start + size); // set loaded flag for(fdpage_list_t::iterator iter = pages.begin(); iter != pages.end(); ++iter){ if(iter->end() < start){ continue; }else if(start + size <= iter->offset){ break; }else{ iter->loaded = is_loaded; iter->modified = is_modified; } } } // compress area return (is_compress ? Compress() : true); } bool PageList::FindUnloadedPage(off_t start, off_t& resstart, off_t& ressize) const { for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){ if(start <= iter->end()){ if(!iter->loaded && !iter->modified){ // Do not load unloaded and modified areas resstart = iter->offset; ressize = iter->bytes; return true; } } } return false; } off_t PageList::GetTotalUnloadedPageSize(off_t start, off_t size) const { off_t restsize = 0; off_t next = start + size; for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){ if(iter->next() <= start){ continue; } if(next <= iter->offset){ break; } if(iter->loaded || iter->modified){ continue; } off_t tmpsize; if(iter->offset <= start){ if(iter->next() <= next){ tmpsize = (iter->next() - start); }else{ tmpsize = next - start; // = size } }else{ if(iter->next() <= next){ tmpsize = iter->next() - iter->offset; // = iter->bytes }else{ tmpsize = next - iter->offset; } } restsize += tmpsize; } return restsize; } int PageList::GetUnloadedPages(fdpage_list_t& unloaded_list, off_t start, off_t size) const { // If size is 0, it means loading to end. if(0 == size){ if(start < Size()){ size = Size() - start; } } off_t next = start + size; for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){ if(iter->next() <= start){ continue; } if(next <= iter->offset){ break; } if(iter->loaded || iter->modified){ continue; // already loaded or modified } // page area off_t page_start = max(iter->offset, start); off_t page_next = min(iter->next(), next); off_t page_size = page_next - page_start; // add list fdpage_list_t::reverse_iterator riter = unloaded_list.rbegin(); if(riter != unloaded_list.rend() && riter->next() == page_start){ // merge to before page riter->bytes += page_size; }else{ fdpage page(page_start, page_size, false, false); unloaded_list.push_back(page); } } return unloaded_list.size(); } // [NOTE] // This method is called in advance when mixing POST and COPY in multi-part upload. // The minimum size of each part must be 5 MB, and the data area below this must be // downloaded from S3. // This method checks the current PageList status and returns the area that needs // to be downloaded so that each part is at least 5 MB. // bool PageList::GetLoadPageListForMultipartUpload(fdpage_list_t& dlpages) { // compress before this processing if(!Compress()){ return false; } bool is_prev_modified_page = false; off_t accumulated_bytes = 0; off_t last_modified_bytes = 0; for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){ if(iter->modified){ // this is modified page if(is_prev_modified_page){ // in case of continuous modified page accumulated_bytes += iter->bytes; }else{ // previous page is unmodified page // check unmodified page bytes is over minimum size(5MB) if(static_cast(MIN_MULTIPART_SIZE) <= accumulated_bytes){ // over minimum size accumulated_bytes = iter->bytes; // reset accumulated size }else{ // less than minimum size(5MB) // the previous unmodified page needs to load, if it is not loaded. // and that page will be included in consecutive modified page. PageList::RawGetUnloadPageList(dlpages, (iter->offset - accumulated_bytes), accumulated_bytes); accumulated_bytes += last_modified_bytes + iter->bytes; // this page size and last modified page size are accumulated last_modified_bytes = 0; } is_prev_modified_page = true; } }else{ // this is unmodified page if(!is_prev_modified_page){ // in case of continuous unmodified page accumulated_bytes += iter->bytes; }else{ // previous page is modified page // check modified page bytes is over minimum size(5MB) if(static_cast(MIN_MULTIPART_SIZE) <= accumulated_bytes){ // over minimum size last_modified_bytes = accumulated_bytes; // backup last modified page size accumulated_bytes = iter->bytes; // set new accumulated size(this page size) is_prev_modified_page = false; }else{ // less than minimum size(5MB) // this unmodified page needs to load, if it is not loaded. // and this page will be included in consecutive modified page. if((static_cast(MIN_MULTIPART_SIZE) - accumulated_bytes) <= iter->bytes){ // Split the missing size from this page size for just before modified page. if(!iter->loaded){ // because this page is not loaded fdpage dlpage(iter->offset, (iter->bytes - (static_cast(MIN_MULTIPART_SIZE) - accumulated_bytes))); // don't care for loaded/modified flag dlpages.push_back(dlpage); } last_modified_bytes = static_cast(MIN_MULTIPART_SIZE); // backup last modified page size accumulated_bytes = iter->bytes - (static_cast(MIN_MULTIPART_SIZE) - accumulated_bytes); // set rest bytes to accumulated size is_prev_modified_page = false; }else{ // assign all this page sizes to just before modified page. // but still it is not enough for the minimum size. if(!iter->loaded){ // because this page is not loaded fdpage dlpage(iter->offset, iter->bytes); // don't care for loaded/modified flag dlpages.push_back(dlpage); } accumulated_bytes += iter->bytes; // add all bytes to accumulated size } } } } } // compress dlpages bool is_first = true; for(fdpage_list_t::iterator dliter = dlpages.begin(); dliter != dlpages.end(); ){ if(is_first){ is_first = false; ++dliter; continue; } fdpage_list_t::iterator biter = dliter; --biter; if((biter->offset + biter->bytes) == dliter->offset){ biter->bytes += dliter->bytes; dliter = dlpages.erase(dliter); }else{ ++dliter; } } return true; } // [NOTE] // This static method assumes that it is called only from GetLoadPageListForMultipartUpload. // If you want to exclusive control, please do with GetLoadPageListForMultipartUpload, // not with this method. // bool PageList::RawGetUnloadPageList(fdpage_list_t& dlpages, off_t offset, off_t size) { for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){ if((iter->offset + iter->bytes) <= offset){ continue; }else if((offset + size) <= iter->offset){ break; }else{ if(!iter->loaded && !iter->modified){ fdpage dlpage(iter->offset, iter->bytes); // don't care for loaded/modified flag dlpages.push_back(dlpage); } } } return true; } bool PageList::GetMultipartSizeList(fdpage_list_t& mplist, off_t partsize) const { if(!mplist.empty()){ return false; } // temporary page list PageList tmpPageObj(*this); if(!tmpPageObj.Compress(true)){ // compress by modified flag return false; } // [NOTE] // Set the modified flag in page list to the minimum size. // This process needs to match the GetLoadPageListForMultipartUpload method exactly. // // [FIXME] // Make the common processing of GetLoadPageListForMultipartUpload and this method // to one method. // bool is_first = true; bool is_prev_modified_page = false; off_t accumulated_bytes = 0; off_t last_modified_bytes = 0; fdpage_list_t::iterator iter; for(iter = tmpPageObj.pages.begin(); iter != tmpPageObj.pages.end(); ++iter){ if(is_first){ is_prev_modified_page = iter->modified; is_first = false; } if(iter->modified){ // this is modified page if(is_prev_modified_page){ // in case of continuous modified page accumulated_bytes += iter->bytes; }else{ // previous page is unmodified page // check unmodified page bytes is over minimum size(5MB) if(static_cast(MIN_MULTIPART_SIZE) <= accumulated_bytes){ // over minimum size accumulated_bytes = iter->bytes; // reset accumulated size }else{ // less than minimum size(5MB) // the previous unmodified page is set modified flag. fdpage_list_t::iterator biter = iter; --biter; biter->loaded = true; biter->modified = true; accumulated_bytes += last_modified_bytes + iter->bytes; // this page size and last modified page size are accumulated last_modified_bytes = 0; } is_prev_modified_page = true; } }else{ // this is unmodified page if(!is_prev_modified_page){ // in case of continuous unmodified page accumulated_bytes += iter->bytes; }else{ // previous page is modified page // check modified page bytes is over minimum size(5MB) if(static_cast(MIN_MULTIPART_SIZE) <= accumulated_bytes){ // over minimum size last_modified_bytes = accumulated_bytes; // backup last modified page size accumulated_bytes = iter->bytes; // set new accumulated size(this page size) is_prev_modified_page = false; }else{ // less than minimum size(5MB) // this unmodified page is set modified flag. if((static_cast(MIN_MULTIPART_SIZE) - accumulated_bytes) <= iter->bytes){ // Split the missing size from this page size for just before modified page. fdpage newpage(iter->offset, (static_cast(MIN_MULTIPART_SIZE) - accumulated_bytes), true, true); iter->bytes -= (static_cast(MIN_MULTIPART_SIZE) - accumulated_bytes); iter->offset += (static_cast(MIN_MULTIPART_SIZE) - accumulated_bytes); tmpPageObj.pages.insert(iter, newpage); last_modified_bytes = static_cast(MIN_MULTIPART_SIZE); // backup last modified page size accumulated_bytes = iter->bytes; // set rest bytes to accumulated size is_prev_modified_page = false; }else{ // assign all this page sizes to just before modified page. // but still it is not enough for the minimum size. accumulated_bytes += iter->bytes; // add all bytes to accumulated size } } } } } // recompress if(!tmpPageObj.Compress(true)){ // compress by modified flag return false; } // normalization for uploading parts for(iter = tmpPageObj.pages.begin(); iter != tmpPageObj.pages.end(); ++iter){ off_t start = iter->offset; off_t remains = iter->bytes; while(0 < remains){ off_t onesize; if(iter->modified){ // Uploading parts, this page must be 5MB - partsize onesize = std::min(remains, partsize); }else{ // Not uploading parts, this page must be 5MB - 5GB onesize = std::min(remains, static_cast(FIVE_GB)); } fdpage page(start, onesize, iter->loaded, iter->modified); mplist.push_back(page); start += onesize; remains -= onesize; } } return true; } bool PageList::IsModified() const { for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){ if(iter->modified){ return true; } } return false; } bool PageList::ClearAllModified() { for(fdpage_list_t::iterator iter = pages.begin(); iter != pages.end(); ++iter){ if(iter->modified){ iter->modified = false; } } return Compress(); } bool PageList::Serialize(CacheFileStat& file, bool is_output, ino_t inode) { if(!file.Open()){ return false; } if(is_output){ // // put to file // ostringstream ssall; ssall << inode << ":" << Size(); for(fdpage_list_t::iterator iter = pages.begin(); iter != pages.end(); ++iter){ ssall << "\n" << iter->offset << ":" << iter->bytes << ":" << (iter->loaded ? "1" : "0") << ":" << (iter->modified ? "1" : "0"); } if(-1 == ftruncate(file.GetFd(), 0)){ S3FS_PRN_ERR("failed to truncate file(to 0) for stats(%d)", errno); return false; } string strall = ssall.str(); if(0 >= pwrite(file.GetFd(), strall.c_str(), strall.length(), 0)){ S3FS_PRN_ERR("failed to write stats(%d)", errno); return false; } }else{ // // loading from file // struct stat st; memset(&st, 0, sizeof(struct stat)); if(-1 == fstat(file.GetFd(), &st)){ S3FS_PRN_ERR("fstat is failed. errno(%d)", errno); return false; } if(0 >= st.st_size){ // nothing Init(0, false, false); return true; } char* ptmp = new char[st.st_size + 1]; ptmp[st.st_size] = '\0'; // read from file if(0 >= pread(file.GetFd(), ptmp, st.st_size, 0)){ S3FS_PRN_ERR("failed to read stats(%d)", errno); delete[] ptmp; return false; } string oneline; istringstream ssall(ptmp); // loaded Clear(); // load head line(for size and inode) off_t total; ino_t cache_inode; // if this value is 0, it means old format. if(!getline(ssall, oneline, '\n')){ S3FS_PRN_ERR("failed to parse stats."); delete[] ptmp; return false; }else{ istringstream sshead(oneline); string strhead1; string strhead2; // get first part in head line. if(!getline(sshead, strhead1, ':')){ S3FS_PRN_ERR("failed to parse stats."); delete[] ptmp; return false; } // get second part in head line. if(!getline(sshead, strhead2, ':')){ // old head format is "\n" total = wrap_strtoofft(strhead1.c_str()); cache_inode = 0; }else{ // current head format is ":\n" total = wrap_strtoofft(strhead1.c_str()); cache_inode = static_cast(wrap_strtoofft(strhead2.c_str())); if(0 == cache_inode){ S3FS_PRN_ERR("wrong inode number in parsed cache stats."); delete[] ptmp; return false; } } } // check inode number if(0 != cache_inode && cache_inode != inode){ S3FS_PRN_ERR("differ inode and inode number in parsed cache stats."); delete[] ptmp; return false; } // load each part bool is_err = false; while(getline(ssall, oneline, '\n')){ string part; istringstream ssparts(oneline); // offset if(!getline(ssparts, part, ':')){ is_err = true; break; } off_t offset = wrap_strtoofft(part.c_str()); // size if(!getline(ssparts, part, ':')){ is_err = true; break; } off_t size = wrap_strtoofft(part.c_str()); // loaded if(!getline(ssparts, part, ':')){ is_err = true; break; } bool is_loaded = (1 == wrap_strtoofft(part.c_str()) ? true : false); bool is_modified; if(!getline(ssparts, part, ':')){ is_modified = false; // old version does not have this part. }else{ is_modified = (1 == wrap_strtoofft(part.c_str()) ? true : false); } // add new area PageList::page_status pstatus = ( is_loaded && is_modified ? PageList::PAGE_LOAD_MODIFIED : !is_loaded && is_modified ? PageList::PAGE_MODIFIED : is_loaded && !is_modified ? PageList::PAGE_LOADED : PageList::PAGE_NOT_LOAD_MODIFIED ); SetPageLoadedStatus(offset, size, pstatus); } delete[] ptmp; if(is_err){ S3FS_PRN_ERR("failed to parse stats."); Clear(); return false; } // check size if(total != Size()){ S3FS_PRN_ERR("different size(%lld - %lld).", static_cast(total), static_cast(Size())); Clear(); return false; } } return true; } void PageList::Dump() { int cnt = 0; S3FS_PRN_DBG("pages = {"); for(fdpage_list_t::iterator iter = pages.begin(); iter != pages.end(); ++iter, ++cnt){ S3FS_PRN_DBG(" [%08d] -> {%014lld - %014lld : %s / %s}", cnt, static_cast(iter->offset), static_cast(iter->bytes), iter->loaded ? "loaded" : "unloaded", iter->modified ? "modified" : "not modified"); } S3FS_PRN_DBG("}"); } //------------------------------------------------ // FdEntity class methods //------------------------------------------------ bool FdEntity::mixmultipart = true; bool FdEntity::SetNoMixMultipart() { bool old = mixmultipart; mixmultipart = false; return old; } int FdEntity::FillFile(int fd, unsigned char byte, off_t size, off_t start) { unsigned char bytes[1024 * 32]; // 32kb memset(bytes, byte, min(static_cast(sizeof(bytes)), size)); for(off_t total = 0, onewrote = 0; total < size; total += onewrote){ if(-1 == (onewrote = pwrite(fd, bytes, min(static_cast(sizeof(bytes)), size - total), start + total))){ S3FS_PRN_ERR("pwrite failed. errno(%d)", errno); return -errno; } } return 0; } // [NOTE] // If fd is wrong or something error is occurred, return 0. // The ino_t is allowed zero, but inode 0 is not realistic. // So this method returns 0 on error assuming the correct // inode is never 0. // The caller must have exclusive control. // ino_t FdEntity::GetInode(int fd) { if(-1 == fd){ S3FS_PRN_ERR("file descriptor is wrong."); return 0; } struct stat st; if(0 != fstat(fd, &st)){ S3FS_PRN_ERR("could not get stat for file descriptor(%d) by errno(%d).", fd, errno); return 0; } return st.st_ino; } //------------------------------------------------ // FdEntity methods //------------------------------------------------ FdEntity::FdEntity(const char* tpath, const char* cpath) : is_lock_init(false), refcnt(0), path(SAFESTRPTR(tpath)), fd(-1), pfile(NULL), inode(0), size_orgmeta(0), upload_id(""), mp_start(0), mp_size(0), cachepath(SAFESTRPTR(cpath)), mirrorpath("") { try{ pthread_mutexattr_t attr; pthread_mutexattr_init(&attr); #if S3FS_PTHREAD_ERRORCHECK pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); #endif pthread_mutex_init(&fdent_lock, &attr); pthread_mutex_init(&fdent_data_lock, &attr); is_lock_init = true; }catch(exception& e){ S3FS_PRN_CRIT("failed to init mutex"); } } FdEntity::~FdEntity() { Clear(); if(is_lock_init){ try{ pthread_mutex_destroy(&fdent_data_lock); pthread_mutex_destroy(&fdent_lock); }catch(exception& e){ S3FS_PRN_CRIT("failed to destroy mutex"); } is_lock_init = false; } } void FdEntity::Clear() { AutoLock auto_lock(&fdent_lock); AutoLock auto_data_lock(&fdent_data_lock); if(-1 != fd){ if(!cachepath.empty()){ // [NOTE] // Compare the inode of the existing cache file with the inode of // the cache file output by this object, and if they are the same, // serialize the pagelist. // ino_t cur_inode = GetInode(); if(0 != cur_inode && cur_inode == inode){ CacheFileStat cfstat(path.c_str()); if(!pagelist.Serialize(cfstat, true, inode)){ S3FS_PRN_WARN("failed to save cache stat file(%s).", path.c_str()); } } } if(pfile){ fclose(pfile); pfile = NULL; } fd = -1; inode = 0; if(!mirrorpath.empty()){ if(-1 == unlink(mirrorpath.c_str())){ S3FS_PRN_WARN("failed to remove mirror cache file(%s) by errno(%d).", mirrorpath.c_str(), errno); } mirrorpath.erase(); } } pagelist.Init(0, false, false); refcnt = 0; path = ""; cachepath = ""; } // [NOTE] // This method returns the inode of the file in cachepath. // The return value is the same as the class method GetInode(). // The caller must have exclusive control. // ino_t FdEntity::GetInode() { if(cachepath.empty()){ S3FS_PRN_INFO("cache file path is empty, then return inode as 0."); return 0; } struct stat st; if(0 != stat(cachepath.c_str(), &st)){ S3FS_PRN_INFO("could not get stat for file(%s) by errno(%d).", cachepath.c_str(), errno); return 0; } return st.st_ino; } void FdEntity::Close() { AutoLock auto_lock(&fdent_lock); S3FS_PRN_DBG("[path=%s][fd=%d][refcnt=%d]", path.c_str(), fd, (-1 != fd ? refcnt - 1 : refcnt)); if(-1 != fd){ if(0 < refcnt){ refcnt--; }else{ S3FS_PRN_EXIT("reference count underflow"); abort(); } if(0 == refcnt){ AutoLock auto_data_lock(&fdent_data_lock); if(!cachepath.empty()){ // [NOTE] // Compare the inode of the existing cache file with the inode of // the cache file output by this object, and if they are the same, // serialize the pagelist. // ino_t cur_inode = GetInode(); if(0 != cur_inode && cur_inode == inode){ CacheFileStat cfstat(path.c_str()); if(!pagelist.Serialize(cfstat, true, inode)){ S3FS_PRN_WARN("failed to save cache stat file(%s).", path.c_str()); } } } if(pfile){ fclose(pfile); pfile = NULL; } fd = -1; inode = 0; if(!mirrorpath.empty()){ if(-1 == unlink(mirrorpath.c_str())){ S3FS_PRN_WARN("failed to remove mirror cache file(%s) by errno(%d).", mirrorpath.c_str(), errno); } mirrorpath.erase(); } } } } int FdEntity::Dup(bool lock_already_held) { AutoLock auto_lock(&fdent_lock, lock_already_held ? AutoLock::ALREADY_LOCKED : AutoLock::NONE); S3FS_PRN_DBG("[path=%s][fd=%d][refcnt=%d]", path.c_str(), fd, (-1 != fd ? refcnt + 1 : refcnt)); if(-1 != fd){ refcnt++; } return fd; } // // Open mirror file which is linked cache file. // int FdEntity::OpenMirrorFile() { if(cachepath.empty()){ S3FS_PRN_ERR("cache path is empty, why come here"); return -EIO; } // make temporary directory string bupdir; if(!FdManager::MakeCachePath(NULL, bupdir, true, true)){ S3FS_PRN_ERR("could not make bup cache directory path or create it."); return -EIO; } // create seed generating mirror file name unsigned int seed = static_cast(time(NULL)); int urandom_fd; if(-1 != (urandom_fd = open("/dev/urandom", O_RDONLY))){ unsigned int rand_data; if(sizeof(rand_data) == read(urandom_fd, &rand_data, sizeof(rand_data))){ seed ^= rand_data; } close(urandom_fd); } // try to link mirror file while(true){ // make random(temp) file path // (do not care for threading, because allowed any value returned.) // char szfile[NAME_MAX + 1]; sprintf(szfile, "%x.tmp", rand_r(&seed)); mirrorpath = bupdir + "/" + szfile; // link mirror file to cache file if(0 == link(cachepath.c_str(), mirrorpath.c_str())){ break; } if(EEXIST != errno){ S3FS_PRN_ERR("could not link mirror file(%s) to cache file(%s) by errno(%d).", mirrorpath.c_str(), cachepath.c_str(), errno); return -errno; } ++seed; } // open mirror file int mirrorfd; if(-1 == (mirrorfd = open(mirrorpath.c_str(), O_RDWR))){ S3FS_PRN_ERR("could not open mirror file(%s) by errno(%d).", mirrorpath.c_str(), errno); return -errno; } return mirrorfd; } int FdEntity::Open(headers_t* pmeta, off_t size, time_t time, bool no_fd_lock_wait) { AutoLock auto_lock(&fdent_lock, no_fd_lock_wait ? AutoLock::NO_WAIT : AutoLock::NONE); S3FS_PRN_DBG("[path=%s][fd=%d][size=%lld][time=%lld]", path.c_str(), fd, static_cast(size), static_cast(time)); if (!auto_lock.isLockAcquired()) { // had to wait for fd lock, return return -EIO; } S3FS_PRN_DBG("[path=%s][fd=%d][size=%lld][time=%lld]", path.c_str(), fd, static_cast(size), static_cast(time)); AutoLock auto_data_lock(&fdent_data_lock); if(-1 != fd){ // already opened, needs to increment refcnt. Dup(/*lock_already_held=*/ true); // check only file size(do not need to save cfs and time. if(0 <= size && pagelist.Size() != size){ // truncate temporary file size if(-1 == ftruncate(fd, size)){ S3FS_PRN_ERR("failed to truncate temporary file(%d) by errno(%d).", fd, errno); if(0 < refcnt){ refcnt--; } return -EIO; } // resize page list if(!pagelist.Resize(size, false, false)){ S3FS_PRN_ERR("failed to truncate temporary file information(%d).", fd); if(0 < refcnt){ refcnt--; } return -EIO; } } // set original headers and set size. off_t new_size = (0 <= size ? size : size_orgmeta); if(pmeta){ orgmeta = *pmeta; new_size = get_size(orgmeta); } if(new_size < size_orgmeta){ size_orgmeta = new_size; } return 0; } bool need_save_csf = false; // need to save(reset) cache stat file bool is_truncate = false; // need to truncate if(!cachepath.empty()){ // using cache struct stat st; if(stat(cachepath.c_str(), &st) == 0){ if(st.st_mtime < time){ S3FS_PRN_DBG("cache file stale, removing: %s", cachepath.c_str()); if(unlink(cachepath.c_str()) != 0){ return (0 == errno ? -EIO : -errno); } } } // open cache and cache stat file, load page info. CacheFileStat cfstat(path.c_str()); // try to open cache file if( -1 != (fd = open(cachepath.c_str(), O_RDWR)) && 0 != (inode = FdEntity::GetInode(fd)) && pagelist.Serialize(cfstat, false, inode) ) { // succeed to open cache file and to load stats data memset(&st, 0, sizeof(struct stat)); if(-1 == fstat(fd, &st)){ S3FS_PRN_ERR("fstat is failed. errno(%d)", errno); fd = -1; inode = 0; return (0 == errno ? -EIO : -errno); } // check size, st_size, loading stat file if(-1 == size){ if(st.st_size != pagelist.Size()){ pagelist.Resize(st.st_size, false, false); need_save_csf = true; // need to update page info } size = st.st_size; }else{ if(size != pagelist.Size()){ pagelist.Resize(size, false, false); need_save_csf = true; // need to update page info } if(size != st.st_size){ is_truncate = true; } } }else{ if(-1 != fd){ close(fd); } inode = 0; // could not open cache file or could not load stats data, so initialize it. if(-1 == (fd = open(cachepath.c_str(), O_CREAT|O_RDWR|O_TRUNC, 0600))){ S3FS_PRN_ERR("failed to open file(%s). errno(%d)", cachepath.c_str(), errno); // remove cache stat file if it is existed if(!CacheFileStat::DeleteCacheFileStat(path.c_str())){ if(ENOENT != errno){ S3FS_PRN_WARN("failed to delete current cache stat file(%s) by errno(%d), but continue...", path.c_str(), errno); } } return (0 == errno ? -EIO : -errno); } need_save_csf = true; // need to update page info inode = FdEntity::GetInode(fd); if(-1 == size){ size = 0; pagelist.Init(0, false, false); }else{ pagelist.Resize(size, false, false); is_truncate = true; } } // open mirror file int mirrorfd; if(0 >= (mirrorfd = OpenMirrorFile())){ S3FS_PRN_ERR("failed to open mirror file linked cache file(%s).", cachepath.c_str()); return (0 == mirrorfd ? -EIO : mirrorfd); } // switch fd close(fd); fd = mirrorfd; // make file pointer(for being same tmpfile) if(NULL == (pfile = fdopen(fd, "wb"))){ S3FS_PRN_ERR("failed to get fileno(%s). errno(%d)", cachepath.c_str(), errno); close(fd); fd = -1; inode = 0; return (0 == errno ? -EIO : -errno); } }else{ // not using cache inode = 0; // open temporary file if(NULL == (pfile = tmpfile()) || -1 ==(fd = fileno(pfile))){ S3FS_PRN_ERR("failed to open tmp file. err(%d)", errno); if(pfile){ fclose(pfile); pfile = NULL; } return (0 == errno ? -EIO : -errno); } if(-1 == size){ size = 0; pagelist.Init(0, false, false); }else{ pagelist.Resize(size, false, false); is_truncate = true; } } // truncate cache(tmp) file if(is_truncate){ if(0 != ftruncate(fd, size) || 0 != fsync(fd)){ S3FS_PRN_ERR("ftruncate(%s) or fsync returned err(%d)", cachepath.c_str(), errno); fclose(pfile); pfile = NULL; fd = -1; inode = 0; return (0 == errno ? -EIO : -errno); } } // reset cache stat file if(need_save_csf){ CacheFileStat cfstat(path.c_str()); if(!pagelist.Serialize(cfstat, true, inode)){ S3FS_PRN_WARN("failed to save cache stat file(%s), but continue...", path.c_str()); } } // init internal data refcnt = 1; // set original headers and size in it. if(pmeta){ orgmeta = *pmeta; size_orgmeta = get_size(orgmeta); }else{ orgmeta.clear(); size_orgmeta = 0; } // set mtime(set "x-amz-meta-mtime" in orgmeta) if(-1 != time){ if(0 != SetMtime(time, /*lock_already_held=*/ true)){ S3FS_PRN_ERR("failed to set mtime. errno(%d)", errno); fclose(pfile); pfile = NULL; fd = -1; inode = 0; return (0 == errno ? -EIO : -errno); } } return 0; } // [NOTE] // This method is called from only nocopyapi functions. // So we do not check disk space for this option mode, if there is no enough // disk space this method will be failed. // bool FdEntity::OpenAndLoadAll(headers_t* pmeta, off_t* size, bool force_load) { AutoLock auto_lock(&fdent_lock); int result; S3FS_PRN_INFO3("[path=%s][fd=%d]", path.c_str(), fd); if(-1 == fd){ if(0 != Open(pmeta)){ return false; } } AutoLock auto_data_lock(&fdent_data_lock); if(force_load){ SetAllStatusUnloaded(); } // // TODO: possibly do background for delay loading // if(0 != (result = Load(/*start=*/ 0, /*size=*/ 0, /*lock_already_held=*/ true))){ S3FS_PRN_ERR("could not download, result(%d)", result); return false; } if(size){ *size = pagelist.Size(); } return true; } // // Rename file path. // // This method sets the FdManager::fent map registration key to fentmapkey. // // [NOTE] // This method changes the file path of FdEntity. // Old file is deleted after linking to the new file path, and this works // without problem because the file descriptor is not affected even if the // cache file is open. // The mirror file descriptor is also the same. The mirror file path does // not need to be changed and will remain as it is. // bool FdEntity::RenamePath(const string& newpath, string& fentmapkey) { if(!cachepath.empty()){ // has cache path // make new cache path string newcachepath; if(!FdManager::MakeCachePath(newpath.c_str(), newcachepath, true)){ S3FS_PRN_ERR("failed to make cache path for object(%s).", newpath.c_str()); return false; } // link and unlink cache file if(-1 == link(cachepath.c_str(), newcachepath.c_str())){ S3FS_PRN_ERR("failed to link old cache path(%s) to new cache path(%s) by errno(%d).", cachepath.c_str(), newcachepath.c_str(), errno); return false; } if(-1 == unlink(cachepath.c_str())){ S3FS_PRN_ERR("failed to unlink old cache path(%s) by errno(%d).", cachepath.c_str(), errno); return false; } // link and unlink cache file stat if(!CacheFileStat::RenameCacheFileStat(path.c_str(), newpath.c_str())){ S3FS_PRN_ERR("failed to rename cache file stat(%s to %s).", path.c_str(), newpath.c_str()); return false; } fentmapkey = newpath; cachepath = newcachepath; }else{ // does not have cache path fentmapkey.erase(); FdManager::MakeRandomTempPath(newpath.c_str(), fentmapkey); } // set new path path = newpath; return true; } bool FdEntity::GetStats(struct stat& st, bool lock_already_held) { AutoLock auto_lock(&fdent_lock, lock_already_held ? AutoLock::ALREADY_LOCKED : AutoLock::NONE); if(-1 == fd){ return false; } memset(&st, 0, sizeof(struct stat)); if(-1 == fstat(fd, &st)){ S3FS_PRN_ERR("fstat failed. errno(%d)", errno); return false; } return true; } int FdEntity::SetCtime(time_t time) { if(-1 == time){ return 0; } AutoLock auto_lock(&fdent_lock); orgmeta["x-amz-meta-ctime"] = str(time); return 0; } int FdEntity::SetMtime(time_t time, bool lock_already_held) { AutoLock auto_lock(&fdent_lock, lock_already_held ? AutoLock::ALREADY_LOCKED : AutoLock::NONE); S3FS_PRN_INFO3("[path=%s][fd=%d][time=%lld]", path.c_str(), fd, static_cast(time)); if(-1 == time){ return 0; } if(-1 != fd){ struct timeval tv[2]; tv[0].tv_sec = time; tv[0].tv_usec= 0L; tv[1].tv_sec = tv[0].tv_sec; tv[1].tv_usec= 0L; if(-1 == futimes(fd, tv)){ S3FS_PRN_ERR("futimes failed. errno(%d)", errno); return -errno; } }else if(!cachepath.empty()){ // not opened file yet. struct utimbuf n_mtime; n_mtime.modtime = time; n_mtime.actime = time; if(-1 == utime(cachepath.c_str(), &n_mtime)){ S3FS_PRN_ERR("utime failed. errno(%d)", errno); return -errno; } } orgmeta["x-amz-meta-ctime"] = str(time); orgmeta["x-amz-meta-mtime"] = str(time); return 0; } bool FdEntity::UpdateCtime() { AutoLock auto_lock(&fdent_lock); struct stat st; if(!GetStats(st, /*lock_already_held=*/ true)){ return false; } orgmeta["x-amz-meta-ctime"] = str(st.st_ctime); return true; } bool FdEntity::UpdateMtime() { AutoLock auto_lock(&fdent_lock); struct stat st; if(!GetStats(st, /*lock_already_held=*/ true)){ return false; } orgmeta["x-amz-meta-ctime"] = str(st.st_ctime); orgmeta["x-amz-meta-mtime"] = str(st.st_mtime); return true; } bool FdEntity::GetSize(off_t& size) { AutoLock auto_lock(&fdent_lock); if(-1 == fd){ return false; } size = pagelist.Size(); return true; } bool FdEntity::SetMode(mode_t mode) { AutoLock auto_lock(&fdent_lock); orgmeta["x-amz-meta-mode"] = str(mode); return true; } bool FdEntity::SetUId(uid_t uid) { AutoLock auto_lock(&fdent_lock); orgmeta["x-amz-meta-uid"] = str(uid); return true; } bool FdEntity::SetGId(gid_t gid) { AutoLock auto_lock(&fdent_lock); orgmeta["x-amz-meta-gid"] = str(gid); return true; } bool FdEntity::SetContentType(const char* path) { if(!path){ return false; } AutoLock auto_lock(&fdent_lock); orgmeta["Content-Type"] = S3fsCurl::LookupMimeType(string(path)); return true; } bool FdEntity::SetAllStatus(bool is_loaded) { S3FS_PRN_INFO3("[path=%s][fd=%d][%s]", path.c_str(), fd, is_loaded ? "loaded" : "unloaded"); if(-1 == fd){ return false; } // [NOTE] // this method is only internal use, and calling after locking. // so do not lock now. // //AutoLock auto_lock(&fdent_lock); // get file size struct stat st; memset(&st, 0, sizeof(struct stat)); if(-1 == fstat(fd, &st)){ S3FS_PRN_ERR("fstat is failed. errno(%d)", errno); return false; } // Reinit pagelist.Init(st.st_size, is_loaded, false); return true; } int FdEntity::Load(off_t start, off_t size, bool lock_already_held) { AutoLock auto_lock(&fdent_lock, lock_already_held ? AutoLock::ALREADY_LOCKED : AutoLock::NONE); S3FS_PRN_DBG("[path=%s][fd=%d][offset=%lld][size=%lld]", path.c_str(), fd, static_cast(start), static_cast(size)); if(-1 == fd){ return -EBADF; } AutoLock auto_data_lock(&fdent_data_lock, lock_already_held ? AutoLock::ALREADY_LOCKED : AutoLock::NONE); int result = 0; // check loaded area & load fdpage_list_t unloaded_list; if(0 < pagelist.GetUnloadedPages(unloaded_list, start, size)){ for(fdpage_list_t::iterator iter = unloaded_list.begin(); iter != unloaded_list.end(); ++iter){ if(0 != size && start + size <= iter->offset){ // reached end break; } // check loading size off_t need_load_size = 0; if(iter->offset < size_orgmeta){ // original file size(on S3) is smaller than request. need_load_size = (iter->next() <= size_orgmeta ? iter->bytes : (size_orgmeta - iter->offset)); } // download if(S3fsCurl::GetMultipartSize() <= need_load_size && !nomultipart){ // parallel request result = S3fsCurl::ParallelGetObjectRequest(path.c_str(), fd, iter->offset, need_load_size); }else{ // single request if(0 < need_load_size){ S3fsCurl s3fscurl; result = s3fscurl.GetObjectRequest(path.c_str(), fd, iter->offset, need_load_size); }else{ result = 0; } } if(0 != result){ break; } // Set loaded flag pagelist.SetPageLoadedStatus(iter->offset, iter->bytes, PageList::PAGE_LOADED); } PageList::FreeList(unloaded_list); } return result; } // [NOTE] // At no disk space for caching object. // This method is downloading by dividing an object of the specified range // and uploading by multipart after finishing downloading it. // // [NOTICE] // Need to lock before calling this method. // int FdEntity::NoCacheLoadAndPost(off_t start, off_t size) { int result = 0; S3FS_PRN_INFO3("[path=%s][fd=%d][offset=%lld][size=%lld]", path.c_str(), fd, static_cast(start), static_cast(size)); if(-1 == fd){ return -EBADF; } // [NOTE] // This method calling means that the cache file is never used no more. // if(!cachepath.empty()){ // remove cache files(and cache stat file) FdManager::DeleteCacheFile(path.c_str()); // cache file path does not use no more. cachepath.erase(); mirrorpath.erase(); } // Change entity key in manager mapping FdManager::get()->ChangeEntityToTempPath(this, path.c_str()); // open temporary file FILE* ptmpfp; int tmpfd; if(NULL == (ptmpfp = tmpfile()) || -1 ==(tmpfd = fileno(ptmpfp))){ S3FS_PRN_ERR("failed to open tmp file. err(%d)", errno); if(ptmpfp){ fclose(ptmpfp); } return (0 == errno ? -EIO : -errno); } // loop uploading by multipart for(fdpage_list_t::iterator iter = pagelist.pages.begin(); iter != pagelist.pages.end(); ++iter){ if(iter->end() < start){ continue; } if(0 != size && start + size <= iter->offset){ break; } // download each multipart size(default 10MB) in unit for(off_t oneread = 0, totalread = (iter->offset < start ? start : 0); totalread < static_cast(iter->bytes); totalread += oneread){ int upload_fd = fd; off_t offset = iter->offset + totalread; oneread = min(static_cast(iter->bytes) - totalread, S3fsCurl::GetMultipartSize()); // check rest size is over minimum part size // // [NOTE] // If the final part size is smaller than 5MB, it is not allowed by S3 API. // For this case, if the previous part of the final part is not over 5GB, // we incorporate the final part to the previous part. If the previous part // is over 5GB, we want to even out the last part and the previous part. // if((iter->bytes - totalread - oneread) < MIN_MULTIPART_SIZE){ if(FIVE_GB < iter->bytes - totalread){ oneread = (iter->bytes - totalread) / 2; }else{ oneread = iter->bytes - totalread; } } if(!iter->loaded){ // // loading or initializing // upload_fd = tmpfd; // load offset & size size_t need_load_size = 0; if(size_orgmeta <= offset){ // all area is over of original size need_load_size = 0; }else{ if(size_orgmeta < (offset + oneread)){ // original file size(on S3) is smaller than request. need_load_size = size_orgmeta - offset; }else{ need_load_size = oneread; } } size_t over_size = oneread - need_load_size; // [NOTE] // truncate file to zero and set length to part offset + size // after this, file length is (offset + size), but file does not use any disk space. // if(-1 == ftruncate(tmpfd, 0) || -1 == ftruncate(tmpfd, (offset + oneread))){ S3FS_PRN_ERR("failed to truncate temporary file(%d).", tmpfd); result = -EIO; break; } // single area get request if(0 < need_load_size){ S3fsCurl s3fscurl; if(0 != (result = s3fscurl.GetObjectRequest(path.c_str(), tmpfd, offset, oneread))){ S3FS_PRN_ERR("failed to get object(start=%lld, size=%lld) for file(%d).", static_cast(offset), static_cast(oneread), tmpfd); break; } } // initialize fd without loading if(0 < over_size){ if(0 != (result = FdEntity::FillFile(tmpfd, 0, over_size, offset + need_load_size))){ S3FS_PRN_ERR("failed to fill rest bytes for fd(%d). errno(%d)", tmpfd, result); break; } } }else{ // already loaded area } // single area upload by multipart post if(0 != (result = NoCacheMultipartPost(upload_fd, offset, oneread))){ S3FS_PRN_ERR("failed to multipart post(start=%lld, size=%lld) for file(%d).", static_cast(offset), static_cast(oneread), upload_fd); break; } } if(0 != result){ break; } // set loaded flag if(!iter->loaded){ if(iter->offset < start){ fdpage page(iter->offset, start - iter->offset, iter->loaded, false); iter->bytes -= (start - iter->offset); iter->offset = start; pagelist.pages.insert(iter, page); } if(0 != size && start + size < iter->next()){ fdpage page(iter->offset, start + size - iter->offset, true, false); iter->bytes -= (start + size - iter->offset); iter->offset = start + size; pagelist.pages.insert(iter, page); }else{ iter->loaded = true; iter->modified = false; } } } if(0 == result){ // compress pagelist pagelist.Compress(); // fd data do empty if(-1 == ftruncate(fd, 0)){ S3FS_PRN_ERR("failed to truncate file(%d), but continue...", fd); } } // close temporary fclose(ptmpfp); return result; } // [NOTE] // At no disk space for caching object. // This method is starting multipart uploading. // int FdEntity::NoCachePreMultipartPost() { // initialize multipart upload values upload_id.erase(); etaglist.clear(); S3fsCurl s3fscurl(true); int result; if(0 != (result = s3fscurl.PreMultipartPostRequest(path.c_str(), orgmeta, upload_id, false))){ return result; } s3fscurl.DestroyCurlHandle(); return 0; } // [NOTE] // At no disk space for caching object. // This method is uploading one part of multipart. // int FdEntity::NoCacheMultipartPost(int tgfd, off_t start, off_t size) { if(-1 == tgfd || upload_id.empty()){ S3FS_PRN_ERR("Need to initialize for multipart post."); return -EIO; } S3fsCurl s3fscurl(true); return s3fscurl.MultipartUploadRequest(upload_id, path.c_str(), tgfd, start, size, etaglist); } // [NOTE] // At no disk space for caching object. // This method is finishing multipart uploading. // int FdEntity::NoCacheCompleteMultipartPost() { if(upload_id.empty() || etaglist.empty()){ S3FS_PRN_ERR("There is no upload id or etag list."); return -EIO; } S3fsCurl s3fscurl(true); int result; if(0 != (result = s3fscurl.CompleteMultipartPostRequest(path.c_str(), upload_id, etaglist))){ return result; } s3fscurl.DestroyCurlHandle(); // reset values upload_id.erase(); etaglist.clear(); mp_start = 0; mp_size = 0; return 0; } int FdEntity::RowFlush(const char* tpath, bool force_sync) { int result = 0; std::string tmppath; headers_t tmporgmeta; { AutoLock auto_lock(&fdent_lock); tmppath = path; tmporgmeta = orgmeta; } S3FS_PRN_INFO3("[tpath=%s][path=%s][fd=%d]", SAFESTRPTR(tpath), tmppath.c_str(), fd); if(-1 == fd){ return -EBADF; } AutoLock auto_lock(&fdent_data_lock); if(!force_sync && !pagelist.IsModified()){ // nothing to update. return 0; } // If there is no loading all of the area, loading all area. off_t restsize = pagelist.GetTotalUnloadedPageSize(); if(0 < restsize){ if(0 == upload_id.length()){ // check disk space if(ReserveDiskSpace(restsize)){ // enough disk space // Load all uninitialized area(no mix multipart uploading) if(!FdEntity::mixmultipart){ result = Load(/*start=*/ 0, /*size=*/ 0, /*lock_already_held=*/ true); } FdManager::FreeReservedDiskSpace(restsize); if(0 != result){ S3FS_PRN_ERR("failed to upload all area(errno=%d)", result); return static_cast(result); } }else{ // no enough disk space // upload all by multipart uploading if(0 != (result = NoCacheLoadAndPost())){ S3FS_PRN_ERR("failed to upload all area by multipart uploading(errno=%d)", result); return static_cast(result); } } }else{ // already start multipart uploading } } if(0 == upload_id.length()){ // normal uploading /* * Make decision to do multi upload (or not) based upon file size * * According to the AWS spec: * - 1 to 10,000 parts are allowed * - minimum size of parts is 5MB (expect for the last part) * * For our application, we will define minimum part size to be 10MB (10 * 2^20 Bytes) * minimum file size will be 64 GB - 2 ** 36 * * Initially uploads will be done serially * * If file is > 20MB, then multipart will kick in */ if(pagelist.Size() > MAX_MULTIPART_CNT * S3fsCurl::GetMultipartSize()){ // close f ? S3FS_PRN_ERR("Part count exceeds %d. Increase multipart size and try again.", MAX_MULTIPART_CNT); return -ENOTSUP; } // seek to head of file. if(0 != lseek(fd, 0, SEEK_SET)){ S3FS_PRN_ERR("lseek error(%d)", errno); return -errno; } // backup upload file size struct stat st; memset(&st, 0, sizeof(struct stat)); if(-1 == fstat(fd, &st)){ S3FS_PRN_ERR("fstat is failed by errno(%d), but continue...", errno); } if(pagelist.Size() >= S3fsCurl::GetMultipartSize() && !nomultipart){ if(FdEntity::mixmultipart){ // multipart uploading can use copy api // This is to ensure that each part is 5MB or more. // If the part is less than 5MB, download it. fdpage_list_t dlpages; if(!pagelist.GetLoadPageListForMultipartUpload(dlpages)){ S3FS_PRN_ERR("something error occurred during getting download pagelist."); return -1; } for(fdpage_list_t::const_iterator iter = dlpages.begin(); iter != dlpages.end(); ++iter){ if(0 != (result = Load(iter->offset, iter->bytes, true))){ S3FS_PRN_ERR("failed to get parts(start=%lld, size=%lld) before uploading.", static_cast(iter->offset), static_cast(iter->bytes)); return result; } } // multipart uploading with copy api result = S3fsCurl::ParallelMixMultipartUploadRequest(tpath ? tpath : tmppath.c_str(), tmporgmeta, fd, pagelist); }else{ // multipart uploading not using copy api result = S3fsCurl::ParallelMultipartUploadRequest(tpath ? tpath : tmppath.c_str(), tmporgmeta, fd); } }else{ // If there are unloaded pages, they are loaded at here. if(0 != (result = Load(/*start=*/ 0, /*size=*/ 0, /*lock_already_held=*/ true))){ S3FS_PRN_ERR("failed to load parts before uploading object(%d)", result); return result; } S3fsCurl s3fscurl(true); result = s3fscurl.PutRequest(tpath ? tpath : tmppath.c_str(), tmporgmeta, fd); } // seek to head of file. if(0 == result && 0 != lseek(fd, 0, SEEK_SET)){ S3FS_PRN_ERR("lseek error(%d)", errno); return -errno; } // reset uploaded file size size_orgmeta = st.st_size; }else{ // upload rest data if(0 < mp_size){ if(0 != (result = NoCacheMultipartPost(fd, mp_start, mp_size))){ S3FS_PRN_ERR("failed to multipart post(start=%lld, size=%lld) for file(%d).", static_cast(mp_start), static_cast(mp_size), fd); return result; } mp_start = 0; mp_size = 0; } // complete multipart uploading. if(0 != (result = NoCacheCompleteMultipartPost())){ S3FS_PRN_ERR("failed to complete(finish) multipart post for file(%d).", fd); return result; } // truncate file to zero if(-1 == ftruncate(fd, 0)){ // So the file has already been removed, skip error. S3FS_PRN_ERR("failed to truncate file(%d) to zero, but continue...", fd); } } if(0 == result){ pagelist.ClearAllModified(); } return result; } // [NOTICE] // Need to lock before calling this method. bool FdEntity::ReserveDiskSpace(off_t size) { if(FdManager::ReserveDiskSpace(size)){ return true; } if(!pagelist.IsModified()){ // try to clear all cache for this fd. pagelist.Init(pagelist.Size(), false, false); if(-1 == ftruncate(fd, 0) || -1 == ftruncate(fd, pagelist.Size())){ S3FS_PRN_ERR("failed to truncate temporary file(%d).", fd); return false; } if(FdManager::ReserveDiskSpace(size)){ return true; } } FdManager::get()->CleanupCacheDir(); return FdManager::ReserveDiskSpace(size); } ssize_t FdEntity::Read(char* bytes, off_t start, size_t size, bool force_load) { S3FS_PRN_DBG("[path=%s][fd=%d][offset=%lld][size=%zu]", path.c_str(), fd, static_cast(start), size); if(-1 == fd){ return -EBADF; } AutoLock auto_lock(&fdent_data_lock); if(force_load){ pagelist.SetPageLoadedStatus(start, size, PageList::PAGE_NOT_LOAD_MODIFIED); } ssize_t rsize; // check disk space if(0 < pagelist.GetTotalUnloadedPageSize(start, size)){ // load size(for prefetch) size_t load_size = size; if(start + static_cast(size) < pagelist.Size()){ ssize_t prefetch_max_size = max(static_cast(size), S3fsCurl::GetMultipartSize() * S3fsCurl::GetMaxParallelCount()); if(start + prefetch_max_size < pagelist.Size()){ load_size = prefetch_max_size; }else{ load_size = pagelist.Size() - start; } } if(!ReserveDiskSpace(load_size)){ S3FS_PRN_WARN("could not reserve disk space for pre-fetch download"); load_size = size; if(!ReserveDiskSpace(load_size)){ S3FS_PRN_ERR("could not reserve disk space for pre-fetch download"); return -ENOSPC; } } // Loading int result = 0; if(0 < size){ result = Load(start, load_size, /*lock_already_held=*/ true); } FdManager::FreeReservedDiskSpace(load_size); if(0 != result){ S3FS_PRN_ERR("could not download. start(%lld), size(%zu), errno(%d)", static_cast(start), size, result); return -EIO; } } // Reading if(-1 == (rsize = pread(fd, bytes, size, start))){ S3FS_PRN_ERR("pread failed. errno(%d)", errno); return -errno; } return rsize; } ssize_t FdEntity::Write(const char* bytes, off_t start, size_t size) { S3FS_PRN_DBG("[path=%s][fd=%d][offset=%lld][size=%zu]", path.c_str(), fd, static_cast(start), size); if(-1 == fd){ return -EBADF; } // check if not enough disk space left BEFORE locking fd if(FdManager::IsCacheDir() && !FdManager::IsSafeDiskSpace(NULL, size)){ FdManager::get()->CleanupCacheDir(); } AutoLock auto_lock(&fdent_data_lock); // check file size if(pagelist.Size() < start){ // grow file size if(-1 == ftruncate(fd, start)){ S3FS_PRN_ERR("failed to truncate temporary file(%d).", fd); return -EIO; } // add new area pagelist.SetPageLoadedStatus(pagelist.Size(), start - pagelist.Size(), PageList::PAGE_MODIFIED); } int result = 0; ssize_t wsize; if(0 == upload_id.length()){ // check disk space off_t restsize = pagelist.GetTotalUnloadedPageSize(0, start) + size; if(ReserveDiskSpace(restsize)){ // enough disk space // Load uninitialized area which starts from 0 to (start + size) before writing. if(!FdEntity::mixmultipart){ if(0 < start){ result = Load(0, start, /*lock_already_held=*/ true); } } FdManager::FreeReservedDiskSpace(restsize); if(0 != result){ S3FS_PRN_ERR("failed to load uninitialized area before writing(errno=%d)", result); return static_cast(result); } }else{ // no enough disk space if(0 != (result = NoCachePreMultipartPost())){ S3FS_PRN_ERR("failed to switch multipart uploading with no cache(errno=%d)", result); return static_cast(result); } // start multipart uploading if(0 != (result = NoCacheLoadAndPost(0, start))){ S3FS_PRN_ERR("failed to load uninitialized area and multipart uploading it(errno=%d)", result); return static_cast(result); } mp_start = start; mp_size = 0; } }else{ // already start multipart uploading } // Writing if(-1 == (wsize = pwrite(fd, bytes, size, start))){ S3FS_PRN_ERR("pwrite failed. errno(%d)", errno); return -errno; } if(0 < wsize){ pagelist.SetPageLoadedStatus(start, wsize, PageList::PAGE_LOAD_MODIFIED); } // Load uninitialized area which starts from (start + size) to EOF after writing. if(!FdEntity::mixmultipart){ if(pagelist.Size() > start + static_cast(size)){ result = Load(start + size, pagelist.Size(), /*lock_already_held=*/ true); if(0 != result){ S3FS_PRN_ERR("failed to load uninitialized area after writing(errno=%d)", result); return static_cast(result); } } } // check multipart uploading if(0 < upload_id.length()){ mp_size += wsize; if(S3fsCurl::GetMultipartSize() <= mp_size){ // over one multipart size if(0 != (result = NoCacheMultipartPost(fd, mp_start, mp_size))){ S3FS_PRN_ERR("failed to multipart post(start=%lld, size=%lld) for file(%d).", static_cast(mp_start), static_cast(mp_size), fd); return result; } // [NOTE] // truncate file to zero and set length to part offset + size // after this, file length is (offset + size), but file does not use any disk space. // if(-1 == ftruncate(fd, 0) || -1 == ftruncate(fd, (mp_start + mp_size))){ S3FS_PRN_ERR("failed to truncate file(%d).", fd); return -EIO; } mp_start += mp_size; mp_size = 0; } } return wsize; } //------------------------------------------------ // FdManager symbol //------------------------------------------------ // [NOTE] // NOCACHE_PATH_PREFIX symbol needs for not using cache mode. // Now s3fs I/F functions in s3fs.cpp has left the processing // to FdManager and FdEntity class. FdManager class manages // the list of local file stat and file descriptor in conjunction // with the FdEntity class. // When s3fs is not using local cache, it means FdManager must // return new temporary file descriptor at each opening it. // Then FdManager caches fd by key which is dummy file path // instead of real file path. // This process may not be complete, but it is easy way can // be realized. // #define NOCACHE_PATH_PREFIX_FORM " __S3FS_UNEXISTED_PATH_%lx__ / " // important space words for simply //------------------------------------------------ // FdManager class variable //------------------------------------------------ FdManager FdManager::singleton; pthread_mutex_t FdManager::fd_manager_lock; pthread_mutex_t FdManager::cache_cleanup_lock; pthread_mutex_t FdManager::reserved_diskspace_lock; bool FdManager::is_lock_init(false); string FdManager::cache_dir; bool FdManager::check_cache_dir_exist(false); off_t FdManager::free_disk_space = 0; //------------------------------------------------ // FdManager class methods //------------------------------------------------ bool FdManager::SetCacheDir(const char* dir) { if(!dir || '\0' == dir[0]){ cache_dir = ""; }else{ cache_dir = dir; } return true; } bool FdManager::DeleteCacheDirectory() { if(FdManager::cache_dir.empty()){ return true; } string cache_path; if(!FdManager::MakeCachePath(NULL, cache_path, false)){ return false; } if(!delete_files_in_dir(cache_path.c_str(), true)){ return false; } string mirror_path = FdManager::cache_dir + "/." + bucket + ".mirror"; if(!delete_files_in_dir(mirror_path.c_str(), true)){ return false; } return true; } int FdManager::DeleteCacheFile(const char* path) { S3FS_PRN_INFO3("[path=%s]", SAFESTRPTR(path)); if(!path){ return -EIO; } if(FdManager::cache_dir.empty()){ return 0; } string cache_path; if(!FdManager::MakeCachePath(path, cache_path, false)){ return 0; } int result = 0; if(0 != unlink(cache_path.c_str())){ if(ENOENT == errno){ S3FS_PRN_DBG("failed to delete file(%s): errno=%d", path, errno); }else{ S3FS_PRN_ERR("failed to delete file(%s): errno=%d", path, errno); } result = -errno; } if(!CacheFileStat::DeleteCacheFileStat(path)){ if(ENOENT == errno){ S3FS_PRN_DBG("failed to delete stat file(%s): errno=%d", path, errno); }else{ S3FS_PRN_ERR("failed to delete stat file(%s): errno=%d", path, errno); } if(0 != errno){ result = -errno; }else{ result = -EIO; } } return result; } bool FdManager::MakeCachePath(const char* path, string& cache_path, bool is_create_dir, bool is_mirror_path) { if(FdManager::cache_dir.empty()){ cache_path = ""; return true; } string resolved_path(FdManager::cache_dir); if(!is_mirror_path){ resolved_path += "/"; resolved_path += bucket; }else{ resolved_path += "/."; resolved_path += bucket; resolved_path += ".mirror"; } if(is_create_dir){ int result; if(0 != (result = mkdirp(resolved_path + mydirname(path), 0777))){ S3FS_PRN_ERR("failed to create dir(%s) by errno(%d).", path, result); return false; } } if(!path || '\0' == path[0]){ cache_path = resolved_path; }else{ cache_path = resolved_path + SAFESTRPTR(path); } return true; } bool FdManager::CheckCacheTopDir() { if(FdManager::cache_dir.empty()){ return true; } string toppath(FdManager::cache_dir + "/" + bucket); return check_exist_dir_permission(toppath.c_str()); } bool FdManager::MakeRandomTempPath(const char* path, string& tmppath) { char szBuff[64]; sprintf(szBuff, NOCACHE_PATH_PREFIX_FORM, random()); // worry for performance, but maybe don't worry. tmppath = szBuff; tmppath += path ? path : ""; return true; } bool FdManager::SetCheckCacheDirExist(bool is_check) { bool old = FdManager::check_cache_dir_exist; FdManager::check_cache_dir_exist = is_check; return old; } bool FdManager::CheckCacheDirExist() { if(!FdManager::check_cache_dir_exist){ return true; } if(FdManager::cache_dir.empty()){ return true; } // check the directory struct stat st; if(0 != stat(cache_dir.c_str(), &st)){ S3FS_PRN_ERR("could not access to cache directory(%s) by errno(%d).", cache_dir.c_str(), errno); return false; } if(!S_ISDIR(st.st_mode)){ S3FS_PRN_ERR("the cache directory(%s) is not directory.", cache_dir.c_str()); return false; } return true; } off_t FdManager::GetEnsureFreeDiskSpace() { AutoLock auto_lock(&FdManager::reserved_diskspace_lock); return FdManager::free_disk_space; } off_t FdManager::SetEnsureFreeDiskSpace(off_t size) { AutoLock auto_lock(&FdManager::reserved_diskspace_lock); off_t old = FdManager::free_disk_space; FdManager::free_disk_space = size; return old; } off_t FdManager::GetFreeDiskSpace(const char* path) { struct statvfs vfsbuf; string ctoppath; if(!FdManager::cache_dir.empty()){ ctoppath = FdManager::cache_dir + "/"; ctoppath = get_exist_directory_path(ctoppath); // existed directory if(ctoppath != "/"){ ctoppath += "/"; } }else{ ctoppath = TMPFILE_DIR_0PATH "/"; } if(path && '\0' != *path){ ctoppath += path; }else{ ctoppath += "."; } if(-1 == statvfs(ctoppath.c_str(), &vfsbuf)){ S3FS_PRN_ERR("could not get vfs stat by errno(%d)", errno); return 0; } return (vfsbuf.f_bavail * vfsbuf.f_frsize); } bool FdManager::IsSafeDiskSpace(const char* path, off_t size) { off_t fsize = FdManager::GetFreeDiskSpace(path); return size + FdManager::GetEnsureFreeDiskSpace() <= fsize; } //------------------------------------------------ // FdManager methods //------------------------------------------------ FdManager::FdManager() { if(this == FdManager::get()){ pthread_mutexattr_t attr; pthread_mutexattr_init(&attr); #if S3FS_PTHREAD_ERRORCHECK pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); #endif try{ pthread_mutex_init(&FdManager::fd_manager_lock, &attr); pthread_mutex_init(&FdManager::cache_cleanup_lock, &attr); pthread_mutex_init(&FdManager::reserved_diskspace_lock, &attr); FdManager::is_lock_init = true; }catch(exception& e){ FdManager::is_lock_init = false; S3FS_PRN_CRIT("failed to init mutex"); } }else{ abort(); } } FdManager::~FdManager() { if(this == FdManager::get()){ for(fdent_map_t::iterator iter = fent.begin(); fent.end() != iter; ++iter){ FdEntity* ent = (*iter).second; delete ent; } fent.clear(); if(FdManager::is_lock_init){ try{ pthread_mutex_destroy(&FdManager::fd_manager_lock); pthread_mutex_destroy(&FdManager::cache_cleanup_lock); pthread_mutex_destroy(&FdManager::reserved_diskspace_lock); }catch(exception& e){ S3FS_PRN_CRIT("failed to init mutex"); } FdManager::is_lock_init = false; } }else{ abort(); } } FdEntity* FdManager::GetFdEntity(const char* path, int existfd) { S3FS_PRN_INFO3("[path=%s][fd=%d]", SAFESTRPTR(path), existfd); if(!path || '\0' == path[0]){ return NULL; } AutoLock auto_lock(&FdManager::fd_manager_lock); fdent_map_t::iterator iter = fent.find(string(path)); if(fent.end() != iter && (-1 == existfd || (*iter).second->GetFd() == existfd)){ iter->second->Dup(); return (*iter).second; } if(-1 != existfd){ for(iter = fent.begin(); iter != fent.end(); ++iter){ if((*iter).second && (*iter).second->GetFd() == existfd){ // found opened fd in map if(0 == strcmp((*iter).second->GetPath(), path)){ iter->second->Dup(); return (*iter).second; } // found fd, but it is used another file(file descriptor is recycled) // so returns NULL. break; } } } return NULL; } FdEntity* FdManager::Open(const char* path, headers_t* pmeta, off_t size, time_t time, bool force_tmpfile, bool is_create, bool no_fd_lock_wait) { S3FS_PRN_DBG("[path=%s][size=%lld][time=%lld]", SAFESTRPTR(path), static_cast(size), static_cast(time)); if(!path || '\0' == path[0]){ return NULL; } bool close = false; FdEntity* ent; AutoLock auto_lock(&FdManager::fd_manager_lock); // search in mapping by key(path) fdent_map_t::iterator iter = fent.find(string(path)); if(fent.end() == iter && !force_tmpfile && !FdManager::IsCacheDir()){ // If the cache directory is not specified, s3fs opens a temporary file // when the file is opened. // Then if it could not find a entity in map for the file, s3fs should // search a entity in all which opened the temporary file. // for(iter = fent.begin(); iter != fent.end(); ++iter){ if((*iter).second && (*iter).second->IsOpen() && 0 == strcmp((*iter).second->GetPath(), path)){ break; // found opened fd in mapping } } } if(fent.end() != iter){ // found ent = (*iter).second; ent->Dup(); if(ent->IsModified()){ // If the file is being modified, it will not be resized. size = -1; } close = true; }else if(is_create){ // not found string cache_path; if(!force_tmpfile && !FdManager::MakeCachePath(path, cache_path, true)){ S3FS_PRN_ERR("failed to make cache path for object(%s).", path); return NULL; } // make new obj ent = new FdEntity(path, cache_path.c_str()); if(!cache_path.empty()){ // using cache fent[string(path)] = ent; }else{ // not using cache, so the key of fdentity is set not really existing path. // (but not strictly unexisting path.) // // [NOTE] // The reason why this process here, please look at the definition of the // comments of NOCACHE_PATH_PREFIX_FORM symbol. // string tmppath; FdManager::MakeRandomTempPath(path, tmppath); fent[tmppath] = ent; } }else{ return NULL; } // open if(0 != ent->Open(pmeta, size, time, no_fd_lock_wait)){ if(close){ ent->Close(); } return NULL; } if(close){ ent->Close(); } return ent; } FdEntity* FdManager::ExistOpen(const char* path, int existfd, bool ignore_existfd) { S3FS_PRN_DBG("[path=%s][fd=%d][ignore_existfd=%s]", SAFESTRPTR(path), existfd, ignore_existfd ? "true" : "false"); // search by real path FdEntity* ent = Open(path, NULL, -1, -1, false, false); if(!ent && (ignore_existfd || (-1 != existfd))){ // search from all fdentity because of not using cache. AutoLock auto_lock(&FdManager::fd_manager_lock); for(fdent_map_t::iterator iter = fent.begin(); iter != fent.end(); ++iter){ if((*iter).second && (*iter).second->IsOpen() && (ignore_existfd || ((*iter).second->GetFd() == existfd))){ // found opened fd in map if(0 == strcmp((*iter).second->GetPath(), path)){ ent = (*iter).second; ent->Dup(); }else{ // found fd, but it is used another file(file descriptor is recycled) // so returns NULL. } break; } } } return ent; } void FdManager::Rename(const std::string &from, const std::string &to) { AutoLock auto_lock(&FdManager::fd_manager_lock); fdent_map_t::iterator iter = fent.find(from); if(fent.end() == iter && !FdManager::IsCacheDir()){ // If the cache directory is not specified, s3fs opens a temporary file // when the file is opened. // Then if it could not find a entity in map for the file, s3fs should // search a entity in all which opened the temporary file. // for(iter = fent.begin(); iter != fent.end(); ++iter){ if((*iter).second && (*iter).second->IsOpen() && 0 == strcmp((*iter).second->GetPath(), from.c_str())){ break; // found opened fd in mapping } } } if(fent.end() != iter){ // found S3FS_PRN_DBG("[from=%s][to=%s]", from.c_str(), to.c_str()); FdEntity* ent = (*iter).second; // retrieve old fd entity from map fent.erase(iter); // rename path and caches in fd entity string fentmapkey; if(!ent->RenamePath(to, fentmapkey)){ S3FS_PRN_ERR("Failed to rename FdEntity obejct for %s to %s", from.c_str(), to.c_str()); return; } // set new fd entity to map fent[fentmapkey] = ent; } } bool FdManager::Close(FdEntity* ent) { S3FS_PRN_DBG("[ent->file=%s][ent->fd=%d]", ent ? ent->GetPath() : "", ent ? ent->GetFd() : -1); if(!ent){ return true; // returns success } AutoLock auto_lock(&FdManager::fd_manager_lock); for(fdent_map_t::iterator iter = fent.begin(); iter != fent.end(); ++iter){ if((*iter).second == ent){ ent->Close(); if(!ent->IsOpen()){ // remove found entity from map. fent.erase(iter++); // check another key name for entity value to be on the safe side for(; iter != fent.end(); ){ if((*iter).second == ent){ fent.erase(iter++); }else{ ++iter; } } delete ent; } return true; } } return false; } bool FdManager::ChangeEntityToTempPath(FdEntity* ent, const char* path) { AutoLock auto_lock(&FdManager::fd_manager_lock); for(fdent_map_t::iterator iter = fent.begin(); iter != fent.end(); ){ if((*iter).second == ent){ fent.erase(iter++); string tmppath; FdManager::MakeRandomTempPath(path, tmppath); fent[tmppath] = ent; }else{ ++iter; } } return false; } void FdManager::CleanupCacheDir() { //S3FS_PRN_DBG("cache cleanup requested"); if(!FdManager::IsCacheDir()){ return; } AutoLock auto_lock_no_wait(&FdManager::cache_cleanup_lock, AutoLock::NO_WAIT); if(auto_lock_no_wait.isLockAcquired()){ //S3FS_PRN_DBG("cache cleanup started"); CleanupCacheDirInternal(""); //S3FS_PRN_DBG("cache cleanup ended"); }else{ // wait for other thread to finish cache cleanup AutoLock auto_lock(&FdManager::cache_cleanup_lock); } } void FdManager::CleanupCacheDirInternal(const std::string &path) { DIR* dp; struct dirent* dent; std::string abs_path = cache_dir + "/" + bucket + path; if(NULL == (dp = opendir(abs_path.c_str()))){ S3FS_PRN_ERR("could not open cache dir(%s) - errno(%d)", abs_path.c_str(), errno); return; } for(dent = readdir(dp); dent; dent = readdir(dp)){ if(0 == strcmp(dent->d_name, "..") || 0 == strcmp(dent->d_name, ".")){ continue; } string fullpath = abs_path; fullpath += "/"; fullpath += dent->d_name; struct stat st; if(0 != lstat(fullpath.c_str(), &st)){ S3FS_PRN_ERR("could not get stats of file(%s) - errno(%d)", fullpath.c_str(), errno); closedir(dp); return; } string next_path = path + "/" + dent->d_name; if(S_ISDIR(st.st_mode)){ CleanupCacheDirInternal(next_path); }else{ AutoLock auto_lock(&FdManager::fd_manager_lock, AutoLock::NO_WAIT); if (!auto_lock.isLockAcquired()) { S3FS_PRN_ERR("could not get fd_manager_lock when clean up file(%s)", next_path.c_str()); continue; } fdent_map_t::iterator iter = fent.find(next_path); if(fent.end() == iter) { S3FS_PRN_DBG("cleaned up: %s", next_path.c_str()); FdManager::DeleteCacheFile(next_path.c_str()); } } } closedir(dp); } bool FdManager::ReserveDiskSpace(off_t size) { if(IsSafeDiskSpace(NULL, size)){ AutoLock auto_lock(&FdManager::reserved_diskspace_lock); free_disk_space += size; return true; } return false; } void FdManager::FreeReservedDiskSpace(off_t size) { AutoLock auto_lock(&FdManager::reserved_diskspace_lock); free_disk_space -= size; } /* * Local variables: * tab-width: 4 * c-basic-offset: 4 * End: * vim600: noet sw=4 ts=4 fdm=marker * vim<600: noet sw=4 ts=4 */