Added enable_content_md5 option

1) Adds enable_content_md5 option
   When s3fs uploads large object(over 20MB), s3fs checks always ETag(MD5) in each multipart's response.
   But for small objects, s3fs does not check MD5.
   This new option enables for checking MD5 of uploading object.
   If "enable_content_md5" option is specified, s3fs puts the object with "Content-MD5" header.

   The checking MD5 value is not default, because it increases some of user's cpu usage.
   (The default value may be replaced in the future.)



git-svn-id: http://s3fs.googlecode.com/svn/trunk@423 df820570-a93a-0410-bd06-b72b767a4274
This commit is contained in:
ggtakec@gmail.com 2013-05-16 02:02:55 +00:00
parent 715b837a2b
commit 9da497af45
5 changed files with 128 additions and 49 deletions

View File

@ -91,6 +91,12 @@ sets the url to use to access Amazon S3. If you want to use HTTPS, then you can
.TP
\fB\-o\fR nomultipart - disable multipart uploads
.TP
\fB\-o\fR enable_content_md5 ( default is disable )
verifying uploaded data without multipart by content-md5 header.
Enable to send "Content-MD5" header when uploading a object without multipart posting.
If this option is enabled, it has some influences on a performance of s3fs when uploading small object.
Because s3fs always checks MD5 when uploading large object, this option does not affect on large object.
.TP
\fB\-o\fR noxmlns - disable registing xml name space.
disable registing xml name space for response of ListBucketResult and ListVersionsResult etc. Default name space is looked up from "http://s3.amazonaws.com/doc/2006-03-01".
This option should not be specified now, because s3fs looks up xmlns automatically after v1.66.

View File

@ -233,7 +233,7 @@ int curl_delete(const char *path)
headers.append("Content-Type: ");
if(public_bucket.substr(0,1) != "1"){
headers.append("Authorization: AWS " + AWSAccessKeyId + ":" +
calc_signature("DELETE", "", date, headers.get(), resource));
calc_signature("DELETE", "", "", date, headers.get(), resource));
}
my_url = prepare_url(url.c_str());
curl = create_curl_handle();
@ -269,7 +269,7 @@ int curl_get_headers(const char *path, headers_t &meta)
headers.append("Content-Type: ");
if(public_bucket.substr(0,1) != "1") {
headers.append("Authorization: AWS " + AWSAccessKeyId + ":" +
calc_signature("HEAD", "", date, headers.get(), resource));
calc_signature("HEAD", "", "", date, headers.get(), resource));
}
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers.get());
string my_url = prepare_url(url.c_str());
@ -334,7 +334,7 @@ CURL *create_head_handle(head_data *request_data)
if(public_bucket.substr(0,1) != "1") {
request_data->requestHeaders = curl_slist_append(
request_data->requestHeaders, string("Authorization: AWS " + AWSAccessKeyId + ":" +
calc_signature("HEAD", "", date, request_data->requestHeaders, resource)).c_str());
calc_signature("HEAD", "", "", date, request_data->requestHeaders, resource)).c_str());
}
curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, request_data->requestHeaders);
@ -616,7 +616,7 @@ int my_curl_progress(void *clientp, double dltotal, double dlnow, double ultotal
* @param date e.g., get_date()
* @param resource e.g., "/pub"
*/
string calc_signature(string method, string content_type, string date, curl_slist* headers, string resource)
string calc_signature(string method, string strMD5, string content_type, string date, curl_slist* headers, string resource)
{
int ret;
int bytes_written;
@ -626,7 +626,7 @@ string calc_signature(string method, string content_type, string date, curl_slis
string Signature;
string StringToSign;
StringToSign += method + "\n";
StringToSign += "\n"; // md5
StringToSign += strMD5 + "\n"; // md5
StringToSign += content_type + "\n";
StringToSign += date + "\n";
int count = 0;
@ -766,32 +766,82 @@ void locate_bundle(void)
return;
}
string md5sum(int fd)
string GetContentMD5(int fd)
{
BIO* b64;
BIO* bmem;
BUF_MEM* bptr;
string Signature;
unsigned char* md5hex;
if(NULL == (md5hex = md5hexsum(fd))){
return string("");
}
b64 = BIO_new(BIO_f_base64());
bmem = BIO_new(BIO_s_mem());
b64 = BIO_push(b64, bmem);
BIO_write(b64, md5hex, MD5_DIGEST_LENGTH);
free(md5hex);
if(1 != BIO_flush(b64)){
BIO_free_all(b64);
return string("");
}
BIO_get_mem_ptr(b64, &bptr);
Signature.resize(bptr->length - 1);
memcpy(&Signature[0], bptr->data, bptr->length - 1);
BIO_free_all(b64);
return Signature;
}
unsigned char* md5hexsum(int fd)
{
MD5_CTX c;
char buf[512];
char hexbuf[3];
char buf[512];
ssize_t bytes;
char md5[2 * MD5_DIGEST_LENGTH + 1];
unsigned char *result = (unsigned char *) malloc(MD5_DIGEST_LENGTH);
unsigned char* result = (unsigned char*)malloc(MD5_DIGEST_LENGTH);
// seek to top of file.
if(-1 == lseek(fd, 0, SEEK_SET)){
return NULL;
}
memset(buf, 0, 512);
MD5_Init(&c);
while((bytes = read(fd, buf, 512)) > 0) {
MD5_Update(&c, buf, bytes);
memset(buf, 0, 512);
}
MD5_Final(result, &c);
if(-1 == lseek(fd, 0, SEEK_SET)){
free(result);
return NULL;
}
return result;
}
string md5sum(int fd)
{
char md5[2 * MD5_DIGEST_LENGTH + 1];
char hexbuf[3];
unsigned char* md5hex;
if(NULL == (md5hex = md5hexsum(fd))){
return string("");
}
memset(md5, 0, 2 * MD5_DIGEST_LENGTH + 1);
for(int i = 0; i < MD5_DIGEST_LENGTH; i++) {
snprintf(hexbuf, 3, "%02x", result[i]);
snprintf(hexbuf, 3, "%02x", md5hex[i]);
strncat(md5, hexbuf, 2);
}
free(result);
lseek(fd, 0, 0);
free(md5hex);
return string(md5);
}

View File

@ -120,9 +120,11 @@ size_t WriteMemoryCallback(void *ptr, size_t blockSize, size_t numBlocks, void *
size_t read_callback(void *ptr, size_t size, size_t nmemb, void *userp);
int my_curl_progress(
void *clientp, double dltotal, double dlnow, double ultotal, double ulnow);
std::string calc_signature(
std::string method, std::string content_type, std::string date, curl_slist* headers, std::string resource);
std::string calc_signature(std::string method, std::string strMD5, std::string content_type,
std::string date, curl_slist* headers, std::string resource);
void locate_bundle(void);
std::string GetContentMD5(int fd);
unsigned char* md5hexsum(int fd);
std::string md5sum(int fd);
bool InitMimeType(const char* file);
std::string lookupMimeType(std::string);

View File

@ -105,6 +105,7 @@ static bool noxmlns = false;
static bool nocopyapi = false;
static bool norenameapi = false;
static bool nonempty = false;
static bool content_md5 = false;
// if .size()==0 then local file cache is disabled
static std::string use_cache;
@ -653,7 +654,7 @@ static int get_local_fd(const char* path) {
headers.append("Content-Type: ");
if(public_bucket.substr(0,1) != "1") {
headers.append("Authorization: AWS " + AWSAccessKeyId + ":" +
calc_signature("GET", "", date, headers.get(), resource));
calc_signature("GET", "", "", date, headers.get(), resource));
}
curl = create_curl_handle();
@ -746,12 +747,13 @@ static int put_headers(const char *path, headers_t meta) {
}
}
if(use_rrs.substr(0,1) == "1")
if(use_rrs.substr(0,1) == "1"){
headers.append("x-amz-storage-class:REDUCED_REDUNDANCY");
if(public_bucket.substr(0,1) != "1")
}
if(public_bucket.substr(0,1) != "1"){
headers.append("Authorization: AWS " + AWSAccessKeyId + ":" +
calc_signature("PUT", ContentType, date, headers.get(), resource));
calc_signature("PUT", "", ContentType, date, headers.get(), resource));
}
curl = create_curl_handle();
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&body);
@ -889,7 +891,11 @@ static int put_local_fd_small_file(const char* path, headers_t meta, int fd) {
int result;
BodyData body;
auto_curl_slist headers;
string date = get_date();
string date = get_date();
string strMD5;
if(content_md5){
strMD5 = GetContentMD5(fd);
}
curl = create_curl_handle();
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&body);
@ -908,6 +914,10 @@ static int put_local_fd_small_file(const char* path, headers_t meta, int fd) {
headers.append("Date: " + date);
meta["x-amz-acl"] = default_acl;
if(content_md5){
headers.append("Content-MD5: " + strMD5);
}
for (headers_t::iterator iter = meta.begin(); iter != meta.end(); ++iter) {
string key = (*iter).first;
string value = (*iter).second;
@ -919,12 +929,13 @@ static int put_local_fd_small_file(const char* path, headers_t meta, int fd) {
headers.append(key + ":" + value);
}
if(use_rrs.substr(0,1) == "1")
if(use_rrs.substr(0,1) == "1"){
headers.append("x-amz-storage-class:REDUCED_REDUNDANCY");
if(public_bucket.substr(0,1) != "1")
}
if(public_bucket.substr(0,1) != "1"){
headers.append("Authorization: AWS " + AWSAccessKeyId + ":" +
calc_signature("PUT", ContentType, date, headers.get(), resource));
calc_signature("PUT", strMD5, ContentType, date, headers.get(), resource));
}
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers.get());
@ -935,8 +946,9 @@ static int put_local_fd_small_file(const char* path, headers_t meta, int fd) {
result = my_curl_easy_perform(curl, &body, NULL, f);
destroy_curl_handle(curl);
if(result != 0)
if(result != 0){
return result;
}
return 0;
}
@ -1207,10 +1219,10 @@ static string initiate_multipart_upload(const char *path, off_t size, headers_t
slist = curl_slist_append(slist, "x-amz-storage-class:REDUCED_REDUNDANCY");
if(public_bucket.substr(0,1) != "1") {
auth.assign("Authorization: AWS ");
auth.append(AWSAccessKeyId);
auth.append(":");
auth.append(calc_signature("POST", ctype_data, raw_date, slist, resource));
auth.assign("Authorization: AWS ");
auth.append(AWSAccessKeyId);
auth.append(":");
auth.append(calc_signature("POST", "", ctype_data, raw_date, slist, resource));
slist = curl_slist_append(slist, auth.c_str());
}
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, slist);
@ -1330,7 +1342,7 @@ static int complete_multipart_upload(const char *path, string upload_id,
auth.assign("Authorization: AWS ");
auth.append(AWSAccessKeyId);
auth.append(":");
auth.append(calc_signature("POST", "", raw_date, slist, resource));
auth.append(calc_signature("POST", "", "", raw_date, slist, resource));
slist = curl_slist_append(slist, auth.c_str());
}
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, slist);
@ -1421,7 +1433,7 @@ static string upload_part(const char *path, const char *source, int part_number,
auth.assign("Authorization: AWS ");
auth.append(AWSAccessKeyId);
auth.append(":");
auth.append(calc_signature("PUT", "", raw_date, slist, resource));
auth.append(calc_signature("PUT", "", "", raw_date, slist, resource));
slist = curl_slist_append(slist, auth.c_str());
}
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, slist);
@ -1498,12 +1510,13 @@ static string copy_part(const char *from, const char *to, int part_number, strin
headers.append(key + ":" + value);
}
if(use_rrs.substr(0,1) == "1")
if(use_rrs.substr(0,1) == "1"){
headers.append("x-amz-storage-class:REDUCED_REDUNDANCY");
if(public_bucket.substr(0,1) != "1")
}
if(public_bucket.substr(0,1) != "1"){
headers.append("Authorization: AWS " + AWSAccessKeyId + ":" +
calc_signature("PUT", ContentType, date, headers.get(), resource));
calc_signature("PUT", "", ContentType, date, headers.get(), resource));
}
curl = create_curl_handle();
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&body);
@ -1560,11 +1573,11 @@ static int list_multipart_uploads(void) {
slist = curl_slist_append(slist, date.c_str());
slist = curl_slist_append(slist, "Accept:");
if (public_bucket.substr(0,1) != "1") {
auth.assign("Authorization: AWS ");
auth.append(AWSAccessKeyId);
auth.append(":");
auth.append(calc_signature("GET", "", raw_date, slist, resource));
if(public_bucket.substr(0,1) != "1"){
auth.assign("Authorization: AWS ");
auth.append(AWSAccessKeyId);
auth.append(":");
auth.append(calc_signature("GET", "", "", raw_date, slist, resource));
slist = curl_slist_append(slist, auth.c_str());
}
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, slist);
@ -1662,9 +1675,10 @@ static int create_file_object(const char *path, mode_t mode, uid_t uid, gid_t gi
headers.append("x-amz-meta-mode:" + str(mode));
headers.append("x-amz-meta-mtime:" + str(time(NULL)));
headers.append("x-amz-meta-uid:" + str(uid));
if(public_bucket.substr(0,1) != "1")
if(public_bucket.substr(0,1) != "1"){
headers.append("Authorization: AWS " + AWSAccessKeyId + ":" +
calc_signature("PUT", contentType, date, headers.get(), resource));
calc_signature("PUT", "", contentType, date, headers.get(), resource));
}
curl = create_curl_handle();
curl_easy_setopt(curl, CURLOPT_UPLOAD, true); // HTTP PUT
@ -1768,7 +1782,7 @@ static int create_directory_object(const char *path, mode_t mode, time_t time, u
}
if (public_bucket.substr(0,1) != "1") {
headers.append("Authorization: AWS " + AWSAccessKeyId + ":" +
calc_signature("PUT", "application/x-directory", date, headers.get(), resource));
calc_signature("PUT", "", "application/x-directory", date, headers.get(), resource));
}
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers.get());
@ -3078,7 +3092,7 @@ static int list_bucket(const char *path, S3ObjList& head, const char* delimiter)
headers.append("ContentType: ");
if(public_bucket.substr(0,1) != "1") {
headers.append("Authorization: AWS " + AWSAccessKeyId + ":" +
calc_signature("GET", "", date, headers.get(), resource + "/"));
calc_signature("GET", "", "", date, headers.get(), resource + "/"));
}
curl = create_curl_handle();
@ -3623,7 +3637,7 @@ static int s3fs_check_service(void) {
headers.append("Date: " + date);
if (public_bucket.substr(0,1) != "1") {
headers.append("Authorization: AWS " + AWSAccessKeyId + ":" +
calc_signature("GET", "", date, headers.get(), resource));
calc_signature("GET", "", "", date, headers.get(), resource));
} else {
return EXIT_SUCCESS;
}
@ -4185,6 +4199,10 @@ static int my_fuse_opt_proc(void *data, const char *arg, int key, struct fuse_ar
norenameapi = true;
return 0;
}
if(strstr(arg, "enable_content_md5") != 0) {
content_md5 = true;
return 0;
}
if (strstr(arg, "url=") != 0) {
host = strchr(arg, '=') + 1;
// strip the trailing '/', if any, off the end of the host

View File

@ -708,6 +708,9 @@ void show_help (void)
"\n"
" nomultipart - disable multipart uploads\n"
"\n"
" enable_content_md5 (default is disable)\n"
" - verifying uploaded object without multipart by content-md5 header.\n"
"\n"
" noxmlns - disable registing xml name space.\n"
" disable registing xml name space for response of \n"
" ListBucketResult and ListVersionsResult etc. Default name \n"