Merge pull request #1067 from m-holger/pl_buff

Reduce copying of Buffer contents
This commit is contained in:
Jay Berkenbilt 2023-12-16 11:31:11 -05:00 committed by GitHub
commit b0b6d9f21f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 140 additions and 66 deletions

View File

@ -24,6 +24,7 @@
#include <cstddef>
#include <memory>
#include <string>
class Buffer
{
@ -35,11 +36,15 @@ class Buffer
// object is destroyed.
QPDF_DLL
Buffer(size_t size);
QPDF_DLL
Buffer(std::string&& content);
// Create a Buffer object whose memory is owned by the caller and will not be freed when the
// Buffer is destroyed.
QPDF_DLL
Buffer(unsigned char* buf, size_t size);
QPDF_DLL
Buffer(std::string& content);
[[deprecated("Move Buffer or use Buffer::copy instead")]] QPDF_DLL Buffer(Buffer const&);
[[deprecated("Move Buffer or use Buffer::copy instead")]] QPDF_DLL Buffer&
@ -75,8 +80,10 @@ class Buffer
private:
Members(size_t size, unsigned char* buf, bool own_memory);
Members(std::string&& content);
Members(Members const&) = delete;
std::string str;
bool own_memory;
size_t size;
unsigned char* buf;

View File

@ -33,7 +33,7 @@
#include <qpdf/PointerHolder.hh> // unused -- remove in qpdf 12 (see #785)
#include <memory>
#include <vector>
#include <string>
class QPDF_DLL_CLASS Pl_Buffer: public Pipeline
{
@ -64,6 +64,10 @@ class QPDF_DLL_CLASS Pl_Buffer: public Pipeline
QPDF_DLL
void getMallocBuffer(unsigned char** buf, size_t* len);
// Same as getBuffer but returns the result as a string.
QPDF_DLL
std::string getString();
private:
class QPDF_DLL_PRIVATE Members
{
@ -78,7 +82,7 @@ class QPDF_DLL_CLASS Pl_Buffer: public Pipeline
Members(Members const&) = delete;
bool ready{true};
std::vector<unsigned char> data;
std::string data;
};
std::shared_ptr<Members> m;

View File

@ -1133,7 +1133,7 @@ class QPDF
Pipeline*& pipeline,
QPDFObjGen const& og,
QPDFObjectHandle& stream_dict,
std::vector<std::shared_ptr<Pipeline>>& heap);
std::unique_ptr<Pipeline>& heap);
// Methods to support object copying
void reserveObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top);

View File

@ -27,6 +27,14 @@ Buffer::Members::Members(size_t size, unsigned char* buf, bool own_memory) :
}
}
Buffer::Members::Members(std::string&& content) :
str(std::move(content)),
own_memory(false),
size(str.size()),
buf(reinterpret_cast<unsigned char*>(str.data()))
{
}
Buffer::Members::~Members()
{
if (this->own_memory) {
@ -44,11 +52,21 @@ Buffer::Buffer(size_t size) :
{
}
Buffer::Buffer(std::string&& content) :
m(new Members(std::move(content)))
{
}
Buffer::Buffer(unsigned char* buf, size_t size) :
m(new Members(size, buf, false))
{
}
Buffer::Buffer(std::string& content) :
m(new Members(content.size(), reinterpret_cast<unsigned char*>(content.data()), false))
{
}
Buffer::Buffer(Buffer const& rhs)
{
assert(test_mode);

View File

@ -19,7 +19,7 @@ Pl_Buffer::~Pl_Buffer() // NOLINT (modernize-use-equals-default)
void
Pl_Buffer::write(unsigned char const* buf, size_t len)
{
m->data.insert(m->data.end(), buf, buf + len);
m->data.append(reinterpret_cast<char const*>(buf), len);
m->ready = false;
if (getNext(true)) {
@ -42,17 +42,22 @@ Pl_Buffer::getBuffer()
if (!m->ready) {
throw std::logic_error("Pl_Buffer::getBuffer() called when not ready");
}
auto size = m->data.size();
auto* b = new Buffer(size);
if (size > 0) {
unsigned char* p = b->getBuffer();
memcpy(p, m->data.data(), size);
}
auto* b = new Buffer(std::move(m->data));
m->data.clear();
return b;
}
std::string
Pl_Buffer::getString()
{
if (!m->ready) {
throw std::logic_error("Pl_Buffer::getString() called when not ready");
}
auto s = std::move(m->data);
m->data.clear();
return s;
}
std::shared_ptr<Buffer>
Pl_Buffer::getBufferSharedPointer()
{

View File

@ -2413,29 +2413,23 @@ QPDF::pipeStreamData(
bool suppress_warnings,
bool will_retry)
{
std::vector<std::shared_ptr<Pipeline>> to_delete;
std::unique_ptr<Pipeline> to_delete;
if (encp->encrypted) {
decryptStream(encp, file, qpdf_for_warning, pipeline, og, stream_dict, to_delete);
}
bool attempted_finish = false;
bool success = false;
try {
file->seek(offset, SEEK_SET);
char buf[10240];
while (length > 0) {
size_t to_read = (sizeof(buf) < length ? sizeof(buf) : length);
size_t len = file->read(buf, to_read);
if (len == 0) {
throw damagedPDF(
file, "", file->getLastOffset(), "unexpected EOF reading stream data");
}
length -= len;
pipeline->write(buf, len);
auto buf = std::make_unique<char[]>(length);
if (auto read = file->read(buf.get(), length); read != length) {
throw damagedPDF(
file, "", offset + toO(read), "unexpected EOF reading stream data");
}
pipeline->write(buf.get(), length);
attempted_finish = true;
pipeline->finish();
success = true;
return true;
} catch (QPDFExc& e) {
if (!suppress_warnings) {
qpdf_for_warning.warn(e);
@ -2458,8 +2452,7 @@ QPDF::pipeStreamData(
file,
"",
file->getLastOffset(),
"stream will be re-processed without"
" filtering to avoid data loss"));
"stream will be re-processed without filtering to avoid data loss"));
}
}
}
@ -2470,7 +2463,7 @@ QPDF::pipeStreamData(
// ignore
}
}
return success;
return false ;
}
bool

View File

@ -584,8 +584,7 @@ QPDFAcroFormDocumentHelper::adjustDefaultAppearances(
ResourceReplacer rr(dr_map, rf.getNamesByResourceType());
Pl_Buffer buf_pl("filtered DA");
da_stream.filterAsContents(&rr, &buf_pl);
auto buf = buf_pl.getBufferSharedPointer();
std::string new_da(reinterpret_cast<char*>(buf->getBuffer()), buf->getSize());
std::string new_da = buf_pl.getString();
obj.replaceKey("/DA", QPDFObjectHandle::newString(new_da));
}

View File

@ -1708,8 +1708,7 @@ QPDFObjectHandle::pipeContentStreams(
need_newline = (lc.getLastChar() != static_cast<unsigned char>('\n'));
QTC::TC("qpdf", "QPDFObjectHandle need_newline", need_newline ? 0 : 1);
}
std::unique_ptr<Buffer> b(buf.getBuffer());
p->write(b->getBuffer(), b->getSize());
p->writeString(buf.getString());
p->finish();
}

View File

@ -175,14 +175,11 @@ InlineImageTracker::handleToken(QPDFTokenizer::Token const& token)
size_t len = image_data.length();
if (len >= this->min_size) {
QTC::TC("qpdf", "QPDFPageObjectHelper externalize inline image");
Pl_Buffer b("image_data");
b.writeString(image_data);
b.finish();
QPDFObjectHandle dict = convertIIDict(QPDFObjectHandle::parse(dict_str));
dict.replaceKey("/Length", QPDFObjectHandle::newInteger(QIntC::to_longlong(len)));
std::string name = resources.getUniqueResourceName("/IIm", this->min_suffix);
QPDFObjectHandle image =
QPDFObjectHandle::newStream(this->qpdf, b.getBufferSharedPointer());
QPDFObjectHandle::newStream(this->qpdf, std::make_shared<Buffer>(std::move(image_data)));
image.replaceDict(dict);
resources.getKey("/XObject").replaceKey(name, image);
write(name);

View File

@ -1579,10 +1579,7 @@ QPDFWriter::unparseObject(
m->cur_data_key.length());
pl.writeString(val);
pl.finish();
auto buf = bufpl.getBufferSharedPointer();
val = QPDF_String(
std::string(reinterpret_cast<char*>(buf->getBuffer()), buf->getSize()))
.unparse(true);
val = QPDF_String(bufpl.getString()).unparse(true);
} else {
auto tmp_ph = QUtil::make_unique_cstr(val);
char* tmp = tmp_ph.get();

View File

@ -216,29 +216,28 @@ QPDF_Stream::getStreamJSON(
auto dict = this->stream_dict;
JSON result = JSON::makeDictionary();
if (json_data != qpdf_sj_none) {
std::shared_ptr<Buffer> buf;
Pl_Discard discard;
Pl_Buffer buf_pl{"stream data"};
// buf_pl contains valid data and is ready for retrieval of the data.
bool buf_pl_ready = false;
bool filtered = false;
bool filter = (decode_level != qpdf_dl_none);
for (int attempt = 1; attempt <= 2; ++attempt) {
Pl_Discard discard;
std::shared_ptr<Pl_Buffer> buf_pl;
Pipeline* data_pipeline = nullptr;
Pipeline* data_pipeline = &discard;
if (json_data == qpdf_sj_file) {
// We need to capture the data to write
buf_pl = std::make_shared<Pl_Buffer>("stream data");
data_pipeline = buf_pl.get();
} else {
data_pipeline = &discard;
data_pipeline = &buf_pl;
}
bool succeeded =
pipeStreamData(data_pipeline, &filtered, 0, decode_level, false, (attempt == 1));
if ((!succeeded) || (filter && (!filtered))) {
if (!succeeded || (filter && !filtered)) {
// Try again
filter = false;
decode_level = qpdf_dl_none;
buf_pl.getString(); // reset buf_pl
} else {
if (buf_pl.get()) {
buf = buf_pl->getBufferSharedPointer();
if (json_data == qpdf_sj_file) {
buf_pl_ready = true;
}
break;
}
@ -252,10 +251,10 @@ QPDF_Stream::getStreamJSON(
}
if (json_data == qpdf_sj_file) {
result.addDictionaryMember("datafile", JSON::makeString(data_filename));
if (!buf.get()) {
if (!buf_pl_ready) {
throw std::logic_error("QPDF_Stream: failed to get stream data in json file mode");
}
p->write(buf->getBuffer(), buf->getSize());
p->writeString(buf_pl.getString());
} else if (json_data == qpdf_sj_inline) {
result.addDictionaryMember(
"data", JSON::makeBlob(StreamBlobProvider(this, decode_level)));

View File

@ -229,13 +229,11 @@ process_with_aes(
aes.writeString(data);
}
aes.finish();
auto bufp = buffer.getBufferSharedPointer();
if (outlength == 0) {
outlength = bufp->getSize();
return buffer.getString();
} else {
outlength = std::min(outlength, bufp->getSize());
return buffer.getString().substr(0, outlength);
}
return {reinterpret_cast<char*>(bufp->getBuffer()), outlength};
}
static std::string
@ -1021,8 +1019,7 @@ QPDF::decryptString(std::string& str, QPDFObjGen const& og)
key.length());
pl.writeString(str);
pl.finish();
auto buf = bufpl.getBufferSharedPointer();
str = std::string(reinterpret_cast<char*>(buf->getBuffer()), buf->getSize());
str = bufpl.getString();
} else {
QTC::TC("qpdf", "QPDF_encryption rc4 decode string");
size_t vlen = str.length();
@ -1041,6 +1038,9 @@ QPDF::decryptString(std::string& str, QPDFObjGen const& og)
}
}
// Prepend a decryption pipeline to 'pipeline'. The decryption pipeline (returned as
// 'decrypt_pipeline' must be owned by the caller to ensure that it stays alive while the pipeline
// is in use.
void
QPDF::decryptStream(
std::shared_ptr<EncryptionParameters> encp,
@ -1049,7 +1049,7 @@ QPDF::decryptStream(
Pipeline*& pipeline,
QPDFObjGen const& og,
QPDFObjectHandle& stream_dict,
std::vector<std::shared_ptr<Pipeline>>& heap)
std::unique_ptr<Pipeline>& decrypt_pipeline)
{
std::string type;
if (stream_dict.getKey("/Type").isName()) {
@ -1085,8 +1085,7 @@ QPDF::decryptStream(
crypt_params.getKey("/Name").isName()) {
QTC::TC("qpdf", "QPDF_encrypt crypt array");
method = interpretCF(encp, crypt_params.getKey("/Name"));
method_source = "stream's Crypt "
"decode parameters (array)";
method_source = "stream's Crypt decode parameters (array)";
}
}
}
@ -1135,10 +1134,9 @@ QPDF::decryptStream(
}
}
std::string key = getKeyForObject(encp, og, use_aes);
std::shared_ptr<Pipeline> new_pipeline;
if (use_aes) {
QTC::TC("qpdf", "QPDF_encryption aes decode stream");
new_pipeline = std::make_shared<Pl_AES_PDF>(
decrypt_pipeline = std::make_unique<Pl_AES_PDF>(
"AES stream decryption",
pipeline,
false,
@ -1146,14 +1144,13 @@ QPDF::decryptStream(
key.length());
} else {
QTC::TC("qpdf", "QPDF_encryption rc4 decode stream");
new_pipeline = std::make_shared<Pl_RC4>(
decrypt_pipeline = std::make_unique<Pl_RC4>(
"RC4 stream decryption",
pipeline,
QUtil::unsigned_char_pointer(key),
toI(key.length()));
}
pipeline = new_pipeline.get();
heap.push_back(new_pipeline);
pipeline = decrypt_pipeline.get();
}
void

View File

@ -35,6 +35,21 @@ main()
assert(bc2p != bc1p);
assert(bc2p[0] == 'R');
assert(bc2p[1] == 'W');
// Test Buffer(std:string&&)
Buffer bc3("QW");
unsigned char* bc3p = bc3.getBuffer();
Buffer bc4(bc3.copy());
bc3p[0] = 'R';
unsigned char* bc4p = bc4.getBuffer();
assert(bc4p != bc3p);
assert(bc4p[0] == 'Q');
assert(bc4p[1] == 'W');
bc4 = bc3.copy();
bc4p = bc4.getBuffer();
assert(bc4p != bc3p);
assert(bc4p[0] == 'R');
assert(bc4p[1] == 'W');
}
#ifdef _MSC_VER
@ -62,6 +77,37 @@ main()
assert(bc2p != bc1p);
assert(bc2p[0] == 'R');
assert(bc2p[1] == 'W');
// Test Buffer(std:string&&)
Buffer bc3("QW");
unsigned char* bc3p = bc3.getBuffer();
Buffer bc4(bc3);
bc3p[0] = 'R';
unsigned char* bc4p = bc4.getBuffer();
assert(bc4p != bc3p);
assert(bc4p[0] == 'Q');
assert(bc4p[1] == 'W');
bc4 = bc3;
bc4p = bc4.getBuffer();
assert(bc4p != bc3p);
assert(bc2p[0] == 'R');
assert(bc2p[1] == 'W');
// Test Buffer(std:string&)
std::string s{"QW"};
Buffer bc5(s);
unsigned char* bc5p = bc5.getBuffer();
Buffer bc6(bc5);
bc5p[0] = 'R';
unsigned char* bc6p = bc6.getBuffer();
assert(bc6p != bc5p);
assert(bc6p[0] == 'Q');
assert(bc6p[1] == 'W');
bc6 = bc5;
bc6p = bc6.getBuffer();
assert(bc6p != bc5p);
assert(bc2p[0] == 'R');
assert(bc2p[1] == 'W');
}
#if (defined(__GNUC__) || defined(__clang__))
# pragma GCC diagnostic pop
@ -82,6 +128,19 @@ main()
Buffer bm3 = std::move(bm2);
unsigned char* bm3p = bm3.getBuffer();
assert(bm3p == bm2p);
// Test Buffer(dtd::string&&)
Buffer bm4("QW");
unsigned char* bm4p = bm4.getBuffer();
Buffer bm5(std::move(bm4));
bm4p[0] = 'R';
unsigned char* bm5p = bm5.getBuffer();
assert(bm5p == bm4p);
assert(bm5p[0] == 'R');
Buffer bm6 = std::move(bm5);
unsigned char* bm6p = bm6.getBuffer();
assert(bm6p == bm5p);
}
try {