diff --git a/ChangeLog b/ChangeLog index 0fa9d10b..7b1f63c0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2018-08-12 Jay Berkenbilt + + * Rewrite the internals of Pl_Buffer to be much more efficient in + use of memory at a very slight performance cost. The old + implementation could cause memory usage to go out of control for + files with large images compressed using the TIFF predictor. + Fixes #228. + 2018-08-05 Jay Berkenbilt * Bug fix: end of line characters were not properly handled inside diff --git a/TODO b/TODO index 3c17bafb..2c8f779a 100644 --- a/TODO +++ b/TODO @@ -31,6 +31,16 @@ Soon - See ../misc/broken-files +Next ABI +======== + +Do these things next time we have to break binary compatibility + + * Pl_Buffer's internal structure is not right for what it does. It + was modified for greater efficiency, but it was done in a way that + preserved binary compatibility, so the implementation is a bit + convoluted. + Lexical ======= @@ -72,6 +82,8 @@ directory or that are otherwise not publicly accessible. This includes things sent to me by email that are specifically not public. Even so, I find it useful to make reference to them in this list + * Pl_TIFFPredictor is pretty slow. + * Some test cases on bad fails fail because qpdf is unable to find the root dictionary when it fails to read the trailer. Recovery could find the root dictionary and even the info dictionary in diff --git a/libqpdf/Pl_Buffer.cc b/libqpdf/Pl_Buffer.cc index 45c0c862..1ca9292d 100644 --- a/libqpdf/Pl_Buffer.cc +++ b/libqpdf/Pl_Buffer.cc @@ -17,11 +17,32 @@ Pl_Buffer::~Pl_Buffer() void Pl_Buffer::write(unsigned char* buf, size_t len) { - Buffer* b = new Buffer(len); - memcpy(b->getBuffer(), buf, len); - this->data.push_back(b); + PointerHolder cur_buf; + size_t cur_size = 0; + if (! this->data.empty()) + { + cur_buf = this->data.back(); + cur_size = cur_buf->getSize(); + } + size_t left = cur_size - this->total_size; + if (left < len) + { + size_t new_size = std::max(this->total_size + len, 2 * cur_size); + Buffer* b = new Buffer(new_size); + if (cur_buf.getPointer()) + { + memcpy(b->getBuffer(), cur_buf->getBuffer(), this->total_size); + } + this->data.clear(); + cur_buf = b; + this->data.push_back(cur_buf); + } + if (len) + { + memcpy(cur_buf->getBuffer() + this->total_size, buf, len); + this->total_size += len; + } this->ready = false; - this->total_size += len; if (getNext(true)) { @@ -49,17 +70,13 @@ Pl_Buffer::getBuffer() Buffer* b = new Buffer(this->total_size); unsigned char* p = b->getBuffer(); - while (! this->data.empty()) + if (! this->data.empty()) { - PointerHolder bp = this->data.front(); - this->data.pop_front(); - size_t bytes = bp->getSize(); - memcpy(p, bp->getBuffer(), bytes); - p += bytes; - this->total_size -= bytes; + PointerHolder bp = this->data.back(); + this->data.clear(); + memcpy(p, bp->getBuffer(), this->total_size); } - - assert(this->total_size == 0); + this->total_size = 0; this->ready = false; return b;