2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-06-03 19:00:51 +00:00

Let optimize filter stream parameters instead of making them direct

Also removes preclusion of stream references in stream parameters of
filterable streams and reduces write times by about 8% by eliminating
an extra traversal of the objects.
This commit is contained in:
Jay Berkenbilt 2020-12-25 09:51:20 -05:00
parent 1a62cce940
commit 858c7b89bc
5 changed files with 53 additions and 114 deletions

View File

@ -1,3 +1,11 @@
2020-12-25 Jay Berkenbilt <ejb@ql.org>
* Refactor write code to eliminate an extra full traversal of
objects in the file and to remove assumptions that preclude stream
references from appearing in /DecodeParms of filterable streams.
This results in an approximately 8% performance reduction in write
times.
2020-12-23 Jay Berkenbilt <ejb@ql.org> 2020-12-23 Jay Berkenbilt <ejb@ql.org>
* Allow library users to provide their own decoders for stream * Allow library users to provide their own decoders for stream

View File

@ -2452,115 +2452,36 @@ QPDFWriter::getTrimmedTrailer()
void void
QPDFWriter::prepareFileForWrite() QPDFWriter::prepareFileForWrite()
{ {
// Do a traversal of the entire PDF file structure replacing all // Make document extension level information direct as required by
// indirect objects that QPDFWriter wants to be direct. This // the spec.
// includes stream lengths, stream filtering parameters, and
// document extension level information.
this->m->pdf.fixDanglingReferences(true); this->m->pdf.fixDanglingReferences(true);
std::list<QPDFObjectHandle> queue; QPDFObjectHandle root = this->m->pdf.getRoot();
queue.push_back(getTrimmedTrailer()); for (auto const& key: root.getKeys())
std::set<int> visited;
while (! queue.empty())
{ {
QPDFObjectHandle node = queue.front(); QPDFObjectHandle oh = root.getKey(key);
queue.pop_front(); if ((key == "/Extensions") && (oh.isDictionary()))
if (node.isIndirect()) {
{ bool extensions_indirect = false;
if (visited.count(node.getObjectID()) > 0) if (oh.isIndirect())
{
continue;
}
indicateProgress(false, false);
visited.insert(node.getObjectID());
}
if (node.isArray())
{
int nitems = node.getArrayNItems();
for (int i = 0; i < nitems; ++i)
{
QPDFObjectHandle oh = node.getArrayItem(i);
if (! oh.isScalar())
{
queue.push_back(oh);
}
}
}
else if (node.isDictionary() || node.isStream())
{
bool is_stream = false;
bool is_root = false;
bool filterable = false;
QPDFObjectHandle dict = node;
if (node.isStream())
{
is_stream = true;
dict = node.getDict();
// See whether we are able to filter this stream.
filterable = node.pipeStreamData(
0, 0, this->m->stream_decode_level, true);
}
else if (this->m->pdf.getRoot().getObjectID() == node.getObjectID())
{ {
is_root = true; QTC::TC("qpdf", "QPDFWriter make Extensions direct");
extensions_indirect = true;
oh = oh.shallowCopy();
root.replaceKey(key, oh);
} }
if (oh.hasKey("/ADBE"))
std::set<std::string> keys = dict.getKeys(); {
for (std::set<std::string>::iterator iter = keys.begin(); QPDFObjectHandle adbe = oh.getKey("/ADBE");
iter != keys.end(); ++iter) if (adbe.isIndirect())
{
std::string const& key = *iter;
QPDFObjectHandle oh = dict.getKey(key);
bool add_to_queue = true;
if (is_stream)
{ {
if (oh.isIndirect() && QTC::TC("qpdf", "QPDFWriter make ADBE direct",
((key == "/Length") || extensions_indirect ? 0 : 1);
(filterable && adbe.makeDirect();
((key == "/Filter") || oh.replaceKey("/ADBE", adbe);
(key == "/DecodeParms")))))
{
QTC::TC("qpdf", "QPDFWriter make stream key direct");
add_to_queue = false;
oh.makeDirect();
dict.replaceKey(key, oh);
}
} }
else if (is_root) }
{ }
if ((key == "/Extensions") && (oh.isDictionary()))
{
bool extensions_indirect = false;
if (oh.isIndirect())
{
QTC::TC("qpdf", "QPDFWriter make Extensions direct");
extensions_indirect = true;
add_to_queue = false;
oh = oh.shallowCopy();
dict.replaceKey(key, oh);
}
if (oh.hasKey("/ADBE"))
{
QPDFObjectHandle adbe = oh.getKey("/ADBE");
if (adbe.isIndirect())
{
QTC::TC("qpdf", "QPDFWriter make ADBE direct",
extensions_indirect ? 0 : 1);
adbe.makeDirect();
oh.replaceKey("/ADBE", adbe);
}
}
}
}
if (add_to_queue)
{
queue.push_back(oh);
}
}
}
} }
} }
@ -2737,14 +2658,11 @@ QPDFWriter::write()
{ {
doWriteSetup(); doWriteSetup();
// Set up progress reporting. We spent about equal amounts of time // Set up progress reporting. For linearized files, we write two
// preparing and writing one pass. To get a rough estimate of // passes. events_expected is an approximation, but it's good
// progress, we track handling of indirect objects. For linearized // enough for progress reporting, which is mostly a guess anyway.
// files, we write two passes. events_expected is an
// approximation, but it's good enough for progress reporting,
// which is mostly a guess anyway.
this->m->events_expected = QIntC::to_int( this->m->events_expected = QIntC::to_int(
this->m->pdf.getObjectCount() * (this->m->linearized ? 3 : 2)); this->m->pdf.getObjectCount() * (this->m->linearized ? 2 : 1));
prepareFileForWrite(); prepareFileForWrite();
@ -3138,8 +3056,21 @@ QPDFWriter::writeLinearized()
discardGeneration(this->m->object_to_object_stream, discardGeneration(this->m->object_to_object_stream,
this->m->object_to_object_stream_no_gen); this->m->object_to_object_stream_no_gen);
bool need_xref_stream = (! this->m->object_to_object_stream.empty()); auto skip_stream_parameters = [this](QPDFObjectHandle& stream) {
this->m->pdf.optimize(this->m->object_to_object_stream_no_gen); bool compress_stream;
bool is_metadata;
if (willFilterStream(stream, compress_stream, is_metadata, nullptr))
{
return 2;
}
else
{
return 1;
}
};
this->m->pdf.optimize(this->m->object_to_object_stream_no_gen,
true, skip_stream_parameters);
std::vector<QPDFObjectHandle> part4; std::vector<QPDFObjectHandle> part4;
std::vector<QPDFObjectHandle> part6; std::vector<QPDFObjectHandle> part6;
@ -3173,6 +3104,7 @@ QPDFWriter::writeLinearized()
int after_second_half = 1 + second_half_uncompressed; int after_second_half = 1 + second_half_uncompressed;
this->m->next_objid = after_second_half; this->m->next_objid = after_second_half;
int second_half_xref = 0; int second_half_xref = 0;
bool need_xref_stream = (! this->m->object_to_object_stream.empty());
if (need_xref_stream) if (need_xref_stream)
{ {
second_half_xref = this->m->next_objid++; second_half_xref = this->m->next_objid++;

View File

@ -234,7 +234,6 @@ QPDFWriter extra header text no newline 0
QPDFWriter extra header text add newline 0 QPDFWriter extra header text add newline 0
QPDF bogus 0 offset 0 QPDF bogus 0 offset 0
QPDF global offset 0 QPDF global offset 0
QPDFWriter make stream key direct 0
QPDFWriter copy V5 0 QPDFWriter copy V5 0
QPDFWriter increasing extension level 0 QPDFWriter increasing extension level 0
QPDFWriter make Extensions direct 0 QPDFWriter make Extensions direct 0

View File

@ -716,7 +716,7 @@ my @bug_tests = (
["99b", "object 0", 2], ["99b", "object 0", 2],
["100", "xref reconstruction loop", 2], ["100", "xref reconstruction loop", 2],
["101", "resolve for exception text", 2], ["101", "resolve for exception text", 2],
["117", "other infinite loop", 2], ["117", "other infinite loop", 3],
["118", "other infinite loop", 2], ["118", "other infinite loop", 2],
["119", "other infinite loop", 3], ["119", "other infinite loop", 3],
["120", "other infinite loop", 3], ["120", "other infinite loop", 3],

View File

@ -13,4 +13,4 @@ WARNING: issue-117.pdf (object 7 0, offset 1791): unknown token while reading ob
WARNING: issue-117.pdf (object 7 0, offset 1267): /Length key in stream dictionary is not an integer WARNING: issue-117.pdf (object 7 0, offset 1267): /Length key in stream dictionary is not an integer
WARNING: issue-117.pdf (object 7 0, offset 1418): attempting to recover stream length WARNING: issue-117.pdf (object 7 0, offset 1418): attempting to recover stream length
WARNING: issue-117.pdf (object 7 0, offset 1418): recovered stream length: 347 WARNING: issue-117.pdf (object 7 0, offset 1418): recovered stream length: 347
attempt to make a stream into a direct object qpdf: operation succeeded with warnings; resulting file may have some problems