mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 10:58:58 +00:00
Let optimize filter stream parameters instead of making them direct
Also removes preclusion of stream references in stream parameters of filterable streams and reduces write times by about 8% by eliminating an extra traversal of the objects.
This commit is contained in:
parent
1a62cce940
commit
858c7b89bc
@ -1,3 +1,11 @@
|
||||
2020-12-25 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Refactor write code to eliminate an extra full traversal of
|
||||
objects in the file and to remove assumptions that preclude stream
|
||||
references from appearing in /DecodeParms of filterable streams.
|
||||
This results in an approximately 8% performance reduction in write
|
||||
times.
|
||||
|
||||
2020-12-23 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Allow library users to provide their own decoders for stream
|
||||
|
@ -2452,84 +2452,14 @@ QPDFWriter::getTrimmedTrailer()
|
||||
void
|
||||
QPDFWriter::prepareFileForWrite()
|
||||
{
|
||||
// Do a traversal of the entire PDF file structure replacing all
|
||||
// indirect objects that QPDFWriter wants to be direct. This
|
||||
// includes stream lengths, stream filtering parameters, and
|
||||
// document extension level information.
|
||||
// Make document extension level information direct as required by
|
||||
// the spec.
|
||||
|
||||
this->m->pdf.fixDanglingReferences(true);
|
||||
std::list<QPDFObjectHandle> queue;
|
||||
queue.push_back(getTrimmedTrailer());
|
||||
std::set<int> visited;
|
||||
|
||||
while (! queue.empty())
|
||||
{
|
||||
QPDFObjectHandle node = queue.front();
|
||||
queue.pop_front();
|
||||
if (node.isIndirect())
|
||||
{
|
||||
if (visited.count(node.getObjectID()) > 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
indicateProgress(false, false);
|
||||
visited.insert(node.getObjectID());
|
||||
}
|
||||
|
||||
if (node.isArray())
|
||||
{
|
||||
int nitems = node.getArrayNItems();
|
||||
for (int i = 0; i < nitems; ++i)
|
||||
{
|
||||
QPDFObjectHandle oh = node.getArrayItem(i);
|
||||
if (! oh.isScalar())
|
||||
{
|
||||
queue.push_back(oh);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (node.isDictionary() || node.isStream())
|
||||
{
|
||||
bool is_stream = false;
|
||||
bool is_root = false;
|
||||
bool filterable = false;
|
||||
QPDFObjectHandle dict = node;
|
||||
if (node.isStream())
|
||||
{
|
||||
is_stream = true;
|
||||
dict = node.getDict();
|
||||
// See whether we are able to filter this stream.
|
||||
filterable = node.pipeStreamData(
|
||||
0, 0, this->m->stream_decode_level, true);
|
||||
}
|
||||
else if (this->m->pdf.getRoot().getObjectID() == node.getObjectID())
|
||||
{
|
||||
is_root = true;
|
||||
}
|
||||
|
||||
std::set<std::string> keys = dict.getKeys();
|
||||
for (std::set<std::string>::iterator iter = keys.begin();
|
||||
iter != keys.end(); ++iter)
|
||||
{
|
||||
std::string const& key = *iter;
|
||||
QPDFObjectHandle oh = dict.getKey(key);
|
||||
bool add_to_queue = true;
|
||||
if (is_stream)
|
||||
{
|
||||
if (oh.isIndirect() &&
|
||||
((key == "/Length") ||
|
||||
(filterable &&
|
||||
((key == "/Filter") ||
|
||||
(key == "/DecodeParms")))))
|
||||
{
|
||||
QTC::TC("qpdf", "QPDFWriter make stream key direct");
|
||||
add_to_queue = false;
|
||||
oh.makeDirect();
|
||||
dict.replaceKey(key, oh);
|
||||
}
|
||||
}
|
||||
else if (is_root)
|
||||
QPDFObjectHandle root = this->m->pdf.getRoot();
|
||||
for (auto const& key: root.getKeys())
|
||||
{
|
||||
QPDFObjectHandle oh = root.getKey(key);
|
||||
if ((key == "/Extensions") && (oh.isDictionary()))
|
||||
{
|
||||
bool extensions_indirect = false;
|
||||
@ -2537,9 +2467,8 @@ QPDFWriter::prepareFileForWrite()
|
||||
{
|
||||
QTC::TC("qpdf", "QPDFWriter make Extensions direct");
|
||||
extensions_indirect = true;
|
||||
add_to_queue = false;
|
||||
oh = oh.shallowCopy();
|
||||
dict.replaceKey(key, oh);
|
||||
root.replaceKey(key, oh);
|
||||
}
|
||||
if (oh.hasKey("/ADBE"))
|
||||
{
|
||||
@ -2554,14 +2483,6 @@ QPDFWriter::prepareFileForWrite()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (add_to_queue)
|
||||
{
|
||||
queue.push_back(oh);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@ -2737,14 +2658,11 @@ QPDFWriter::write()
|
||||
{
|
||||
doWriteSetup();
|
||||
|
||||
// Set up progress reporting. We spent about equal amounts of time
|
||||
// preparing and writing one pass. To get a rough estimate of
|
||||
// progress, we track handling of indirect objects. For linearized
|
||||
// files, we write two passes. events_expected is an
|
||||
// approximation, but it's good enough for progress reporting,
|
||||
// which is mostly a guess anyway.
|
||||
// Set up progress reporting. For linearized files, we write two
|
||||
// passes. events_expected is an approximation, but it's good
|
||||
// enough for progress reporting, which is mostly a guess anyway.
|
||||
this->m->events_expected = QIntC::to_int(
|
||||
this->m->pdf.getObjectCount() * (this->m->linearized ? 3 : 2));
|
||||
this->m->pdf.getObjectCount() * (this->m->linearized ? 2 : 1));
|
||||
|
||||
prepareFileForWrite();
|
||||
|
||||
@ -3138,8 +3056,21 @@ QPDFWriter::writeLinearized()
|
||||
discardGeneration(this->m->object_to_object_stream,
|
||||
this->m->object_to_object_stream_no_gen);
|
||||
|
||||
bool need_xref_stream = (! this->m->object_to_object_stream.empty());
|
||||
this->m->pdf.optimize(this->m->object_to_object_stream_no_gen);
|
||||
auto skip_stream_parameters = [this](QPDFObjectHandle& stream) {
|
||||
bool compress_stream;
|
||||
bool is_metadata;
|
||||
if (willFilterStream(stream, compress_stream, is_metadata, nullptr))
|
||||
{
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
|
||||
this->m->pdf.optimize(this->m->object_to_object_stream_no_gen,
|
||||
true, skip_stream_parameters);
|
||||
|
||||
std::vector<QPDFObjectHandle> part4;
|
||||
std::vector<QPDFObjectHandle> part6;
|
||||
@ -3173,6 +3104,7 @@ QPDFWriter::writeLinearized()
|
||||
int after_second_half = 1 + second_half_uncompressed;
|
||||
this->m->next_objid = after_second_half;
|
||||
int second_half_xref = 0;
|
||||
bool need_xref_stream = (! this->m->object_to_object_stream.empty());
|
||||
if (need_xref_stream)
|
||||
{
|
||||
second_half_xref = this->m->next_objid++;
|
||||
|
@ -234,7 +234,6 @@ QPDFWriter extra header text no newline 0
|
||||
QPDFWriter extra header text add newline 0
|
||||
QPDF bogus 0 offset 0
|
||||
QPDF global offset 0
|
||||
QPDFWriter make stream key direct 0
|
||||
QPDFWriter copy V5 0
|
||||
QPDFWriter increasing extension level 0
|
||||
QPDFWriter make Extensions direct 0
|
||||
|
@ -716,7 +716,7 @@ my @bug_tests = (
|
||||
["99b", "object 0", 2],
|
||||
["100", "xref reconstruction loop", 2],
|
||||
["101", "resolve for exception text", 2],
|
||||
["117", "other infinite loop", 2],
|
||||
["117", "other infinite loop", 3],
|
||||
["118", "other infinite loop", 2],
|
||||
["119", "other infinite loop", 3],
|
||||
["120", "other infinite loop", 3],
|
||||
|
@ -13,4 +13,4 @@ WARNING: issue-117.pdf (object 7 0, offset 1791): unknown token while reading ob
|
||||
WARNING: issue-117.pdf (object 7 0, offset 1267): /Length key in stream dictionary is not an integer
|
||||
WARNING: issue-117.pdf (object 7 0, offset 1418): attempting to recover stream length
|
||||
WARNING: issue-117.pdf (object 7 0, offset 1418): recovered stream length: 347
|
||||
attempt to make a stream into a direct object
|
||||
qpdf: operation succeeded with warnings; resulting file may have some problems
|
||||
|
Loading…
Reference in New Issue
Block a user