mirror of
https://github.com/qpdf/qpdf.git
synced 2025-04-02 14:41:50 +00:00
Let optimize filter stream parameters instead of making them direct
Also removes preclusion of stream references in stream parameters of filterable streams and reduces write times by about 8% by eliminating an extra traversal of the objects.
This commit is contained in:
parent
1a62cce940
commit
858c7b89bc
@ -1,3 +1,11 @@
|
|||||||
|
2020-12-25 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
|
* Refactor write code to eliminate an extra full traversal of
|
||||||
|
objects in the file and to remove assumptions that preclude stream
|
||||||
|
references from appearing in /DecodeParms of filterable streams.
|
||||||
|
This results in an approximately 8% performance reduction in write
|
||||||
|
times.
|
||||||
|
|
||||||
2020-12-23 Jay Berkenbilt <ejb@ql.org>
|
2020-12-23 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
* Allow library users to provide their own decoders for stream
|
* Allow library users to provide their own decoders for stream
|
||||||
|
@ -2452,84 +2452,14 @@ QPDFWriter::getTrimmedTrailer()
|
|||||||
void
|
void
|
||||||
QPDFWriter::prepareFileForWrite()
|
QPDFWriter::prepareFileForWrite()
|
||||||
{
|
{
|
||||||
// Do a traversal of the entire PDF file structure replacing all
|
// Make document extension level information direct as required by
|
||||||
// indirect objects that QPDFWriter wants to be direct. This
|
// the spec.
|
||||||
// includes stream lengths, stream filtering parameters, and
|
|
||||||
// document extension level information.
|
|
||||||
|
|
||||||
this->m->pdf.fixDanglingReferences(true);
|
this->m->pdf.fixDanglingReferences(true);
|
||||||
std::list<QPDFObjectHandle> queue;
|
QPDFObjectHandle root = this->m->pdf.getRoot();
|
||||||
queue.push_back(getTrimmedTrailer());
|
for (auto const& key: root.getKeys())
|
||||||
std::set<int> visited;
|
|
||||||
|
|
||||||
while (! queue.empty())
|
|
||||||
{
|
|
||||||
QPDFObjectHandle node = queue.front();
|
|
||||||
queue.pop_front();
|
|
||||||
if (node.isIndirect())
|
|
||||||
{
|
|
||||||
if (visited.count(node.getObjectID()) > 0)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
indicateProgress(false, false);
|
|
||||||
visited.insert(node.getObjectID());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (node.isArray())
|
|
||||||
{
|
|
||||||
int nitems = node.getArrayNItems();
|
|
||||||
for (int i = 0; i < nitems; ++i)
|
|
||||||
{
|
|
||||||
QPDFObjectHandle oh = node.getArrayItem(i);
|
|
||||||
if (! oh.isScalar())
|
|
||||||
{
|
|
||||||
queue.push_back(oh);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (node.isDictionary() || node.isStream())
|
|
||||||
{
|
|
||||||
bool is_stream = false;
|
|
||||||
bool is_root = false;
|
|
||||||
bool filterable = false;
|
|
||||||
QPDFObjectHandle dict = node;
|
|
||||||
if (node.isStream())
|
|
||||||
{
|
|
||||||
is_stream = true;
|
|
||||||
dict = node.getDict();
|
|
||||||
// See whether we are able to filter this stream.
|
|
||||||
filterable = node.pipeStreamData(
|
|
||||||
0, 0, this->m->stream_decode_level, true);
|
|
||||||
}
|
|
||||||
else if (this->m->pdf.getRoot().getObjectID() == node.getObjectID())
|
|
||||||
{
|
|
||||||
is_root = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::set<std::string> keys = dict.getKeys();
|
|
||||||
for (std::set<std::string>::iterator iter = keys.begin();
|
|
||||||
iter != keys.end(); ++iter)
|
|
||||||
{
|
|
||||||
std::string const& key = *iter;
|
|
||||||
QPDFObjectHandle oh = dict.getKey(key);
|
|
||||||
bool add_to_queue = true;
|
|
||||||
if (is_stream)
|
|
||||||
{
|
|
||||||
if (oh.isIndirect() &&
|
|
||||||
((key == "/Length") ||
|
|
||||||
(filterable &&
|
|
||||||
((key == "/Filter") ||
|
|
||||||
(key == "/DecodeParms")))))
|
|
||||||
{
|
|
||||||
QTC::TC("qpdf", "QPDFWriter make stream key direct");
|
|
||||||
add_to_queue = false;
|
|
||||||
oh.makeDirect();
|
|
||||||
dict.replaceKey(key, oh);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (is_root)
|
|
||||||
{
|
{
|
||||||
|
QPDFObjectHandle oh = root.getKey(key);
|
||||||
if ((key == "/Extensions") && (oh.isDictionary()))
|
if ((key == "/Extensions") && (oh.isDictionary()))
|
||||||
{
|
{
|
||||||
bool extensions_indirect = false;
|
bool extensions_indirect = false;
|
||||||
@ -2537,9 +2467,8 @@ QPDFWriter::prepareFileForWrite()
|
|||||||
{
|
{
|
||||||
QTC::TC("qpdf", "QPDFWriter make Extensions direct");
|
QTC::TC("qpdf", "QPDFWriter make Extensions direct");
|
||||||
extensions_indirect = true;
|
extensions_indirect = true;
|
||||||
add_to_queue = false;
|
|
||||||
oh = oh.shallowCopy();
|
oh = oh.shallowCopy();
|
||||||
dict.replaceKey(key, oh);
|
root.replaceKey(key, oh);
|
||||||
}
|
}
|
||||||
if (oh.hasKey("/ADBE"))
|
if (oh.hasKey("/ADBE"))
|
||||||
{
|
{
|
||||||
@ -2554,14 +2483,6 @@ QPDFWriter::prepareFileForWrite()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (add_to_queue)
|
|
||||||
{
|
|
||||||
queue.push_back(oh);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -2737,14 +2658,11 @@ QPDFWriter::write()
|
|||||||
{
|
{
|
||||||
doWriteSetup();
|
doWriteSetup();
|
||||||
|
|
||||||
// Set up progress reporting. We spent about equal amounts of time
|
// Set up progress reporting. For linearized files, we write two
|
||||||
// preparing and writing one pass. To get a rough estimate of
|
// passes. events_expected is an approximation, but it's good
|
||||||
// progress, we track handling of indirect objects. For linearized
|
// enough for progress reporting, which is mostly a guess anyway.
|
||||||
// files, we write two passes. events_expected is an
|
|
||||||
// approximation, but it's good enough for progress reporting,
|
|
||||||
// which is mostly a guess anyway.
|
|
||||||
this->m->events_expected = QIntC::to_int(
|
this->m->events_expected = QIntC::to_int(
|
||||||
this->m->pdf.getObjectCount() * (this->m->linearized ? 3 : 2));
|
this->m->pdf.getObjectCount() * (this->m->linearized ? 2 : 1));
|
||||||
|
|
||||||
prepareFileForWrite();
|
prepareFileForWrite();
|
||||||
|
|
||||||
@ -3138,8 +3056,21 @@ QPDFWriter::writeLinearized()
|
|||||||
discardGeneration(this->m->object_to_object_stream,
|
discardGeneration(this->m->object_to_object_stream,
|
||||||
this->m->object_to_object_stream_no_gen);
|
this->m->object_to_object_stream_no_gen);
|
||||||
|
|
||||||
bool need_xref_stream = (! this->m->object_to_object_stream.empty());
|
auto skip_stream_parameters = [this](QPDFObjectHandle& stream) {
|
||||||
this->m->pdf.optimize(this->m->object_to_object_stream_no_gen);
|
bool compress_stream;
|
||||||
|
bool is_metadata;
|
||||||
|
if (willFilterStream(stream, compress_stream, is_metadata, nullptr))
|
||||||
|
{
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
this->m->pdf.optimize(this->m->object_to_object_stream_no_gen,
|
||||||
|
true, skip_stream_parameters);
|
||||||
|
|
||||||
std::vector<QPDFObjectHandle> part4;
|
std::vector<QPDFObjectHandle> part4;
|
||||||
std::vector<QPDFObjectHandle> part6;
|
std::vector<QPDFObjectHandle> part6;
|
||||||
@ -3173,6 +3104,7 @@ QPDFWriter::writeLinearized()
|
|||||||
int after_second_half = 1 + second_half_uncompressed;
|
int after_second_half = 1 + second_half_uncompressed;
|
||||||
this->m->next_objid = after_second_half;
|
this->m->next_objid = after_second_half;
|
||||||
int second_half_xref = 0;
|
int second_half_xref = 0;
|
||||||
|
bool need_xref_stream = (! this->m->object_to_object_stream.empty());
|
||||||
if (need_xref_stream)
|
if (need_xref_stream)
|
||||||
{
|
{
|
||||||
second_half_xref = this->m->next_objid++;
|
second_half_xref = this->m->next_objid++;
|
||||||
|
@ -234,7 +234,6 @@ QPDFWriter extra header text no newline 0
|
|||||||
QPDFWriter extra header text add newline 0
|
QPDFWriter extra header text add newline 0
|
||||||
QPDF bogus 0 offset 0
|
QPDF bogus 0 offset 0
|
||||||
QPDF global offset 0
|
QPDF global offset 0
|
||||||
QPDFWriter make stream key direct 0
|
|
||||||
QPDFWriter copy V5 0
|
QPDFWriter copy V5 0
|
||||||
QPDFWriter increasing extension level 0
|
QPDFWriter increasing extension level 0
|
||||||
QPDFWriter make Extensions direct 0
|
QPDFWriter make Extensions direct 0
|
||||||
|
@ -716,7 +716,7 @@ my @bug_tests = (
|
|||||||
["99b", "object 0", 2],
|
["99b", "object 0", 2],
|
||||||
["100", "xref reconstruction loop", 2],
|
["100", "xref reconstruction loop", 2],
|
||||||
["101", "resolve for exception text", 2],
|
["101", "resolve for exception text", 2],
|
||||||
["117", "other infinite loop", 2],
|
["117", "other infinite loop", 3],
|
||||||
["118", "other infinite loop", 2],
|
["118", "other infinite loop", 2],
|
||||||
["119", "other infinite loop", 3],
|
["119", "other infinite loop", 3],
|
||||||
["120", "other infinite loop", 3],
|
["120", "other infinite loop", 3],
|
||||||
|
@ -13,4 +13,4 @@ WARNING: issue-117.pdf (object 7 0, offset 1791): unknown token while reading ob
|
|||||||
WARNING: issue-117.pdf (object 7 0, offset 1267): /Length key in stream dictionary is not an integer
|
WARNING: issue-117.pdf (object 7 0, offset 1267): /Length key in stream dictionary is not an integer
|
||||||
WARNING: issue-117.pdf (object 7 0, offset 1418): attempting to recover stream length
|
WARNING: issue-117.pdf (object 7 0, offset 1418): attempting to recover stream length
|
||||||
WARNING: issue-117.pdf (object 7 0, offset 1418): recovered stream length: 347
|
WARNING: issue-117.pdf (object 7 0, offset 1418): recovered stream length: 347
|
||||||
attempt to make a stream into a direct object
|
qpdf: operation succeeded with warnings; resulting file may have some problems
|
||||||
|
Loading…
x
Reference in New Issue
Block a user