mirror of
https://github.com/qpdf/qpdf.git
synced 2024-06-02 18:30:52 +00:00
Exclude unreferenced objects in object streams (fixes #520)
This commit is contained in:
parent
16c19e9424
commit
30ac51bc78
|
@ -1,3 +1,8 @@
|
||||||
|
2021-05-08 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
|
* Fix 11-year-old bug of leaving unreferenced objects in preserved
|
||||||
|
object streams. Fixes #520.
|
||||||
|
|
||||||
2021-04-17 Jay Berkenbilt <ejb@ql.org>
|
2021-04-17 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
* Portability fix: use tm_gmtoff rather than global timezone
|
* Portability fix: use tm_gmtoff rather than global timezone
|
||||||
|
|
3
TODO
3
TODO
|
@ -511,9 +511,6 @@ I find it useful to make reference to them in this list.
|
||||||
implemented, update the docs on crypto providers, which mention
|
implemented, update the docs on crypto providers, which mention
|
||||||
that this may happen in the future.
|
that this may happen in the future.
|
||||||
|
|
||||||
* See if we can avoid preserving unreferenced objects in object
|
|
||||||
streams even when preserving the object streams.
|
|
||||||
|
|
||||||
* Provide APIs for embedded files. See *attachments*.pdf in test
|
* Provide APIs for embedded files. See *attachments*.pdf in test
|
||||||
suite. The private method findAttachmentStreams finds at least
|
suite. The private method findAttachmentStreams finds at least
|
||||||
cases for modern versions of Adobe Reader (>= 1.7, maybe earlier).
|
cases for modern versions of Adobe Reader (>= 1.7, maybe earlier).
|
||||||
|
|
|
@ -2363,17 +2363,36 @@ QPDFWriter::preserveObjectStreams()
|
||||||
{
|
{
|
||||||
// Our object_to_object_stream map has to map ObjGen -> ObjGen
|
// Our object_to_object_stream map has to map ObjGen -> ObjGen
|
||||||
// since we may be generating object streams out of old objects
|
// since we may be generating object streams out of old objects
|
||||||
// that have generation numbers greater than zero. However in an
|
// that have generation numbers greater than zero. However in an
|
||||||
// existing PDF, all object stream objects and all objects in them
|
// existing PDF, all object stream objects and all objects in them
|
||||||
// must have generation 0 because the PDF spec does not provide
|
// must have generation 0 because the PDF spec does not provide
|
||||||
// any way to do otherwise.
|
// any way to do otherwise. This code filters out objects that are
|
||||||
|
// not allowed to be in object streams. In addition to removing
|
||||||
|
// objects that were erroneously included in object streams in the
|
||||||
|
// source PDF, it also prevents unreferenced objects from being
|
||||||
|
// included.
|
||||||
|
std::set<QPDFObjGen> eligible;
|
||||||
|
if (! this->m->preserve_unreferenced_objects)
|
||||||
|
{
|
||||||
|
std::vector<QPDFObjGen> eligible_v =
|
||||||
|
QPDF::Writer::getCompressibleObjGens(this->m->pdf);
|
||||||
|
eligible = std::set<QPDFObjGen>(eligible_v.begin(), eligible_v.end());
|
||||||
|
}
|
||||||
|
QTC::TC("qpdf", "QPDFWriter preserve object streams",
|
||||||
|
this->m->preserve_unreferenced_objects ? 0 : 1);
|
||||||
std::map<int, int> omap;
|
std::map<int, int> omap;
|
||||||
QPDF::Writer::getObjectStreamData(this->m->pdf, omap);
|
QPDF::Writer::getObjectStreamData(this->m->pdf, omap);
|
||||||
for (std::map<int, int>::iterator iter = omap.begin();
|
for (auto iter: omap)
|
||||||
iter != omap.end(); ++iter)
|
|
||||||
{
|
{
|
||||||
this->m->object_to_object_stream[QPDFObjGen((*iter).first, 0)] =
|
QPDFObjGen og(iter.first, 0);
|
||||||
(*iter).second;
|
if (eligible.count(og) || this->m->preserve_unreferenced_objects)
|
||||||
|
{
|
||||||
|
this->m->object_to_object_stream[og] = iter.second;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
QTC::TC("qpdf", "QPDFWriter exclude from object stream");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5060,6 +5060,27 @@ print "\n";
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
-->
|
-->
|
||||||
|
<varlistentry>
|
||||||
|
<term>10.3.2: May 8, 2021</term>
|
||||||
|
<listitem>
|
||||||
|
<itemizedlist>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Bug Fixes
|
||||||
|
</para>
|
||||||
|
<itemizedlist>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
When generating a file while preserving object streams,
|
||||||
|
unreferenced objects are correctly removed unless
|
||||||
|
<option>--preserve-unreferenced</option> is specified.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</itemizedlist>
|
||||||
|
</listitem>
|
||||||
|
</itemizedlist>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term>10.3.1: March 11, 2021</term>
|
<term>10.3.1: March 11, 2021</term>
|
||||||
<listitem>
|
<listitem>
|
||||||
|
|
|
@ -592,3 +592,5 @@ QPDFAcroFormDocumentHelper AP parse error 0
|
||||||
qpdf copy fields not this file 0
|
qpdf copy fields not this file 0
|
||||||
qpdf copy fields non-first from orig 0
|
qpdf copy fields non-first from orig 0
|
||||||
QPDF resolve duplicated page in insert 0
|
QPDF resolve duplicated page in insert 0
|
||||||
|
QPDFWriter preserve object streams 1
|
||||||
|
QPDFWriter exclude from object stream 0
|
||||||
|
|
|
@ -986,7 +986,7 @@ my @bug_tests = (
|
||||||
["106", "zlib data error", 3],
|
["106", "zlib data error", 3],
|
||||||
["141a", "/W entry size 0", 2],
|
["141a", "/W entry size 0", 2],
|
||||||
["141b", "/W entry size 0", 2],
|
["141b", "/W entry size 0", 2],
|
||||||
["143", "self-referential ostream", 3],
|
["143", "self-referential ostream", 3, "--preserve-unreferenced"],
|
||||||
["146", "very deeply nested array", 2],
|
["146", "very deeply nested array", 2],
|
||||||
["147", "previously caused memory error", 2],
|
["147", "previously caused memory error", 2],
|
||||||
["148", "free memory on bad flate", 2],
|
["148", "free memory on bad flate", 2],
|
||||||
|
@ -996,14 +996,18 @@ my @bug_tests = (
|
||||||
["263", "empty xref stream", 2],
|
["263", "empty xref stream", 2],
|
||||||
["335a", "ozz-fuzz-12152", 2],
|
["335a", "ozz-fuzz-12152", 2],
|
||||||
["335b", "ozz-fuzz-14845", 2],
|
["335b", "ozz-fuzz-14845", 2],
|
||||||
["fuzz-16214", "stream in object stream", 3],
|
["fuzz-16214", "stream in object stream", 3, "--preserve-unreferenced"],
|
||||||
# When adding to this list, consider adding to SEED_CORPUS_FILES
|
# When adding to this list, consider adding to SEED_CORPUS_FILES
|
||||||
# in fuzz/build.mk and updating the count in fuzz/qtest/fuzz.test.
|
# in fuzz/build.mk and updating the count in fuzz/qtest/fuzz.test.
|
||||||
);
|
);
|
||||||
$n_tests += scalar(@bug_tests);
|
$n_tests += scalar(@bug_tests);
|
||||||
foreach my $d (@bug_tests)
|
foreach my $d (@bug_tests)
|
||||||
{
|
{
|
||||||
my ($n, $description, $exit_status) = @$d;
|
my ($n, $description, $exit_status, $xargs) = @$d;
|
||||||
|
if (! defined $xargs)
|
||||||
|
{
|
||||||
|
$xargs = "";
|
||||||
|
}
|
||||||
if (-f "issue-$n.obfuscated")
|
if (-f "issue-$n.obfuscated")
|
||||||
{
|
{
|
||||||
# Some of the PDF files in the test suite trigger anti-virus
|
# Some of the PDF files in the test suite trigger anti-virus
|
||||||
|
@ -1025,7 +1029,7 @@ foreach my $d (@bug_tests)
|
||||||
{
|
{
|
||||||
my $base = (-f "issue-$n.pdf") ? "issue-$n" : "$n";
|
my $base = (-f "issue-$n.pdf") ? "issue-$n" : "$n";
|
||||||
$td->runtest($description,
|
$td->runtest($description,
|
||||||
{$td->COMMAND => "qpdf $base.pdf a.pdf"},
|
{$td->COMMAND => "qpdf $xargs $base.pdf a.pdf"},
|
||||||
{$td->FILE => "$base.out",
|
{$td->FILE => "$base.out",
|
||||||
$td->EXIT_STATUS => $exit_status},
|
$td->EXIT_STATUS => $exit_status},
|
||||||
$td->NORMALIZE_NEWLINES);
|
$td->NORMALIZE_NEWLINES);
|
||||||
|
@ -1304,7 +1308,8 @@ $n_tests += 2;
|
||||||
# that in turn contains an indirect scalar (bug 2974522).
|
# that in turn contains an indirect scalar (bug 2974522).
|
||||||
$td->runtest("unreferenced indirect scalar",
|
$td->runtest("unreferenced indirect scalar",
|
||||||
{$td->COMMAND =>
|
{$td->COMMAND =>
|
||||||
"qpdf --qdf --static-id --object-streams=preserve" .
|
"qpdf --qdf --static-id --preserve-unreferenced" .
|
||||||
|
" --object-streams=preserve" .
|
||||||
" unreferenced-indirect-scalar.pdf a.qdf"},
|
" unreferenced-indirect-scalar.pdf a.qdf"},
|
||||||
{$td->STRING => "",
|
{$td->STRING => "",
|
||||||
$td->EXIT_STATUS => 0},
|
$td->EXIT_STATUS => 0},
|
||||||
|
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Loading…
Reference in New Issue
Block a user