Exclude unreferenced objects in object streams (fixes #520)

This commit is contained in:
Jay Berkenbilt 2021-05-08 09:07:59 -04:00
parent 16c19e9424
commit 30ac51bc78
9 changed files with 853 additions and 1642 deletions

View File

@ -1,3 +1,8 @@
2021-05-08 Jay Berkenbilt <ejb@ql.org>
* Fix 11-year-old bug of leaving unreferenced objects in preserved
object streams. Fixes #520.
2021-04-17 Jay Berkenbilt <ejb@ql.org>
* Portability fix: use tm_gmtoff rather than global timezone

3
TODO
View File

@ -511,9 +511,6 @@ I find it useful to make reference to them in this list.
implemented, update the docs on crypto providers, which mention
that this may happen in the future.
* See if we can avoid preserving unreferenced objects in object
streams even when preserving the object streams.
* Provide APIs for embedded files. See *attachments*.pdf in test
suite. The private method findAttachmentStreams finds at least
cases for modern versions of Adobe Reader (>= 1.7, maybe earlier).

View File

@ -2363,17 +2363,36 @@ QPDFWriter::preserveObjectStreams()
{
// Our object_to_object_stream map has to map ObjGen -> ObjGen
// since we may be generating object streams out of old objects
// that have generation numbers greater than zero. However in an
// that have generation numbers greater than zero. However in an
// existing PDF, all object stream objects and all objects in them
// must have generation 0 because the PDF spec does not provide
// any way to do otherwise.
// any way to do otherwise. This code filters out objects that are
// not allowed to be in object streams. In addition to removing
// objects that were erroneously included in object streams in the
// source PDF, it also prevents unreferenced objects from being
// included.
std::set<QPDFObjGen> eligible;
if (! this->m->preserve_unreferenced_objects)
{
std::vector<QPDFObjGen> eligible_v =
QPDF::Writer::getCompressibleObjGens(this->m->pdf);
eligible = std::set<QPDFObjGen>(eligible_v.begin(), eligible_v.end());
}
QTC::TC("qpdf", "QPDFWriter preserve object streams",
this->m->preserve_unreferenced_objects ? 0 : 1);
std::map<int, int> omap;
QPDF::Writer::getObjectStreamData(this->m->pdf, omap);
for (std::map<int, int>::iterator iter = omap.begin();
iter != omap.end(); ++iter)
for (auto iter: omap)
{
this->m->object_to_object_stream[QPDFObjGen((*iter).first, 0)] =
(*iter).second;
QPDFObjGen og(iter.first, 0);
if (eligible.count(og) || this->m->preserve_unreferenced_objects)
{
this->m->object_to_object_stream[og] = iter.second;
}
else
{
QTC::TC("qpdf", "QPDFWriter exclude from object stream");
}
}
}

View File

@ -5060,6 +5060,27 @@ print "\n";
</listitem>
</varlistentry>
-->
<varlistentry>
<term>10.3.2: May 8, 2021</term>
<listitem>
<itemizedlist>
<listitem>
<para>
Bug Fixes
</para>
<itemizedlist>
<listitem>
<para>
When generating a file while preserving object streams,
unreferenced objects are correctly removed unless
<option>--preserve-unreferenced</option> is specified.
</para>
</listitem>
</itemizedlist>
</listitem>
</itemizedlist>
</listitem>
</varlistentry>
<varlistentry>
<term>10.3.1: March 11, 2021</term>
<listitem>

View File

@ -592,3 +592,5 @@ QPDFAcroFormDocumentHelper AP parse error 0
qpdf copy fields not this file 0
qpdf copy fields non-first from orig 0
QPDF resolve duplicated page in insert 0
QPDFWriter preserve object streams 1
QPDFWriter exclude from object stream 0

View File

@ -986,7 +986,7 @@ my @bug_tests = (
["106", "zlib data error", 3],
["141a", "/W entry size 0", 2],
["141b", "/W entry size 0", 2],
["143", "self-referential ostream", 3],
["143", "self-referential ostream", 3, "--preserve-unreferenced"],
["146", "very deeply nested array", 2],
["147", "previously caused memory error", 2],
["148", "free memory on bad flate", 2],
@ -996,14 +996,18 @@ my @bug_tests = (
["263", "empty xref stream", 2],
["335a", "ozz-fuzz-12152", 2],
["335b", "ozz-fuzz-14845", 2],
["fuzz-16214", "stream in object stream", 3],
["fuzz-16214", "stream in object stream", 3, "--preserve-unreferenced"],
# When adding to this list, consider adding to SEED_CORPUS_FILES
# in fuzz/build.mk and updating the count in fuzz/qtest/fuzz.test.
);
$n_tests += scalar(@bug_tests);
foreach my $d (@bug_tests)
{
my ($n, $description, $exit_status) = @$d;
my ($n, $description, $exit_status, $xargs) = @$d;
if (! defined $xargs)
{
$xargs = "";
}
if (-f "issue-$n.obfuscated")
{
# Some of the PDF files in the test suite trigger anti-virus
@ -1025,7 +1029,7 @@ foreach my $d (@bug_tests)
{
my $base = (-f "issue-$n.pdf") ? "issue-$n" : "$n";
$td->runtest($description,
{$td->COMMAND => "qpdf $base.pdf a.pdf"},
{$td->COMMAND => "qpdf $xargs $base.pdf a.pdf"},
{$td->FILE => "$base.out",
$td->EXIT_STATUS => $exit_status},
$td->NORMALIZE_NEWLINES);
@ -1304,7 +1308,8 @@ $n_tests += 2;
# that in turn contains an indirect scalar (bug 2974522).
$td->runtest("unreferenced indirect scalar",
{$td->COMMAND =>
"qpdf --qdf --static-id --object-streams=preserve" .
"qpdf --qdf --static-id --preserve-unreferenced" .
" --object-streams=preserve" .
" unreferenced-indirect-scalar.pdf a.qdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0},

File diff suppressed because it is too large Load Diff