2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-06-02 18:30:52 +00:00

Exclude unreferenced objects in object streams (fixes #520)

This commit is contained in:
Jay Berkenbilt 2021-05-08 09:07:59 -04:00
parent 16c19e9424
commit 30ac51bc78
9 changed files with 853 additions and 1642 deletions

View File

@ -1,3 +1,8 @@
2021-05-08 Jay Berkenbilt <ejb@ql.org>
* Fix 11-year-old bug of leaving unreferenced objects in preserved
object streams. Fixes #520.
2021-04-17 Jay Berkenbilt <ejb@ql.org> 2021-04-17 Jay Berkenbilt <ejb@ql.org>
* Portability fix: use tm_gmtoff rather than global timezone * Portability fix: use tm_gmtoff rather than global timezone

3
TODO
View File

@ -511,9 +511,6 @@ I find it useful to make reference to them in this list.
implemented, update the docs on crypto providers, which mention implemented, update the docs on crypto providers, which mention
that this may happen in the future. that this may happen in the future.
* See if we can avoid preserving unreferenced objects in object
streams even when preserving the object streams.
* Provide APIs for embedded files. See *attachments*.pdf in test * Provide APIs for embedded files. See *attachments*.pdf in test
suite. The private method findAttachmentStreams finds at least suite. The private method findAttachmentStreams finds at least
cases for modern versions of Adobe Reader (>= 1.7, maybe earlier). cases for modern versions of Adobe Reader (>= 1.7, maybe earlier).

View File

@ -2363,17 +2363,36 @@ QPDFWriter::preserveObjectStreams()
{ {
// Our object_to_object_stream map has to map ObjGen -> ObjGen // Our object_to_object_stream map has to map ObjGen -> ObjGen
// since we may be generating object streams out of old objects // since we may be generating object streams out of old objects
// that have generation numbers greater than zero. However in an // that have generation numbers greater than zero. However in an
// existing PDF, all object stream objects and all objects in them // existing PDF, all object stream objects and all objects in them
// must have generation 0 because the PDF spec does not provide // must have generation 0 because the PDF spec does not provide
// any way to do otherwise. // any way to do otherwise. This code filters out objects that are
// not allowed to be in object streams. In addition to removing
// objects that were erroneously included in object streams in the
// source PDF, it also prevents unreferenced objects from being
// included.
std::set<QPDFObjGen> eligible;
if (! this->m->preserve_unreferenced_objects)
{
std::vector<QPDFObjGen> eligible_v =
QPDF::Writer::getCompressibleObjGens(this->m->pdf);
eligible = std::set<QPDFObjGen>(eligible_v.begin(), eligible_v.end());
}
QTC::TC("qpdf", "QPDFWriter preserve object streams",
this->m->preserve_unreferenced_objects ? 0 : 1);
std::map<int, int> omap; std::map<int, int> omap;
QPDF::Writer::getObjectStreamData(this->m->pdf, omap); QPDF::Writer::getObjectStreamData(this->m->pdf, omap);
for (std::map<int, int>::iterator iter = omap.begin(); for (auto iter: omap)
iter != omap.end(); ++iter)
{ {
this->m->object_to_object_stream[QPDFObjGen((*iter).first, 0)] = QPDFObjGen og(iter.first, 0);
(*iter).second; if (eligible.count(og) || this->m->preserve_unreferenced_objects)
{
this->m->object_to_object_stream[og] = iter.second;
}
else
{
QTC::TC("qpdf", "QPDFWriter exclude from object stream");
}
} }
} }

View File

@ -5060,6 +5060,27 @@ print "\n";
</listitem> </listitem>
</varlistentry> </varlistentry>
--> -->
<varlistentry>
<term>10.3.2: May 8, 2021</term>
<listitem>
<itemizedlist>
<listitem>
<para>
Bug Fixes
</para>
<itemizedlist>
<listitem>
<para>
When generating a file while preserving object streams,
unreferenced objects are correctly removed unless
<option>--preserve-unreferenced</option> is specified.
</para>
</listitem>
</itemizedlist>
</listitem>
</itemizedlist>
</listitem>
</varlistentry>
<varlistentry> <varlistentry>
<term>10.3.1: March 11, 2021</term> <term>10.3.1: March 11, 2021</term>
<listitem> <listitem>

View File

@ -592,3 +592,5 @@ QPDFAcroFormDocumentHelper AP parse error 0
qpdf copy fields not this file 0 qpdf copy fields not this file 0
qpdf copy fields non-first from orig 0 qpdf copy fields non-first from orig 0
QPDF resolve duplicated page in insert 0 QPDF resolve duplicated page in insert 0
QPDFWriter preserve object streams 1
QPDFWriter exclude from object stream 0

View File

@ -986,7 +986,7 @@ my @bug_tests = (
["106", "zlib data error", 3], ["106", "zlib data error", 3],
["141a", "/W entry size 0", 2], ["141a", "/W entry size 0", 2],
["141b", "/W entry size 0", 2], ["141b", "/W entry size 0", 2],
["143", "self-referential ostream", 3], ["143", "self-referential ostream", 3, "--preserve-unreferenced"],
["146", "very deeply nested array", 2], ["146", "very deeply nested array", 2],
["147", "previously caused memory error", 2], ["147", "previously caused memory error", 2],
["148", "free memory on bad flate", 2], ["148", "free memory on bad flate", 2],
@ -996,14 +996,18 @@ my @bug_tests = (
["263", "empty xref stream", 2], ["263", "empty xref stream", 2],
["335a", "ozz-fuzz-12152", 2], ["335a", "ozz-fuzz-12152", 2],
["335b", "ozz-fuzz-14845", 2], ["335b", "ozz-fuzz-14845", 2],
["fuzz-16214", "stream in object stream", 3], ["fuzz-16214", "stream in object stream", 3, "--preserve-unreferenced"],
# When adding to this list, consider adding to SEED_CORPUS_FILES # When adding to this list, consider adding to SEED_CORPUS_FILES
# in fuzz/build.mk and updating the count in fuzz/qtest/fuzz.test. # in fuzz/build.mk and updating the count in fuzz/qtest/fuzz.test.
); );
$n_tests += scalar(@bug_tests); $n_tests += scalar(@bug_tests);
foreach my $d (@bug_tests) foreach my $d (@bug_tests)
{ {
my ($n, $description, $exit_status) = @$d; my ($n, $description, $exit_status, $xargs) = @$d;
if (! defined $xargs)
{
$xargs = "";
}
if (-f "issue-$n.obfuscated") if (-f "issue-$n.obfuscated")
{ {
# Some of the PDF files in the test suite trigger anti-virus # Some of the PDF files in the test suite trigger anti-virus
@ -1025,7 +1029,7 @@ foreach my $d (@bug_tests)
{ {
my $base = (-f "issue-$n.pdf") ? "issue-$n" : "$n"; my $base = (-f "issue-$n.pdf") ? "issue-$n" : "$n";
$td->runtest($description, $td->runtest($description,
{$td->COMMAND => "qpdf $base.pdf a.pdf"}, {$td->COMMAND => "qpdf $xargs $base.pdf a.pdf"},
{$td->FILE => "$base.out", {$td->FILE => "$base.out",
$td->EXIT_STATUS => $exit_status}, $td->EXIT_STATUS => $exit_status},
$td->NORMALIZE_NEWLINES); $td->NORMALIZE_NEWLINES);
@ -1304,7 +1308,8 @@ $n_tests += 2;
# that in turn contains an indirect scalar (bug 2974522). # that in turn contains an indirect scalar (bug 2974522).
$td->runtest("unreferenced indirect scalar", $td->runtest("unreferenced indirect scalar",
{$td->COMMAND => {$td->COMMAND =>
"qpdf --qdf --static-id --object-streams=preserve" . "qpdf --qdf --static-id --preserve-unreferenced" .
" --object-streams=preserve" .
" unreferenced-indirect-scalar.pdf a.qdf"}, " unreferenced-indirect-scalar.pdf a.qdf"},
{$td->STRING => "", {$td->STRING => "",
$td->EXIT_STATUS => 0}, $td->EXIT_STATUS => 0},

File diff suppressed because it is too large Load Diff