From 07f6c635a95d8c20040896646394c1e5a8a64784 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sun, 7 Jan 2024 17:05:35 -0500 Subject: [PATCH] Bug fix: treat old generations of reused objects as null --- ChangeLog | 5 ++ include/qpdf/QPDF.hh | 1 + libqpdf/QPDF.cc | 25 +++++++ qpdf/qtest/incremental.test | 26 ++++--- qpdf/qtest/linearization.test | 19 ++---- qpdf/qtest/object-stream.test | 6 +- qpdf/qtest/qpdf/incremental-2-xref.out | 2 +- qpdf/qtest/qpdf/incremental-3-out.qdf | 79 +++++++++++++++++++++ qpdf/qtest/qpdf/incremental-3-xref.out | 5 ++ qpdf/qtest/qpdf/incremental-3.pdf | 95 ++++++++++++++++++++++++++ 10 files changed, 234 insertions(+), 29 deletions(-) create mode 100644 qpdf/qtest/qpdf/incremental-3-out.qdf create mode 100644 qpdf/qtest/qpdf/incremental-3-xref.out create mode 100644 qpdf/qtest/qpdf/incremental-3.pdf diff --git a/ChangeLog b/ChangeLog index 0a57abea..64fe4422 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2024-01-07 Jay Berkenbilt + + * Bug fix: treat references to older generations of objects as + null. + 2024-01-06 Jay Berkenbilt * When recovering a file's xref table, attempt to find xref diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 5ae01770..9ab3ae3f 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -1038,6 +1038,7 @@ class QPDF QPDFObjectHandle makeIndirectFromQPDFObject(std::shared_ptr const& obj); bool isCached(QPDFObjGen const& og); bool isUnresolved(QPDFObjGen const& og); + void removeObject(QPDFObjGen const& og); void updateCache( QPDFObjGen const& og, std::shared_ptr const& object, diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 3718eb7e..a089a6c2 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -706,6 +706,19 @@ QPDF::read_xref(qpdf_offset_t xref_offset) // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we // never depend on its being set. m->deleted_objects.clear(); + + // Make sure we keep only the highest generation for any object. + QPDFObjGen::set to_delete; + QPDFObjGen last_og; + for (auto const& og: m->xref_table) { + if (og.first.getObj() == last_og.getObj()) { + to_delete.emplace(last_og); + } + last_og = og.first; + } + for (auto const& og: to_delete) { + removeObject(og); + } } bool @@ -1978,6 +1991,18 @@ QPDF::replaceObject(QPDFObjGen const& og, QPDFObjectHandle oh) updateCache(og, oh.getObj(), -1, -1); } +void +QPDF::removeObject(QPDFObjGen const& og) +{ + auto null = QPDFObjectHandle::newNull(); + m->xref_table.erase(og); + if (isCached(og)) { + // Take care of any object handles that may be floating around. + replaceObject(og, null); + } + m->obj_cache.erase(og); +} + void QPDF::replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement) { diff --git a/qpdf/qtest/incremental.test b/qpdf/qtest/incremental.test index b1edb918..63a0f351 100644 --- a/qpdf/qtest/incremental.test +++ b/qpdf/qtest/incremental.test @@ -14,12 +14,8 @@ cleanup(); my $td = new TestDriver('incremental'); -my $n_tests = 6; +my $n_tests = 9; -# Since the beginning but discovered at the time of releasing 11.8.0: -# qpdf doesn't delete earlier generations of an object when they are -# reused. See also EXPECT_FAILURE in object-stream.test and -# linearization.test. $td->runtest("handle delete and reuse", {$td->COMMAND => "qpdf --qdf --static-id incremental-1.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, @@ -27,11 +23,11 @@ $td->runtest("handle delete and reuse", $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "incremental-1-out.qdf"}, - $td->NORMALIZE_NEWLINES | $td->EXPECT_FAILURE); + $td->NORMALIZE_NEWLINES); $td->runtest("check xref", {$td->COMMAND => "qpdf --show-xref incremental-1.pdf"}, {$td->FILE => "incremental-1-xref.out", $td->EXIT_STATUS => 0}, - $td->NORMALIZE_NEWLINES | $td->EXPECT_FAILURE); + $td->NORMALIZE_NEWLINES); $td->runtest("handle delete and reuse", {$td->COMMAND => "qpdf --qdf --static-id incremental-2.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, @@ -40,11 +36,23 @@ $td->runtest("check output", {$td->FILE => "a.pdf"}, # intentionally comparing incremental-2 with incremental-1-out {$td->FILE => "incremental-1-out.qdf"}, - $td->NORMALIZE_NEWLINES | $td->EXPECT_FAILURE); + $td->NORMALIZE_NEWLINES); $td->runtest("check xref", {$td->COMMAND => "qpdf --show-xref incremental-1.pdf"}, {$td->FILE => "incremental-2-xref.out", $td->EXIT_STATUS => 0}, - $td->NORMALIZE_NEWLINES | $td->EXPECT_FAILURE); + $td->NORMALIZE_NEWLINES); +$td->runtest("handle delete and don't reuse", + {$td->COMMAND => "qpdf --qdf --static-id incremental-3.pdf a.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "incremental-3-out.qdf"}, + $td->NORMALIZE_NEWLINES); +$td->runtest("check xref", + {$td->COMMAND => "qpdf --show-xref incremental-3.pdf"}, + {$td->FILE => "incremental-3-xref.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); cleanup(); $td->report($n_tests); diff --git a/qpdf/qtest/linearization.test b/qpdf/qtest/linearization.test index 02cd778a..c24bdd48 100644 --- a/qpdf/qtest/linearization.test +++ b/qpdf/qtest/linearization.test @@ -84,11 +84,6 @@ foreach my $base (@to_linearize) { foreach my $omode (qw(disable preserve generate)) { - my $xflags = 0; - if ($base eq 'gen1') - { - $xflags = $td->EXPECT_FAILURE; - } my $oarg = "-object-streams=$omode"; my $sdarg = ""; if (($base eq 'lin-special') || ($base eq 'object-stream')) @@ -100,13 +95,12 @@ foreach my $base (@to_linearize) {$td->COMMAND => "qpdf -linearize $oarg $sdarg" . " --static-id $base.pdf a.pdf"}, - {$td->STRING => "", $td->EXIT_STATUS => 0}, - $xflags); + {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check linearization", {$td->COMMAND => "qpdf --check-linearization a.pdf"}, {$td->STRING => "a.pdf: no linearization errors\n", $td->EXIT_STATUS => 0}, - $td->NORMALIZE_NEWLINES | $xflags); + $td->NORMALIZE_NEWLINES); # Relinearizing twice should produce identical results. We # have to do it twice because, if objects changed ordering # during the original linearization, the hint tables won't @@ -117,17 +111,14 @@ foreach my $base (@to_linearize) $td->runtest("relinearize $base 1", {$td->COMMAND => "qpdf -linearize $sdarg --static-id a.pdf b.pdf"}, - {$td->STRING => "", $td->EXIT_STATUS => 0}, - $xflags); + {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("relinearize $base 2", {$td->COMMAND => "qpdf -linearize $sdarg --static-id b.pdf c.pdf"}, - {$td->STRING => "", $td->EXIT_STATUS => 0}, - $xflags); + {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("compare files ($omode)", {$td->FILE => "b.pdf"}, - {$td->FILE => "c.pdf"}, - $xflags); + {$td->FILE => "c.pdf"}); if (($base eq 'lin-special') || ($base eq 'object-stream')) { $td->runtest("check $base ($omode)", diff --git a/qpdf/qtest/object-stream.test b/qpdf/qtest/object-stream.test index c9fa0664..28479ee3 100644 --- a/qpdf/qtest/object-stream.test +++ b/qpdf/qtest/object-stream.test @@ -82,13 +82,9 @@ $td->runtest("generate object streams for gen > 0", {$td->COMMAND => "qpdf --qdf --static-id" . " --object-streams=generate gen1.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); -# qpdf 11.8.0 -- it was discovered that qpdf was incorrectly handling -# references to older generations of reused objects in incrementally -# updated files. $td->runtest("check file", {$td->FILE => "a.pdf"}, - {$td->FILE => "gen1.qdf"}, - $td->EXPECT_FAILURE); + {$td->FILE => "gen1.qdf"}); $td->runtest("generate object streams for gen > 0", {$td->COMMAND => "qpdf --qdf --static-id" . diff --git a/qpdf/qtest/qpdf/incremental-2-xref.out b/qpdf/qtest/qpdf/incremental-2-xref.out index 6ff3fdee..43f4fddd 100644 --- a/qpdf/qtest/qpdf/incremental-2-xref.out +++ b/qpdf/qtest/qpdf/incremental-2-xref.out @@ -1,7 +1,7 @@ 1/0: uncompressed; offset = 9 2/0: uncompressed; offset = 63 3/0: uncompressed; offset = 1069 -4/0: uncompressed; offset = 307 +4/1: uncompressed; offset = 948 5/0: uncompressed; offset = 403 6/0: uncompressed; offset = 438 7/0: uncompressed; offset = 974 diff --git a/qpdf/qtest/qpdf/incremental-3-out.qdf b/qpdf/qtest/qpdf/incremental-3-out.qdf new file mode 100644 index 00000000..fcd4a3c3 --- /dev/null +++ b/qpdf/qtest/qpdf/incremental-3-out.qdf @@ -0,0 +1,79 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +%% Original object ID: 1 0 +1 0 obj +<< + /Pages 2 0 R + /Type /Catalog +>> +endobj + +%% Original object ID: 2 0 +2 0 obj +<< + /Count 1 + /Kids [ + 3 0 R + ] + /Type /Pages +>> +endobj + +%% Page 1 +%% Original object ID: 3 0 +3 0 obj +<< + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 4 0 R + >> + /ProcSet 5 0 R + >> + /Type /Page +>> +endobj + +%% Original object ID: 6 0 +4 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 5 0 +5 0 obj +[ + /PDF + /Text +] +endobj + +xref +0 6 +0000000000 65535 f +0000000052 00000 n +0000000133 00000 n +0000000242 00000 n +0000000443 00000 n +0000000588 00000 n +trailer << + /Root 1 0 R + /Size 6 + /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] +>> +startxref +623 +%%EOF diff --git a/qpdf/qtest/qpdf/incremental-3-xref.out b/qpdf/qtest/qpdf/incremental-3-xref.out new file mode 100644 index 00000000..ebce256b --- /dev/null +++ b/qpdf/qtest/qpdf/incremental-3-xref.out @@ -0,0 +1,5 @@ +1/0: uncompressed; offset = 9 +2/0: uncompressed; offset = 63 +3/0: uncompressed; offset = 135 +5/0: uncompressed; offset = 403 +6/0: uncompressed; offset = 438 diff --git a/qpdf/qtest/qpdf/incremental-3.pdf b/qpdf/qtest/qpdf/incremental-3.pdf new file mode 100644 index 00000000..da1279d4 --- /dev/null +++ b/qpdf/qtest/qpdf/incremental-3.pdf @@ -0,0 +1,95 @@ +%PDF-1.3 +1 0 obj +<< + /Type /Catalog + /Pages 2 0 R +>> +endobj + +2 0 obj +<< + /Type /Pages + /Kids [ + 3 0 R + ] + /Count 1 +>> +endobj + +3 0 obj +<< + /Type /Page + /Parent 2 0 R + /MediaBox [0 0 612 792] + /Contents 4 0 R + /Resources << + /ProcSet 5 0 R + /Font << + /F1 6 0 R + >> + >> +>> +endobj + +4 0 obj +<< + /Length 44 +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +5 0 obj +[ + /PDF + /Text +] +endobj + +6 0 obj +<< + /Type /Font + /Subtype /Type1 + /Name /F1 + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding +>> +endobj + +xref +0 7 +0000000000 65535 f +0000000009 00000 n +0000000063 00000 n +0000000135 00000 n +0000000307 00000 n +0000000403 00000 n +0000000438 00000 n +trailer << + /Size 7 + /Root 1 0 R +>> +startxref +556 +%%EOF + +% Delete object 4 and increment generation +xref +0 1 +0000000004 65535 f +4 1 +0000000000 00001 f +trailer << + /Size 7 + /Root 1 0 R + /Prev 556 + /Gone 4 0 R +>> +startxref +807 +%%EOF