Bug fix: treat old generations of reused objects as null

This commit is contained in:
Jay Berkenbilt 2024-01-07 17:05:35 -05:00
parent 1d96af8b9f
commit 07f6c635a9
10 changed files with 234 additions and 29 deletions

View File

@ -1,3 +1,8 @@
2024-01-07 Jay Berkenbilt <ejb@ql.org>
* Bug fix: treat references to older generations of objects as
null.
2024-01-06 Jay Berkenbilt <ejb@ql.org>
* When recovering a file's xref table, attempt to find xref

View File

@ -1038,6 +1038,7 @@ class QPDF
QPDFObjectHandle makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj);
bool isCached(QPDFObjGen const& og);
bool isUnresolved(QPDFObjGen const& og);
void removeObject(QPDFObjGen const& og);
void updateCache(
QPDFObjGen const& og,
std::shared_ptr<QPDFObject> const& object,

View File

@ -706,6 +706,19 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
// We no longer need the deleted_objects table, so go ahead and clear it out to make sure we
// never depend on its being set.
m->deleted_objects.clear();
// Make sure we keep only the highest generation for any object.
QPDFObjGen::set to_delete;
QPDFObjGen last_og;
for (auto const& og: m->xref_table) {
if (og.first.getObj() == last_og.getObj()) {
to_delete.emplace(last_og);
}
last_og = og.first;
}
for (auto const& og: to_delete) {
removeObject(og);
}
}
bool
@ -1978,6 +1991,18 @@ QPDF::replaceObject(QPDFObjGen const& og, QPDFObjectHandle oh)
updateCache(og, oh.getObj(), -1, -1);
}
void
QPDF::removeObject(QPDFObjGen const& og)
{
auto null = QPDFObjectHandle::newNull();
m->xref_table.erase(og);
if (isCached(og)) {
// Take care of any object handles that may be floating around.
replaceObject(og, null);
}
m->obj_cache.erase(og);
}
void
QPDF::replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement)
{

View File

@ -14,12 +14,8 @@ cleanup();
my $td = new TestDriver('incremental');
my $n_tests = 6;
my $n_tests = 9;
# Since the beginning but discovered at the time of releasing 11.8.0:
# qpdf doesn't delete earlier generations of an object when they are
# reused. See also EXPECT_FAILURE in object-stream.test and
# linearization.test.
$td->runtest("handle delete and reuse",
{$td->COMMAND => "qpdf --qdf --static-id incremental-1.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
@ -27,11 +23,11 @@ $td->runtest("handle delete and reuse",
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "incremental-1-out.qdf"},
$td->NORMALIZE_NEWLINES | $td->EXPECT_FAILURE);
$td->NORMALIZE_NEWLINES);
$td->runtest("check xref",
{$td->COMMAND => "qpdf --show-xref incremental-1.pdf"},
{$td->FILE => "incremental-1-xref.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES | $td->EXPECT_FAILURE);
$td->NORMALIZE_NEWLINES);
$td->runtest("handle delete and reuse",
{$td->COMMAND => "qpdf --qdf --static-id incremental-2.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
@ -40,11 +36,23 @@ $td->runtest("check output",
{$td->FILE => "a.pdf"},
# intentionally comparing incremental-2 with incremental-1-out
{$td->FILE => "incremental-1-out.qdf"},
$td->NORMALIZE_NEWLINES | $td->EXPECT_FAILURE);
$td->NORMALIZE_NEWLINES);
$td->runtest("check xref",
{$td->COMMAND => "qpdf --show-xref incremental-1.pdf"},
{$td->FILE => "incremental-2-xref.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES | $td->EXPECT_FAILURE);
$td->NORMALIZE_NEWLINES);
$td->runtest("handle delete and don't reuse",
{$td->COMMAND => "qpdf --qdf --static-id incremental-3.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "incremental-3-out.qdf"},
$td->NORMALIZE_NEWLINES);
$td->runtest("check xref",
{$td->COMMAND => "qpdf --show-xref incremental-3.pdf"},
{$td->FILE => "incremental-3-xref.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
cleanup();
$td->report($n_tests);

View File

@ -84,11 +84,6 @@ foreach my $base (@to_linearize)
{
foreach my $omode (qw(disable preserve generate))
{
my $xflags = 0;
if ($base eq 'gen1')
{
$xflags = $td->EXPECT_FAILURE;
}
my $oarg = "-object-streams=$omode";
my $sdarg = "";
if (($base eq 'lin-special') || ($base eq 'object-stream'))
@ -100,13 +95,12 @@ foreach my $base (@to_linearize)
{$td->COMMAND =>
"qpdf -linearize $oarg $sdarg" .
" --static-id $base.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$xflags);
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check linearization",
{$td->COMMAND => "qpdf --check-linearization a.pdf"},
{$td->STRING => "a.pdf: no linearization errors\n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES | $xflags);
$td->NORMALIZE_NEWLINES);
# Relinearizing twice should produce identical results. We
# have to do it twice because, if objects changed ordering
# during the original linearization, the hint tables won't
@ -117,17 +111,14 @@ foreach my $base (@to_linearize)
$td->runtest("relinearize $base 1",
{$td->COMMAND =>
"qpdf -linearize $sdarg --static-id a.pdf b.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$xflags);
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("relinearize $base 2",
{$td->COMMAND =>
"qpdf -linearize $sdarg --static-id b.pdf c.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$xflags);
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("compare files ($omode)",
{$td->FILE => "b.pdf"},
{$td->FILE => "c.pdf"},
$xflags);
{$td->FILE => "c.pdf"});
if (($base eq 'lin-special') || ($base eq 'object-stream'))
{
$td->runtest("check $base ($omode)",

View File

@ -82,13 +82,9 @@ $td->runtest("generate object streams for gen > 0",
{$td->COMMAND => "qpdf --qdf --static-id" .
" --object-streams=generate gen1.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
# qpdf 11.8.0 -- it was discovered that qpdf was incorrectly handling
# references to older generations of reused objects in incrementally
# updated files.
$td->runtest("check file",
{$td->FILE => "a.pdf"},
{$td->FILE => "gen1.qdf"},
$td->EXPECT_FAILURE);
{$td->FILE => "gen1.qdf"});
$td->runtest("generate object streams for gen > 0",
{$td->COMMAND => "qpdf --qdf --static-id" .

View File

@ -1,7 +1,7 @@
1/0: uncompressed; offset = 9
2/0: uncompressed; offset = 63
3/0: uncompressed; offset = 1069
4/0: uncompressed; offset = 307
4/1: uncompressed; offset = 948
5/0: uncompressed; offset = 403
6/0: uncompressed; offset = 438
7/0: uncompressed; offset = 974

View File

@ -0,0 +1,79 @@
%PDF-1.3
%¿÷¢þ
%QDF-1.0
%% Original object ID: 1 0
1 0 obj
<<
/Pages 2 0 R
/Type /Catalog
>>
endobj
%% Original object ID: 2 0
2 0 obj
<<
/Count 1
/Kids [
3 0 R
]
/Type /Pages
>>
endobj
%% Page 1
%% Original object ID: 3 0
3 0 obj
<<
/MediaBox [
0
0
612
792
]
/Parent 2 0 R
/Resources <<
/Font <<
/F1 4 0 R
>>
/ProcSet 5 0 R
>>
/Type /Page
>>
endobj
%% Original object ID: 6 0
4 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
%% Original object ID: 5 0
5 0 obj
[
/PDF
/Text
]
endobj
xref
0 6
0000000000 65535 f
0000000052 00000 n
0000000133 00000 n
0000000242 00000 n
0000000443 00000 n
0000000588 00000 n
trailer <<
/Root 1 0 R
/Size 6
/ID [<31415926535897932384626433832795><31415926535897932384626433832795>]
>>
startxref
623
%%EOF

View File

@ -0,0 +1,5 @@
1/0: uncompressed; offset = 9
2/0: uncompressed; offset = 63
3/0: uncompressed; offset = 135
5/0: uncompressed; offset = 403
6/0: uncompressed; offset = 438

View File

@ -0,0 +1,95 @@
%PDF-1.3
1 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
2 0 obj
<<
/Type /Pages
/Kids [
3 0 R
]
/Count 1
>>
endobj
3 0 obj
<<
/Type /Page
/Parent 2 0 R
/MediaBox [0 0 612 792]
/Contents 4 0 R
/Resources <<
/ProcSet 5 0 R
/Font <<
/F1 6 0 R
>>
>>
>>
endobj
4 0 obj
<<
/Length 44
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
5 0 obj
[
/PDF
/Text
]
endobj
6 0 obj
<<
/Type /Font
/Subtype /Type1
/Name /F1
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
>>
endobj
xref
0 7
0000000000 65535 f
0000000009 00000 n
0000000063 00000 n
0000000135 00000 n
0000000307 00000 n
0000000403 00000 n
0000000438 00000 n
trailer <<
/Size 7
/Root 1 0 R
>>
startxref
556
%%EOF
% Delete object 4 and increment generation
xref
0 1
0000000004 65535 f
4 1
0000000000 00001 f
trailer <<
/Size 7
/Root 1 0 R
/Prev 556
/Gone 4 0 R
>>
startxref
807
%%EOF