2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-12-22 02:49:00 +00:00

Generalise last commit to xref parsing and reconstruction

Tests use a modified dangling-bad-xref.pdf.
This commit is contained in:
m-holger 2024-10-14 14:20:44 +01:00
parent 405f3765c5
commit 8753ffe335
6 changed files with 168 additions and 15 deletions

View File

@ -463,9 +463,8 @@ QPDF::fixDanglingReferences(bool force)
size_t
QPDF::getObjectCount()
{
// This method returns the next available indirect object number. makeIndirectObject uses it for
// this purpose. After fixDanglingReferences is called, all objects in the xref table will also
// be in obj_cache.
// This method returns the highest used indirect object number. Calling this method will resolve
// all objects.
return toS(m->objects.last_id());
}

View File

@ -171,18 +171,28 @@ Xref_table::initialize()
initialized_ = true;
}
// Remove any dangling reference picked up while parsing the xref table.
// Remove any dangling reference picked up while parsing or reconstructing the xref table from the
// object table.
void
Xref_table::prepare_obj_table()
{
for (auto it = objects.table.begin(), end = objects.table.end(); it != end;) {
if (type(it->first, it->second.gen)) {
++it;
} else {
if (it->second.unconfirmed && !type(it->first, it->second.gen)) {
it->second.object->make_null();
it = objects.table.erase(it);
} else {
it->second.unconfirmed = false;
++it;
}
}
for (auto& [id_gen, obj]: objects.unconfirmed_objects) {
if (type(id_gen.first, id_gen.second)) {
objects.update_table(id_gen.first, id_gen.second, obj);
} else {
obj->make_null();
}
}
objects.unconfirmed_objects.clear();
}
void
@ -204,6 +214,8 @@ Xref_table::reconstruct(QPDFExc& e)
};
reconstructed_ = true;
bool called_during_resolve_attempt = initialized_;
initialized_ = false;
warn_damaged("file is damaged");
qpdf.warn(e);
@ -280,7 +292,7 @@ Xref_table::reconstruct(QPDFExc& e)
if (item.type() != 1) {
continue;
}
auto oh = objects.get(i, item.gen());
QPDFObjectHandle oh{objects.get_when_uncertain(i, item.gen())};
try {
if (!oh.isStreamOfType("/XRef")) {
continue;
@ -319,8 +331,11 @@ Xref_table::reconstruct(QPDFExc& e)
throw damaged_pdf("unable to find objects while recovering damaged file");
}
check_warnings();
if (!initialized_) {
initialized_ = true;
prepare_obj_table();
initialized_ = true;
if (!called_during_resolve_attempt) {
// We can't do the checks because we may try to resolve the object that triggered the
// reconstruction.
qpdf.getAllPages();
check_warnings();
if (qpdf.m->all_pages.empty()) {
@ -1552,7 +1567,7 @@ QPDFObject*
Objects::resolve(int id, int gen)
{
if (!unresolved(id, gen)) {
return get(id, gen).getObjectPtr();
return get_for_parser(id, gen, true).get();
}
auto og = QPDFObjGen(id, gen);
@ -1835,6 +1850,9 @@ std::shared_ptr<QPDFObject>
Objects::get_for_parser(int id, int gen, bool parse_pdf)
{
// This method is called by the parser and therefore must not resolve any objects.
if (!xref.initialized() && parse_pdf) {
return get_when_uncertain(id, gen);
}
auto iter = table.find(id);
if (iter != table.end() && iter->second.gen == gen) {
return iter->second.object;
@ -1843,7 +1861,7 @@ Objects::get_for_parser(int id, int gen, bool parse_pdf)
// id in table, different gen
return QPDF_Null::create();
}
if (xref.type(id, gen) || !xref.initialized()) {
if (xref.type(id, gen)) {
return table.insert({id, {gen, QPDF_Unresolved::create(&qpdf, QPDFObjGen(id, gen))}})
.first->second.object;
}
@ -1869,7 +1887,9 @@ Objects::get_when_uncertain(int id, int gen)
e.gen = gen;
if (!xref.type(id, gen)) {
e.unconfirmed = true;
return e.object = QPDF_Null::create(&qpdf, QPDFObjGen(id, gen));
return e.object = xref.initialized()
? QPDF_Null::create(&qpdf, QPDFObjGen(id, gen))
: QPDF_Unresolved::create(&qpdf, QPDFObjGen(id, gen));
} else {
return e.object = QPDF_Unresolved::create(&qpdf, QPDFObjGen(id, gen));
}
@ -1889,7 +1909,8 @@ Objects::get_when_uncertain(int id, int gen)
if (auto& j = unconfirmed_objects[{id, gen}]) {
return j;
} else {
return j = QPDF_Null::create(&qpdf, QPDFObjGen(id, gen));
return j = xref.initialized() ? QPDF_Null::create(&qpdf, QPDFObjGen(id, gen))
: QPDF_Unresolved::create(&qpdf, QPDFObjGen(id, gen));
}
}

View File

@ -14,7 +14,7 @@ cleanup();
my $td = new TestDriver('dangling-refs');
my @dangling = (qw(minimal dangling-refs dangling-bad-xref));
my @dangling = (qw(minimal dangling-refs dangling-bad-xref dangling-multiple-gen-bad-xref));
my $n_tests = 2 * scalar(@dangling);
foreach my $f (@dangling)

View File

@ -0,0 +1,15 @@
WARNING: dangling-multiple-gen-bad-xref.pdf: file is damaged
WARNING: dangling-multiple-gen-bad-xref.pdf (object 7 0, offset 10000): expected n n obj
WARNING: dangling-multiple-gen-bad-xref.pdf: Attempting to reconstruct cross-reference table
new object: 12 0 R
all objects
1 3 R
2 3 R
3 0 R
4 0 R
5 0 R
6 0 R
7 0 R
11 0 R
12 0 R
test 53 done

View File

@ -0,0 +1,118 @@
%PDF-1.3
%¿÷¢þ
%QDF-1.0
1 3 obj
<<
/Pages 2 3 R
/Type /Catalog
/Dangling 8 0 R
/AlsoDangling [
9 0 R
<<
/yes 2 5 R
/no 10 0 R
/nope 8 0 R
>>
]
>>
endobj
2 3 obj
<<
/Count 1
/Kids [
3 0 R
]
/Type /Pages
>>
endobj
%% Page 1
3 0 obj
<<
/Contents 4 0 R
/MediaBox [
0
0
612
792
]
/Parent 2 0 R
/Resources <<
/Font <<
/F1 6 0 R
>>
/ProcSet 7 0 R
>>
/Type /Page
>>
endobj
%% Contents for page 1
4 0 obj
<<
/Length 5 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
5 0 obj
44
endobj
6 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
7 0 obj
[
/PDF
/Text
]
endobj
11 0 obj
[
12 0 R
2 4 R
]
endobj
xref
0 8
0000000000 65535 f
0000000025 00003 n
0000000195 00003 n
0000000277 00000 n
0000000492 00000 n
0000000591 00000 n
0000000610 00000 n
0000010000 00000 n
trailer <<
/Dangle [
1 0 R
1 6 R
2 6 R
2 0 R
4 6 R
]
/Root 1 3 R
/Size 8
/ID [<7141a6cf32de469328cf0f51982b5f89><7141a6cf32de469328cf0f51982b5f89>]
>>
startxref
803
%%EOF