diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index e63045fc..0248bf47 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -463,9 +463,8 @@ QPDF::fixDanglingReferences(bool force) size_t QPDF::getObjectCount() { - // This method returns the next available indirect object number. makeIndirectObject uses it for - // this purpose. After fixDanglingReferences is called, all objects in the xref table will also - // be in obj_cache. + // This method returns the highest used indirect object number. Calling this method will resolve + // all objects. return toS(m->objects.last_id()); } diff --git a/libqpdf/QPDF_objects.cc b/libqpdf/QPDF_objects.cc index 509fa0c9..832de720 100644 --- a/libqpdf/QPDF_objects.cc +++ b/libqpdf/QPDF_objects.cc @@ -171,18 +171,28 @@ Xref_table::initialize() initialized_ = true; } -// Remove any dangling reference picked up while parsing the xref table. +// Remove any dangling reference picked up while parsing or reconstructing the xref table from the +// object table. void Xref_table::prepare_obj_table() { for (auto it = objects.table.begin(), end = objects.table.end(); it != end;) { - if (type(it->first, it->second.gen)) { - ++it; - } else { + if (it->second.unconfirmed && !type(it->first, it->second.gen)) { it->second.object->make_null(); it = objects.table.erase(it); + } else { + it->second.unconfirmed = false; + ++it; } } + for (auto& [id_gen, obj]: objects.unconfirmed_objects) { + if (type(id_gen.first, id_gen.second)) { + objects.update_table(id_gen.first, id_gen.second, obj); + } else { + obj->make_null(); + } + } + objects.unconfirmed_objects.clear(); } void @@ -204,6 +214,8 @@ Xref_table::reconstruct(QPDFExc& e) }; reconstructed_ = true; + bool called_during_resolve_attempt = initialized_; + initialized_ = false; warn_damaged("file is damaged"); qpdf.warn(e); @@ -280,7 +292,7 @@ Xref_table::reconstruct(QPDFExc& e) if (item.type() != 1) { continue; } - auto oh = objects.get(i, item.gen()); + QPDFObjectHandle oh{objects.get_when_uncertain(i, item.gen())}; try { if (!oh.isStreamOfType("/XRef")) { continue; @@ -319,8 +331,11 @@ Xref_table::reconstruct(QPDFExc& e) throw damaged_pdf("unable to find objects while recovering damaged file"); } check_warnings(); - if (!initialized_) { - initialized_ = true; + prepare_obj_table(); + initialized_ = true; + if (!called_during_resolve_attempt) { + // We can't do the checks because we may try to resolve the object that triggered the + // reconstruction. qpdf.getAllPages(); check_warnings(); if (qpdf.m->all_pages.empty()) { @@ -1552,7 +1567,7 @@ QPDFObject* Objects::resolve(int id, int gen) { if (!unresolved(id, gen)) { - return get(id, gen).getObjectPtr(); + return get_for_parser(id, gen, true).get(); } auto og = QPDFObjGen(id, gen); @@ -1835,6 +1850,9 @@ std::shared_ptr<QPDFObject> Objects::get_for_parser(int id, int gen, bool parse_pdf) { // This method is called by the parser and therefore must not resolve any objects. + if (!xref.initialized() && parse_pdf) { + return get_when_uncertain(id, gen); + } auto iter = table.find(id); if (iter != table.end() && iter->second.gen == gen) { return iter->second.object; @@ -1843,7 +1861,7 @@ Objects::get_for_parser(int id, int gen, bool parse_pdf) // id in table, different gen return QPDF_Null::create(); } - if (xref.type(id, gen) || !xref.initialized()) { + if (xref.type(id, gen)) { return table.insert({id, {gen, QPDF_Unresolved::create(&qpdf, QPDFObjGen(id, gen))}}) .first->second.object; } @@ -1869,7 +1887,9 @@ Objects::get_when_uncertain(int id, int gen) e.gen = gen; if (!xref.type(id, gen)) { e.unconfirmed = true; - return e.object = QPDF_Null::create(&qpdf, QPDFObjGen(id, gen)); + return e.object = xref.initialized() + ? QPDF_Null::create(&qpdf, QPDFObjGen(id, gen)) + : QPDF_Unresolved::create(&qpdf, QPDFObjGen(id, gen)); } else { return e.object = QPDF_Unresolved::create(&qpdf, QPDFObjGen(id, gen)); } @@ -1889,7 +1909,8 @@ Objects::get_when_uncertain(int id, int gen) if (auto& j = unconfirmed_objects[{id, gen}]) { return j; } else { - return j = QPDF_Null::create(&qpdf, QPDFObjGen(id, gen)); + return j = xref.initialized() ? QPDF_Null::create(&qpdf, QPDFObjGen(id, gen)) + : QPDF_Unresolved::create(&qpdf, QPDFObjGen(id, gen)); } } diff --git a/qpdf/qtest/dangling-refs.test b/qpdf/qtest/dangling-refs.test index 5a431b83..fb4df6eb 100644 --- a/qpdf/qtest/dangling-refs.test +++ b/qpdf/qtest/dangling-refs.test @@ -14,7 +14,7 @@ cleanup(); my $td = new TestDriver('dangling-refs'); -my @dangling = (qw(minimal dangling-refs dangling-bad-xref)); +my @dangling = (qw(minimal dangling-refs dangling-bad-xref dangling-multiple-gen-bad-xref)); my $n_tests = 2 * scalar(@dangling); foreach my $f (@dangling) diff --git a/qpdf/qtest/qpdf/dangling-multiple-gen-bad-xref-dangling-out.pdf b/qpdf/qtest/qpdf/dangling-multiple-gen-bad-xref-dangling-out.pdf new file mode 100644 index 00000000..2f96508d Binary files /dev/null and b/qpdf/qtest/qpdf/dangling-multiple-gen-bad-xref-dangling-out.pdf differ diff --git a/qpdf/qtest/qpdf/dangling-multiple-gen-bad-xref-dangling.out b/qpdf/qtest/qpdf/dangling-multiple-gen-bad-xref-dangling.out new file mode 100644 index 00000000..4fcf16e1 --- /dev/null +++ b/qpdf/qtest/qpdf/dangling-multiple-gen-bad-xref-dangling.out @@ -0,0 +1,15 @@ +WARNING: dangling-multiple-gen-bad-xref.pdf: file is damaged +WARNING: dangling-multiple-gen-bad-xref.pdf (object 7 0, offset 10000): expected n n obj +WARNING: dangling-multiple-gen-bad-xref.pdf: Attempting to reconstruct cross-reference table +new object: 12 0 R +all objects +1 3 R +2 3 R +3 0 R +4 0 R +5 0 R +6 0 R +7 0 R +11 0 R +12 0 R +test 53 done diff --git a/qpdf/qtest/qpdf/dangling-multiple-gen-bad-xref.pdf b/qpdf/qtest/qpdf/dangling-multiple-gen-bad-xref.pdf new file mode 100644 index 00000000..8a8db365 --- /dev/null +++ b/qpdf/qtest/qpdf/dangling-multiple-gen-bad-xref.pdf @@ -0,0 +1,118 @@ +%PDF-1.3 +%���� +%QDF-1.0 + +1 3 obj +<< + /Pages 2 3 R + /Type /Catalog + /Dangling 8 0 R + /AlsoDangling [ + 9 0 R + << + /yes 2 5 R + /no 10 0 R + /nope 8 0 R + >> + ] +>> +endobj + +2 3 obj +<< + /Count 1 + /Kids [ + 3 0 R + ] + /Type /Pages +>> +endobj + +%% Page 1 +3 0 obj +<< + /Contents 4 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 6 0 R + >> + /ProcSet 7 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +4 0 obj +<< + /Length 5 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +5 0 obj +44 +endobj + +6 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +7 0 obj +[ + /PDF + /Text +] +endobj + +11 0 obj +[ + 12 0 R + 2 4 R + ] +endobj + +xref +0 8 +0000000000 65535 f +0000000025 00003 n +0000000195 00003 n +0000000277 00000 n +0000000492 00000 n +0000000591 00000 n +0000000610 00000 n +0000010000 00000 n +trailer << + /Dangle [ + 1 0 R + 1 6 R + 2 6 R + 2 0 R + 4 6 R + ] + /Root 1 3 R + /Size 8 + /ID [<7141a6cf32de469328cf0f51982b5f89><7141a6cf32de469328cf0f51982b5f89>] +>> +startxref +803 +%%EOF