mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 10:58:58 +00:00
Attempt to find xref streams during recovery (fixes #1103)
This commit is contained in:
parent
8a24287c39
commit
2994f9cf4c
@ -1,3 +1,9 @@
|
||||
2024-01-06 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* When recovering a file's xref table, attempt to find xref
|
||||
streams if a traditional trailer dictionary is not found. Fixes
|
||||
#1103.
|
||||
|
||||
2024-01-05 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Add --set-page-labels command-line argument and supporting API.
|
||||
|
@ -579,6 +579,38 @@ QPDF::reconstruct_xref(QPDFExc& e)
|
||||
}
|
||||
m->deleted_objects.clear();
|
||||
|
||||
if (!m->trailer.isInitialized()) {
|
||||
qpdf_offset_t max_offset{0};
|
||||
// If there are any xref streams, take the last one to appear.
|
||||
for (auto const& iter: m->xref_table) {
|
||||
auto entry = iter.second;
|
||||
if (entry.getType() != 1) {
|
||||
continue;
|
||||
}
|
||||
auto oh = getObjectByObjGen(iter.first);
|
||||
try {
|
||||
if (!oh.isStreamOfType("/XRef")) {
|
||||
continue;
|
||||
}
|
||||
} catch (std::exception&) {
|
||||
continue;
|
||||
}
|
||||
auto offset = entry.getOffset();
|
||||
if (offset > max_offset) {
|
||||
max_offset = offset;
|
||||
setTrailer(oh.getDict());
|
||||
}
|
||||
}
|
||||
if (max_offset > 0) {
|
||||
try {
|
||||
read_xref(max_offset);
|
||||
} catch (std::exception&) {
|
||||
throw damagedPDF("", 0, "error decoding candidate xref stream while recovering damaged file");
|
||||
}
|
||||
QTC::TC("qpdf", "QPDF recover xref stream");
|
||||
}
|
||||
}
|
||||
|
||||
if (!m->trailer.isInitialized()) {
|
||||
// We could check the last encountered object to see if it was an xref stream. If so, we
|
||||
// could try to get the trailer from there. This may make it possible to recover files with
|
||||
|
@ -67,6 +67,11 @@ Planned changes for future 12.x (subject to change):
|
||||
|
||||
- ``QPDFPageLabelDocumentHelper::pageLabelDict``
|
||||
|
||||
- Improve file recovery logic to better handle files with
|
||||
cross-reference streams. This should enable qpdf to recover some
|
||||
files that it would previously have reported "unable to find
|
||||
trailer dictionary."
|
||||
|
||||
11.7.0: December 24, 2023
|
||||
- Bug fixes:
|
||||
|
||||
|
@ -689,3 +689,4 @@ QPDFPageObjectHelper used fallback without copying 0
|
||||
QPDF skipping cache for known unchecked object 0
|
||||
QPDF fix dangling triggered xref reconstruction 0
|
||||
QPDFPageDocumentHelper flatten resources missing or invalid 0
|
||||
QPDF recover xref stream 0
|
||||
|
@ -16,7 +16,7 @@ cleanup();
|
||||
|
||||
my $td = new TestDriver('object-stream');
|
||||
|
||||
my $n_tests = 3 + (36 * 4) + (12 * 2);
|
||||
my $n_tests = 5 + (36 * 4) + (12 * 2);
|
||||
my $n_compare_pdfs = 36;
|
||||
|
||||
for (my $n = 16; $n <= 19; ++$n)
|
||||
@ -87,5 +87,15 @@ $td->runtest("check file",
|
||||
{$td->FILE => "gen1.qdf"});
|
||||
|
||||
|
||||
# Recover a file with xref streams
|
||||
$td->runtest("recover file with xref stream",
|
||||
{$td->COMMAND => "qpdf --static-id --compress-streams=n" .
|
||||
" recover-xref-stream.pdf a.pdf"},
|
||||
{$td->FILE => "recover-xref-stream.out", $td->EXIT_STATUS => 3},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("check file",
|
||||
{$td->FILE => "a.pdf"},
|
||||
{$td->FILE => "recover-xref-stream-recovered.pdf"});
|
||||
|
||||
cleanup();
|
||||
$td->report(calc_ntests($n_tests, $n_compare_pdfs));
|
||||
|
@ -1,4 +1,6 @@
|
||||
WARNING: bad7.pdf: file is damaged
|
||||
WARNING: bad7.pdf (offset 698): expected trailer dictionary
|
||||
WARNING: bad7.pdf: Attempting to reconstruct cross-reference table
|
||||
WARNING: bad7.pdf (object 2 0, offset 128): expected endobj
|
||||
WARNING: bad7.pdf (object 4 0, offset 389): expected endobj
|
||||
bad7.pdf: unable to find trailer dictionary while recovering damaged file
|
||||
|
@ -2,4 +2,7 @@ WARNING: issue-146.pdf: file is damaged
|
||||
WARNING: issue-146.pdf: can't find startxref
|
||||
WARNING: issue-146.pdf: Attempting to reconstruct cross-reference table
|
||||
WARNING: issue-146.pdf (trailer, offset 695): ignoring excessively deeply nested data structure
|
||||
WARNING: issue-146.pdf (object 1 0, offset 92): expected endobj
|
||||
WARNING: issue-146.pdf (object 7 0, offset 146): unknown token while reading object; treating as string
|
||||
WARNING: issue-146.pdf (object 7 0, offset 168): expected endobj
|
||||
qpdf: issue-146.pdf: unable to find trailer dictionary while recovering damaged file
|
||||
|
@ -7,4 +7,9 @@ WARNING: issue-148.pdf (offset 73): error decoding stream data for object 8 0: s
|
||||
WARNING: issue-148.pdf: file is damaged
|
||||
WARNING: issue-148.pdf (offset 73): getStreamData called on unfilterable stream
|
||||
WARNING: issue-148.pdf: Attempting to reconstruct cross-reference table
|
||||
qpdf: issue-148.pdf: unable to find trailer dictionary while recovering damaged file
|
||||
WARNING: issue-148.pdf (xref stream: object 8 0, offset 26): stream dictionary lacks /Length key
|
||||
WARNING: issue-148.pdf (xref stream: object 8 0, offset 73): attempting to recover stream length
|
||||
WARNING: issue-148.pdf (xref stream: object 8 0, offset 73): recovered stream length: 2
|
||||
WARNING: issue-148.pdf (xref stream: object 8 0, offset 85): expected endobj
|
||||
WARNING: issue-148.pdf (offset 73): error decoding stream data for object 8 0: stream inflate: inflate: data: incorrect header check
|
||||
qpdf: issue-148.pdf: error decoding candidate xref stream while recovering damaged file
|
||||
|
@ -2,4 +2,5 @@ WARNING: issue-150.pdf: can't find PDF header
|
||||
WARNING: issue-150.pdf: file is damaged
|
||||
WARNING: issue-150.pdf: error reading xref: overflow/underflow converting 9900000000000000000 to 64-bit integer
|
||||
WARNING: issue-150.pdf: Attempting to reconstruct cross-reference table
|
||||
WARNING: issue-150.pdf (object 8 0): object has offset 0
|
||||
qpdf: issue-150.pdf: unable to find trailer dictionary while recovering damaged file
|
||||
|
@ -3,4 +3,6 @@ WARNING: issue-202.pdf: file is damaged
|
||||
WARNING: issue-202.pdf (offset 54769): expected trailer dictionary
|
||||
WARNING: issue-202.pdf: Attempting to reconstruct cross-reference table
|
||||
WARNING: issue-202.pdf (trailer, offset 55770): ignoring excessively deeply nested data structure
|
||||
WARNING: issue-202.pdf (object 222 0, offset 50101): dictionary has duplicated key /Creator; last occurrence overrides earlier ones
|
||||
WARNING: issue-202.pdf (object 222 0, offset 50101): dictionary has duplicated key /Producer; last occurrence overrides earlier ones
|
||||
qpdf: issue-202.pdf: unable to find trailer dictionary while recovering damaged file
|
||||
|
BIN
qpdf/qtest/qpdf/recover-xref-stream-recovered.pdf
Normal file
BIN
qpdf/qtest/qpdf/recover-xref-stream-recovered.pdf
Normal file
Binary file not shown.
5
qpdf/qtest/qpdf/recover-xref-stream.out
Normal file
5
qpdf/qtest/qpdf/recover-xref-stream.out
Normal file
@ -0,0 +1,5 @@
|
||||
WARNING: recover-xref-stream.pdf: file is damaged
|
||||
WARNING: recover-xref-stream.pdf: can't find startxref
|
||||
WARNING: recover-xref-stream.pdf: Attempting to reconstruct cross-reference table
|
||||
WARNING: recover-xref-stream.pdf: reported number of objects (14) is not one plus the highest object number (15)
|
||||
qpdf: operation succeeded with warnings; resulting file may have some problems
|
BIN
qpdf/qtest/qpdf/recover-xref-stream.pdf
Normal file
BIN
qpdf/qtest/qpdf/recover-xref-stream.pdf
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user