mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 10:58:58 +00:00
Attempt to find xref streams during recovery (fixes #1103)
This commit is contained in:
parent
8a24287c39
commit
2994f9cf4c
@ -1,3 +1,9 @@
|
|||||||
|
2024-01-06 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
|
* When recovering a file's xref table, attempt to find xref
|
||||||
|
streams if a traditional trailer dictionary is not found. Fixes
|
||||||
|
#1103.
|
||||||
|
|
||||||
2024-01-05 Jay Berkenbilt <ejb@ql.org>
|
2024-01-05 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
* Add --set-page-labels command-line argument and supporting API.
|
* Add --set-page-labels command-line argument and supporting API.
|
||||||
|
@ -579,6 +579,38 @@ QPDF::reconstruct_xref(QPDFExc& e)
|
|||||||
}
|
}
|
||||||
m->deleted_objects.clear();
|
m->deleted_objects.clear();
|
||||||
|
|
||||||
|
if (!m->trailer.isInitialized()) {
|
||||||
|
qpdf_offset_t max_offset{0};
|
||||||
|
// If there are any xref streams, take the last one to appear.
|
||||||
|
for (auto const& iter: m->xref_table) {
|
||||||
|
auto entry = iter.second;
|
||||||
|
if (entry.getType() != 1) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto oh = getObjectByObjGen(iter.first);
|
||||||
|
try {
|
||||||
|
if (!oh.isStreamOfType("/XRef")) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} catch (std::exception&) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto offset = entry.getOffset();
|
||||||
|
if (offset > max_offset) {
|
||||||
|
max_offset = offset;
|
||||||
|
setTrailer(oh.getDict());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (max_offset > 0) {
|
||||||
|
try {
|
||||||
|
read_xref(max_offset);
|
||||||
|
} catch (std::exception&) {
|
||||||
|
throw damagedPDF("", 0, "error decoding candidate xref stream while recovering damaged file");
|
||||||
|
}
|
||||||
|
QTC::TC("qpdf", "QPDF recover xref stream");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!m->trailer.isInitialized()) {
|
if (!m->trailer.isInitialized()) {
|
||||||
// We could check the last encountered object to see if it was an xref stream. If so, we
|
// We could check the last encountered object to see if it was an xref stream. If so, we
|
||||||
// could try to get the trailer from there. This may make it possible to recover files with
|
// could try to get the trailer from there. This may make it possible to recover files with
|
||||||
|
@ -67,6 +67,11 @@ Planned changes for future 12.x (subject to change):
|
|||||||
|
|
||||||
- ``QPDFPageLabelDocumentHelper::pageLabelDict``
|
- ``QPDFPageLabelDocumentHelper::pageLabelDict``
|
||||||
|
|
||||||
|
- Improve file recovery logic to better handle files with
|
||||||
|
cross-reference streams. This should enable qpdf to recover some
|
||||||
|
files that it would previously have reported "unable to find
|
||||||
|
trailer dictionary."
|
||||||
|
|
||||||
11.7.0: December 24, 2023
|
11.7.0: December 24, 2023
|
||||||
- Bug fixes:
|
- Bug fixes:
|
||||||
|
|
||||||
|
@ -689,3 +689,4 @@ QPDFPageObjectHelper used fallback without copying 0
|
|||||||
QPDF skipping cache for known unchecked object 0
|
QPDF skipping cache for known unchecked object 0
|
||||||
QPDF fix dangling triggered xref reconstruction 0
|
QPDF fix dangling triggered xref reconstruction 0
|
||||||
QPDFPageDocumentHelper flatten resources missing or invalid 0
|
QPDFPageDocumentHelper flatten resources missing or invalid 0
|
||||||
|
QPDF recover xref stream 0
|
||||||
|
@ -16,7 +16,7 @@ cleanup();
|
|||||||
|
|
||||||
my $td = new TestDriver('object-stream');
|
my $td = new TestDriver('object-stream');
|
||||||
|
|
||||||
my $n_tests = 3 + (36 * 4) + (12 * 2);
|
my $n_tests = 5 + (36 * 4) + (12 * 2);
|
||||||
my $n_compare_pdfs = 36;
|
my $n_compare_pdfs = 36;
|
||||||
|
|
||||||
for (my $n = 16; $n <= 19; ++$n)
|
for (my $n = 16; $n <= 19; ++$n)
|
||||||
@ -87,5 +87,15 @@ $td->runtest("check file",
|
|||||||
{$td->FILE => "gen1.qdf"});
|
{$td->FILE => "gen1.qdf"});
|
||||||
|
|
||||||
|
|
||||||
|
# Recover a file with xref streams
|
||||||
|
$td->runtest("recover file with xref stream",
|
||||||
|
{$td->COMMAND => "qpdf --static-id --compress-streams=n" .
|
||||||
|
" recover-xref-stream.pdf a.pdf"},
|
||||||
|
{$td->FILE => "recover-xref-stream.out", $td->EXIT_STATUS => 3},
|
||||||
|
$td->NORMALIZE_NEWLINES);
|
||||||
|
$td->runtest("check file",
|
||||||
|
{$td->FILE => "a.pdf"},
|
||||||
|
{$td->FILE => "recover-xref-stream-recovered.pdf"});
|
||||||
|
|
||||||
cleanup();
|
cleanup();
|
||||||
$td->report(calc_ntests($n_tests, $n_compare_pdfs));
|
$td->report(calc_ntests($n_tests, $n_compare_pdfs));
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
WARNING: bad7.pdf: file is damaged
|
WARNING: bad7.pdf: file is damaged
|
||||||
WARNING: bad7.pdf (offset 698): expected trailer dictionary
|
WARNING: bad7.pdf (offset 698): expected trailer dictionary
|
||||||
WARNING: bad7.pdf: Attempting to reconstruct cross-reference table
|
WARNING: bad7.pdf: Attempting to reconstruct cross-reference table
|
||||||
|
WARNING: bad7.pdf (object 2 0, offset 128): expected endobj
|
||||||
|
WARNING: bad7.pdf (object 4 0, offset 389): expected endobj
|
||||||
bad7.pdf: unable to find trailer dictionary while recovering damaged file
|
bad7.pdf: unable to find trailer dictionary while recovering damaged file
|
||||||
|
@ -2,4 +2,7 @@ WARNING: issue-146.pdf: file is damaged
|
|||||||
WARNING: issue-146.pdf: can't find startxref
|
WARNING: issue-146.pdf: can't find startxref
|
||||||
WARNING: issue-146.pdf: Attempting to reconstruct cross-reference table
|
WARNING: issue-146.pdf: Attempting to reconstruct cross-reference table
|
||||||
WARNING: issue-146.pdf (trailer, offset 695): ignoring excessively deeply nested data structure
|
WARNING: issue-146.pdf (trailer, offset 695): ignoring excessively deeply nested data structure
|
||||||
|
WARNING: issue-146.pdf (object 1 0, offset 92): expected endobj
|
||||||
|
WARNING: issue-146.pdf (object 7 0, offset 146): unknown token while reading object; treating as string
|
||||||
|
WARNING: issue-146.pdf (object 7 0, offset 168): expected endobj
|
||||||
qpdf: issue-146.pdf: unable to find trailer dictionary while recovering damaged file
|
qpdf: issue-146.pdf: unable to find trailer dictionary while recovering damaged file
|
||||||
|
@ -7,4 +7,9 @@ WARNING: issue-148.pdf (offset 73): error decoding stream data for object 8 0: s
|
|||||||
WARNING: issue-148.pdf: file is damaged
|
WARNING: issue-148.pdf: file is damaged
|
||||||
WARNING: issue-148.pdf (offset 73): getStreamData called on unfilterable stream
|
WARNING: issue-148.pdf (offset 73): getStreamData called on unfilterable stream
|
||||||
WARNING: issue-148.pdf: Attempting to reconstruct cross-reference table
|
WARNING: issue-148.pdf: Attempting to reconstruct cross-reference table
|
||||||
qpdf: issue-148.pdf: unable to find trailer dictionary while recovering damaged file
|
WARNING: issue-148.pdf (xref stream: object 8 0, offset 26): stream dictionary lacks /Length key
|
||||||
|
WARNING: issue-148.pdf (xref stream: object 8 0, offset 73): attempting to recover stream length
|
||||||
|
WARNING: issue-148.pdf (xref stream: object 8 0, offset 73): recovered stream length: 2
|
||||||
|
WARNING: issue-148.pdf (xref stream: object 8 0, offset 85): expected endobj
|
||||||
|
WARNING: issue-148.pdf (offset 73): error decoding stream data for object 8 0: stream inflate: inflate: data: incorrect header check
|
||||||
|
qpdf: issue-148.pdf: error decoding candidate xref stream while recovering damaged file
|
||||||
|
@ -2,4 +2,5 @@ WARNING: issue-150.pdf: can't find PDF header
|
|||||||
WARNING: issue-150.pdf: file is damaged
|
WARNING: issue-150.pdf: file is damaged
|
||||||
WARNING: issue-150.pdf: error reading xref: overflow/underflow converting 9900000000000000000 to 64-bit integer
|
WARNING: issue-150.pdf: error reading xref: overflow/underflow converting 9900000000000000000 to 64-bit integer
|
||||||
WARNING: issue-150.pdf: Attempting to reconstruct cross-reference table
|
WARNING: issue-150.pdf: Attempting to reconstruct cross-reference table
|
||||||
|
WARNING: issue-150.pdf (object 8 0): object has offset 0
|
||||||
qpdf: issue-150.pdf: unable to find trailer dictionary while recovering damaged file
|
qpdf: issue-150.pdf: unable to find trailer dictionary while recovering damaged file
|
||||||
|
@ -3,4 +3,6 @@ WARNING: issue-202.pdf: file is damaged
|
|||||||
WARNING: issue-202.pdf (offset 54769): expected trailer dictionary
|
WARNING: issue-202.pdf (offset 54769): expected trailer dictionary
|
||||||
WARNING: issue-202.pdf: Attempting to reconstruct cross-reference table
|
WARNING: issue-202.pdf: Attempting to reconstruct cross-reference table
|
||||||
WARNING: issue-202.pdf (trailer, offset 55770): ignoring excessively deeply nested data structure
|
WARNING: issue-202.pdf (trailer, offset 55770): ignoring excessively deeply nested data structure
|
||||||
|
WARNING: issue-202.pdf (object 222 0, offset 50101): dictionary has duplicated key /Creator; last occurrence overrides earlier ones
|
||||||
|
WARNING: issue-202.pdf (object 222 0, offset 50101): dictionary has duplicated key /Producer; last occurrence overrides earlier ones
|
||||||
qpdf: issue-202.pdf: unable to find trailer dictionary while recovering damaged file
|
qpdf: issue-202.pdf: unable to find trailer dictionary while recovering damaged file
|
||||||
|
BIN
qpdf/qtest/qpdf/recover-xref-stream-recovered.pdf
Normal file
BIN
qpdf/qtest/qpdf/recover-xref-stream-recovered.pdf
Normal file
Binary file not shown.
5
qpdf/qtest/qpdf/recover-xref-stream.out
Normal file
5
qpdf/qtest/qpdf/recover-xref-stream.out
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
WARNING: recover-xref-stream.pdf: file is damaged
|
||||||
|
WARNING: recover-xref-stream.pdf: can't find startxref
|
||||||
|
WARNING: recover-xref-stream.pdf: Attempting to reconstruct cross-reference table
|
||||||
|
WARNING: recover-xref-stream.pdf: reported number of objects (14) is not one plus the highest object number (15)
|
||||||
|
qpdf: operation succeeded with warnings; resulting file may have some problems
|
BIN
qpdf/qtest/qpdf/recover-xref-stream.pdf
Normal file
BIN
qpdf/qtest/qpdf/recover-xref-stream.pdf
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user