From 47f4ebcdac39256b084cdbb91ff04392fca4bb8d Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Wed, 4 Nov 2020 07:46:46 -0500 Subject: [PATCH] Ignore unused field in xref entry, avoiding range error (fixes #482) --- ChangeLog | 6 ++++++ libqpdf/QPDF.cc | 23 ++++++++++++++++++++++- qpdf/qtest/qpdf.test | 8 +++++++- qpdf/qtest/qpdf/issue-150.out | 5 ++++- qpdf/qtest/qpdf/xref-range.out | 6 ++++++ qpdf/qtest/qpdf/xref-range.pdf | Bin 0 -> 722 bytes 6 files changed, 45 insertions(+), 3 deletions(-) create mode 100644 qpdf/qtest/qpdf/xref-range.out create mode 100644 qpdf/qtest/qpdf/xref-range.pdf diff --git a/ChangeLog b/ChangeLog index 860e73a9..6e20fabb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2020-11-04 Jay Berkenbilt + + * Ignore the value of the offset/generation field in an xref entry + for a deleted object. Also attempt file recovery on lower-level + exceptions thrown while reading the xref table. Fixes #482. + 2020-10-31 Jay Berkenbilt * 10.0.3: release diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 62ad926c..a233f05e 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -425,7 +425,20 @@ QPDF::parse(char const* password) throw QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(), "", 0, "can't find startxref"); } - read_xref(xref_offset); + try + { + read_xref(xref_offset); + } + catch (QPDFExc&) + { + throw; + } + catch (std::exception& e) + { + throw QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(), "", 0, + std::string("error reading xref: ") + e.what()); + + } } catch (QPDFExc& e) { @@ -1250,6 +1263,14 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) // This is needed by checkLinearization() this->m->first_xref_item_offset = xref_offset; } + if (fields[0] == 0) + { + // Ignore fields[2], which we don't care about in this + // case. This works around the issue of some PDF files + // that put invalid values, like -1, here for deleted + // objects. + fields[2] = 0; + } insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2])); } diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 8254013b..1c136126 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -1496,7 +1496,7 @@ $td->runtest("detect loops in pages structure", show_ntests(); # ---------- $td->notify("--- Xref ---"); -$n_tests += 5; +$n_tests += 6; # Handle file with invalid xref table and object 0 as a regular object # (bug 3159950). @@ -1538,6 +1538,12 @@ $td->runtest("show number of pages", {$td->STRING => "20\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +# Issue 482 -- don't range check fields[2] for xref entry type 0. +$td->runtest("out of range in deleted object", + {$td->COMMAND => "qpdf --check xref-range.pdf"}, + {$td->FILE => "xref-range.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + show_ntests(); # ---------- $td->notify("--- Overwrite self ---"); diff --git a/qpdf/qtest/qpdf/issue-150.out b/qpdf/qtest/qpdf/issue-150.out index 862b4d1e..f45a0977 100644 --- a/qpdf/qtest/qpdf/issue-150.out +++ b/qpdf/qtest/qpdf/issue-150.out @@ -1,2 +1,5 @@ WARNING: issue-150.pdf: can't find PDF header -overflow/underflow converting 9900000000000000000 to 64-bit integer +WARNING: issue-150.pdf: file is damaged +WARNING: issue-150.pdf: error reading xref: overflow/underflow converting 9900000000000000000 to 64-bit integer +WARNING: issue-150.pdf: Attempting to reconstruct cross-reference table +issue-150.pdf: unable to find trailer dictionary while recovering damaged file diff --git a/qpdf/qtest/qpdf/xref-range.out b/qpdf/qtest/qpdf/xref-range.out new file mode 100644 index 00000000..cb72e559 --- /dev/null +++ b/qpdf/qtest/qpdf/xref-range.out @@ -0,0 +1,6 @@ +checking xref-range.pdf +PDF Version: 1.5 +File is not encrypted +File is not linearized +No syntax or stream encoding errors found; the file may still contain +errors that qpdf cannot detect diff --git a/qpdf/qtest/qpdf/xref-range.pdf b/qpdf/qtest/qpdf/xref-range.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8a1413d089ad7363b5508c1a23f5c6e00795b4d8 GIT binary patch literal 722 zcmY!laBZ^4|D>$ol3WFSpVYkck_-hS zBVz@9x6GW9)FL3AlUS1KlA4^K0#xXyU<#5cDlSnlGE=a#<0>vGN=?k=s+i;2AJ~1! zK%nLRU(wI}mt1oFCmdR--c->c_d~(%$*rX&N%$K$ zer(hXnWo$xRAe&W`Awc{#DqSDKbUXIx1XN z^8_6{_`aDgJae7GlLR-}tH!ghjI~XGM=ycdRvu%wQlD{+eR_gEjD!(+O z@J*fbqT@W4E~;Ygdl~vC$1Y9HOM!Y6$N~kOIZ_Zp1I@$&dzc{yQZd)VpnwbI#vukq zfgGO9o6hz&oN!Q7VLfq;f4dBwHa77ZH@229Gu~ zM6BhN=Hw{ohbu%Y7%CVkm?*?5=m+KJ10xM+LXd)faAsAif~A7Kr;9?gjiH%kvVnzx zfkCRVMY5%Ns$q(SnT3V1X>ziWxtUpNvK?U+u}DdQfq~&a5HK?E16d4A42%eh5yE4F qP|OgDg^@7`q=b?21c>6o@O5!XVo^y&QED2OiK!Wvs;aBM8y5i9nDS@< literal 0 HcmV?d00001