diff --git a/ChangeLog b/ChangeLog index 8732f22e..ac32ea25 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,15 @@ 2017-07-29 Jay Berkenbilt + * When passing multiple inspection arguments, run --check first, + and defer exit until after all the checks have been run. This + makes it possible to force operations such as --show-xref to be + delayed until after recovery attempts have been made. For example, + if you have a file with a syntactically valid xref table that has + some offsets that are incorrect, running qpdf --check --show-xref + on that file will first recover the xref and the dump the + recovered xref, while just running qpdf --show-xref will show the + xref table as present in the file. Fixes #42. + * When recovering stream length, indicate the recovered length. Fixes #44. diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc index 1882f7c9..ae365c70 100644 --- a/qpdf/qpdf.cc +++ b/qpdf/qpdf.cc @@ -1383,6 +1383,97 @@ int main(int argc, char* argv[]) } if (outfilename == 0) { + int exit_code = 0; + if (check) + { + // Code below may set okay to false but not to true. + // We assume okay until we prove otherwise but may + // continue to perform additional checks after finding + // errors. + bool okay = true; + std::cout << "checking " << infilename << std::endl; + try + { + int extension_level = pdf.getExtensionLevel(); + std::cout << "PDF Version: " << pdf.getPDFVersion(); + if (extension_level > 0) + { + std::cout << " extension level " + << pdf.getExtensionLevel(); + } + std::cout << std::endl; + ::show_encryption(pdf); + if (pdf.isLinearized()) + { + std::cout << "File is linearized\n"; + if (! pdf.checkLinearization()) + { + // any errors are reported by checkLinearization() + okay = false; + } + } + else + { + std::cout << "File is not linearized\n"; + } + + // Write the file no nowhere, uncompressing + // streams. This causes full file traversal and + // decoding of all streams we can decode. + QPDFWriter w(pdf); + Pl_Discard discard; + w.setOutputPipeline(&discard); + w.setStreamDataMode(qpdf_s_uncompress); + w.write(); + + // Parse all content streams + std::vector pages = pdf.getAllPages(); + DiscardContents discard_contents; + int pageno = 0; + for (std::vector::iterator iter = + pages.begin(); + iter != pages.end(); ++iter) + { + ++pageno; + try + { + QPDFObjectHandle::parseContentStream( + (*iter).getKey("/Contents"), + &discard_contents); + } + catch (QPDFExc& e) + { + okay = false; + std::cout << "page " << pageno << ": " + << e.what() << std::endl; + } + } + } + catch (std::exception& e) + { + std::cout << e.what() << std::endl; + okay = false; + } + if (okay) + { + if (! pdf.getWarnings().empty()) + { + exit_code = EXIT_WARNING; + } + else + { + std::cout << "No syntax or stream encoding errors" + << " found; the file may still contain" + << std::endl + << "errors that qpdf cannot detect" + << std::endl; + } + } + else + { + exit_code = EXIT_ERROR; + } + } if (show_npages) { QTC::TC("qpdf", "qpdf npages"); @@ -1402,7 +1493,7 @@ int main(int argc, char* argv[]) } else { - exit(EXIT_ERROR); + exit_code = EXIT_ERROR; } } if (show_linearization) @@ -1435,7 +1526,7 @@ int main(int argc, char* argv[]) QTC::TC("qpdf", "qpdf unable to filter"); std::cerr << "Unable to filter stream data." << std::endl; - exit(EXIT_ERROR); + exit_code = EXIT_ERROR; } else { @@ -1512,96 +1603,10 @@ int main(int argc, char* argv[]) } } } - if (check) - { - // Code below may set okay to false but not to true. - // We assume okay until we prove otherwise but may - // continue to perform additional checks after finding - // errors. - bool okay = true; - std::cout << "checking " << infilename << std::endl; - try - { - int extension_level = pdf.getExtensionLevel(); - std::cout << "PDF Version: " << pdf.getPDFVersion(); - if (extension_level > 0) - { - std::cout << " extension level " - << pdf.getExtensionLevel(); - } - std::cout << std::endl; - ::show_encryption(pdf); - if (pdf.isLinearized()) - { - std::cout << "File is linearized\n"; - if (! pdf.checkLinearization()) - { - // any errors are reported by checkLinearization() - okay = false; - } - } - else - { - std::cout << "File is not linearized\n"; - } - - // Write the file no nowhere, uncompressing - // streams. This causes full file traversal and - // decoding of all streams we can decode. - QPDFWriter w(pdf); - Pl_Discard discard; - w.setOutputPipeline(&discard); - w.setStreamDataMode(qpdf_s_uncompress); - w.write(); - - // Parse all content streams - std::vector pages = pdf.getAllPages(); - DiscardContents discard_contents; - int pageno = 0; - for (std::vector::iterator iter = - pages.begin(); - iter != pages.end(); ++iter) - { - ++pageno; - try - { - QPDFObjectHandle::parseContentStream( - (*iter).getKey("/Contents"), - &discard_contents); - } - catch (QPDFExc& e) - { - okay = false; - std::cout << "page " << pageno << ": " - << e.what() << std::endl; - } - } - } - catch (std::exception& e) - { - std::cout << e.what() << std::endl; - okay = false; - } - if (okay) - { - if (! pdf.getWarnings().empty()) - { - exit(EXIT_WARNING); - } - else - { - std::cout << "No syntax or stream encoding errors" - << " found; the file may still contain" - << std::endl - << "errors that qpdf cannot detect" - << std::endl; - } - } - else - { - exit(EXIT_ERROR); - } - } + if (exit_code) + { + exit(exit_code); + } } else { diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 81c69025..031c33a9 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -206,7 +206,7 @@ $td->runtest("remove page we don't have", show_ntests(); # ---------- $td->notify("--- Miscellaneous Tests ---"); -$n_tests += 91; +$n_tests += 93; $td->runtest("qpdf version", {$td->COMMAND => "qpdf --version"}, @@ -628,6 +628,19 @@ $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "newline-before-endstream.pdf"}); +# Demonstrate show-xref after check and not after check to illustrate +# that it can dump the real xref or the recovered xref. +$td->runtest("dump bad xref", + {$td->COMMAND => "qpdf --show-xref bad-xref-entry.pdf"}, + {$td->FILE => "bad-xref-entry.out", + $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +$td->runtest("dump corrected bad xref", + {$td->COMMAND => "qpdf --check --show-xref bad-xref-entry.pdf"}, + {$td->FILE => "bad-xref-entry-corrected.out", + $td->EXIT_STATUS => 3}, + $td->NORMALIZE_NEWLINES); + show_ntests(); # ---------- diff --git a/qpdf/qtest/qpdf/bad-xref-entry-corrected.out b/qpdf/qtest/qpdf/bad-xref-entry-corrected.out new file mode 100644 index 00000000..258c18a7 --- /dev/null +++ b/qpdf/qtest/qpdf/bad-xref-entry-corrected.out @@ -0,0 +1,14 @@ +checking bad-xref-entry.pdf +PDF Version: 1.3 +File is not encrypted +File is not linearized +WARNING: bad-xref-entry.pdf: file is damaged +WARNING: bad-xref-entry.pdf (object 5 0, file position 580): expected n n obj +WARNING: bad-xref-entry.pdf: Attempting to reconstruct cross-reference table +1/0: uncompressed; offset = 52 +2/0: uncompressed; offset = 133 +3/0: uncompressed; offset = 242 +4/0: uncompressed; offset = 484 +5/0: uncompressed; offset = 583 +6/0: uncompressed; offset = 629 +7/0: uncompressed; offset = 774 diff --git a/qpdf/qtest/qpdf/bad-xref-entry.out b/qpdf/qtest/qpdf/bad-xref-entry.out new file mode 100644 index 00000000..3690e02c --- /dev/null +++ b/qpdf/qtest/qpdf/bad-xref-entry.out @@ -0,0 +1,7 @@ +1/0: uncompressed; offset = 52 +2/0: uncompressed; offset = 133 +3/0: uncompressed; offset = 242 +4/0: uncompressed; offset = 484 +5/0: uncompressed; offset = 580 +6/0: uncompressed; offset = 629 +7/0: uncompressed; offset = 774 diff --git a/qpdf/qtest/qpdf/bad-xref-entry.pdf b/qpdf/qtest/qpdf/bad-xref-entry.pdf new file mode 100644 index 00000000..32df6674 --- /dev/null +++ b/qpdf/qtest/qpdf/bad-xref-entry.pdf @@ -0,0 +1,101 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +%% Original object ID: 1 0 +1 0 obj +<< + /Pages 2 0 R + /Type /Catalog +>> +endobj + +%% Original object ID: 2 0 +2 0 obj +<< + /Count 1 + /Kids [ + 3 0 R + ] + /Type /Pages +>> +endobj + +%% Page 1 +%% Original object ID: 3 0 +3 0 obj +<< + /Contents 4 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 6 0 R + >> + /ProcSet 7 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +%% Original object ID: 4 0 +4 0 obj +<< + /Length 5 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +5 0 obj +44 +endobj + +%% Original object ID: 6 0 +6 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 5 0 +7 0 obj +[ + /PDF + /Text +] +endobj + +xref +0 8 +0000000000 65535 f +0000000052 00000 n +0000000133 00000 n +0000000242 00000 n +0000000484 00000 n +0000000580 00000 n +0000000629 00000 n +0000000774 00000 n +trailer << + /Root 1 0 R + /Size 8 + /ID [<2e68fbddcf3742fa64db89e66acd25d9><2e68fbddcf3742fa64db89e66acd25d9>] +>> +startxref +809 +%%EOF