From a5d8783f6793de05381fe399d4d0025d480b2aa3 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Fri, 25 Jan 2013 11:08:50 -0500 Subject: [PATCH] Improve qpdf --check Fix exit status for case of errors without warnings, continue after errors when possible, add test case for parsing a file with content stream errors on some but not all pages. --- ChangeLog | 10 +++--- libqpdf/QPDFObjectHandle.cc | 8 ++--- qpdf/qpdf.cc | 40 +++++++++++++++++----- qpdf/qtest/qpdf.test | 9 +++-- qpdf/qtest/qpdf/content-stream-errors.out | 7 ++++ qpdf/qtest/qpdf/content-stream-errors.pdf | Bin 0 -> 2889 bytes 6 files changed, 55 insertions(+), 19 deletions(-) create mode 100644 qpdf/qtest/qpdf/content-stream-errors.out create mode 100644 qpdf/qtest/qpdf/content-stream-errors.pdf diff --git a/ChangeLog b/ChangeLog index d7a61d1f..3473a31e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2013-01-25 Jay Berkenbilt + + * qpdf --check was exiting with status 0 in some rare cases even + when errors were found. It now always exits with one of the + document error codes (0 for success, 2 for errors, 3 or warnings). + 2013-01-24 Jay Berkenbilt * qpdf --check now does syntactic checks all pages' content @@ -5,10 +11,6 @@ errors are still not checked, and there are no plans to add semantic checks. - * Bug fix: in versions 4.0.0 and 4.0.1, qpdf --check wasn't doing - as full of a check for linearized files as for non-linearized - files. - 2013-01-22 Jay Berkenbilt * Add QPDFObjectHandle::getTypeCode(). This method returns a diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index fca8191d..1a7dfc73 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -1012,10 +1012,10 @@ QPDFObjectHandle::parseInternal(PointerHolder input, } else if (! object.isInitialized()) { - throw std::logic_error( - "INTERNAL ERROR: uninitialized object (token = " + - QUtil::int_to_string(token.getType()) + - ", " + token.getValue() + ")"); + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), + object_description, + input->getLastOffset(), + "parse error while reading object"); } else { diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc index 102f4b7f..a416870a 100644 --- a/qpdf/qpdf.cc +++ b/qpdf/qpdf.cc @@ -1428,7 +1428,11 @@ int main(int argc, char* argv[]) } if (check) { - bool okay = false; + // Code below may set okay to false but not to true. + // We assume okay until we prove otherwise but may + // continue to perform additional checks after finding + // errors. + bool okay = true; std::cout << "checking " << infilename << std::endl; try { @@ -1444,8 +1448,11 @@ int main(int argc, char* argv[]) if (pdf.isLinearized()) { std::cout << "File is linearized\n"; - okay = pdf.checkLinearization(); - // any errors are reported by checkLinearization(). + if (! pdf.checkLinearization()) + { + // any errors are reported by checkLinearization() + okay = false; + } } else { @@ -1453,8 +1460,8 @@ int main(int argc, char* argv[]) } // Write the file no nowhere, uncompressing - // streams. This causes full file traversal - // and decoding of all streams we can decode. + // streams. This causes full file traversal and + // decoding of all streams we can decode. QPDFWriter w(pdf); Pl_Discard discard; w.setOutputPipeline(&discard); @@ -1464,19 +1471,30 @@ int main(int argc, char* argv[]) // Parse all content streams std::vector pages = pdf.getAllPages(); DiscardContents discard_contents; + int pageno = 0; for (std::vector::iterator iter = pages.begin(); iter != pages.end(); ++iter) { - QPDFObjectHandle::parseContentStream( - (*iter).getKey("/Contents"), &discard_contents); + ++pageno; + try + { + QPDFObjectHandle::parseContentStream( + (*iter).getKey("/Contents"), + &discard_contents); + } + catch (QPDFExc& e) + { + okay = false; + std::cout << "page " << pageno << ": " + << e.what() << std::endl; + } } - - okay = true; } catch (std::exception& e) { std::cout << e.what() << std::endl; + okay = false; } if (okay) { @@ -1493,6 +1511,10 @@ int main(int argc, char* argv[]) << std::endl; } } + else + { + exit(EXIT_ERROR); + } } } else diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 8d2b5cfc..f447bd83 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -199,7 +199,7 @@ $td->runtest("remove page we don't have", show_ntests(); # ---------- $td->notify("--- Miscellaneous Tests ---"); -$n_tests += 59; +$n_tests += 60; $td->runtest("qpdf version", {$td->COMMAND => "qpdf --version"}, @@ -435,7 +435,7 @@ $td->runtest("EOF terminating literal tokens", $td->NORMALIZE_NEWLINES); $td->runtest("EOF reading token", {$td->COMMAND => "qpdf --check eof-reading-token.pdf"}, - {$td->FILE => "eof-reading-token.out", $td->EXIT_STATUS => 0}, + {$td->FILE => "eof-reading-token.out", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("extra header text", {$td->COMMAND => "test_driver 32 minimal.pdf"}, @@ -478,6 +478,11 @@ $td->runtest("tokenize content streams", {$td->FILE => "tokenize-content-streams.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +$td->runtest("content stream errors", + {$td->COMMAND => "qpdf --check content-stream-errors.pdf"}, + {$td->FILE => "content-stream-errors.out", + $td->EXIT_STATUS => 2}, + $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- diff --git a/qpdf/qtest/qpdf/content-stream-errors.out b/qpdf/qtest/qpdf/content-stream-errors.out new file mode 100644 index 00000000..a375f5e6 --- /dev/null +++ b/qpdf/qtest/qpdf/content-stream-errors.out @@ -0,0 +1,7 @@ +checking content-stream-errors.pdf +PDF Version: 1.3 +File is not encrypted +File is not linearized +page 1: content stream object 7 0 (content, file position 52): parse error while reading object +page 3: content stream object 15 0 (stream data, file position 117): EOF found while reading inline image +page 4: content stream object 19 0 (content, file position 53): parse error while reading object diff --git a/qpdf/qtest/qpdf/content-stream-errors.pdf b/qpdf/qtest/qpdf/content-stream-errors.pdf new file mode 100644 index 0000000000000000000000000000000000000000..abbb7d79087c81322240ef29d7f253625868fc6f GIT binary patch literal 2889 zcmcgu&2G~`5T@d`7hYj5k>CL9S+C<*iYk$`p%s6jjnqTcgX64AP-<7Ug9^{Wi5KCq zxFE#L#y?FQep-n{NxVDW-I?#3na}Qw4@OV%nJYTqzJL8GI`8b%5dy)1boxQ``vO3Y z!%LHcBHIa1C!ekj$YEH7i}X_L?+cU6c(LFW&8}g(ObXDv zdjs>t>URJ}RaMXiqpGg_ zOJ;^ty0$suG=u9^`!GcfvR5X#EZ)P496Bv8G80~j!G!9XOI>N0#5DF5eI1z0X#9Ab z7GaS-fyoDPI1%R{)ATxLJt%iq%D|_xD^Jy8!TdGe2Vrh(d&wiS_-KlG6tdsLBuZ!V z2g|FSz>XrmX%8JnYTO6wk5k|HIgKMe3_V=LR{35 zk-)U+Qo{CP6LHBGt`cxR!rDaHC$pOjR@d4a?iGDnKu=j9p zZ%c7v)#8YLL;8Am?2PoPQ|?N-7P#I~CF#_v`~HYq1JKCPhMd`UUXI7_nU#M0C z+3g=VyU9!}98j)s>JM~1&-EaN+NP*(`2$H~^NH3C8wMoE8#XtfcG<9bL3?k{X{|$t z5v=b;RIR;;nyO=Co$6BcZrI#54%Di>PV2VK>$W(+E@}U!4?k$f(&G}`h$V2_>wZR&Q+MHbE%CR@Fe$&)mt_byxR{6v_4Hk@x7k^r3dF$yqr{MjsWrlzM& z&~su`VLUanXsX?KzaIoHt#Eo%-`D6&-MAZhuGX>cV&ny(w|}u}qcAIMIVsgsqSHBi HI}*PDhqAxH literal 0 HcmV?d00001