From a7e8b8c789ae43976cab5356a586183ae123a14d Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Thu, 24 Jan 2013 10:47:29 -0500 Subject: [PATCH] Have qpdf --check parse content streams Also move writing to null and parsing of content streams out of the wrong if block. --- ChangeLog | 11 +++++++++++ qpdf/qpdf.cc | 41 +++++++++++++++++++++++++++++++---------- 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5cbfc738..d7a61d1f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2013-01-24 Jay Berkenbilt + + * qpdf --check now does syntactic checks all pages' content + streams as well as checking overall document structure. Semantic + errors are still not checked, and there are no plans to add + semantic checks. + + * Bug fix: in versions 4.0.0 and 4.0.1, qpdf --check wasn't doing + as full of a check for linearized files as for non-linearized + files. + 2013-01-22 Jay Berkenbilt * Add QPDFObjectHandle::getTypeCode(). This method returns a diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc index 5a4c4089..102f4b7f 100644 --- a/qpdf/qpdf.cc +++ b/qpdf/qpdf.cc @@ -46,6 +46,14 @@ struct QPDFPageData std::vector selected_pages; }; +class DiscardContents: public QPDFObjectHandle::ParserCallbacks +{ + public: + virtual ~DiscardContents() {} + virtual void handleObject(QPDFObjectHandle) {} + virtual void handleEOF() {} +}; + // Note: let's not be too noisy about documenting the fact that this // software purposely fails to enforce the distinction between user // and owner passwords. A user password is sufficient to gain full @@ -1442,16 +1450,29 @@ int main(int argc, char* argv[]) else { std::cout << "File is not linearized\n"; - // Write the file no nowhere, uncompressing - // streams. This causes full file traversal - // and decoding of all streams we can decode. - QPDFWriter w(pdf); - Pl_Discard discard; - w.setOutputPipeline(&discard); - w.setStreamDataMode(qpdf_s_uncompress); - w.write(); - okay = true; - } + } + + // Write the file no nowhere, uncompressing + // streams. This causes full file traversal + // and decoding of all streams we can decode. + QPDFWriter w(pdf); + Pl_Discard discard; + w.setOutputPipeline(&discard); + w.setStreamDataMode(qpdf_s_uncompress); + w.write(); + + // Parse all content streams + std::vector pages = pdf.getAllPages(); + DiscardContents discard_contents; + for (std::vector::iterator iter = + pages.begin(); + iter != pages.end(); ++iter) + { + QPDFObjectHandle::parseContentStream( + (*iter).getKey("/Contents"), &discard_contents); + } + + okay = true; } catch (std::exception& e) {