From a7e8b8c789ae43976cab5356a586183ae123a14d Mon Sep 17 00:00:00 2001
From: Jay Berkenbilt <ejb@ql.org>
Date: Thu, 24 Jan 2013 10:47:29 -0500
Subject: [PATCH] Have qpdf --check parse content streams

Also move writing to null and parsing of content streams out of the
wrong if block.
---
 ChangeLog    | 11 +++++++++++
 qpdf/qpdf.cc | 41 +++++++++++++++++++++++++++++++----------
 2 files changed, 42 insertions(+), 10 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 5cbfc738..d7a61d1f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2013-01-24  Jay Berkenbilt  <ejb@ql.org>
+
+	* qpdf --check now does syntactic checks all pages' content
+	streams as well as checking overall document structure.  Semantic
+	errors are still not checked, and there are no plans to add
+	semantic checks.
+
+	* Bug fix: in versions 4.0.0 and 4.0.1, qpdf --check wasn't doing
+	as full of a check for linearized files as for non-linearized
+	files.
+
 2013-01-22  Jay Berkenbilt  <ejb@ql.org>
 
 	* Add QPDFObjectHandle::getTypeCode().  This method returns a
diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc
index 5a4c4089..102f4b7f 100644
--- a/qpdf/qpdf.cc
+++ b/qpdf/qpdf.cc
@@ -46,6 +46,14 @@ struct QPDFPageData
     std::vector<int> selected_pages;
 };
 
+class DiscardContents: public QPDFObjectHandle::ParserCallbacks
+{
+  public:
+    virtual ~DiscardContents() {}
+    virtual void handleObject(QPDFObjectHandle) {}
+    virtual void handleEOF() {}
+};
+
 // Note: let's not be too noisy about documenting the fact that this
 // software purposely fails to enforce the distinction between user
 // and owner passwords.  A user password is sufficient to gain full
@@ -1442,16 +1450,29 @@ int main(int argc, char* argv[])
 		    else
 		    {
 			std::cout << "File is not linearized\n";
-                        // Write the file no nowhere, uncompressing
-                        // streams.  This causes full file traversal
-                        // and decoding of all streams we can decode.
-                        QPDFWriter w(pdf);
-                        Pl_Discard discard;
-                        w.setOutputPipeline(&discard);
-                        w.setStreamDataMode(qpdf_s_uncompress);
-                        w.write();
-			okay = true;
-		    }
+                    }
+
+                    // Write the file no nowhere, uncompressing
+                    // streams.  This causes full file traversal
+                    // and decoding of all streams we can decode.
+                    QPDFWriter w(pdf);
+                    Pl_Discard discard;
+                    w.setOutputPipeline(&discard);
+                    w.setStreamDataMode(qpdf_s_uncompress);
+                    w.write();
+
+                    // Parse all content streams
+                    std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
+                    DiscardContents discard_contents;
+                    for (std::vector<QPDFObjectHandle>::iterator iter =
+                             pages.begin();
+                         iter != pages.end(); ++iter)
+                    {
+                        QPDFObjectHandle::parseContentStream(
+                            (*iter).getKey("/Contents"), &discard_contents);
+                    }
+
+                    okay = true;
 		}
 		catch (std::exception& e)
 		{