From 599daddb47fc0340e48b02f7ba00ef86bfef8c45 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sun, 8 Mar 2009 19:00:19 +0000 Subject: [PATCH] decode streams on check, always exit abnormally when warnings are detected git-svn-id: svn+q:///qpdf/trunk@660 71b93d88-0707-0410-a8cf-f5a4172ac649 --- ChangeLog | 19 ++++++++++++++++ include/qpdf/QPDF.hh | 4 ++++ libqpdf/QPDF.cc | 49 +++++++++++++++++++++++++++++++++--------- manual/qpdf-manual.xml | 24 +++++++++++++-------- qpdf/qpdf.cc | 20 +++++++++++------ qpdf/qpdf.testcov | 1 + qpdf/qtest/qpdf.test | 33 ++++++++++++++++++++++------ 7 files changed, 118 insertions(+), 32 deletions(-) diff --git a/ChangeLog b/ChangeLog index 949b64be..d93ed079 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +2009-03-08 Jay Berkenbilt + + * qpdf/fix-qdf (write_ostream): Adjust offsets while writing + object streams to account for changes in the length of the + dictionary and offset tables. + + * qpdf/qpdf.cc (main): In check mode, in addition to checking + structure of file, attempt to decode all stream data. + + * libqpdf/QPDFWriter.cc (QPDFWriter::writeObject): In QDF mode, + write a comment to the QDF file that indicates the object ID from + the original file. + + * libqpdf/QPDF.cc (QPDF::pipeStreamData): Issue a warning instead + of failing if there is a problem found while decoding stream. + + * qpdf/qpdf.cc: Exit with a status of 3 if warnings were found + regardless of what mode we're in. + 2009-02-21 Jay Berkenbilt * 2.0.4: release diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 6081d18b..5868268b 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -160,6 +160,10 @@ class QPDF // dictionaries) with direct objects. void flattenScalarReferences(); + // Decode all streams, discarding the output. Used to check + // correctness of stream encoding. + void decodeStreams(); + // For QPDFWriter: // Remove /ID, /Encrypt, and /Prev keys from the trailer diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 0fe738f4..bcee47a4 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -1810,23 +1811,51 @@ QPDF::pipeStreamData(int objid, int generation, } } - this->file.seek(offset, SEEK_SET); - char buf[10240]; - while (length > 0) + try { - size_t to_read = (sizeof(buf) < length ? sizeof(buf) : length); - size_t len = this->file.read(buf, to_read); - if (len == 0) + this->file.seek(offset, SEEK_SET); + char buf[10240]; + while (length > 0) { - throw QPDFExc(this->file.getName(), this->file.getLastOffset(), - "unexpected EOF reading stream data"); + size_t to_read = (sizeof(buf) < length ? sizeof(buf) : length); + size_t len = this->file.read(buf, to_read); + if (len == 0) + { + throw QPDFExc(this->file.getName(), this->file.getLastOffset(), + "unexpected EOF reading stream data"); + } + length -= len; + pipeline->write((unsigned char*)buf, len); } - length -= len; - pipeline->write((unsigned char*)buf, len); + } + catch (QEXC::General& e) + { + QTC::TC("qpdf", "QPDF decoding error warning"); + warn(QPDFExc(this->file.getName(), this->file.getLastOffset(), + "error decoding stream data for object " + + QUtil::int_to_string(objid) + " " + + QUtil::int_to_string(generation) + ": " + e.unparse())); } pipeline->finish(); } +void +QPDF::decodeStreams() +{ + for (std::map::iterator iter = + this->xref_table.begin(); + iter != this->xref_table.end(); ++iter) + { + ObjGen const& og = (*iter).first; + QPDFObjectHandle obj = getObjectByID(og.obj, og.gen); + if (obj.isStream()) + { + Pl_Discard pl; + obj.pipeStreamData(&pl, true, false, false); + } + } +} + std::vector const& QPDF::getAllPages() { diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml index 996ac29e..6259bcfa 100644 --- a/manual/qpdf-manual.xml +++ b/manual/qpdf-manual.xml @@ -738,15 +738,15 @@ make - Checks file structure and well as encryption and - linearization. A file for which - reports no errors may still have errors in stream data but - should otherwise be otherwise structurally sound. If - any errors, qpdf will exit with a - status of 2. There are some recoverable conditions that - detects. These are issued as - warnings instead of errors. If qpdf finds no errors but finds - warnings, it will exit with a status of 3 (as of + Checks file structure and well as encryption, linearization, + and encoding of stream data. A file for which + reports no errors may still have + errors in stream data content but should otherwise be + structurally sound. If any errors, + qpdf will exit with a status of 2. There are some recoverable + conditions that detects. These are + issued as warnings instead of errors. If qpdf finds no errors + but finds warnings, it will exit with a status of 3 (as of version 2.0.4). @@ -861,6 +861,12 @@ make special comments that make them easy to find. + + + Comments precede each object indicating the object number of the + corresponding object in the original file. + + diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc index 9a349902..279eef33 100644 --- a/qpdf/qpdf.cc +++ b/qpdf/qpdf.cc @@ -12,6 +12,9 @@ #include +static int const EXIT_ERROR = 2; +static int const EXIT_WARNING = 3; + static char const* whoami = 0; // Note: let's not be too noisy about documenting the fact that this @@ -158,7 +161,7 @@ void usage(std::string const& msg) << "Usage: " << whoami << " [options] infile outfile" << std::endl << "For detailed help, run " << whoami << " --help" << std::endl << std::endl; - exit(2); + exit(EXIT_ERROR); } static void show_encryption(QPDF& pdf) @@ -752,7 +755,7 @@ int main(int argc, char* argv[]) } else { - exit(2); + exit(EXIT_ERROR); } } if (show_linearization) @@ -777,7 +780,7 @@ int main(int argc, char* argv[]) QTC::TC("qpdf", "unable to filter"); std::cerr << "Unable to filter stream data." << std::endl; - exit(2); + exit(EXIT_ERROR); } else { @@ -869,6 +872,8 @@ int main(int argc, char* argv[]) // traversal of file, so any structural errors // would be exposed. pdf.flattenScalarReferences(); + // Also explicitly decode all streams. + pdf.decodeStreams(); okay = true; } } @@ -880,8 +885,7 @@ int main(int argc, char* argv[]) { if (! pdf.getWarnings().empty()) { - // special exit status for warnings without errors - exit(3); + exit(EXIT_WARNING); } else { @@ -946,11 +950,15 @@ int main(int argc, char* argv[]) } w.write(); } + if (! pdf.getWarnings().empty()) + { + exit(EXIT_WARNING); + } } catch (std::exception& e) { std::cerr << e.what() << std::endl; - exit(2); + exit(EXIT_ERROR); } return 0; diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 93f850f4..3110f76a 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -117,3 +117,4 @@ QPDF piping xref stream from encrypted file 0 unable to filter 0 QPDF_String non-trivial UTF-16 0 QPDF xref overwrite object 0 +QPDF decoding error warning 0 diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 7432edbd..0751afbd 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -183,7 +183,7 @@ for (my $i = 1; $i <= scalar(@badfiles); ++$i) $td->runtest("recover heifer file", {$td->COMMAND => "qpdf --static-id -qdf heifer.pdf a.pdf"}, {$td->FILE => "heifer.out", - $td->EXIT_STATUS => 0}, + $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, @@ -206,7 +206,7 @@ $td->runtest("damaged replaced page contents", {$td->COMMAND => "qpdf --static-id -qdf" . " append-page-content-damaged.pdf a.pdf"}, {$td->FILE => "append-page-content-damaged.out", - $td->EXIT_STATUS => 0}, + $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, @@ -282,7 +282,7 @@ check_pdf("no recompression", show_ntests(); # ---------- $td->notify("--- Object Stream Tests ---"); -$n_tests += 36 * 4; +$n_tests += 36 * 6; $n_compare_pdfs += 36; for (my $n = 16; $n <= 19; ++$n) @@ -294,7 +294,7 @@ for (my $n = 16; $n <= 19; ++$n) { foreach my $qdf ('-qdf', '', '-encrypt "" x 128 --') { - # 4 tests + 1 compare_pdfs + # 6 tests + 1 compare_pdfs $td->runtest("object stream mode", {$td->COMMAND => "qpdf --static-id $flags $qdf $in a.pdf"}, @@ -316,6 +316,12 @@ for (my $n = 16; $n <= 19; ++$n) $td->runtest("compare files", {$td->FILE => "a.qdf"}, {$td->FILE => "b.qdf"}); + $td->runtest("fix-qdf identity check", + {$td->COMMAND => "fix-qdf a.qdf >| b.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); + $td->runtest("compare files", + {$td->FILE => "a.qdf"}, + {$td->FILE => "b.qdf"}); } } flush_tiff_cache(); @@ -324,12 +330,14 @@ for (my $n = 16; $n <= 19; ++$n) show_ntests(); # ---------- $td->notify("--- Specific File Tests ---"); -$n_tests += 1; +$n_tests += 2; +$n_compare_pdfs += 1; # Special PDF files that caused problems at some point # This file is a PDF 1.1 file with /# as a name and with -# inconsistencies in its free table. +# inconsistencies in its free table. It also has LZW streams that +# happen to test boundary conditions in the LZW decoder. $td->runtest("old and complex", {$td->COMMAND => "qpdf --check old-and-complex.pdf"}, {$td->STRING => +("checking old-and-complex.pdf\n" . @@ -339,6 +347,12 @@ $td->runtest("old and complex", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +$td->runtest("convert to qdf", + {$td->COMMAND => "qpdf --qdf old-and-complex.pdf a.qdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); + +compare_pdfs("old-and-complex.pdf", "a.qdf"); + show_ntests(); # ---------- $td->notify("--- Mutability Tests ---"); @@ -823,7 +837,7 @@ foreach my $file (@files) show_ntests(); # ---------- $td->notify("--- fix-qdf Tests ---"); -$n_tests += 2; +$n_tests += 4; for (my $n = 1; $n <= 2; ++$n) { @@ -831,6 +845,11 @@ for (my $n = 1; $n <= 2; ++$n) {$td->COMMAND => "fix-qdf fix$n.qdf"}, {$td->FILE => "fix$n.qdf.out", $td->EXIT_STATUS => 0}); + + $td->runtest("identity fix-qdf $n", + {$td->COMMAND => "fix-qdf fix$n.qdf.out"}, + {$td->FILE => "fix$n.qdf.out", + $td->EXIT_STATUS => 0}); } show_ntests();