From 532a4f3d60f6981b22beb32e6ff688ec41f87e26 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Tue, 2 Nov 2021 17:54:10 -0400 Subject: [PATCH] Detect recoverable but invalid zlib data streams (fixes #562) --- ChangeLog | 13 +++++++ include/qpdf/Pl_Flate.hh | 6 ++++ libqpdf/Pl_Flate.cc | 35 +++++++++++++++++-- libqpdf/QPDF_Stream.cc | 8 +++++ .../qpdf/form-filled-by-acrobat-warn.out | 4 +++ zlib-flate/qtest/missing-z-finish.in | 2 ++ zlib-flate/qtest/zf.test | 9 ++++- zlib-flate/zlib-flate.cc | 12 ++++++- 8 files changed, 84 insertions(+), 5 deletions(-) create mode 100644 qpdf/qtest/qpdf/form-filled-by-acrobat-warn.out create mode 100644 zlib-flate/qtest/missing-z-finish.in diff --git a/ChangeLog b/ChangeLog index fdef04ac..c64f18ae 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,18 @@ 2021-11-02 Jay Berkenbilt + * zlib-flate: warn and exit with code 3 when there is corrupted + input data even when decompression is possible. We do this in the + zlib-flate CLI so that it can be more reliably used to test the + validity of zlib streams, but we don't warn by default in qpdf + itself because PDF files in the wild exist with this problem and + other readers appear to tolerate it. There is a PDF in the qpdf + test suite (form-filled-by-acrobat.pdf) that was written by a + version of Adobe Acrobat that exhibits this problem. Fixes #562. + + * Add Pl_Flate::setWarnCallback to make it possible to be notified + of data errors that are recoverable but still indicate invalid + data. + * Improve error reporting when someone forgets the -- after --pages. Fixes #555. diff --git a/include/qpdf/Pl_Flate.hh b/include/qpdf/Pl_Flate.hh index 9c06944c..2a1fe4d9 100644 --- a/include/qpdf/Pl_Flate.hh +++ b/include/qpdf/Pl_Flate.hh @@ -23,6 +23,7 @@ #define PL_FLATE_HH #include +#include class Pl_Flate: public Pipeline { @@ -52,9 +53,13 @@ class Pl_Flate: public Pipeline QPDF_DLL static void setCompressionLevel(int); + QPDF_DLL + void setWarnCallback(std::function callback); + private: void handleData(unsigned char* data, size_t len, int flush); void checkError(char const* prefix, int error_code); + void warn(char const*, int error_code); class Members { @@ -73,6 +78,7 @@ class Pl_Flate: public Pipeline action_e action; bool initialized; void* zdata; + std::function callback; }; PointerHolder m; diff --git a/libqpdf/Pl_Flate.cc b/libqpdf/Pl_Flate.cc index 1eca837e..ee07b6f1 100644 --- a/libqpdf/Pl_Flate.cc +++ b/libqpdf/Pl_Flate.cc @@ -71,6 +71,21 @@ Pl_Flate::~Pl_Flate() { } +void +Pl_Flate::setWarnCallback(std::function callback) +{ + this->m->callback = callback; +} + +void +Pl_Flate::warn(char const* msg, int code) +{ + if (this->m->callback != nullptr) + { + this->m->callback(msg, code); + } +} + void Pl_Flate::write(unsigned char* data, size_t len) { @@ -164,7 +179,14 @@ Pl_Flate::handleData(unsigned char* data, size_t len, int flush) // Probably shouldn't be able to happen, but possible as a // boundary condition: if the last call to inflate exactly // filled the output buffer, it's possible that the next - // call to inflate could have nothing to do. + // call to inflate could have nothing to do. There are PDF + // files in the wild that have this error (including at + // least one in qpdf's test suite). In some cases, we want + // to know about this, because it indicates incorrect + // compression, so call a callback if provided. + this->warn( + "input stream is complete but output may still be valid", + err); done = true; break; @@ -231,8 +253,15 @@ Pl_Flate::finish() } catch (std::exception& e) { - this->getNext()->finish(); - throw e; + try + { + this->getNext()->finish(); + } + catch (...) + { + // ignore secondary exception + } + throw std::runtime_error(e.what()); } this->getNext()->finish(); } diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index bc3b1b56..14659008 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -503,6 +503,14 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp, { pipeline = decode_pipeline; } + Pl_Flate* flate = dynamic_cast(pipeline); + if (flate != nullptr) + { + flate->setWarnCallback([this](char const* msg, int code) { + warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(), + "", this->offset, msg)); + }); + } } } diff --git a/qpdf/qtest/qpdf/form-filled-by-acrobat-warn.out b/qpdf/qtest/qpdf/form-filled-by-acrobat-warn.out new file mode 100644 index 00000000..119ef0d2 --- /dev/null +++ b/qpdf/qtest/qpdf/form-filled-by-acrobat-warn.out @@ -0,0 +1,4 @@ +WARNING: form-filled-by-acrobat.pdf (offset 56091): input stream is complete but output may still be valid +WARNING: form-filled-by-acrobat.pdf (offset 56420): input stream is complete but output may still be valid +WARNING: form-filled-by-acrobat.pdf (offset 56657): input stream is complete but output may still be valid +qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/zlib-flate/qtest/missing-z-finish.in b/zlib-flate/qtest/missing-z-finish.in new file mode 100644 index 00000000..848c9c5c --- /dev/null +++ b/zlib-flate/qtest/missing-z-finish.in @@ -0,0 +1,2 @@ +xڻ⻈Qţr#U?0&I%dJIYҒy!ԢTC=c=%Iiy龮A>iy%0vRz^gprz^ k'eZXY&4L*Iϴ0105400K*) 02475NMLu rM*N4202i3614252060I%EFf& Nr[kRr>I)Iə +A~VaΞVVnAVFQF~^cON&@ye!>_3եcyj*NݸwI7KQc䵅q [s \ No newline at end of file diff --git a/zlib-flate/qtest/zf.test b/zlib-flate/qtest/zf.test index 2b188eb2..2f89ff54 100644 --- a/zlib-flate/qtest/zf.test +++ b/zlib-flate/qtest/zf.test @@ -29,4 +29,11 @@ $td->runtest("error", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); -$td->report(7); +$td->runtest("corrupted input", + {$td->COMMAND => "zlib-flate -uncompress < missing-z-finish.in"}, + {$td->REGEXP => + "input stream is complete but output may still be valid", + $td->EXIT_STATUS => 3}, + $td->NORMALIZE_NEWLINES); + +$td->report(8); diff --git a/zlib-flate/zlib-flate.cc b/zlib-flate/zlib-flate.cc index 4d65bcc3..ae0fa4e1 100644 --- a/zlib-flate/zlib-flate.cc +++ b/zlib-flate/zlib-flate.cc @@ -76,6 +76,12 @@ int main(int argc, char* argv[]) PointerHolder out = new Pl_StdioFile("stdout", stdout); PointerHolder flate = new Pl_Flate("flate", out.getPointer(), action); + bool warn = false; + flate->setWarnCallback([&warn](char const* msg, int code) { + warn = true; + std::cerr << whoami << ": WARNING: zlib code " << code + << ", msg = " << msg << std::endl; + }); try { @@ -97,9 +103,13 @@ int main(int argc, char* argv[]) } catch (std::exception& e) { - std::cerr << e.what() << std::endl; + std::cerr << whoami << ": " << e.what() << std::endl; exit(2); } + if (warn) + { + exit(3); + } return 0; }