Detect recoverable but invalid zlib data streams (fixes #562)

This commit is contained in:
Jay Berkenbilt 2021-11-02 17:54:10 -04:00
parent c491d9f6af
commit 532a4f3d60
8 changed files with 84 additions and 5 deletions

View File

@ -1,5 +1,18 @@
2021-11-02 Jay Berkenbilt <ejb@ql.org>
* zlib-flate: warn and exit with code 3 when there is corrupted
input data even when decompression is possible. We do this in the
zlib-flate CLI so that it can be more reliably used to test the
validity of zlib streams, but we don't warn by default in qpdf
itself because PDF files in the wild exist with this problem and
other readers appear to tolerate it. There is a PDF in the qpdf
test suite (form-filled-by-acrobat.pdf) that was written by a
version of Adobe Acrobat that exhibits this problem. Fixes #562.
* Add Pl_Flate::setWarnCallback to make it possible to be notified
of data errors that are recoverable but still indicate invalid
data.
* Improve error reporting when someone forgets the -- after
--pages. Fixes #555.

View File

@ -23,6 +23,7 @@
#define PL_FLATE_HH
#include <qpdf/Pipeline.hh>
#include <functional>
class Pl_Flate: public Pipeline
{
@ -52,9 +53,13 @@ class Pl_Flate: public Pipeline
QPDF_DLL
static void setCompressionLevel(int);
QPDF_DLL
void setWarnCallback(std::function<void(char const*, int)> callback);
private:
void handleData(unsigned char* data, size_t len, int flush);
void checkError(char const* prefix, int error_code);
void warn(char const*, int error_code);
class Members
{
@ -73,6 +78,7 @@ class Pl_Flate: public Pipeline
action_e action;
bool initialized;
void* zdata;
std::function<void(char const*, int)> callback;
};
PointerHolder<Members> m;

View File

@ -71,6 +71,21 @@ Pl_Flate::~Pl_Flate()
{
}
void
Pl_Flate::setWarnCallback(std::function<void(char const*, int)> callback)
{
this->m->callback = callback;
}
void
Pl_Flate::warn(char const* msg, int code)
{
if (this->m->callback != nullptr)
{
this->m->callback(msg, code);
}
}
void
Pl_Flate::write(unsigned char* data, size_t len)
{
@ -164,7 +179,14 @@ Pl_Flate::handleData(unsigned char* data, size_t len, int flush)
// Probably shouldn't be able to happen, but possible as a
// boundary condition: if the last call to inflate exactly
// filled the output buffer, it's possible that the next
// call to inflate could have nothing to do.
// call to inflate could have nothing to do. There are PDF
// files in the wild that have this error (including at
// least one in qpdf's test suite). In some cases, we want
// to know about this, because it indicates incorrect
// compression, so call a callback if provided.
this->warn(
"input stream is complete but output may still be valid",
err);
done = true;
break;
@ -231,8 +253,15 @@ Pl_Flate::finish()
}
catch (std::exception& e)
{
this->getNext()->finish();
throw e;
try
{
this->getNext()->finish();
}
catch (...)
{
// ignore secondary exception
}
throw std::runtime_error(e.what());
}
this->getNext()->finish();
}

View File

@ -503,6 +503,14 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp,
{
pipeline = decode_pipeline;
}
Pl_Flate* flate = dynamic_cast<Pl_Flate*>(pipeline);
if (flate != nullptr)
{
flate->setWarnCallback([this](char const* msg, int code) {
warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
"", this->offset, msg));
});
}
}
}

View File

@ -0,0 +1,4 @@
WARNING: form-filled-by-acrobat.pdf (offset 56091): input stream is complete but output may still be valid
WARNING: form-filled-by-acrobat.pdf (offset 56420): input stream is complete but output may still be valid
WARNING: form-filled-by-acrobat.pdf (offset 56657): input stream is complete but output may still be valid
qpdf: operation succeeded with warnings; resulting file may have some problems

View File

@ -0,0 +1,2 @@
xÚ»Ô⻈Q<EFBFBD>Å£úÙÑr#UÆ?—0&¹±I%ê¬ÕdJIY¶Òy!ãä²Ô¢TC=c=ƒä¼ÄÜ%Iiyé¾®AžŽ>Éiy%0vRz^—«£Ÿ<C5B8>¨gprz^ ˜ká¦ä'eëY&4®L*IÏ´0105¶400K*)Éò 0247´´Ð5NÊMLu rM*N®<4E>4202i3614´25·20Ð60I%E™Ff&†¦ Nrš[<5B>£Ÿ³kRr>ÐI™Å)ŽIÉ™
¡A~V¡aΞV†VnAVFæQžF~®æŽçÜcÛONùÿ&è@yí„øe!ÿ>ÈÛù_¾Õó3Õ¥cyçjéŒ*²çÍNݸwóâöI<C3B6>KÉì¾Qcäµ…q [³sÉ

View File

@ -29,4 +29,11 @@ $td->runtest("error",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->report(7);
$td->runtest("corrupted input",
{$td->COMMAND => "zlib-flate -uncompress < missing-z-finish.in"},
{$td->REGEXP =>
"input stream is complete but output may still be valid",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->report(8);

View File

@ -76,6 +76,12 @@ int main(int argc, char* argv[])
PointerHolder<Pl_StdioFile> out = new Pl_StdioFile("stdout", stdout);
PointerHolder<Pl_Flate> flate =
new Pl_Flate("flate", out.getPointer(), action);
bool warn = false;
flate->setWarnCallback([&warn](char const* msg, int code) {
warn = true;
std::cerr << whoami << ": WARNING: zlib code " << code
<< ", msg = " << msg << std::endl;
});
try
{
@ -97,9 +103,13 @@ int main(int argc, char* argv[])
}
catch (std::exception& e)
{
std::cerr << e.what() << std::endl;
std::cerr << whoami << ": " << e.what() << std::endl;
exit(2);
}
if (warn)
{
exit(3);
}
return 0;
}