2008-04-29 12:55:25 +00:00
|
|
|
#include <qpdf/Pl_Flate.hh>
|
2022-02-04 21:31:31 +00:00
|
|
|
|
2023-05-20 11:22:32 +00:00
|
|
|
#include <climits>
|
|
|
|
#include <cstring>
|
2022-04-02 21:14:10 +00:00
|
|
|
#include <zlib.h>
|
2008-04-29 12:55:25 +00:00
|
|
|
|
2019-06-21 04:01:36 +00:00
|
|
|
#include <qpdf/QIntC.hh>
|
2022-04-02 21:14:10 +00:00
|
|
|
#include <qpdf/QUtil.hh>
|
2008-04-29 12:55:25 +00:00
|
|
|
|
2024-07-28 18:53:46 +00:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
unsigned long long memory_limit{0};
|
|
|
|
} // namespace
|
|
|
|
|
2019-08-23 23:54:08 +00:00
|
|
|
int Pl_Flate::compression_level = Z_DEFAULT_COMPRESSION;
|
|
|
|
|
2022-04-02 21:14:10 +00:00
|
|
|
Pl_Flate::Members::Members(size_t out_bufsize, action_e action) :
|
2019-06-22 01:32:47 +00:00
|
|
|
out_bufsize(out_bufsize),
|
2008-04-29 12:55:25 +00:00
|
|
|
action(action),
|
2019-06-22 01:32:47 +00:00
|
|
|
initialized(false),
|
2022-07-26 11:37:50 +00:00
|
|
|
zdata(nullptr)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
2022-02-06 18:53:16 +00:00
|
|
|
this->outbuf = QUtil::make_shared_array<unsigned char>(out_bufsize);
|
2023-05-27 17:19:52 +00:00
|
|
|
// Indirect through zdata to reach the z_stream so we don't have to include zlib.h in
|
|
|
|
// Pl_Flate.hh. This means people using shared library versions of qpdf don't have to have zlib
|
|
|
|
// development files available, which particularly helps in a Windows environment.
|
2009-10-23 14:58:09 +00:00
|
|
|
this->zdata = new z_stream;
|
|
|
|
|
2022-04-02 21:14:10 +00:00
|
|
|
if (out_bufsize > UINT_MAX) {
|
2023-05-27 17:19:52 +00:00
|
|
|
throw std::runtime_error(
|
|
|
|
"Pl_Flate: zlib doesn't support buffer sizes larger than unsigned int");
|
2019-06-21 04:01:36 +00:00
|
|
|
}
|
|
|
|
|
2013-02-24 02:46:21 +00:00
|
|
|
z_stream& zstream = *(static_cast<z_stream*>(this->zdata));
|
2022-07-26 11:37:50 +00:00
|
|
|
zstream.zalloc = nullptr;
|
|
|
|
zstream.zfree = nullptr;
|
|
|
|
zstream.opaque = nullptr;
|
|
|
|
zstream.next_in = nullptr;
|
2008-04-29 12:55:25 +00:00
|
|
|
zstream.avail_in = 0;
|
2022-02-04 15:10:19 +00:00
|
|
|
zstream.next_out = this->outbuf.get();
|
2019-06-21 04:01:36 +00:00
|
|
|
zstream.avail_out = QIntC::to_uint(out_bufsize);
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
2019-06-22 01:32:47 +00:00
|
|
|
Pl_Flate::Members::~Members()
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
2022-04-02 21:14:10 +00:00
|
|
|
if (this->initialized) {
|
2017-08-26 02:26:53 +00:00
|
|
|
z_stream& zstream = *(static_cast<z_stream*>(this->zdata));
|
2022-04-02 21:14:10 +00:00
|
|
|
if (action == a_deflate) {
|
2017-08-26 02:26:53 +00:00
|
|
|
deflateEnd(&zstream);
|
2022-04-02 21:14:10 +00:00
|
|
|
} else {
|
2017-08-26 02:26:53 +00:00
|
|
|
inflateEnd(&zstream);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-02-24 02:46:21 +00:00
|
|
|
delete static_cast<z_stream*>(this->zdata);
|
2022-07-26 11:37:50 +00:00
|
|
|
this->zdata = nullptr;
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
2022-04-02 21:14:10 +00:00
|
|
|
Pl_Flate::Pl_Flate(
|
2023-05-21 17:35:09 +00:00
|
|
|
char const* identifier, Pipeline* next, action_e action, unsigned int out_bufsize_int) :
|
2019-06-22 01:32:47 +00:00
|
|
|
Pipeline(identifier, next),
|
|
|
|
m(new Members(QIntC::to_size(out_bufsize_int), action))
|
|
|
|
{
|
2024-08-07 16:20:19 +00:00
|
|
|
if (!next) {
|
|
|
|
throw std::logic_error("Attempt to create Pl_Flate with nullptr as next");
|
|
|
|
}
|
2019-06-22 01:32:47 +00:00
|
|
|
}
|
|
|
|
|
2023-06-01 13:47:36 +00:00
|
|
|
Pl_Flate::~Pl_Flate() // NOLINT (modernize-use-equals-default)
|
2019-06-22 01:32:47 +00:00
|
|
|
{
|
2023-05-27 17:19:52 +00:00
|
|
|
// Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
|
2019-06-22 01:32:47 +00:00
|
|
|
}
|
|
|
|
|
2024-07-28 18:53:46 +00:00
|
|
|
void
|
|
|
|
Pl_Flate::setMemoryLimit(unsigned long long limit)
|
|
|
|
{
|
|
|
|
memory_limit = limit;
|
|
|
|
}
|
|
|
|
|
2021-11-02 21:54:10 +00:00
|
|
|
void
|
|
|
|
Pl_Flate::setWarnCallback(std::function<void(char const*, int)> callback)
|
|
|
|
{
|
2023-05-21 13:42:34 +00:00
|
|
|
m->callback = callback;
|
2021-11-02 21:54:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
Pl_Flate::warn(char const* msg, int code)
|
|
|
|
{
|
2023-05-21 13:42:34 +00:00
|
|
|
if (m->callback != nullptr) {
|
|
|
|
m->callback(msg, code);
|
2021-11-02 21:54:10 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-04-29 12:55:25 +00:00
|
|
|
void
|
2022-05-03 21:43:07 +00:00
|
|
|
Pl_Flate::write(unsigned char const* data, size_t len)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
2023-05-21 13:42:34 +00:00
|
|
|
if (m->outbuf == nullptr) {
|
2022-02-08 14:18:08 +00:00
|
|
|
throw std::logic_error(
|
2023-05-21 17:35:09 +00:00
|
|
|
this->identifier + ": Pl_Flate: write() called after finish() called");
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
2012-06-20 15:20:57 +00:00
|
|
|
|
2023-05-27 17:19:52 +00:00
|
|
|
// Write in chunks in case len is too big to fit in an int. Assume int is at least 32 bits.
|
2012-06-20 15:20:57 +00:00
|
|
|
static size_t const max_bytes = 1 << 30;
|
|
|
|
size_t bytes_left = len;
|
2022-05-03 21:43:07 +00:00
|
|
|
unsigned char const* buf = data;
|
2022-04-02 21:14:10 +00:00
|
|
|
while (bytes_left > 0) {
|
2022-02-08 14:18:08 +00:00
|
|
|
size_t bytes = (bytes_left >= max_bytes ? max_bytes : bytes_left);
|
2023-05-21 17:35:09 +00:00
|
|
|
handleData(buf, bytes, (m->action == a_inflate ? Z_SYNC_FLUSH : Z_NO_FLUSH));
|
2022-02-08 14:18:08 +00:00
|
|
|
bytes_left -= bytes;
|
2012-06-20 15:20:57 +00:00
|
|
|
buf += bytes;
|
|
|
|
}
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2022-05-03 21:43:07 +00:00
|
|
|
Pl_Flate::handleData(unsigned char const* data, size_t len, int flush)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
2022-04-02 21:14:10 +00:00
|
|
|
if (len > UINT_MAX) {
|
2023-05-27 17:19:52 +00:00
|
|
|
throw std::runtime_error("Pl_Flate: zlib doesn't support data blocks larger than int");
|
2019-06-21 04:01:36 +00:00
|
|
|
}
|
2023-05-21 13:42:34 +00:00
|
|
|
z_stream& zstream = *(static_cast<z_stream*>(m->zdata));
|
2023-05-27 17:19:52 +00:00
|
|
|
// zlib is known not to modify the data pointed to by next_in but doesn't declare the field
|
|
|
|
// value const unless compiled to do so.
|
2022-05-03 21:43:07 +00:00
|
|
|
zstream.next_in = const_cast<unsigned char*>(data);
|
2019-06-21 04:01:36 +00:00
|
|
|
zstream.avail_in = QIntC::to_uint(len);
|
2008-04-29 12:55:25 +00:00
|
|
|
|
2023-05-21 13:42:34 +00:00
|
|
|
if (!m->initialized) {
|
2022-02-08 14:18:08 +00:00
|
|
|
int err = Z_OK;
|
2013-02-24 02:46:21 +00:00
|
|
|
|
2023-05-27 17:19:52 +00:00
|
|
|
// deflateInit and inflateInit are macros that use old-style casts.
|
2023-05-21 17:35:09 +00:00
|
|
|
#if ((defined(__GNUC__) && ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406) || defined(__clang__))
|
2022-04-02 21:14:10 +00:00
|
|
|
# pragma GCC diagnostic push
|
|
|
|
# pragma GCC diagnostic ignored "-Wold-style-cast"
|
2013-02-24 02:46:21 +00:00
|
|
|
#endif
|
2023-05-21 13:42:34 +00:00
|
|
|
if (m->action == a_deflate) {
|
2022-02-08 14:18:08 +00:00
|
|
|
err = deflateInit(&zstream, compression_level);
|
2022-04-02 21:14:10 +00:00
|
|
|
} else {
|
2022-02-08 14:18:08 +00:00
|
|
|
err = inflateInit(&zstream);
|
|
|
|
}
|
2023-05-21 17:35:09 +00:00
|
|
|
#if ((defined(__GNUC__) && ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406) || defined(__clang__))
|
2022-04-02 21:14:10 +00:00
|
|
|
# pragma GCC diagnostic pop
|
2013-02-24 02:46:21 +00:00
|
|
|
#endif
|
|
|
|
|
2022-02-08 14:18:08 +00:00
|
|
|
checkError("Init", err);
|
2023-05-21 13:42:34 +00:00
|
|
|
m->initialized = true;
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int err = Z_OK;
|
|
|
|
|
|
|
|
bool done = false;
|
2022-04-02 21:14:10 +00:00
|
|
|
while (!done) {
|
2023-05-21 13:42:34 +00:00
|
|
|
if (m->action == a_deflate) {
|
2022-02-08 14:18:08 +00:00
|
|
|
err = deflate(&zstream, flush);
|
2022-04-02 21:14:10 +00:00
|
|
|
} else {
|
2022-02-08 14:18:08 +00:00
|
|
|
err = inflate(&zstream, flush);
|
|
|
|
}
|
2023-05-21 13:42:34 +00:00
|
|
|
if ((m->action == a_inflate) && (err != Z_OK) && zstream.msg &&
|
2022-04-02 21:14:10 +00:00
|
|
|
(strcmp(zstream.msg, "incorrect data check") == 0)) {
|
2023-05-27 17:19:52 +00:00
|
|
|
// Other PDF readers ignore this specific error. Combining this with Z_SYNC_FLUSH
|
|
|
|
// enables qpdf to handle some broken zlib streams without losing data.
|
2018-03-03 16:35:01 +00:00
|
|
|
err = Z_STREAM_END;
|
|
|
|
}
|
2022-04-02 21:14:10 +00:00
|
|
|
switch (err) {
|
|
|
|
case Z_BUF_ERROR:
|
2023-05-27 17:19:52 +00:00
|
|
|
// Probably shouldn't be able to happen, but possible as a boundary condition: if the
|
|
|
|
// last call to inflate exactly filled the output buffer, it's possible that the next
|
|
|
|
// call to inflate could have nothing to do. There are PDF files in the wild that have
|
|
|
|
// this error (including at least one in qpdf's test suite). In some cases, we want to
|
|
|
|
// know about this, because it indicates incorrect compression, so call a callback if
|
|
|
|
// provided.
|
2023-05-21 17:35:09 +00:00
|
|
|
this->warn("input stream is complete but output may still be valid", err);
|
2022-02-08 14:18:08 +00:00
|
|
|
done = true;
|
|
|
|
break;
|
|
|
|
|
2022-04-02 21:14:10 +00:00
|
|
|
case Z_STREAM_END:
|
2022-02-08 14:18:08 +00:00
|
|
|
done = true;
|
|
|
|
// fall through
|
|
|
|
|
2022-04-02 21:14:10 +00:00
|
|
|
case Z_OK:
|
2022-02-08 14:18:08 +00:00
|
|
|
{
|
2022-04-02 21:14:10 +00:00
|
|
|
if ((zstream.avail_in == 0) && (zstream.avail_out > 0)) {
|
2023-05-27 17:19:52 +00:00
|
|
|
// There is nothing left to read, and there was sufficient buffer space to write
|
|
|
|
// everything we needed, so we're done for now.
|
2022-02-08 14:18:08 +00:00
|
|
|
done = true;
|
|
|
|
}
|
2023-05-21 17:35:09 +00:00
|
|
|
uLong ready = QIntC::to_ulong(m->out_bufsize - zstream.avail_out);
|
2022-04-02 21:14:10 +00:00
|
|
|
if (ready > 0) {
|
2024-09-17 23:12:44 +00:00
|
|
|
if (memory_limit && m->action != a_deflate) {
|
2024-07-28 18:53:46 +00:00
|
|
|
m->written += ready;
|
|
|
|
if (m->written > memory_limit) {
|
|
|
|
throw std::runtime_error("PL_Flate memory limit exceeded");
|
|
|
|
}
|
|
|
|
}
|
2024-08-07 16:20:19 +00:00
|
|
|
next()->write(m->outbuf.get(), ready);
|
2023-05-21 13:42:34 +00:00
|
|
|
zstream.next_out = m->outbuf.get();
|
|
|
|
zstream.avail_out = QIntC::to_uint(m->out_bufsize);
|
2022-02-08 14:18:08 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2022-04-02 21:14:10 +00:00
|
|
|
default:
|
2022-02-08 14:18:08 +00:00
|
|
|
this->checkError("data", err);
|
|
|
|
break;
|
|
|
|
}
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
Pl_Flate::finish()
|
|
|
|
{
|
2024-08-09 10:08:30 +00:00
|
|
|
if (m->written > memory_limit) {
|
2024-09-17 23:12:44 +00:00
|
|
|
throw std::runtime_error("PL_Flate memory limit exceeded");
|
2024-08-09 10:08:30 +00:00
|
|
|
}
|
2022-04-02 21:14:10 +00:00
|
|
|
try {
|
2023-05-21 13:42:34 +00:00
|
|
|
if (m->outbuf.get()) {
|
|
|
|
if (m->initialized) {
|
|
|
|
z_stream& zstream = *(static_cast<z_stream*>(m->zdata));
|
2017-07-29 16:07:19 +00:00
|
|
|
unsigned char buf[1];
|
|
|
|
buf[0] = '\0';
|
|
|
|
handleData(buf, 0, Z_FINISH);
|
|
|
|
int err = Z_OK;
|
2023-05-21 13:42:34 +00:00
|
|
|
if (m->action == a_deflate) {
|
2017-07-29 16:07:19 +00:00
|
|
|
err = deflateEnd(&zstream);
|
2022-04-02 21:14:10 +00:00
|
|
|
} else {
|
2017-07-29 16:07:19 +00:00
|
|
|
err = inflateEnd(&zstream);
|
|
|
|
}
|
2023-05-21 13:42:34 +00:00
|
|
|
m->initialized = false;
|
2017-07-29 16:07:19 +00:00
|
|
|
checkError("End", err);
|
|
|
|
}
|
|
|
|
|
2023-05-21 13:42:34 +00:00
|
|
|
m->outbuf = nullptr;
|
2017-07-29 16:07:19 +00:00
|
|
|
}
|
2022-04-02 21:14:10 +00:00
|
|
|
} catch (std::exception& e) {
|
|
|
|
try {
|
2024-08-07 16:20:19 +00:00
|
|
|
next()->finish();
|
2022-04-02 21:14:10 +00:00
|
|
|
} catch (...) {
|
2021-11-02 21:54:10 +00:00
|
|
|
// ignore secondary exception
|
|
|
|
}
|
|
|
|
throw std::runtime_error(e.what());
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
2024-08-07 16:20:19 +00:00
|
|
|
next()->finish();
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
2019-08-23 23:54:08 +00:00
|
|
|
void
|
|
|
|
Pl_Flate::setCompressionLevel(int level)
|
|
|
|
{
|
|
|
|
compression_level = level;
|
|
|
|
}
|
|
|
|
|
2008-04-29 12:55:25 +00:00
|
|
|
void
|
|
|
|
Pl_Flate::checkError(char const* prefix, int error_code)
|
|
|
|
{
|
2023-05-21 13:42:34 +00:00
|
|
|
z_stream& zstream = *(static_cast<z_stream*>(m->zdata));
|
2022-04-02 21:14:10 +00:00
|
|
|
if (error_code != Z_OK) {
|
2023-05-21 17:35:09 +00:00
|
|
|
char const* action_str = (m->action == a_deflate ? "deflate" : "inflate");
|
|
|
|
std::string msg = this->identifier + ": " + action_str + ": " + prefix + ": ";
|
2022-02-08 14:18:08 +00:00
|
|
|
|
2022-04-02 21:14:10 +00:00
|
|
|
if (zstream.msg) {
|
2022-02-08 14:18:08 +00:00
|
|
|
msg += zstream.msg;
|
2022-04-02 21:14:10 +00:00
|
|
|
} else {
|
|
|
|
switch (error_code) {
|
|
|
|
case Z_ERRNO:
|
2022-02-08 14:18:08 +00:00
|
|
|
msg += "zlib system error";
|
|
|
|
break;
|
|
|
|
|
2022-04-02 21:14:10 +00:00
|
|
|
case Z_STREAM_ERROR:
|
2022-02-08 14:18:08 +00:00
|
|
|
msg += "zlib stream error";
|
|
|
|
break;
|
|
|
|
|
2022-04-02 21:14:10 +00:00
|
|
|
case Z_DATA_ERROR:
|
2022-02-08 14:18:08 +00:00
|
|
|
msg += "zlib data error";
|
|
|
|
break;
|
|
|
|
|
2022-04-02 21:14:10 +00:00
|
|
|
case Z_MEM_ERROR:
|
2022-02-08 14:18:08 +00:00
|
|
|
msg += "zlib memory error";
|
|
|
|
break;
|
|
|
|
|
2022-04-02 21:14:10 +00:00
|
|
|
case Z_BUF_ERROR:
|
2022-02-08 14:18:08 +00:00
|
|
|
msg += "zlib buffer error";
|
|
|
|
break;
|
|
|
|
|
2022-04-02 21:14:10 +00:00
|
|
|
case Z_VERSION_ERROR:
|
2022-02-08 14:18:08 +00:00
|
|
|
msg += "zlib version error";
|
|
|
|
break;
|
|
|
|
|
2022-04-02 21:14:10 +00:00
|
|
|
default:
|
2023-05-21 17:35:09 +00:00
|
|
|
msg += std::string("zlib unknown error (") + std::to_string(error_code) + ")";
|
2022-02-08 14:18:08 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
throw std::runtime_error(msg);
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
}
|