qpdf/libqpdf/Pl_Flate.cc

328 lines
7.8 KiB
C++

#include <qpdf/Pl_Flate.hh>
#include <zlib.h>
#include <string.h>
#include <limits.h>
#include <qpdf/QUtil.hh>
#include <qpdf/QIntC.hh>
int Pl_Flate::compression_level = Z_DEFAULT_COMPRESSION;
Pl_Flate::Members::Members(size_t out_bufsize,
action_e action) :
out_bufsize(out_bufsize),
action(action),
initialized(false),
zdata(0)
{
this->outbuf = PointerHolder<unsigned char>(
true, new unsigned char[out_bufsize]);
// Indirect through zdata to reach the z_stream so we don't have
// to include zlib.h in Pl_Flate.hh. This means people using
// shared library versions of qpdf don't have to have zlib
// development files available, which particularly helps in a
// Windows environment.
this->zdata = new z_stream;
if (out_bufsize > UINT_MAX)
{
throw std::runtime_error(
"Pl_Flate: zlib doesn't support buffer"
" sizes larger than unsigned int");
}
z_stream& zstream = *(static_cast<z_stream*>(this->zdata));
zstream.zalloc = 0;
zstream.zfree = 0;
zstream.opaque = 0;
zstream.next_in = 0;
zstream.avail_in = 0;
zstream.next_out = this->outbuf.getPointer();
zstream.avail_out = QIntC::to_uint(out_bufsize);
}
Pl_Flate::Members::~Members()
{
if (this->initialized)
{
z_stream& zstream = *(static_cast<z_stream*>(this->zdata));
if (action == a_deflate)
{
deflateEnd(&zstream);
}
else
{
inflateEnd(&zstream);
}
}
delete static_cast<z_stream*>(this->zdata);
this->zdata = 0;
}
Pl_Flate::Pl_Flate(char const* identifier, Pipeline* next,
action_e action, unsigned int out_bufsize_int) :
Pipeline(identifier, next),
m(new Members(QIntC::to_size(out_bufsize_int), action))
{
}
Pl_Flate::~Pl_Flate()
{
}
void
Pl_Flate::setWarnCallback(std::function<void(char const*, int)> callback)
{
this->m->callback = callback;
}
void
Pl_Flate::warn(char const* msg, int code)
{
if (this->m->callback != nullptr)
{
this->m->callback(msg, code);
}
}
void
Pl_Flate::write(unsigned char* data, size_t len)
{
if (this->m->outbuf.getPointer() == 0)
{
throw std::logic_error(
this->identifier +
": Pl_Flate: write() called after finish() called");
}
// Write in chunks in case len is too big to fit in an int.
// Assume int is at least 32 bits.
static size_t const max_bytes = 1 << 30;
size_t bytes_left = len;
unsigned char* buf = data;
while (bytes_left > 0)
{
size_t bytes = (bytes_left >= max_bytes ? max_bytes : bytes_left);
handleData(buf, bytes,
(this->m->action == a_inflate ? Z_SYNC_FLUSH : Z_NO_FLUSH));
bytes_left -= bytes;
buf += bytes;
}
}
void
Pl_Flate::handleData(unsigned char* data, size_t len, int flush)
{
if (len > UINT_MAX)
{
throw std::runtime_error(
"Pl_Flate: zlib doesn't support data"
" blocks larger than int");
}
z_stream& zstream = *(static_cast<z_stream*>(this->m->zdata));
zstream.next_in = data;
zstream.avail_in = QIntC::to_uint(len);
if (! this->m->initialized)
{
int err = Z_OK;
// deflateInit and inflateInit are macros that use old-style
// casts.
#if ((defined(__GNUC__) && ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406) || \
defined(__clang__))
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wold-style-cast"
#endif
if (this->m->action == a_deflate)
{
err = deflateInit(&zstream, compression_level);
}
else
{
err = inflateInit(&zstream);
}
#if ((defined(__GNUC__) && ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406) || \
defined(__clang__))
# pragma GCC diagnostic pop
#endif
checkError("Init", err);
this->m->initialized = true;
}
int err = Z_OK;
bool done = false;
while (! done)
{
if (this->m->action == a_deflate)
{
err = deflate(&zstream, flush);
}
else
{
err = inflate(&zstream, flush);
}
if ((this->m->action == a_inflate) && (err != Z_OK) && zstream.msg &&
(strcmp(zstream.msg, "incorrect data check") == 0))
{
// Other PDF readers ignore this specific error. Combining
// this with Z_SYNC_FLUSH enables qpdf to handle some
// broken zlib streams without losing data.
err = Z_STREAM_END;
}
switch (err)
{
case Z_BUF_ERROR:
// Probably shouldn't be able to happen, but possible as a
// boundary condition: if the last call to inflate exactly
// filled the output buffer, it's possible that the next
// call to inflate could have nothing to do. There are PDF
// files in the wild that have this error (including at
// least one in qpdf's test suite). In some cases, we want
// to know about this, because it indicates incorrect
// compression, so call a callback if provided.
this->warn(
"input stream is complete but output may still be valid",
err);
done = true;
break;
case Z_STREAM_END:
done = true;
// fall through
case Z_OK:
{
if ((zstream.avail_in == 0) &&
(zstream.avail_out > 0))
{
// There is nothing left to read, and there was
// sufficient buffer space to write everything we
// needed, so we're done for now.
done = true;
}
uLong ready =
QIntC::to_ulong(this->m->out_bufsize - zstream.avail_out);
if (ready > 0)
{
this->getNext()->write(this->m->outbuf.getPointer(), ready);
zstream.next_out = this->m->outbuf.getPointer();
zstream.avail_out = QIntC::to_uint(this->m->out_bufsize);
}
}
break;
default:
this->checkError("data", err);
break;
}
}
}
void
Pl_Flate::finish()
{
try
{
if (this->m->outbuf.getPointer())
{
if (this->m->initialized)
{
z_stream& zstream = *(static_cast<z_stream*>(this->m->zdata));
unsigned char buf[1];
buf[0] = '\0';
handleData(buf, 0, Z_FINISH);
int err = Z_OK;
if (this->m->action == a_deflate)
{
err = deflateEnd(&zstream);
}
else
{
err = inflateEnd(&zstream);
}
this->m->initialized = false;
checkError("End", err);
}
this->m->outbuf = 0;
}
}
catch (std::exception& e)
{
try
{
this->getNext()->finish();
}
catch (...)
{
// ignore secondary exception
}
throw std::runtime_error(e.what());
}
this->getNext()->finish();
}
void
Pl_Flate::setCompressionLevel(int level)
{
compression_level = level;
}
void
Pl_Flate::checkError(char const* prefix, int error_code)
{
z_stream& zstream = *(static_cast<z_stream*>(this->m->zdata));
if (error_code != Z_OK)
{
char const* action_str =
(this->m->action == a_deflate ? "deflate" : "inflate");
std::string msg =
this->identifier + ": " + action_str + ": " + prefix + ": ";
if (zstream.msg)
{
msg += zstream.msg;
}
else
{
switch (error_code)
{
case Z_ERRNO:
msg += "zlib system error";
break;
case Z_STREAM_ERROR:
msg += "zlib stream error";
break;
case Z_DATA_ERROR:
msg += "zlib data error";
break;
case Z_MEM_ERROR:
msg += "zlib memory error";
break;
case Z_BUF_ERROR:
msg += "zlib buffer error";
break;
case Z_VERSION_ERROR:
msg += "zlib version error";
break;
default:
msg += std::string("zlib unknown error (") +
QUtil::int_to_string(error_code) + ")";
break;
}
}
throw std::runtime_error(msg);
}
}