mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-02 22:50:20 +00:00
Merge pull request #1230 from m-holger/clean-dct-fuzz-changes
Alternative clean dct fuzz changes
This commit is contained in:
commit
ce2deaf185
@ -131,10 +131,6 @@ if(FUTURE)
|
||||
add_compile_definitions(QPDF_FUTURE=1)
|
||||
endif()
|
||||
|
||||
if(OSS_FUZZ)
|
||||
add_compile_definitions(QPDF_OSS_FUZZ=1)
|
||||
endif()
|
||||
|
||||
enable_testing()
|
||||
set(RUN_QTEST perl ${qpdf_SOURCE_DIR}/run-qtest ${ENABLE_QTC_ARG})
|
||||
|
||||
|
41
ChangeLog
41
ChangeLog
@ -1,3 +1,44 @@
|
||||
2024-07-04 M Holger <m.holger@qpdf.org>
|
||||
|
||||
* Treat corrupt JPEG streams as unfilterable. This avoids them
|
||||
getting uncompressed when writing PDF files with decode level all.
|
||||
|
||||
2024-07-02 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Add QPDF::setMaxWarnings to set the maximum of warnings before
|
||||
warning suppression.
|
||||
|
||||
* Add static option to Pl_DCT to limit memory usage of
|
||||
decompression. The option is generally exposed but is primarily
|
||||
intended to support fuzz tests, which have explicit memory limits
|
||||
that are smaller than what is commonly seen in the wild with PDF
|
||||
files.
|
||||
|
||||
* Add static option to Pl_DCT to control whether decompression of
|
||||
corrupt JPEG data is attempted.
|
||||
|
||||
2024-07-01 M Holger <m.holger@qpdf.org>
|
||||
|
||||
* Bug fix: certain invalid object streams caused the insertion of
|
||||
invalid entries into in the xref table.
|
||||
|
||||
2024-06-29 M Holger <m.holger@qpdf.org>
|
||||
|
||||
* Bug fix: in QPDFOutlineObjectHelper detect loops in the list of
|
||||
direct children of an outline item.
|
||||
|
||||
2024-06-27 M Holger <m.holger@qpdf.org>
|
||||
|
||||
* Add sanity check in QPDF xref table reconstruction to reject
|
||||
objects with impossibly large object id in order to improve
|
||||
handling of severely damaged PDF files.
|
||||
|
||||
2024-06-25 M Holger <m.holger@qpdf.org>
|
||||
|
||||
* Detect severely damaged PDF files early. After parsing the xref
|
||||
table in QPDF throw a damagedPDF exception if the root of the pages
|
||||
tree is not a dictionary.
|
||||
|
||||
2024-06-07 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* 11.9.1: release
|
||||
|
@ -26,8 +26,18 @@ FuzzHelper::FuzzHelper(unsigned char const* data, size_t size) :
|
||||
void
|
||||
FuzzHelper::doChecks()
|
||||
{
|
||||
// Limit the memory used to decompress JPEG files during fuzzing. Excessive memory use during
|
||||
// fuzzing is due to corrupt JPEG data which sometimes cannot be detected before
|
||||
// jpeg_start_decompress is called. During normal use of qpdf very large JPEGs can occasionally
|
||||
// occur legitimately and therefore must be allowed during normal operations.
|
||||
Pl_DCT::setMemoryLimit(1'000'000'000);
|
||||
|
||||
// Do not decompress corrupt data. This may cause extended runtime within jpeglib without
|
||||
// exercising additional code paths in qpdf.
|
||||
Pl_DCT::setThrowOnCorruptData(true);
|
||||
|
||||
Pl_Discard discard;
|
||||
Pl_DCT p("decode", &discard, 20'000'000);
|
||||
Pl_DCT p("decode", &discard);
|
||||
p.write(const_cast<unsigned char*>(data), size);
|
||||
p.finish();
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <qpdf/Buffer.hh>
|
||||
#include <qpdf/BufferInputSource.hh>
|
||||
#include <qpdf/Pl_DCT.hh>
|
||||
#include <qpdf/Pl_Discard.hh>
|
||||
#include <qpdf/QPDF.hh>
|
||||
#include <qpdf/QPDFAcroFormDocumentHelper.hh>
|
||||
@ -56,6 +57,7 @@ FuzzHelper::getQpdf()
|
||||
auto is =
|
||||
std::shared_ptr<InputSource>(new BufferInputSource("fuzz input", &this->input_buffer));
|
||||
auto qpdf = QPDF::create();
|
||||
qpdf->setMaxWarnings(20);
|
||||
qpdf->processInputSource(is);
|
||||
return qpdf;
|
||||
}
|
||||
@ -171,6 +173,16 @@ FuzzHelper::testOutlines()
|
||||
void
|
||||
FuzzHelper::doChecks()
|
||||
{
|
||||
// Limit the memory used to decompress JPEG files during fuzzing. Excessive memory use during
|
||||
// fuzzing is due to corrupt JPEG data which sometimes cannot be detected before
|
||||
// jpeg_start_decompress is called. During normal use of qpdf very large JPEGs can occasionally
|
||||
// occur legitimately and therefore must be allowed during normal operations.
|
||||
Pl_DCT::setMemoryLimit(1'000'000'000);
|
||||
|
||||
// Do not decompress corrupt data. This may cause extended runtime within jpeglib without
|
||||
// exercising additional code paths in qpdf, and potentially causing counterproductive timeouts.
|
||||
Pl_DCT::setThrowOnCorruptData(true);
|
||||
|
||||
// Get as much coverage as possible in parts of the library that
|
||||
// might benefit from fuzzing.
|
||||
std::cerr << "\ninfo: starting testWrite\n";
|
||||
|
@ -34,20 +34,17 @@ class QPDF_DLL_CLASS Pl_DCT: public Pipeline
|
||||
QPDF_DLL
|
||||
Pl_DCT(char const* identifier, Pipeline* next);
|
||||
|
||||
// Constructor for decompressing image data. If corrupt_data_limit is non-zero and the data is
|
||||
// corrupt, only attempt to uncompress if the uncompressed size is less than corrupt_data_limit.
|
||||
// Limit the memory used by jpeglib when decompressing data.
|
||||
// NB This is a static option affecting all Pl_DCT instances.
|
||||
QPDF_DLL
|
||||
Pl_DCT(char const* identifier, Pipeline* next, size_t corrupt_data_limit);
|
||||
static void setMemoryLimit(long limit);
|
||||
|
||||
class QPDF_DLL_CLASS CompressConfig
|
||||
{
|
||||
public:
|
||||
// Treat corrupt data as a runtime error rather than attempting to decompress regardless. This
|
||||
// is the qpdf default behaviour. To attempt to decompress corrupt data set 'treat_as_error' to
|
||||
// false.
|
||||
// NB This is a static option affecting all Pl_DCT instances.
|
||||
QPDF_DLL
|
||||
CompressConfig() = default;
|
||||
QPDF_DLL
|
||||
virtual ~CompressConfig() = default;
|
||||
virtual void apply(jpeg_compress_struct*) = 0;
|
||||
};
|
||||
static void setThrowOnCorruptData(bool treat_as_error);
|
||||
|
||||
// Constructor for compressing image data
|
||||
QPDF_DLL
|
||||
@ -57,8 +54,7 @@ class QPDF_DLL_CLASS Pl_DCT: public Pipeline
|
||||
JDIMENSION image_width,
|
||||
JDIMENSION image_height,
|
||||
int components,
|
||||
J_COLOR_SPACE color_space,
|
||||
CompressConfig* config_callback = nullptr);
|
||||
J_COLOR_SPACE color_space);
|
||||
|
||||
QPDF_DLL
|
||||
~Pl_DCT() override;
|
||||
@ -83,6 +79,7 @@ class QPDF_DLL_CLASS Pl_DCT: public Pipeline
|
||||
public:
|
||||
QPDF_DLL
|
||||
~Members() = default;
|
||||
Members(Members const&) = delete;
|
||||
|
||||
private:
|
||||
// For compression
|
||||
@ -90,25 +87,18 @@ class QPDF_DLL_CLASS Pl_DCT: public Pipeline
|
||||
JDIMENSION image_width,
|
||||
JDIMENSION image_height,
|
||||
int components,
|
||||
J_COLOR_SPACE color_space,
|
||||
CompressConfig* config_callback);
|
||||
J_COLOR_SPACE color_space);
|
||||
// For decompression
|
||||
Members(size_t corrupt_data_limit);
|
||||
Members(Members const&) = delete;
|
||||
Members();
|
||||
|
||||
action_e action;
|
||||
Pl_Buffer buf;
|
||||
|
||||
// Used for decompression
|
||||
size_t corrupt_data_limit{0};
|
||||
|
||||
// Used for compression
|
||||
JDIMENSION image_width{0};
|
||||
JDIMENSION image_height{0};
|
||||
int components{1};
|
||||
J_COLOR_SPACE color_space{JCS_GRAYSCALE};
|
||||
|
||||
CompressConfig* config_callback{nullptr};
|
||||
};
|
||||
|
||||
std::shared_ptr<Members> m;
|
||||
|
@ -228,6 +228,10 @@ class QPDF
|
||||
QPDF_DLL
|
||||
void setSuppressWarnings(bool);
|
||||
|
||||
// Set the maximum number of warnings to output. Subsequent warnings are suppressed.
|
||||
QPDF_DLL
|
||||
void setMaxWarnings(int);
|
||||
|
||||
// By default, QPDF will try to recover if it finds certain types of errors in PDF files. If
|
||||
// turned off, it will throw an exception on the first such problem it finds without attempting
|
||||
// recovery.
|
||||
@ -1497,6 +1501,7 @@ class QPDF
|
||||
bool provided_password_is_hex_key{false};
|
||||
bool ignore_xref_streams{false};
|
||||
bool suppress_warnings{false};
|
||||
int max_warnings{0};
|
||||
bool attempt_recovery{true};
|
||||
bool check_mode{false};
|
||||
std::shared_ptr<EncryptionParameters> encp;
|
||||
|
2
job.sums
2
job.sums
@ -1,5 +1,5 @@
|
||||
# Generated by generate_auto_job
|
||||
CMakeLists.txt 456938b9debc4997f142ccfb13f3baf2517ae5855e1fe9b2ada1a0b8f7e4facf
|
||||
CMakeLists.txt 47752f33b17fa526d46fc608a25ad6b8c61feba9deb1bd659fddf93e6e08b102
|
||||
generate_auto_job f64733b79dcee5a0e3e8ccc6976448e8ddf0e8b6529987a66a7d3ab2ebc10a86
|
||||
include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4
|
||||
include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42
|
||||
|
@ -20,6 +20,9 @@ namespace
|
||||
jmp_buf jmpbuf;
|
||||
std::string msg;
|
||||
};
|
||||
|
||||
long memory_limit{0};
|
||||
bool throw_on_corrupt_data{true};
|
||||
} // namespace
|
||||
|
||||
static void
|
||||
@ -32,38 +35,39 @@ error_handler(j_common_ptr cinfo)
|
||||
longjmp(jerr->jmpbuf, 1);
|
||||
}
|
||||
|
||||
Pl_DCT::Members::Members(size_t corrupt_data_limit) :
|
||||
Pl_DCT::Members::Members() :
|
||||
action(a_decompress),
|
||||
buf("DCT compressed image"),
|
||||
corrupt_data_limit(corrupt_data_limit)
|
||||
buf("DCT compressed image")
|
||||
{
|
||||
}
|
||||
|
||||
Pl_DCT::Members::Members(
|
||||
JDIMENSION image_width,
|
||||
JDIMENSION image_height,
|
||||
int components,
|
||||
J_COLOR_SPACE color_space,
|
||||
CompressConfig* config_callback) :
|
||||
JDIMENSION image_width, JDIMENSION image_height, int components, J_COLOR_SPACE color_space) :
|
||||
action(a_compress),
|
||||
buf("DCT uncompressed image"),
|
||||
image_width(image_width),
|
||||
image_height(image_height),
|
||||
components(components),
|
||||
color_space(color_space),
|
||||
config_callback(config_callback)
|
||||
color_space(color_space)
|
||||
{
|
||||
}
|
||||
|
||||
Pl_DCT::Pl_DCT(char const* identifier, Pipeline* next) :
|
||||
Pl_DCT(identifier, next, 0)
|
||||
Pipeline(identifier, next),
|
||||
m(new Members())
|
||||
{
|
||||
}
|
||||
|
||||
Pl_DCT::Pl_DCT(char const* identifier, Pipeline* next, size_t corrupt_data_limit) :
|
||||
Pipeline(identifier, next),
|
||||
m(new Members(corrupt_data_limit))
|
||||
void
|
||||
Pl_DCT::setMemoryLimit(long limit)
|
||||
{
|
||||
memory_limit = limit;
|
||||
}
|
||||
|
||||
void
|
||||
Pl_DCT::setThrowOnCorruptData(bool treat_as_error)
|
||||
{
|
||||
throw_on_corrupt_data = treat_as_error;
|
||||
}
|
||||
|
||||
Pl_DCT::Pl_DCT(
|
||||
@ -72,10 +76,9 @@ Pl_DCT::Pl_DCT(
|
||||
JDIMENSION image_width,
|
||||
JDIMENSION image_height,
|
||||
int components,
|
||||
J_COLOR_SPACE color_space,
|
||||
CompressConfig* config_callback) :
|
||||
J_COLOR_SPACE color_space) :
|
||||
Pipeline(identifier, next),
|
||||
m(new Members(image_width, image_height, components, color_space, config_callback))
|
||||
m(new Members(image_width, image_height, components, color_space))
|
||||
{
|
||||
}
|
||||
|
||||
@ -273,9 +276,6 @@ Pl_DCT::compress(void* cinfo_p, Buffer* b)
|
||||
cinfo->input_components = m->components;
|
||||
cinfo->in_color_space = m->color_space;
|
||||
jpeg_set_defaults(cinfo);
|
||||
if (m->config_callback) {
|
||||
m->config_callback->apply(cinfo);
|
||||
}
|
||||
|
||||
jpeg_start_compress(cinfo, TRUE);
|
||||
|
||||
@ -312,36 +312,32 @@ Pl_DCT::decompress(void* cinfo_p, Buffer* b)
|
||||
# pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
#ifdef QPDF_OSS_FUZZ
|
||||
// Limit the memory used to decompress JPEG files during fuzzing. Excessive memory use during
|
||||
// fuzzing is due to corrupt JPEG data which sometimes cannot be detected before
|
||||
// jpeg_start_decompress is called. During normal use of qpdf very large JPEGs can occasionally
|
||||
// occur legitimately and therefore must be allowed during normal operations.
|
||||
cinfo->mem->max_memory_to_use = 1'000'000'000;
|
||||
// For some corrupt files the memory used internally by libjpeg stays within the above limits
|
||||
// even though the size written to the next pipeline is significantly larger.
|
||||
m->corrupt_data_limit = 10'000'000;
|
||||
#endif
|
||||
if (memory_limit > 0) {
|
||||
cinfo->mem->max_memory_to_use = memory_limit;
|
||||
}
|
||||
|
||||
jpeg_buffer_src(cinfo, b);
|
||||
|
||||
(void)jpeg_read_header(cinfo, TRUE);
|
||||
if (throw_on_corrupt_data && cinfo->err->num_warnings > 0) {
|
||||
throw std::runtime_error("Pl_DCT::decompress: JPEG data is corrupt");
|
||||
}
|
||||
(void)jpeg_calc_output_dimensions(cinfo);
|
||||
unsigned int width = cinfo->output_width * QIntC::to_uint(cinfo->output_components);
|
||||
if (cinfo->err->num_warnings == 0 || m->corrupt_data_limit == 0 ||
|
||||
(width * QIntC::to_uint(cinfo->output_height)) < m->corrupt_data_limit) {
|
||||
// err->num_warnings is the number of corrupt data warnings emitted.
|
||||
// err->msg_code could also be the code of an informational message.
|
||||
JSAMPARRAY buffer = (*cinfo->mem->alloc_sarray)(
|
||||
reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE, width, 1);
|
||||
JSAMPARRAY buffer =
|
||||
(*cinfo->mem->alloc_sarray)(reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE, width, 1);
|
||||
|
||||
(void)jpeg_start_decompress(cinfo);
|
||||
while (cinfo->output_scanline < cinfo->output_height) {
|
||||
while (cinfo->output_scanline < cinfo->output_height &&
|
||||
(!throw_on_corrupt_data || cinfo->err->num_warnings == 0)) {
|
||||
(void)jpeg_read_scanlines(cinfo, buffer, 1);
|
||||
getNext()->write(buffer[0], width * sizeof(buffer[0][0]));
|
||||
}
|
||||
(void)jpeg_finish_decompress(cinfo);
|
||||
} else {
|
||||
*QPDFLogger::defaultLogger()->getError() << "corrupt JPEG data ignored" << "\n";
|
||||
if (throw_on_corrupt_data && cinfo->err->num_warnings > 0) {
|
||||
throw std::runtime_error("Pl_DCT::decompress: JPEG data is corrupt");
|
||||
}
|
||||
getNext()->finish();
|
||||
}
|
||||
|
@ -331,6 +331,12 @@ QPDF::setSuppressWarnings(bool val)
|
||||
m->suppress_warnings = val;
|
||||
}
|
||||
|
||||
void
|
||||
QPDF::setMaxWarnings(int val)
|
||||
{
|
||||
m->suppress_warnings = val;
|
||||
}
|
||||
|
||||
void
|
||||
QPDF::setAttemptRecovery(bool val)
|
||||
{
|
||||
@ -500,13 +506,11 @@ QPDF::warn(QPDFExc const& e)
|
||||
{
|
||||
m->warnings.push_back(e);
|
||||
if (!m->suppress_warnings) {
|
||||
#ifdef QPDF_OSS_FUZZ
|
||||
if (m->warnings.size() > 20) {
|
||||
*m->log->getWarn() << "WARNING: too many warnings - additional warnings surpressed\n";
|
||||
if (m->max_warnings > 0 && m->warnings.size() > 20) {
|
||||
*m->log->getWarn() << "WARNING: too many warnings - additional warnings suppressed\n";
|
||||
m->suppress_warnings = true;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
*m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n";
|
||||
}
|
||||
}
|
||||
@ -1934,6 +1938,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
|
||||
continue;
|
||||
}
|
||||
if (num == obj_stream_number) {
|
||||
QTC::TC("qpdf", "QPDF ignore self-referential object stream");
|
||||
warn(damagedPDF(
|
||||
input,
|
||||
m->last_object_description,
|
||||
|
@ -14,21 +14,6 @@ usage()
|
||||
exit(2);
|
||||
}
|
||||
|
||||
class Callback: public Pl_DCT::CompressConfig
|
||||
{
|
||||
public:
|
||||
Callback() = default;
|
||||
~Callback() override = default;
|
||||
void apply(jpeg_compress_struct*) override;
|
||||
bool called{false};
|
||||
};
|
||||
|
||||
void
|
||||
Callback::apply(jpeg_compress_struct*)
|
||||
{
|
||||
this->called = true;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char* argv[])
|
||||
{
|
||||
@ -66,21 +51,12 @@ main(int argc, char* argv[])
|
||||
FILE* outfile = QUtil::safe_fopen(outfilename, "wb");
|
||||
Pl_StdioFile out("stdout", outfile);
|
||||
unsigned char buf[100];
|
||||
bool done = false;
|
||||
Callback callback;
|
||||
Pl_DCT dct("dct", &out, width, height, components, cs, &callback);
|
||||
while (!done) {
|
||||
size_t len = fread(buf, 1, sizeof(buf), infile);
|
||||
if (len <= 0) {
|
||||
done = true;
|
||||
} else {
|
||||
Pl_DCT dct("dct", &out, width, height, components, cs);
|
||||
while (size_t len = fread(buf, 1, sizeof(buf), infile)) {
|
||||
dct.write(buf, len);
|
||||
}
|
||||
}
|
||||
dct.finish();
|
||||
if (!callback.called) {
|
||||
std::cout << "Callback was not called" << std::endl;
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
fclose(outfile);
|
||||
return 0;
|
||||
|
@ -4,6 +4,7 @@ QPDF err wrong objid/generation 0
|
||||
QPDF check objid 1
|
||||
QPDF check generation 1
|
||||
QPDF check obj 1
|
||||
QPDF ignore self-referential object stream 0
|
||||
QPDF hint table length indirect 0
|
||||
QPDF hint table length direct 0
|
||||
QPDF P absent in lindict 0
|
||||
|
@ -16,7 +16,7 @@ cleanup();
|
||||
|
||||
my $td = new TestDriver('object-stream');
|
||||
|
||||
my $n_tests = 7 + (36 * 4) + (12 * 2);
|
||||
my $n_tests = 9 + (36 * 4) + (12 * 2);
|
||||
my $n_compare_pdfs = 36;
|
||||
|
||||
for (my $n = 16; $n <= 19; ++$n)
|
||||
@ -107,5 +107,16 @@ $td->runtest("check file",
|
||||
{$td->FILE => "a.pdf"},
|
||||
{$td->FILE => "recover-xref-stream-recovered.pdf"});
|
||||
|
||||
# Self-referential object stream
|
||||
$td->runtest("self-referential object stream",
|
||||
{$td->COMMAND => "qpdf --static-id --qdf" .
|
||||
" object-stream-self-ref.pdf a.pdf"},
|
||||
{$td->FILE => "object-stream-self-ref.out", $td->EXIT_STATUS => 3},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("check file",
|
||||
{$td->FILE => "a.pdf"},
|
||||
{$td->FILE => "object-stream-self-ref.out.pdf"});
|
||||
|
||||
|
||||
cleanup();
|
||||
$td->report(calc_ntests($n_tests, $n_compare_pdfs));
|
||||
|
2
qpdf/qtest/qpdf/object-stream-self-ref.out
Normal file
2
qpdf/qtest/qpdf/object-stream-self-ref.out
Normal file
@ -0,0 +1,2 @@
|
||||
WARNING: object-stream-self-ref.pdf object stream 1 (object 1 0, offset 2): object stream claims to contain itself
|
||||
qpdf: operation succeeded with warnings; resulting file may have some problems
|
BIN
qpdf/qtest/qpdf/object-stream-self-ref.out.pdf
Normal file
BIN
qpdf/qtest/qpdf/object-stream-self-ref.out.pdf
Normal file
Binary file not shown.
BIN
qpdf/qtest/qpdf/object-stream-self-ref.pdf
Normal file
BIN
qpdf/qtest/qpdf/object-stream-self-ref.pdf
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user