From 65bd8bc57de1fa90c9c4994297e7eaf0d3795ed1 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Tue, 2 Jul 2024 09:04:53 -0400 Subject: [PATCH] Add DCT decompression config methods in favor of compile-time changes As a rule, we should avoid conditional compilation is it always causes code paths that are sometimes not even seen lexically by the compiler. Also, we want the actual code being fuzzed to be as close as possible to the real code. Conditional compilation is suitable to handle underlying system differences. Instead, favor configuration using callbacks or other methods that can be triggered in the places where they need to be exercised. --- CMakeLists.txt | 4 -- fuzz/dct_fuzzer.cc | 12 +++++- fuzz/qpdf_fuzzer.cc | 11 ++++++ include/qpdf/Pl_DCT.hh | 34 ++++++---------- job.sums | 2 +- libqpdf/Pl_DCT.cc | 84 +++++++++++++++++++--------------------- libqpdf/QPDF.cc | 1 + libtests/dct_compress.cc | 32 ++------------- 8 files changed, 79 insertions(+), 101 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 73ea22af..6caba84f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -131,10 +131,6 @@ if(FUTURE) add_compile_definitions(QPDF_FUTURE=1) endif() -if(OSS_FUZZ) - add_compile_definitions(QPDF_OSS_FUZZ=1) -endif() - enable_testing() set(RUN_QTEST perl ${qpdf_SOURCE_DIR}/run-qtest ${ENABLE_QTC_ARG}) diff --git a/fuzz/dct_fuzzer.cc b/fuzz/dct_fuzzer.cc index 54ca730c..2179a377 100644 --- a/fuzz/dct_fuzzer.cc +++ b/fuzz/dct_fuzzer.cc @@ -26,8 +26,18 @@ FuzzHelper::FuzzHelper(unsigned char const* data, size_t size) : void FuzzHelper::doChecks() { + // Limit the memory used to decompress JPEG files during fuzzing. Excessive memory use during + // fuzzing is due to corrupt JPEG data which sometimes cannot be detected before + // jpeg_start_decompress is called. During normal use of qpdf very large JPEGs can occasionally + // occur legitimately and therefore must be allowed during normal operations. + Pl_DCT::setMemoryLimit(1'000'000'000); + + // Do not decompress corrupt data. This may cause extended runtime within jpeglib without + // exercising additional code paths in qpdf. + Pl_DCT::setThrowOnCorruptData(true); + Pl_Discard discard; - Pl_DCT p("decode", &discard, 20'000'000); + Pl_DCT p("decode", &discard); p.write(const_cast(data), size); p.finish(); } diff --git a/fuzz/qpdf_fuzzer.cc b/fuzz/qpdf_fuzzer.cc index 814c59c4..d676e8cb 100644 --- a/fuzz/qpdf_fuzzer.cc +++ b/fuzz/qpdf_fuzzer.cc @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -171,6 +172,16 @@ FuzzHelper::testOutlines() void FuzzHelper::doChecks() { + // Limit the memory used to decompress JPEG files during fuzzing. Excessive memory use during + // fuzzing is due to corrupt JPEG data which sometimes cannot be detected before + // jpeg_start_decompress is called. During normal use of qpdf very large JPEGs can occasionally + // occur legitimately and therefore must be allowed during normal operations. + Pl_DCT::setMemoryLimit(1'000'000'000); + + // Do not decompress corrupt data. This may cause extended runtime within jpeglib without + // exercising additional code paths in qpdf, and potentially causing counterproductive timeouts. + Pl_DCT::setThrowOnCorruptData(true); + // Get as much coverage as possible in parts of the library that // might benefit from fuzzing. std::cerr << "\ninfo: starting testWrite\n"; diff --git a/include/qpdf/Pl_DCT.hh b/include/qpdf/Pl_DCT.hh index 5d77db51..d6f05ce6 100644 --- a/include/qpdf/Pl_DCT.hh +++ b/include/qpdf/Pl_DCT.hh @@ -34,20 +34,15 @@ class QPDF_DLL_CLASS Pl_DCT: public Pipeline QPDF_DLL Pl_DCT(char const* identifier, Pipeline* next); - // Constructor for decompressing image data. If corrupt_data_limit is non-zero and the data is - // corrupt, only attempt to uncompress if the uncompressed size is less than corrupt_data_limit. + // Limit the memory used by jpeglib when decompressing data. + // NB This is a static option affecting all Pl_DCT instances. QPDF_DLL - Pl_DCT(char const* identifier, Pipeline* next, size_t corrupt_data_limit); + static void setMemoryLimit(long limit); - class QPDF_DLL_CLASS CompressConfig - { - public: - QPDF_DLL - CompressConfig() = default; - QPDF_DLL - virtual ~CompressConfig() = default; - virtual void apply(jpeg_compress_struct*) = 0; - }; + // Treat corrupt data as a runtime error rather than attempting to decompress regardless. + // NB This is a static option affecting all Pl_DCT instances. + QPDF_DLL + static void setThrowOnCorruptData(bool treat_as_error); // Constructor for compressing image data QPDF_DLL @@ -57,8 +52,7 @@ class QPDF_DLL_CLASS Pl_DCT: public Pipeline JDIMENSION image_width, JDIMENSION image_height, int components, - J_COLOR_SPACE color_space, - CompressConfig* config_callback = nullptr); + J_COLOR_SPACE color_space); QPDF_DLL ~Pl_DCT() override; @@ -83,6 +77,7 @@ class QPDF_DLL_CLASS Pl_DCT: public Pipeline public: QPDF_DLL ~Members() = default; + Members(Members const&) = delete; private: // For compression @@ -90,25 +85,18 @@ class QPDF_DLL_CLASS Pl_DCT: public Pipeline JDIMENSION image_width, JDIMENSION image_height, int components, - J_COLOR_SPACE color_space, - CompressConfig* config_callback); + J_COLOR_SPACE color_space); // For decompression - Members(size_t corrupt_data_limit); - Members(Members const&) = delete; + Members(); action_e action; Pl_Buffer buf; - // Used for decompression - size_t corrupt_data_limit{0}; - // Used for compression JDIMENSION image_width{0}; JDIMENSION image_height{0}; int components{1}; J_COLOR_SPACE color_space{JCS_GRAYSCALE}; - - CompressConfig* config_callback{nullptr}; }; std::shared_ptr m; diff --git a/job.sums b/job.sums index 5a0d8c4b..4f53c923 100644 --- a/job.sums +++ b/job.sums @@ -1,5 +1,5 @@ # Generated by generate_auto_job -CMakeLists.txt 456938b9debc4997f142ccfb13f3baf2517ae5855e1fe9b2ada1a0b8f7e4facf +CMakeLists.txt 47752f33b17fa526d46fc608a25ad6b8c61feba9deb1bd659fddf93e6e08b102 generate_auto_job f64733b79dcee5a0e3e8ccc6976448e8ddf0e8b6529987a66a7d3ab2ebc10a86 include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4 include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42 diff --git a/libqpdf/Pl_DCT.cc b/libqpdf/Pl_DCT.cc index d2544ab0..2944a684 100644 --- a/libqpdf/Pl_DCT.cc +++ b/libqpdf/Pl_DCT.cc @@ -20,6 +20,9 @@ namespace jmp_buf jmpbuf; std::string msg; }; + + long memory_limit{0}; + bool throw_on_corrupt_data{false}; } // namespace static void @@ -32,38 +35,39 @@ error_handler(j_common_ptr cinfo) longjmp(jerr->jmpbuf, 1); } -Pl_DCT::Members::Members(size_t corrupt_data_limit) : +Pl_DCT::Members::Members() : action(a_decompress), - buf("DCT compressed image"), - corrupt_data_limit(corrupt_data_limit) + buf("DCT compressed image") { } Pl_DCT::Members::Members( - JDIMENSION image_width, - JDIMENSION image_height, - int components, - J_COLOR_SPACE color_space, - CompressConfig* config_callback) : + JDIMENSION image_width, JDIMENSION image_height, int components, J_COLOR_SPACE color_space) : action(a_compress), buf("DCT uncompressed image"), image_width(image_width), image_height(image_height), components(components), - color_space(color_space), - config_callback(config_callback) + color_space(color_space) { } Pl_DCT::Pl_DCT(char const* identifier, Pipeline* next) : - Pl_DCT(identifier, next, 0) + Pipeline(identifier, next), + m(new Members()) { } -Pl_DCT::Pl_DCT(char const* identifier, Pipeline* next, size_t corrupt_data_limit) : - Pipeline(identifier, next), - m(new Members(corrupt_data_limit)) +void +Pl_DCT::setMemoryLimit(long limit) { + memory_limit = limit; +} + +void +Pl_DCT::setThrowOnCorruptData(bool treat_as_error) +{ + throw_on_corrupt_data = treat_as_error; } Pl_DCT::Pl_DCT( @@ -72,10 +76,9 @@ Pl_DCT::Pl_DCT( JDIMENSION image_width, JDIMENSION image_height, int components, - J_COLOR_SPACE color_space, - CompressConfig* config_callback) : + J_COLOR_SPACE color_space) : Pipeline(identifier, next), - m(new Members(image_width, image_height, components, color_space, config_callback)) + m(new Members(image_width, image_height, components, color_space)) { } @@ -273,9 +276,6 @@ Pl_DCT::compress(void* cinfo_p, Buffer* b) cinfo->input_components = m->components; cinfo->in_color_space = m->color_space; jpeg_set_defaults(cinfo); - if (m->config_callback) { - m->config_callback->apply(cinfo); - } jpeg_start_compress(cinfo, TRUE); @@ -312,36 +312,32 @@ Pl_DCT::decompress(void* cinfo_p, Buffer* b) # pragma GCC diagnostic pop #endif -#ifdef QPDF_OSS_FUZZ - // Limit the memory used to decompress JPEG files during fuzzing. Excessive memory use during - // fuzzing is due to corrupt JPEG data which sometimes cannot be detected before - // jpeg_start_decompress is called. During normal use of qpdf very large JPEGs can occasionally - // occur legitimately and therefore must be allowed during normal operations. - cinfo->mem->max_memory_to_use = 1'000'000'000; - // For some corrupt files the memory used internally by libjpeg stays within the above limits - // even though the size written to the next pipeline is significantly larger. - m->corrupt_data_limit = 10'000'000; -#endif + if (memory_limit > 0) { + cinfo->mem->max_memory_to_use = memory_limit; + } + jpeg_buffer_src(cinfo, b); (void)jpeg_read_header(cinfo, TRUE); + if (throw_on_corrupt_data && cinfo->err->num_warnings > 0) { + throw std::runtime_error("Pl_DCT::decompress: JPEG data is corrupt"); + } (void)jpeg_calc_output_dimensions(cinfo); unsigned int width = cinfo->output_width * QIntC::to_uint(cinfo->output_components); - if (cinfo->err->num_warnings == 0 || m->corrupt_data_limit == 0 || - (width * QIntC::to_uint(cinfo->output_height)) < m->corrupt_data_limit) { - // err->num_warnings is the number of corrupt data warnings emitted. - // err->msg_code could also be the code of an informational message. - JSAMPARRAY buffer = (*cinfo->mem->alloc_sarray)( - reinterpret_cast(cinfo), JPOOL_IMAGE, width, 1); + // err->num_warnings is the number of corrupt data warnings emitted. + // err->msg_code could also be the code of an informational message. + JSAMPARRAY buffer = + (*cinfo->mem->alloc_sarray)(reinterpret_cast(cinfo), JPOOL_IMAGE, width, 1); - (void)jpeg_start_decompress(cinfo); - while (cinfo->output_scanline < cinfo->output_height) { - (void)jpeg_read_scanlines(cinfo, buffer, 1); - getNext()->write(buffer[0], width * sizeof(buffer[0][0])); - } - (void)jpeg_finish_decompress(cinfo); - } else { - *QPDFLogger::defaultLogger()->getError() << "corrupt JPEG data ignored" << "\n"; + (void)jpeg_start_decompress(cinfo); + while (cinfo->output_scanline < cinfo->output_height && + (!throw_on_corrupt_data || cinfo->err->num_warnings == 0)) { + (void)jpeg_read_scanlines(cinfo, buffer, 1); + getNext()->write(buffer[0], width * sizeof(buffer[0][0])); + } + (void)jpeg_finish_decompress(cinfo); + if (throw_on_corrupt_data && cinfo->err->num_warnings > 0) { + throw std::runtime_error("Pl_DCT::decompress: JPEG data is corrupt"); } getNext()->finish(); } diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 915518af..52fc1ce2 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -500,6 +500,7 @@ QPDF::warn(QPDFExc const& e) { m->warnings.push_back(e); if (!m->suppress_warnings) { +// QXXXQ #ifdef QPDF_OSS_FUZZ if (m->warnings.size() > 20) { *m->log->getWarn() << "WARNING: too many warnings - additional warnings surpressed\n"; diff --git a/libtests/dct_compress.cc b/libtests/dct_compress.cc index 8f7e0913..00428946 100644 --- a/libtests/dct_compress.cc +++ b/libtests/dct_compress.cc @@ -14,21 +14,6 @@ usage() exit(2); } -class Callback: public Pl_DCT::CompressConfig -{ - public: - Callback() = default; - ~Callback() override = default; - void apply(jpeg_compress_struct*) override; - bool called{false}; -}; - -void -Callback::apply(jpeg_compress_struct*) -{ - this->called = true; -} - int main(int argc, char* argv[]) { @@ -66,21 +51,12 @@ main(int argc, char* argv[]) FILE* outfile = QUtil::safe_fopen(outfilename, "wb"); Pl_StdioFile out("stdout", outfile); unsigned char buf[100]; - bool done = false; - Callback callback; - Pl_DCT dct("dct", &out, width, height, components, cs, &callback); - while (!done) { - size_t len = fread(buf, 1, sizeof(buf), infile); - if (len <= 0) { - done = true; - } else { - dct.write(buf, len); - } + Pl_DCT dct("dct", &out, width, height, components, cs); + while (size_t len = fread(buf, 1, sizeof(buf), infile)) { + dct.write(buf, len); } dct.finish(); - if (!callback.called) { - std::cout << "Callback was not called" << std::endl; - } + fclose(infile); fclose(outfile); return 0;