diff --git a/CMakeLists.txt b/CMakeLists.txt index 73ea22af..6caba84f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -131,10 +131,6 @@ if(FUTURE) add_compile_definitions(QPDF_FUTURE=1) endif() -if(OSS_FUZZ) - add_compile_definitions(QPDF_OSS_FUZZ=1) -endif() - enable_testing() set(RUN_QTEST perl ${qpdf_SOURCE_DIR}/run-qtest ${ENABLE_QTC_ARG}) diff --git a/ChangeLog b/ChangeLog index caec9409..aade7ea1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,44 @@ +2024-07-04 M Holger + + * Treat corrupt JPEG streams as unfilterable. This avoids them + getting uncompressed when writing PDF files with decode level all. + +2024-07-02 Jay Berkenbilt + + * Add QPDF::setMaxWarnings to set the maximum of warnings before + warning suppression. + + * Add static option to Pl_DCT to limit memory usage of + decompression. The option is generally exposed but is primarily + intended to support fuzz tests, which have explicit memory limits + that are smaller than what is commonly seen in the wild with PDF + files. + + * Add static option to Pl_DCT to control whether decompression of + corrupt JPEG data is attempted. + +2024-07-01 M Holger + + * Bug fix: certain invalid object streams caused the insertion of + invalid entries into in the xref table. + +2024-06-29 M Holger + + * Bug fix: in QPDFOutlineObjectHelper detect loops in the list of + direct children of an outline item. + +2024-06-27 M Holger + + * Add sanity check in QPDF xref table reconstruction to reject + objects with impossibly large object id in order to improve + handling of severely damaged PDF files. + +2024-06-25 M Holger + + * Detect severely damaged PDF files early. After parsing the xref + table in QPDF throw a damagedPDF exception if the root of the pages + tree is not a dictionary. + 2024-06-07 Jay Berkenbilt * 11.9.1: release diff --git a/fuzz/dct_fuzzer.cc b/fuzz/dct_fuzzer.cc index 54ca730c..2179a377 100644 --- a/fuzz/dct_fuzzer.cc +++ b/fuzz/dct_fuzzer.cc @@ -26,8 +26,18 @@ FuzzHelper::FuzzHelper(unsigned char const* data, size_t size) : void FuzzHelper::doChecks() { + // Limit the memory used to decompress JPEG files during fuzzing. Excessive memory use during + // fuzzing is due to corrupt JPEG data which sometimes cannot be detected before + // jpeg_start_decompress is called. During normal use of qpdf very large JPEGs can occasionally + // occur legitimately and therefore must be allowed during normal operations. + Pl_DCT::setMemoryLimit(1'000'000'000); + + // Do not decompress corrupt data. This may cause extended runtime within jpeglib without + // exercising additional code paths in qpdf. + Pl_DCT::setThrowOnCorruptData(true); + Pl_Discard discard; - Pl_DCT p("decode", &discard, 20'000'000); + Pl_DCT p("decode", &discard); p.write(const_cast(data), size); p.finish(); } diff --git a/fuzz/qpdf_fuzzer.cc b/fuzz/qpdf_fuzzer.cc index 814c59c4..20073d28 100644 --- a/fuzz/qpdf_fuzzer.cc +++ b/fuzz/qpdf_fuzzer.cc @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -56,6 +57,7 @@ FuzzHelper::getQpdf() auto is = std::shared_ptr(new BufferInputSource("fuzz input", &this->input_buffer)); auto qpdf = QPDF::create(); + qpdf->setMaxWarnings(20); qpdf->processInputSource(is); return qpdf; } @@ -171,6 +173,16 @@ FuzzHelper::testOutlines() void FuzzHelper::doChecks() { + // Limit the memory used to decompress JPEG files during fuzzing. Excessive memory use during + // fuzzing is due to corrupt JPEG data which sometimes cannot be detected before + // jpeg_start_decompress is called. During normal use of qpdf very large JPEGs can occasionally + // occur legitimately and therefore must be allowed during normal operations. + Pl_DCT::setMemoryLimit(1'000'000'000); + + // Do not decompress corrupt data. This may cause extended runtime within jpeglib without + // exercising additional code paths in qpdf, and potentially causing counterproductive timeouts. + Pl_DCT::setThrowOnCorruptData(true); + // Get as much coverage as possible in parts of the library that // might benefit from fuzzing. std::cerr << "\ninfo: starting testWrite\n"; diff --git a/include/qpdf/Pl_DCT.hh b/include/qpdf/Pl_DCT.hh index 5d77db51..077a1f92 100644 --- a/include/qpdf/Pl_DCT.hh +++ b/include/qpdf/Pl_DCT.hh @@ -34,20 +34,17 @@ class QPDF_DLL_CLASS Pl_DCT: public Pipeline QPDF_DLL Pl_DCT(char const* identifier, Pipeline* next); - // Constructor for decompressing image data. If corrupt_data_limit is non-zero and the data is - // corrupt, only attempt to uncompress if the uncompressed size is less than corrupt_data_limit. + // Limit the memory used by jpeglib when decompressing data. + // NB This is a static option affecting all Pl_DCT instances. QPDF_DLL - Pl_DCT(char const* identifier, Pipeline* next, size_t corrupt_data_limit); + static void setMemoryLimit(long limit); - class QPDF_DLL_CLASS CompressConfig - { - public: - QPDF_DLL - CompressConfig() = default; - QPDF_DLL - virtual ~CompressConfig() = default; - virtual void apply(jpeg_compress_struct*) = 0; - }; + // Treat corrupt data as a runtime error rather than attempting to decompress regardless. This + // is the qpdf default behaviour. To attempt to decompress corrupt data set 'treat_as_error' to + // false. + // NB This is a static option affecting all Pl_DCT instances. + QPDF_DLL + static void setThrowOnCorruptData(bool treat_as_error); // Constructor for compressing image data QPDF_DLL @@ -57,8 +54,7 @@ class QPDF_DLL_CLASS Pl_DCT: public Pipeline JDIMENSION image_width, JDIMENSION image_height, int components, - J_COLOR_SPACE color_space, - CompressConfig* config_callback = nullptr); + J_COLOR_SPACE color_space); QPDF_DLL ~Pl_DCT() override; @@ -83,6 +79,7 @@ class QPDF_DLL_CLASS Pl_DCT: public Pipeline public: QPDF_DLL ~Members() = default; + Members(Members const&) = delete; private: // For compression @@ -90,25 +87,18 @@ class QPDF_DLL_CLASS Pl_DCT: public Pipeline JDIMENSION image_width, JDIMENSION image_height, int components, - J_COLOR_SPACE color_space, - CompressConfig* config_callback); + J_COLOR_SPACE color_space); // For decompression - Members(size_t corrupt_data_limit); - Members(Members const&) = delete; + Members(); action_e action; Pl_Buffer buf; - // Used for decompression - size_t corrupt_data_limit{0}; - // Used for compression JDIMENSION image_width{0}; JDIMENSION image_height{0}; int components{1}; J_COLOR_SPACE color_space{JCS_GRAYSCALE}; - - CompressConfig* config_callback{nullptr}; }; std::shared_ptr m; diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index c83bcb0b..0044e58a 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -228,6 +228,10 @@ class QPDF QPDF_DLL void setSuppressWarnings(bool); + // Set the maximum number of warnings to output. Subsequent warnings are suppressed. + QPDF_DLL + void setMaxWarnings(int); + // By default, QPDF will try to recover if it finds certain types of errors in PDF files. If // turned off, it will throw an exception on the first such problem it finds without attempting // recovery. @@ -1497,6 +1501,7 @@ class QPDF bool provided_password_is_hex_key{false}; bool ignore_xref_streams{false}; bool suppress_warnings{false}; + int max_warnings{0}; bool attempt_recovery{true}; bool check_mode{false}; std::shared_ptr encp; diff --git a/job.sums b/job.sums index 5a0d8c4b..4f53c923 100644 --- a/job.sums +++ b/job.sums @@ -1,5 +1,5 @@ # Generated by generate_auto_job -CMakeLists.txt 456938b9debc4997f142ccfb13f3baf2517ae5855e1fe9b2ada1a0b8f7e4facf +CMakeLists.txt 47752f33b17fa526d46fc608a25ad6b8c61feba9deb1bd659fddf93e6e08b102 generate_auto_job f64733b79dcee5a0e3e8ccc6976448e8ddf0e8b6529987a66a7d3ab2ebc10a86 include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4 include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42 diff --git a/libqpdf/Pl_DCT.cc b/libqpdf/Pl_DCT.cc index d2544ab0..0597fd09 100644 --- a/libqpdf/Pl_DCT.cc +++ b/libqpdf/Pl_DCT.cc @@ -20,6 +20,9 @@ namespace jmp_buf jmpbuf; std::string msg; }; + + long memory_limit{0}; + bool throw_on_corrupt_data{true}; } // namespace static void @@ -32,38 +35,39 @@ error_handler(j_common_ptr cinfo) longjmp(jerr->jmpbuf, 1); } -Pl_DCT::Members::Members(size_t corrupt_data_limit) : +Pl_DCT::Members::Members() : action(a_decompress), - buf("DCT compressed image"), - corrupt_data_limit(corrupt_data_limit) + buf("DCT compressed image") { } Pl_DCT::Members::Members( - JDIMENSION image_width, - JDIMENSION image_height, - int components, - J_COLOR_SPACE color_space, - CompressConfig* config_callback) : + JDIMENSION image_width, JDIMENSION image_height, int components, J_COLOR_SPACE color_space) : action(a_compress), buf("DCT uncompressed image"), image_width(image_width), image_height(image_height), components(components), - color_space(color_space), - config_callback(config_callback) + color_space(color_space) { } Pl_DCT::Pl_DCT(char const* identifier, Pipeline* next) : - Pl_DCT(identifier, next, 0) + Pipeline(identifier, next), + m(new Members()) { } -Pl_DCT::Pl_DCT(char const* identifier, Pipeline* next, size_t corrupt_data_limit) : - Pipeline(identifier, next), - m(new Members(corrupt_data_limit)) +void +Pl_DCT::setMemoryLimit(long limit) { + memory_limit = limit; +} + +void +Pl_DCT::setThrowOnCorruptData(bool treat_as_error) +{ + throw_on_corrupt_data = treat_as_error; } Pl_DCT::Pl_DCT( @@ -72,10 +76,9 @@ Pl_DCT::Pl_DCT( JDIMENSION image_width, JDIMENSION image_height, int components, - J_COLOR_SPACE color_space, - CompressConfig* config_callback) : + J_COLOR_SPACE color_space) : Pipeline(identifier, next), - m(new Members(image_width, image_height, components, color_space, config_callback)) + m(new Members(image_width, image_height, components, color_space)) { } @@ -273,9 +276,6 @@ Pl_DCT::compress(void* cinfo_p, Buffer* b) cinfo->input_components = m->components; cinfo->in_color_space = m->color_space; jpeg_set_defaults(cinfo); - if (m->config_callback) { - m->config_callback->apply(cinfo); - } jpeg_start_compress(cinfo, TRUE); @@ -312,36 +312,32 @@ Pl_DCT::decompress(void* cinfo_p, Buffer* b) # pragma GCC diagnostic pop #endif -#ifdef QPDF_OSS_FUZZ - // Limit the memory used to decompress JPEG files during fuzzing. Excessive memory use during - // fuzzing is due to corrupt JPEG data which sometimes cannot be detected before - // jpeg_start_decompress is called. During normal use of qpdf very large JPEGs can occasionally - // occur legitimately and therefore must be allowed during normal operations. - cinfo->mem->max_memory_to_use = 1'000'000'000; - // For some corrupt files the memory used internally by libjpeg stays within the above limits - // even though the size written to the next pipeline is significantly larger. - m->corrupt_data_limit = 10'000'000; -#endif + if (memory_limit > 0) { + cinfo->mem->max_memory_to_use = memory_limit; + } + jpeg_buffer_src(cinfo, b); (void)jpeg_read_header(cinfo, TRUE); + if (throw_on_corrupt_data && cinfo->err->num_warnings > 0) { + throw std::runtime_error("Pl_DCT::decompress: JPEG data is corrupt"); + } (void)jpeg_calc_output_dimensions(cinfo); unsigned int width = cinfo->output_width * QIntC::to_uint(cinfo->output_components); - if (cinfo->err->num_warnings == 0 || m->corrupt_data_limit == 0 || - (width * QIntC::to_uint(cinfo->output_height)) < m->corrupt_data_limit) { - // err->num_warnings is the number of corrupt data warnings emitted. - // err->msg_code could also be the code of an informational message. - JSAMPARRAY buffer = (*cinfo->mem->alloc_sarray)( - reinterpret_cast(cinfo), JPOOL_IMAGE, width, 1); + // err->num_warnings is the number of corrupt data warnings emitted. + // err->msg_code could also be the code of an informational message. + JSAMPARRAY buffer = + (*cinfo->mem->alloc_sarray)(reinterpret_cast(cinfo), JPOOL_IMAGE, width, 1); - (void)jpeg_start_decompress(cinfo); - while (cinfo->output_scanline < cinfo->output_height) { - (void)jpeg_read_scanlines(cinfo, buffer, 1); - getNext()->write(buffer[0], width * sizeof(buffer[0][0])); - } - (void)jpeg_finish_decompress(cinfo); - } else { - *QPDFLogger::defaultLogger()->getError() << "corrupt JPEG data ignored" << "\n"; + (void)jpeg_start_decompress(cinfo); + while (cinfo->output_scanline < cinfo->output_height && + (!throw_on_corrupt_data || cinfo->err->num_warnings == 0)) { + (void)jpeg_read_scanlines(cinfo, buffer, 1); + getNext()->write(buffer[0], width * sizeof(buffer[0][0])); + } + (void)jpeg_finish_decompress(cinfo); + if (throw_on_corrupt_data && cinfo->err->num_warnings > 0) { + throw std::runtime_error("Pl_DCT::decompress: JPEG data is corrupt"); } getNext()->finish(); } diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 915518af..451cdf70 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -331,6 +331,12 @@ QPDF::setSuppressWarnings(bool val) m->suppress_warnings = val; } +void +QPDF::setMaxWarnings(int val) +{ + m->suppress_warnings = val; +} + void QPDF::setAttemptRecovery(bool val) { @@ -500,13 +506,11 @@ QPDF::warn(QPDFExc const& e) { m->warnings.push_back(e); if (!m->suppress_warnings) { -#ifdef QPDF_OSS_FUZZ - if (m->warnings.size() > 20) { - *m->log->getWarn() << "WARNING: too many warnings - additional warnings surpressed\n"; + if (m->max_warnings > 0 && m->warnings.size() > 20) { + *m->log->getWarn() << "WARNING: too many warnings - additional warnings suppressed\n"; m->suppress_warnings = true; return; } -#endif *m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n"; } } @@ -1934,6 +1938,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number) continue; } if (num == obj_stream_number) { + QTC::TC("qpdf", "QPDF ignore self-referential object stream"); warn(damagedPDF( input, m->last_object_description, diff --git a/libtests/dct_compress.cc b/libtests/dct_compress.cc index 8f7e0913..00428946 100644 --- a/libtests/dct_compress.cc +++ b/libtests/dct_compress.cc @@ -14,21 +14,6 @@ usage() exit(2); } -class Callback: public Pl_DCT::CompressConfig -{ - public: - Callback() = default; - ~Callback() override = default; - void apply(jpeg_compress_struct*) override; - bool called{false}; -}; - -void -Callback::apply(jpeg_compress_struct*) -{ - this->called = true; -} - int main(int argc, char* argv[]) { @@ -66,21 +51,12 @@ main(int argc, char* argv[]) FILE* outfile = QUtil::safe_fopen(outfilename, "wb"); Pl_StdioFile out("stdout", outfile); unsigned char buf[100]; - bool done = false; - Callback callback; - Pl_DCT dct("dct", &out, width, height, components, cs, &callback); - while (!done) { - size_t len = fread(buf, 1, sizeof(buf), infile); - if (len <= 0) { - done = true; - } else { - dct.write(buf, len); - } + Pl_DCT dct("dct", &out, width, height, components, cs); + while (size_t len = fread(buf, 1, sizeof(buf), infile)) { + dct.write(buf, len); } dct.finish(); - if (!callback.called) { - std::cout << "Callback was not called" << std::endl; - } + fclose(infile); fclose(outfile); return 0; diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 3b82a8da..984b9ff9 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -4,6 +4,7 @@ QPDF err wrong objid/generation 0 QPDF check objid 1 QPDF check generation 1 QPDF check obj 1 +QPDF ignore self-referential object stream 0 QPDF hint table length indirect 0 QPDF hint table length direct 0 QPDF P absent in lindict 0 diff --git a/qpdf/qtest/object-stream.test b/qpdf/qtest/object-stream.test index 28479ee3..bed3fefa 100644 --- a/qpdf/qtest/object-stream.test +++ b/qpdf/qtest/object-stream.test @@ -16,7 +16,7 @@ cleanup(); my $td = new TestDriver('object-stream'); -my $n_tests = 7 + (36 * 4) + (12 * 2); +my $n_tests = 9 + (36 * 4) + (12 * 2); my $n_compare_pdfs = 36; for (my $n = 16; $n <= 19; ++$n) @@ -107,5 +107,16 @@ $td->runtest("check file", {$td->FILE => "a.pdf"}, {$td->FILE => "recover-xref-stream-recovered.pdf"}); +# Self-referential object stream +$td->runtest("self-referential object stream", + {$td->COMMAND => "qpdf --static-id --qdf" . + " object-stream-self-ref.pdf a.pdf"}, + {$td->FILE => "object-stream-self-ref.out", $td->EXIT_STATUS => 3}, + $td->NORMALIZE_NEWLINES); +$td->runtest("check file", + {$td->FILE => "a.pdf"}, + {$td->FILE => "object-stream-self-ref.out.pdf"}); + + cleanup(); $td->report(calc_ntests($n_tests, $n_compare_pdfs)); diff --git a/qpdf/qtest/qpdf/object-stream-self-ref.out b/qpdf/qtest/qpdf/object-stream-self-ref.out new file mode 100644 index 00000000..84a2d3e3 --- /dev/null +++ b/qpdf/qtest/qpdf/object-stream-self-ref.out @@ -0,0 +1,2 @@ +WARNING: object-stream-self-ref.pdf object stream 1 (object 1 0, offset 2): object stream claims to contain itself +qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/object-stream-self-ref.out.pdf b/qpdf/qtest/qpdf/object-stream-self-ref.out.pdf new file mode 100644 index 00000000..d6b78a05 Binary files /dev/null and b/qpdf/qtest/qpdf/object-stream-self-ref.out.pdf differ diff --git a/qpdf/qtest/qpdf/object-stream-self-ref.pdf b/qpdf/qtest/qpdf/object-stream-self-ref.pdf new file mode 100644 index 00000000..40ea11a0 Binary files /dev/null and b/qpdf/qtest/qpdf/object-stream-self-ref.pdf differ