2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-01-02 22:50:20 +00:00

Merge pull request #1230 from m-holger/clean-dct-fuzz-changes

Alternative clean dct fuzz changes
This commit is contained in:
m-holger 2024-07-06 06:10:27 +01:00 committed by GitHub
commit ce2deaf185
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 151 additions and 106 deletions

View File

@ -131,10 +131,6 @@ if(FUTURE)
add_compile_definitions(QPDF_FUTURE=1) add_compile_definitions(QPDF_FUTURE=1)
endif() endif()
if(OSS_FUZZ)
add_compile_definitions(QPDF_OSS_FUZZ=1)
endif()
enable_testing() enable_testing()
set(RUN_QTEST perl ${qpdf_SOURCE_DIR}/run-qtest ${ENABLE_QTC_ARG}) set(RUN_QTEST perl ${qpdf_SOURCE_DIR}/run-qtest ${ENABLE_QTC_ARG})

View File

@ -1,3 +1,44 @@
2024-07-04 M Holger <m.holger@qpdf.org>
* Treat corrupt JPEG streams as unfilterable. This avoids them
getting uncompressed when writing PDF files with decode level all.
2024-07-02 Jay Berkenbilt <ejb@ql.org>
* Add QPDF::setMaxWarnings to set the maximum of warnings before
warning suppression.
* Add static option to Pl_DCT to limit memory usage of
decompression. The option is generally exposed but is primarily
intended to support fuzz tests, which have explicit memory limits
that are smaller than what is commonly seen in the wild with PDF
files.
* Add static option to Pl_DCT to control whether decompression of
corrupt JPEG data is attempted.
2024-07-01 M Holger <m.holger@qpdf.org>
* Bug fix: certain invalid object streams caused the insertion of
invalid entries into in the xref table.
2024-06-29 M Holger <m.holger@qpdf.org>
* Bug fix: in QPDFOutlineObjectHelper detect loops in the list of
direct children of an outline item.
2024-06-27 M Holger <m.holger@qpdf.org>
* Add sanity check in QPDF xref table reconstruction to reject
objects with impossibly large object id in order to improve
handling of severely damaged PDF files.
2024-06-25 M Holger <m.holger@qpdf.org>
* Detect severely damaged PDF files early. After parsing the xref
table in QPDF throw a damagedPDF exception if the root of the pages
tree is not a dictionary.
2024-06-07 Jay Berkenbilt <ejb@ql.org> 2024-06-07 Jay Berkenbilt <ejb@ql.org>
* 11.9.1: release * 11.9.1: release

View File

@ -26,8 +26,18 @@ FuzzHelper::FuzzHelper(unsigned char const* data, size_t size) :
void void
FuzzHelper::doChecks() FuzzHelper::doChecks()
{ {
// Limit the memory used to decompress JPEG files during fuzzing. Excessive memory use during
// fuzzing is due to corrupt JPEG data which sometimes cannot be detected before
// jpeg_start_decompress is called. During normal use of qpdf very large JPEGs can occasionally
// occur legitimately and therefore must be allowed during normal operations.
Pl_DCT::setMemoryLimit(1'000'000'000);
// Do not decompress corrupt data. This may cause extended runtime within jpeglib without
// exercising additional code paths in qpdf.
Pl_DCT::setThrowOnCorruptData(true);
Pl_Discard discard; Pl_Discard discard;
Pl_DCT p("decode", &discard, 20'000'000); Pl_DCT p("decode", &discard);
p.write(const_cast<unsigned char*>(data), size); p.write(const_cast<unsigned char*>(data), size);
p.finish(); p.finish();
} }

View File

@ -1,5 +1,6 @@
#include <qpdf/Buffer.hh> #include <qpdf/Buffer.hh>
#include <qpdf/BufferInputSource.hh> #include <qpdf/BufferInputSource.hh>
#include <qpdf/Pl_DCT.hh>
#include <qpdf/Pl_Discard.hh> #include <qpdf/Pl_Discard.hh>
#include <qpdf/QPDF.hh> #include <qpdf/QPDF.hh>
#include <qpdf/QPDFAcroFormDocumentHelper.hh> #include <qpdf/QPDFAcroFormDocumentHelper.hh>
@ -56,6 +57,7 @@ FuzzHelper::getQpdf()
auto is = auto is =
std::shared_ptr<InputSource>(new BufferInputSource("fuzz input", &this->input_buffer)); std::shared_ptr<InputSource>(new BufferInputSource("fuzz input", &this->input_buffer));
auto qpdf = QPDF::create(); auto qpdf = QPDF::create();
qpdf->setMaxWarnings(20);
qpdf->processInputSource(is); qpdf->processInputSource(is);
return qpdf; return qpdf;
} }
@ -171,6 +173,16 @@ FuzzHelper::testOutlines()
void void
FuzzHelper::doChecks() FuzzHelper::doChecks()
{ {
// Limit the memory used to decompress JPEG files during fuzzing. Excessive memory use during
// fuzzing is due to corrupt JPEG data which sometimes cannot be detected before
// jpeg_start_decompress is called. During normal use of qpdf very large JPEGs can occasionally
// occur legitimately and therefore must be allowed during normal operations.
Pl_DCT::setMemoryLimit(1'000'000'000);
// Do not decompress corrupt data. This may cause extended runtime within jpeglib without
// exercising additional code paths in qpdf, and potentially causing counterproductive timeouts.
Pl_DCT::setThrowOnCorruptData(true);
// Get as much coverage as possible in parts of the library that // Get as much coverage as possible in parts of the library that
// might benefit from fuzzing. // might benefit from fuzzing.
std::cerr << "\ninfo: starting testWrite\n"; std::cerr << "\ninfo: starting testWrite\n";

View File

@ -34,20 +34,17 @@ class QPDF_DLL_CLASS Pl_DCT: public Pipeline
QPDF_DLL QPDF_DLL
Pl_DCT(char const* identifier, Pipeline* next); Pl_DCT(char const* identifier, Pipeline* next);
// Constructor for decompressing image data. If corrupt_data_limit is non-zero and the data is // Limit the memory used by jpeglib when decompressing data.
// corrupt, only attempt to uncompress if the uncompressed size is less than corrupt_data_limit. // NB This is a static option affecting all Pl_DCT instances.
QPDF_DLL QPDF_DLL
Pl_DCT(char const* identifier, Pipeline* next, size_t corrupt_data_limit); static void setMemoryLimit(long limit);
class QPDF_DLL_CLASS CompressConfig // Treat corrupt data as a runtime error rather than attempting to decompress regardless. This
{ // is the qpdf default behaviour. To attempt to decompress corrupt data set 'treat_as_error' to
public: // false.
QPDF_DLL // NB This is a static option affecting all Pl_DCT instances.
CompressConfig() = default; QPDF_DLL
QPDF_DLL static void setThrowOnCorruptData(bool treat_as_error);
virtual ~CompressConfig() = default;
virtual void apply(jpeg_compress_struct*) = 0;
};
// Constructor for compressing image data // Constructor for compressing image data
QPDF_DLL QPDF_DLL
@ -57,8 +54,7 @@ class QPDF_DLL_CLASS Pl_DCT: public Pipeline
JDIMENSION image_width, JDIMENSION image_width,
JDIMENSION image_height, JDIMENSION image_height,
int components, int components,
J_COLOR_SPACE color_space, J_COLOR_SPACE color_space);
CompressConfig* config_callback = nullptr);
QPDF_DLL QPDF_DLL
~Pl_DCT() override; ~Pl_DCT() override;
@ -83,6 +79,7 @@ class QPDF_DLL_CLASS Pl_DCT: public Pipeline
public: public:
QPDF_DLL QPDF_DLL
~Members() = default; ~Members() = default;
Members(Members const&) = delete;
private: private:
// For compression // For compression
@ -90,25 +87,18 @@ class QPDF_DLL_CLASS Pl_DCT: public Pipeline
JDIMENSION image_width, JDIMENSION image_width,
JDIMENSION image_height, JDIMENSION image_height,
int components, int components,
J_COLOR_SPACE color_space, J_COLOR_SPACE color_space);
CompressConfig* config_callback);
// For decompression // For decompression
Members(size_t corrupt_data_limit); Members();
Members(Members const&) = delete;
action_e action; action_e action;
Pl_Buffer buf; Pl_Buffer buf;
// Used for decompression
size_t corrupt_data_limit{0};
// Used for compression // Used for compression
JDIMENSION image_width{0}; JDIMENSION image_width{0};
JDIMENSION image_height{0}; JDIMENSION image_height{0};
int components{1}; int components{1};
J_COLOR_SPACE color_space{JCS_GRAYSCALE}; J_COLOR_SPACE color_space{JCS_GRAYSCALE};
CompressConfig* config_callback{nullptr};
}; };
std::shared_ptr<Members> m; std::shared_ptr<Members> m;

View File

@ -228,6 +228,10 @@ class QPDF
QPDF_DLL QPDF_DLL
void setSuppressWarnings(bool); void setSuppressWarnings(bool);
// Set the maximum number of warnings to output. Subsequent warnings are suppressed.
QPDF_DLL
void setMaxWarnings(int);
// By default, QPDF will try to recover if it finds certain types of errors in PDF files. If // By default, QPDF will try to recover if it finds certain types of errors in PDF files. If
// turned off, it will throw an exception on the first such problem it finds without attempting // turned off, it will throw an exception on the first such problem it finds without attempting
// recovery. // recovery.
@ -1497,6 +1501,7 @@ class QPDF
bool provided_password_is_hex_key{false}; bool provided_password_is_hex_key{false};
bool ignore_xref_streams{false}; bool ignore_xref_streams{false};
bool suppress_warnings{false}; bool suppress_warnings{false};
int max_warnings{0};
bool attempt_recovery{true}; bool attempt_recovery{true};
bool check_mode{false}; bool check_mode{false};
std::shared_ptr<EncryptionParameters> encp; std::shared_ptr<EncryptionParameters> encp;

View File

@ -1,5 +1,5 @@
# Generated by generate_auto_job # Generated by generate_auto_job
CMakeLists.txt 456938b9debc4997f142ccfb13f3baf2517ae5855e1fe9b2ada1a0b8f7e4facf CMakeLists.txt 47752f33b17fa526d46fc608a25ad6b8c61feba9deb1bd659fddf93e6e08b102
generate_auto_job f64733b79dcee5a0e3e8ccc6976448e8ddf0e8b6529987a66a7d3ab2ebc10a86 generate_auto_job f64733b79dcee5a0e3e8ccc6976448e8ddf0e8b6529987a66a7d3ab2ebc10a86
include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4 include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4
include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42 include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42

View File

@ -20,6 +20,9 @@ namespace
jmp_buf jmpbuf; jmp_buf jmpbuf;
std::string msg; std::string msg;
}; };
long memory_limit{0};
bool throw_on_corrupt_data{true};
} // namespace } // namespace
static void static void
@ -32,38 +35,39 @@ error_handler(j_common_ptr cinfo)
longjmp(jerr->jmpbuf, 1); longjmp(jerr->jmpbuf, 1);
} }
Pl_DCT::Members::Members(size_t corrupt_data_limit) : Pl_DCT::Members::Members() :
action(a_decompress), action(a_decompress),
buf("DCT compressed image"), buf("DCT compressed image")
corrupt_data_limit(corrupt_data_limit)
{ {
} }
Pl_DCT::Members::Members( Pl_DCT::Members::Members(
JDIMENSION image_width, JDIMENSION image_width, JDIMENSION image_height, int components, J_COLOR_SPACE color_space) :
JDIMENSION image_height,
int components,
J_COLOR_SPACE color_space,
CompressConfig* config_callback) :
action(a_compress), action(a_compress),
buf("DCT uncompressed image"), buf("DCT uncompressed image"),
image_width(image_width), image_width(image_width),
image_height(image_height), image_height(image_height),
components(components), components(components),
color_space(color_space), color_space(color_space)
config_callback(config_callback)
{ {
} }
Pl_DCT::Pl_DCT(char const* identifier, Pipeline* next) : Pl_DCT::Pl_DCT(char const* identifier, Pipeline* next) :
Pl_DCT(identifier, next, 0) Pipeline(identifier, next),
m(new Members())
{ {
} }
Pl_DCT::Pl_DCT(char const* identifier, Pipeline* next, size_t corrupt_data_limit) : void
Pipeline(identifier, next), Pl_DCT::setMemoryLimit(long limit)
m(new Members(corrupt_data_limit))
{ {
memory_limit = limit;
}
void
Pl_DCT::setThrowOnCorruptData(bool treat_as_error)
{
throw_on_corrupt_data = treat_as_error;
} }
Pl_DCT::Pl_DCT( Pl_DCT::Pl_DCT(
@ -72,10 +76,9 @@ Pl_DCT::Pl_DCT(
JDIMENSION image_width, JDIMENSION image_width,
JDIMENSION image_height, JDIMENSION image_height,
int components, int components,
J_COLOR_SPACE color_space, J_COLOR_SPACE color_space) :
CompressConfig* config_callback) :
Pipeline(identifier, next), Pipeline(identifier, next),
m(new Members(image_width, image_height, components, color_space, config_callback)) m(new Members(image_width, image_height, components, color_space))
{ {
} }
@ -273,9 +276,6 @@ Pl_DCT::compress(void* cinfo_p, Buffer* b)
cinfo->input_components = m->components; cinfo->input_components = m->components;
cinfo->in_color_space = m->color_space; cinfo->in_color_space = m->color_space;
jpeg_set_defaults(cinfo); jpeg_set_defaults(cinfo);
if (m->config_callback) {
m->config_callback->apply(cinfo);
}
jpeg_start_compress(cinfo, TRUE); jpeg_start_compress(cinfo, TRUE);
@ -312,36 +312,32 @@ Pl_DCT::decompress(void* cinfo_p, Buffer* b)
# pragma GCC diagnostic pop # pragma GCC diagnostic pop
#endif #endif
#ifdef QPDF_OSS_FUZZ if (memory_limit > 0) {
// Limit the memory used to decompress JPEG files during fuzzing. Excessive memory use during cinfo->mem->max_memory_to_use = memory_limit;
// fuzzing is due to corrupt JPEG data which sometimes cannot be detected before }
// jpeg_start_decompress is called. During normal use of qpdf very large JPEGs can occasionally
// occur legitimately and therefore must be allowed during normal operations.
cinfo->mem->max_memory_to_use = 1'000'000'000;
// For some corrupt files the memory used internally by libjpeg stays within the above limits
// even though the size written to the next pipeline is significantly larger.
m->corrupt_data_limit = 10'000'000;
#endif
jpeg_buffer_src(cinfo, b); jpeg_buffer_src(cinfo, b);
(void)jpeg_read_header(cinfo, TRUE); (void)jpeg_read_header(cinfo, TRUE);
if (throw_on_corrupt_data && cinfo->err->num_warnings > 0) {
throw std::runtime_error("Pl_DCT::decompress: JPEG data is corrupt");
}
(void)jpeg_calc_output_dimensions(cinfo); (void)jpeg_calc_output_dimensions(cinfo);
unsigned int width = cinfo->output_width * QIntC::to_uint(cinfo->output_components); unsigned int width = cinfo->output_width * QIntC::to_uint(cinfo->output_components);
if (cinfo->err->num_warnings == 0 || m->corrupt_data_limit == 0 || // err->num_warnings is the number of corrupt data warnings emitted.
(width * QIntC::to_uint(cinfo->output_height)) < m->corrupt_data_limit) { // err->msg_code could also be the code of an informational message.
// err->num_warnings is the number of corrupt data warnings emitted. JSAMPARRAY buffer =
// err->msg_code could also be the code of an informational message. (*cinfo->mem->alloc_sarray)(reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE, width, 1);
JSAMPARRAY buffer = (*cinfo->mem->alloc_sarray)(
reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE, width, 1);
(void)jpeg_start_decompress(cinfo); (void)jpeg_start_decompress(cinfo);
while (cinfo->output_scanline < cinfo->output_height) { while (cinfo->output_scanline < cinfo->output_height &&
(void)jpeg_read_scanlines(cinfo, buffer, 1); (!throw_on_corrupt_data || cinfo->err->num_warnings == 0)) {
getNext()->write(buffer[0], width * sizeof(buffer[0][0])); (void)jpeg_read_scanlines(cinfo, buffer, 1);
} getNext()->write(buffer[0], width * sizeof(buffer[0][0]));
(void)jpeg_finish_decompress(cinfo); }
} else { (void)jpeg_finish_decompress(cinfo);
*QPDFLogger::defaultLogger()->getError() << "corrupt JPEG data ignored" << "\n"; if (throw_on_corrupt_data && cinfo->err->num_warnings > 0) {
throw std::runtime_error("Pl_DCT::decompress: JPEG data is corrupt");
} }
getNext()->finish(); getNext()->finish();
} }

View File

@ -331,6 +331,12 @@ QPDF::setSuppressWarnings(bool val)
m->suppress_warnings = val; m->suppress_warnings = val;
} }
void
QPDF::setMaxWarnings(int val)
{
m->suppress_warnings = val;
}
void void
QPDF::setAttemptRecovery(bool val) QPDF::setAttemptRecovery(bool val)
{ {
@ -500,13 +506,11 @@ QPDF::warn(QPDFExc const& e)
{ {
m->warnings.push_back(e); m->warnings.push_back(e);
if (!m->suppress_warnings) { if (!m->suppress_warnings) {
#ifdef QPDF_OSS_FUZZ if (m->max_warnings > 0 && m->warnings.size() > 20) {
if (m->warnings.size() > 20) { *m->log->getWarn() << "WARNING: too many warnings - additional warnings suppressed\n";
*m->log->getWarn() << "WARNING: too many warnings - additional warnings surpressed\n";
m->suppress_warnings = true; m->suppress_warnings = true;
return; return;
} }
#endif
*m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n"; *m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n";
} }
} }
@ -1934,6 +1938,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
continue; continue;
} }
if (num == obj_stream_number) { if (num == obj_stream_number) {
QTC::TC("qpdf", "QPDF ignore self-referential object stream");
warn(damagedPDF( warn(damagedPDF(
input, input,
m->last_object_description, m->last_object_description,

View File

@ -14,21 +14,6 @@ usage()
exit(2); exit(2);
} }
class Callback: public Pl_DCT::CompressConfig
{
public:
Callback() = default;
~Callback() override = default;
void apply(jpeg_compress_struct*) override;
bool called{false};
};
void
Callback::apply(jpeg_compress_struct*)
{
this->called = true;
}
int int
main(int argc, char* argv[]) main(int argc, char* argv[])
{ {
@ -66,21 +51,12 @@ main(int argc, char* argv[])
FILE* outfile = QUtil::safe_fopen(outfilename, "wb"); FILE* outfile = QUtil::safe_fopen(outfilename, "wb");
Pl_StdioFile out("stdout", outfile); Pl_StdioFile out("stdout", outfile);
unsigned char buf[100]; unsigned char buf[100];
bool done = false; Pl_DCT dct("dct", &out, width, height, components, cs);
Callback callback; while (size_t len = fread(buf, 1, sizeof(buf), infile)) {
Pl_DCT dct("dct", &out, width, height, components, cs, &callback); dct.write(buf, len);
while (!done) {
size_t len = fread(buf, 1, sizeof(buf), infile);
if (len <= 0) {
done = true;
} else {
dct.write(buf, len);
}
} }
dct.finish(); dct.finish();
if (!callback.called) {
std::cout << "Callback was not called" << std::endl;
}
fclose(infile); fclose(infile);
fclose(outfile); fclose(outfile);
return 0; return 0;

View File

@ -4,6 +4,7 @@ QPDF err wrong objid/generation 0
QPDF check objid 1 QPDF check objid 1
QPDF check generation 1 QPDF check generation 1
QPDF check obj 1 QPDF check obj 1
QPDF ignore self-referential object stream 0
QPDF hint table length indirect 0 QPDF hint table length indirect 0
QPDF hint table length direct 0 QPDF hint table length direct 0
QPDF P absent in lindict 0 QPDF P absent in lindict 0

View File

@ -16,7 +16,7 @@ cleanup();
my $td = new TestDriver('object-stream'); my $td = new TestDriver('object-stream');
my $n_tests = 7 + (36 * 4) + (12 * 2); my $n_tests = 9 + (36 * 4) + (12 * 2);
my $n_compare_pdfs = 36; my $n_compare_pdfs = 36;
for (my $n = 16; $n <= 19; ++$n) for (my $n = 16; $n <= 19; ++$n)
@ -107,5 +107,16 @@ $td->runtest("check file",
{$td->FILE => "a.pdf"}, {$td->FILE => "a.pdf"},
{$td->FILE => "recover-xref-stream-recovered.pdf"}); {$td->FILE => "recover-xref-stream-recovered.pdf"});
# Self-referential object stream
$td->runtest("self-referential object stream",
{$td->COMMAND => "qpdf --static-id --qdf" .
" object-stream-self-ref.pdf a.pdf"},
{$td->FILE => "object-stream-self-ref.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check file",
{$td->FILE => "a.pdf"},
{$td->FILE => "object-stream-self-ref.out.pdf"});
cleanup(); cleanup();
$td->report(calc_ntests($n_tests, $n_compare_pdfs)); $td->report(calc_ntests($n_tests, $n_compare_pdfs));

View File

@ -0,0 +1,2 @@
WARNING: object-stream-self-ref.pdf object stream 1 (object 1 0, offset 2): object stream claims to contain itself
qpdf: operation succeeded with warnings; resulting file may have some problems

Binary file not shown.

Binary file not shown.