diff --git a/.idea/cmake.xml b/.idea/cmake.xml index f0a93aba..5762463a 100644 --- a/.idea/cmake.xml +++ b/.idea/cmake.xml @@ -2,7 +2,6 @@ - diff --git a/ChangeLog b/ChangeLog index e396a8f5..978c1bb4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,13 @@ 2024-09-20 Chao Li * Add C API qpdf_oh_free_buffer to release memory allocated by - stream data functions + stream data functions. + +2024-08-25 M Holger + + * Add new command-line arguments --remove-metadata and --remove-info + to exclude document metadata and information from the output PDF + file. Patially fixes #1145. 2024-08-06 M Holger diff --git a/fuzz/CMakeLists.txt b/fuzz/CMakeLists.txt index 73d886c0..a4af9256 100644 --- a/fuzz/CMakeLists.txt +++ b/fuzz/CMakeLists.txt @@ -142,6 +142,10 @@ set(CORPUS_OTHER 70306b.fuzz 71624.fuzz 71689.fuzz + 99999a.fuzz + 99999b.fuzz + 99999c.fuzz + 99999d.fuzz ) set(CORPUS_DIR ${CMAKE_CURRENT_BINARY_DIR}/qpdf_corpus) diff --git a/fuzz/qpdf_extra/99999a.fuzz b/fuzz/qpdf_extra/99999a.fuzz new file mode 100644 index 00000000..026c7427 --- /dev/null +++ b/fuzz/qpdf_extra/99999a.fuzz @@ -0,0 +1,63 @@ +%PDF-1.5 +%€€€€ +1 0 obj +<< + /Type /Catalog + /Pages 2 0 R +>> +endobj +2 0 obj +<< + /Count 6 Ri + 0K/ds [3 0 R] + /Type /Pages +>> +endobj +3 0 obj +<< + /Resources << + /Font << + /F1 5 0 R + >> + >> + /MediaBox [0 0 795 842] + /Parent 2 0 R + /Contents 4 0 R + /Type /Page +=> +endobj +4 0 obj +<<444444444444444444444444 1 Tr /F1 30 Tf 350 750 Td (foobar) Tj ET +endstream +endobj +5 0 obj +<< + /Name /F1 + /BaseFont /Helvetica + /Type /Font + /Subtype /Type1 +>> +e„dobj +6 0 obj +<< /Length 6 0 R >> +stre444444444444444444444444444444<<>> +endobj +xref +0 8 +0000000000 65535 f +0000000015 00000 n +0000000066 00000 n +0000000130 00000 n +0000000269 00000 n +0000000362 00000 n +000000ÎËËÉßÏÏÏ00 n +0000000500 00000 n +trailer +<< + /Size 713115528178535 + /Root 1 0 R + /Info 7 0 R +>> +startxref +520 +%%EOF \ No newline at end of file diff --git a/fuzz/qpdf_extra/99999b.fuzz b/fuzz/qpdf_extra/99999b.fuzz new file mode 100644 index 00000000..288a6b5c Binary files /dev/null and b/fuzz/qpdf_extra/99999b.fuzz differ diff --git a/fuzz/qpdf_extra/99999c.fuzz b/fuzz/qpdf_extra/99999c.fuzz new file mode 100644 index 00000000..c856648f Binary files /dev/null and b/fuzz/qpdf_extra/99999c.fuzz differ diff --git a/fuzz/qpdf_extra/99999d.fuzz b/fuzz/qpdf_extra/99999d.fuzz new file mode 100644 index 00000000..4504412f Binary files /dev/null and b/fuzz/qpdf_extra/99999d.fuzz differ diff --git a/fuzz/qtest/fuzz.test b/fuzz/qtest/fuzz.test index 02dbc98a..df5318cf 100644 --- a/fuzz/qtest/fuzz.test +++ b/fuzz/qtest/fuzz.test @@ -11,7 +11,7 @@ my $td = new TestDriver('fuzz'); my $qpdf_corpus = $ENV{'QPDF_FUZZ_CORPUS'} || die "must set QPDF_FUZZ_CORPUS"; -my $n_qpdf_files = 79; # increment when adding new files +my $n_qpdf_files = 83; # increment when adding new files my @fuzzers = ( ['ascii85' => 1], diff --git a/include/qpdf/QPDFJob.hh b/include/qpdf/QPDFJob.hh index b26a9dcf..1c64dcb9 100644 --- a/include/qpdf/QPDFJob.hh +++ b/include/qpdf/QPDFJob.hh @@ -692,6 +692,8 @@ class QPDFJob bool optimize_images{false}; bool externalize_inline_images{false}; bool keep_inline_images{false}; + bool remove_info{false}; + bool remove_metadata{false}; bool remove_page_labels{false}; size_t oi_min_width{DEFAULT_OI_MIN_WIDTH}; size_t oi_min_height{DEFAULT_OI_MIN_HEIGHT}; diff --git a/include/qpdf/auto_job_c_main.hh b/include/qpdf/auto_job_c_main.hh index 6c661404..bb584255 100644 --- a/include/qpdf/auto_job_c_main.hh +++ b/include/qpdf/auto_job_c_main.hh @@ -32,6 +32,8 @@ QPDF_DLL Config* progress(); QPDF_DLL Config* qdf(); QPDF_DLL Config* rawStreamData(); QPDF_DLL Config* recompressFlate(); +QPDF_DLL Config* removeInfo(); +QPDF_DLL Config* removeMetadata(); QPDF_DLL Config* removePageLabels(); QPDF_DLL Config* reportMemoryUsage(); QPDF_DLL Config* requiresPassword(); diff --git a/job.sums b/job.sums index 4f53c923..0ad8c664 100644 --- a/job.sums +++ b/job.sums @@ -4,17 +4,17 @@ generate_auto_job f64733b79dcee5a0e3e8ccc6976448e8ddf0e8b6529987a66a7d3ab2ebc10a include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4 include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42 include/qpdf/auto_job_c_enc.hh 28446f3c32153a52afa239ea40503e6cc8ac2c026813526a349e0cd4ae17ddd5 -include/qpdf/auto_job_c_main.hh dbfc221d1533120d1aa9c361d8d2483dea5fcb1c0fd95144d98d305e64ed32a6 +include/qpdf/auto_job_c_main.hh 84f463237235b2c095b747a4f5dd00f109ee596a1c207b944efb296c0c568cae include/qpdf/auto_job_c_pages.hh 09ca15649cc94fdaf6d9bdae28a20723f2a66616bf15aa86d83df31051d82506 include/qpdf/auto_job_c_uo.hh 9c2f98a355858dd54d0bba444b73177a59c9e56833e02fa6406f429c07f39e62 -job.yml 53cad86659db6722e8f415aacb19fc51ab81bb1589c3cb8f65ec893bb4bf5566 +job.yml 31935064eca625af7657b23f2f12c614d14751ec0b12702482b1768a04905d22 libqpdf/qpdf/auto_job_decl.hh 20d6affe1e260f5a1af4f1d82a820b933835440ff03020e877382da2e8dac6c6 -libqpdf/qpdf/auto_job_help.hh 74b2982771720927ce7be8f1690720ec65cb9989620493a0c154f50ba2c254e4 -libqpdf/qpdf/auto_job_init.hh 19d1da7c4c0c635bd1c5db8d5f17df8edad3442f8eba006adb075cec295fa158 +libqpdf/qpdf/auto_job_help.hh 1e9181f4729a22ff91ab54e2b4a82e6af0c57a8327efb222a4196adb609c1ade +libqpdf/qpdf/auto_job_init.hh e2a6bb87870c5522a01b15461c9fe909e360f5c7fed06e41acf13a125bd1d03e libqpdf/qpdf/auto_job_json_decl.hh 843892c8e8652a86b7eb573893ef24050b7f36fe313f7251874be5cd4cdbe3fd -libqpdf/qpdf/auto_job_json_init.hh a87256c082427ec0318223762472970b2eced535c0c8b0288d45c8cdaaf62f74 -libqpdf/qpdf/auto_job_schema.hh 5dac568dff39614e161a0af59a0f328f1e28edf69b96f08bb76fd592d51bb053 +libqpdf/qpdf/auto_job_json_init.hh 344c2fb473f88fe829c93b1efe6c70a0e4796537b8eb35e421d955fff481ba7d +libqpdf/qpdf/auto_job_schema.hh 6d3eef5137b8828eaa301a1b3cf75cb7bb812aa6e2d8301de865b42d238d7a7c manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580 -manual/cli.rst 94057baba9ecffb4ce19ae61c8fa507ef07209c280fccae97b283c3dfce834e0 -manual/qpdf.1 0ec05f1392c160165cdf6adada4de84c0de75bd2fb5762caff4e1372aacada4c +manual/cli.rst b7f37995f13346518ae7b2ea84836fba13b4da4e1f55be5f2a861f20dea0ccdb +manual/qpdf.1 59c26635017cba5d142ec3fcc4aebcb91e0cf1355d51365db84f48b21585ad8d manual/qpdf.1.in 436ecc85d45c4c9e2dbd1725fb7f0177fb627179469f114561adf3cb6cbb677b diff --git a/job.yml b/job.yml index 6f5c6fb7..13b61a4b 100644 --- a/job.yml +++ b/job.yml @@ -130,6 +130,8 @@ options: - qdf - raw-stream-data - recompress-flate + - remove-info + - remove-metadata - remove-page-labels - replace-input - report-memory-usage @@ -440,6 +442,8 @@ json: - Pages.file: Pages.password: range: + remove-info: + remove-metadata: remove-page-labels: report-memory-usage: rotate: diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index f263551a..5a38ec94 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -832,10 +832,6 @@ std::vector QPDF::Xref_table::bad_subsections(std::string& line, qpdf_offset_t start) { std::vector result; - qpdf_offset_t f1 = 0; - int f2 = 0; - char type = '\0'; - file->seek(start, SEEK_SET); while (true) { @@ -844,7 +840,7 @@ QPDF::Xref_table::bad_subsections(std::string& line, qpdf_offset_t start) auto [obj, num, offset] = result.emplace_back(subsection(line)); file->seek(offset, SEEK_SET); for (qpdf_offset_t i = obj; i - num < obj; ++i) { - if (!read_entry(f1, f2, type)) { + if (!std::get<0>(read_entry())) { QTC::TC("qpdf", "QPDF invalid xref entry"); throw damaged_table("invalid xref entry (obj=" + std::to_string(i) + ")"); } @@ -890,9 +886,13 @@ QPDF::Xref_table::subsections(std::string& line) } } -bool -QPDF::Xref_table::read_bad_entry(qpdf_offset_t& f1, int& f2, char& type) +// Returns (success, f1, f2, type). +std::tuple +QPDF::Xref_table::read_bad_entry() { + qpdf_offset_t f1{0}; + int f2{0}; + char type{'\0'}; // Reposition after initial read attempt and reread. file->seek(file->getLastOffset(), SEEK_SET); auto line = file->readLine(30); @@ -910,7 +910,7 @@ QPDF::Xref_table::read_bad_entry(qpdf_offset_t& f1, int& f2, char& type) } // Require digit if (!QUtil::is_digit(*p)) { - return false; + return {false, 0, 0, '\0'}; } // Gather digits std::string f1_str; @@ -919,7 +919,7 @@ QPDF::Xref_table::read_bad_entry(qpdf_offset_t& f1, int& f2, char& type) } // Require space if (!QUtil::is_space(*p)) { - return false; + return {false, 0, 0, '\0'}; } if (QUtil::is_space(*(p + 1))) { QTC::TC("qpdf", "QPDF ignore first extra space in xref entry"); @@ -931,7 +931,7 @@ QPDF::Xref_table::read_bad_entry(qpdf_offset_t& f1, int& f2, char& type) } // Require digit if (!QUtil::is_digit(*p)) { - return false; + return {false, 0, 0, '\0'}; } // Gather digits std::string f2_str; @@ -940,7 +940,7 @@ QPDF::Xref_table::read_bad_entry(qpdf_offset_t& f1, int& f2, char& type) } // Require space if (!QUtil::is_space(*p)) { - return false; + return {false, 0, 0, '\0'}; } if (QUtil::is_space(*(p + 1))) { QTC::TC("qpdf", "QPDF ignore second extra space in xref entry"); @@ -953,7 +953,7 @@ QPDF::Xref_table::read_bad_entry(qpdf_offset_t& f1, int& f2, char& type) if ((*p == 'f') || (*p == 'n')) { type = *p; } else { - return false; + return {false, 0, 0, '\0'}; } if ((f1_str.length() != 10) || (f2_str.length() != 5)) { QTC::TC("qpdf", "QPDF ignore length error xref entry"); @@ -967,18 +967,23 @@ QPDF::Xref_table::read_bad_entry(qpdf_offset_t& f1, int& f2, char& type) f1 = QUtil::string_to_ll(f1_str.c_str()); f2 = QUtil::string_to_int(f2_str.c_str()); - return true; + return {true, f1, f2, type}; } // Optimistically read and parse xref entry. If entry is bad, call read_bad_xrefEntry and return -// result. -bool -QPDF::Xref_table::read_entry(qpdf_offset_t& f1, int& f2, char& type) +// result. Returns (success, f1, f2, type). +std::tuple +QPDF::Xref_table::read_entry() { + qpdf_offset_t f1{0}; + int f2{0}; + char type{'\0'}; std::array line; + f1 = 0; + f2 = 0; if (file->read(line.data(), 20) != 20) { // C++20: [[unlikely]] - return false; + return {false, 0, 0, '\0'}; } line[20] = '\0'; char const* p = line.data(); @@ -1002,7 +1007,7 @@ QPDF::Xref_table::read_entry(qpdf_offset_t& f1, int& f2, char& type) if (!QUtil::is_space(*p++)) { // Entry doesn't start with space or digit. // C++20: [[unlikely]] - return false; + return {false, 0, 0, '\0'}; } // Gather digits. NB No risk of overflow as 99'999 < max int. while (*p == '0') { @@ -1019,10 +1024,10 @@ QPDF::Xref_table::read_entry(qpdf_offset_t& f1, int& f2, char& type) // No test for valid line[19]. if (*(++p) && *(++p) && (*p == '\n' || *p == '\r') && f1_len == 10 && f2_len == 5) { // C++20: [[likely]] - return true; + return {true, f1, f2, type}; } } - return read_bad_entry(f1, f2, type); + return read_bad_entry(); } // Read a single cross-reference table section and associated trailer. @@ -1052,7 +1057,10 @@ QPDF::Xref_table::process_section(qpdf_offset_t xref_offset) QTC::TC("qpdf", "QPDF trailer size not integer"); throw qpdf.damagedPDF("trailer", "/Size key in trailer dictionary is not an integer"); } - + if (sz >= static_cast(max_id_)) { + QTC::TC("qpdf", "QPDF trailer size impossibly large"); + throw qpdf.damagedPDF("trailer", "/Size key in trailer dictionary is impossibly large"); + } table.resize(sz); } @@ -1064,10 +1072,8 @@ QPDF::Xref_table::process_section(qpdf_offset_t xref_offset) first_item_offset_ = file->tell(); } // For xref_table, these will always be small enough to be ints - qpdf_offset_t f1 = 0; - int f2 = 0; - char type = '\0'; - if (!read_entry(f1, f2, type)) { + auto [success, f1, f2, type] = read_entry(); + if (!success) { throw damaged_table("invalid xref entry (obj=" + std::to_string(i) + ")"); } if (type == 'f') { @@ -1585,8 +1591,7 @@ QPDF::Xref_table::read_trailer() { qpdf_offset_t offset = file->tell(); bool empty = false; - auto object = - QPDFParser(*file, "trailer", tokenizer, nullptr, &qpdf, true).parse(empty, false); + auto object = QPDFParser(*file, "trailer", tokenizer, nullptr, &qpdf, true).parse(empty, false); if (empty) { // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in // actual PDF files and Adobe Reader appears to ignore them. diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index 2433f242..ab57dd1c 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -471,6 +471,21 @@ QPDFJob::createQPDF() } handleUnderOverlay(pdf); handleTransformations(pdf); + if (m->remove_info) { + auto trailer = pdf.getTrailer(); + auto mod_date = trailer.getKey("/Info").getKeyIfDict("/ModDate"); + if (mod_date.isNull()) { + trailer.removeKey("/Info"); + } else { + auto info = trailer.replaceKeyAndGetNew( + "/Info", pdf.makeIndirectObject(QPDFObjectHandle::newDictionary())); + info.replaceKey("/ModDate", mod_date); + } + pdf.getRoot().removeKey("/Metadata"); + } + if (m->remove_metadata) { + pdf.getRoot().removeKey("/Metadata"); + } for (auto& foreign: page_heap) { if (foreign->anyWarnings()) { diff --git a/libqpdf/QPDFJob_config.cc b/libqpdf/QPDFJob_config.cc index 107abf7c..cf6eadd8 100644 --- a/libqpdf/QPDFJob_config.cc +++ b/libqpdf/QPDFJob_config.cc @@ -510,6 +510,20 @@ QPDFJob::Config::removeAttachment(std::string const& parameter) return this; } +QPDFJob::Config* +QPDFJob::Config::removeInfo() +{ + o.m->remove_info = true; + return this; +} + +QPDFJob::Config* +QPDFJob::Config::removeMetadata() +{ + o.m->remove_metadata = true; + return this; +} + QPDFJob::Config* QPDFJob::Config::removePageLabels() { diff --git a/libqpdf/QPDFParser.cc b/libqpdf/QPDFParser.cc index a9d3e04b..3f176c5d 100644 --- a/libqpdf/QPDFParser.cc +++ b/libqpdf/QPDFParser.cc @@ -469,13 +469,14 @@ QPDFParser::fixMissingKeys() bool QPDFParser::tooManyBadTokens() { - if (good_count <= 4) { - if (++bad_count > 5) { - warn("too many errors; giving up on reading object"); - return true; - } - } else { + if (--max_bad_count > 0 && good_count > 4) { + good_count = 0; bad_count = 1; + return false; + } + if (++bad_count > 5) { + warn("too many errors; giving up on reading object"); + return true; } good_count = 0; return false; diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index d48abd3e..7f7c6d9e 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -47,7 +47,7 @@ QPDFWordTokenFinder::check() // Find a word token matching the given string, preceded by a delimiter, and followed by a // delimiter or EOF. QPDFTokenizer tokenizer; - QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true); + QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true, str.size() + 2); qpdf_offset_t pos = is.tell(); if (!(t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, str))) { QTC::TC("qpdf", "QPDFTokenizer finder found wrong word"); diff --git a/libqpdf/qpdf/QPDFParser.hh b/libqpdf/qpdf/QPDFParser.hh index 1eecf975..7b92da25 100644 --- a/libqpdf/qpdf/QPDFParser.hh +++ b/libqpdf/qpdf/QPDFParser.hh @@ -83,9 +83,11 @@ class QPDFParser std::vector stack; StackFrame* frame; // Number of recent bad tokens. - int bad_count = 0; + int bad_count{0}; + // Number of bad tokens (remaining) before giving up. + int max_bad_count{15}; // Number of good tokens since last bad token. Irrelevant if bad_count == 0. - int good_count = 0; + int good_count{0}; // Start offset including any leading whitespace. qpdf_offset_t start; // Number of successive integer tokens. diff --git a/libqpdf/qpdf/QPDF_private.hh b/libqpdf/qpdf/QPDF_private.hh index b055763a..fa14fdc3 100644 --- a/libqpdf/qpdf/QPDF_private.hh +++ b/libqpdf/qpdf/QPDF_private.hh @@ -292,8 +292,8 @@ class QPDF::Xref_table std::vector subsections(std::string& line); std::vector bad_subsections(std::string& line, qpdf_offset_t offset); Subsection subsection(std::string const& line); - bool read_entry(qpdf_offset_t& f1, int& f2, char& type); - bool read_bad_entry(qpdf_offset_t& f1, int& f2, char& type); + std::tuple read_entry(); + std::tuple read_bad_entry(); // Methods to parse streams qpdf_offset_t read_stream(qpdf_offset_t offset); diff --git a/libqpdf/qpdf/auto_job_help.hh b/libqpdf/qpdf/auto_job_help.hh index 79c32ceb..a7207a63 100644 --- a/libqpdf/qpdf/auto_job_help.hh +++ b/libqpdf/qpdf/auto_job_help.hh @@ -414,6 +414,13 @@ Don't optimize images whose area in pixels is below the specified value. )"); ap.addOptionHelp("--keep-inline-images", "modification", "exclude inline images from optimization", R"(Prevent inline images from being considered by --optimize-images. )"); +ap.addOptionHelp("--remove-info", "modification", "remove file information", R"(Exclude file information (except modification date) from the output file. +)"); +ap.addOptionHelp("--remove-metadata", "modification", "remove metadata", R"(Exclude metadata from the output file. +)"); +} +static void add_help_5(QPDFArgParser& ap) +{ ap.addOptionHelp("--remove-page-labels", "modification", "remove explicit page numbers", R"(Exclude page labels (explicit page numbers) from the output file. )"); ap.addOptionHelp("--set-page-labels", "modification", "number pages for the entire document", R"(--set-page-labels label-spec ... -- @@ -460,9 +467,6 @@ iv, then the remaining pages with Arabic numerals starting with 1 and continuing sequentially until the end of the document. For additional examples, please consult the manual. )"); -} -static void add_help_5(QPDFArgParser& ap) -{ ap.addHelpTopic("encryption", "create encrypted files", R"(Create encrypted files. Usage: --encrypt \ @@ -641,6 +645,9 @@ ap.addOptionHelp("--force-R5", "encryption", "use unsupported R=5 encryption", R algorithm that existed only in Acrobat version IX. This option should not be used except for compatibility testing. )"); +} +static void add_help_6(QPDFArgParser& ap) +{ ap.addHelpTopic("page-selection", "select pages from one or more files", R"(Use the --pages option to select pages from multiple files. Usage: qpdf in.pdf --pages --file=input-file \ @@ -725,9 +732,6 @@ appearance: first underlays, then the original page, then overlays. Run qpdf --help=page-ranges for help with page ranges. )"); -} -static void add_help_6(QPDFArgParser& ap) -{ ap.addOptionHelp("--to", "overlay-underlay", "destination pages for underlay/overlay", R"(--to=page-range Specify the range of pages in the primary output to apply @@ -829,6 +833,9 @@ its terminating "--". To copy attachments from a password-protected file, use the --password option after the file name. )"); +} +static void add_help_7(QPDFArgParser& ap) +{ ap.addOptionHelp("--prefix", "copy-attachments", "key prefix for copying attachments", R"(--prefix=prefix Prepend a prefix to each key; may be needed if there are @@ -839,9 +846,6 @@ ap.addHelpTopic("inspection", "inspect PDF files", R"(These options provide tool the options in this section are specified, no output file may be given. )"); -} -static void add_help_7(QPDFArgParser& ap) -{ ap.addOptionHelp("--is-encrypted", "inspection", "silently test whether a file is encrypted", R"(Silently exit with a code indicating the file's encryption status: 0: the file is encrypted @@ -919,6 +923,9 @@ output as binary data. Get the key with --list-attachments. ap.addHelpTopic("json", "JSON output for PDF information", R"(Show information about the PDF file in JSON format. Please see the JSON chapter in the qpdf manual for details. )"); +} +static void add_help_8(QPDFArgParser& ap) +{ ap.addOptionHelp("--json", "json", "show file in JSON format", R"(--json[=version] Generate a JSON representation of the file. This is described in @@ -932,9 +939,6 @@ Describe the format of the JSON output by writing to standard output a JSON object with the same keys and with values containing descriptive text. )"); -} -static void add_help_8(QPDFArgParser& ap) -{ ap.addOptionHelp("--json-key", "json", "limit which keys are in JSON output", R"(--json-key=key This option is repeatable. If given, only the specified diff --git a/libqpdf/qpdf/auto_job_init.hh b/libqpdf/qpdf/auto_job_init.hh index ac42ea6a..5db11318 100644 --- a/libqpdf/qpdf/auto_job_init.hh +++ b/libqpdf/qpdf/auto_job_init.hh @@ -68,6 +68,8 @@ this->ap.addBare("progress", [this](){c_main->progress();}); this->ap.addBare("qdf", [this](){c_main->qdf();}); this->ap.addBare("raw-stream-data", [this](){c_main->rawStreamData();}); this->ap.addBare("recompress-flate", [this](){c_main->recompressFlate();}); +this->ap.addBare("remove-info", [this](){c_main->removeInfo();}); +this->ap.addBare("remove-metadata", [this](){c_main->removeMetadata();}); this->ap.addBare("remove-page-labels", [this](){c_main->removePageLabels();}); this->ap.addBare("replace-input", b(&ArgParser::argReplaceInput)); this->ap.addBare("report-memory-usage", [this](){c_main->reportMemoryUsage();}); diff --git a/libqpdf/qpdf/auto_job_json_init.hh b/libqpdf/qpdf/auto_job_json_init.hh index ee4c7421..fa4c4089 100644 --- a/libqpdf/qpdf/auto_job_json_init.hh +++ b/libqpdf/qpdf/auto_job_json_init.hh @@ -412,6 +412,12 @@ addParameter([this](std::string const& p) { c_pages->range(p); }); popHandler(); // key: range popHandler(); // array: .pages[] popHandler(); // key: pages +pushKey("removeInfo"); +addBare([this]() { c_main->removeInfo(); }); +popHandler(); // key: removeInfo +pushKey("removeMetadata"); +addBare([this]() { c_main->removeMetadata(); }); +popHandler(); // key: removeMetadata pushKey("removePageLabels"); addBare([this]() { c_main->removePageLabels(); }); popHandler(); // key: removePageLabels diff --git a/libqpdf/qpdf/auto_job_schema.hh b/libqpdf/qpdf/auto_job_schema.hh index df75a5e8..6854fd8c 100644 --- a/libqpdf/qpdf/auto_job_schema.hh +++ b/libqpdf/qpdf/auto_job_schema.hh @@ -145,6 +145,8 @@ static constexpr char const* JOB_SCHEMA_DATA = R"({ "range": "page range" } ], + "removeInfo": "remove file information", + "removeMetadata": "remove metadata", "removePageLabels": "remove explicit page numbers", "reportMemoryUsage": "best effort report of memory usage", "rotate": "rotate pages", diff --git a/manual/cli.rst b/manual/cli.rst index c11c2bf3..fee79454 100644 --- a/manual/cli.rst +++ b/manual/cli.rst @@ -1773,6 +1773,27 @@ Related Options Prevent inline images from being included in image optimization done by :qpdf:ref:`--optimize-images`. +.. qpdf:option:: --remove-info + + .. help: remove file information + + Exclude file information (except modification date) from the output file. + + Exclude file information (except modification date) from the output file by + omitting all entries (except ``/ModDate``) from the ``/Info`` dictionary in + the document trailer. + See also :qpdf:ref:`--remove-metadata`. + +.. qpdf:option:: --remove-metadata + + .. help: remove metadata + + Exclude metadata from the output file. + + Exclude metadata from the output file by omitting the ``/Metadata`` + dictionary in the document catalog. + See also :qpdf:ref:`--remove-info`. + .. qpdf:option:: --remove-page-labels .. help: remove explicit page numbers diff --git a/manual/qpdf.1 b/manual/qpdf.1 index ec7b851b..95c23be6 100644 --- a/manual/qpdf.1 +++ b/manual/qpdf.1 @@ -530,6 +530,12 @@ Don't optimize images whose area in pixels is below the specified value. .B --keep-inline-images \-\- exclude inline images from optimization Prevent inline images from being considered by --optimize-images. .TP +.B --remove-info \-\- remove file information +Exclude file information (except modification date) from the output file. +.TP +.B --remove-metadata \-\- remove metadata +Exclude metadata from the output file. +.TP .B --remove-page-labels \-\- remove explicit page numbers Exclude page labels (explicit page numbers) from the output file. .TP diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 09593c9e..6f8d5566 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -55,6 +55,7 @@ QPDF invalid xref entry 0 QPDF missing trailer 0 QPDF trailer lacks size 0 QPDF trailer size not integer 0 +QPDF trailer size impossibly large 0 QPDF trailer prev not integer 0 QPDFParser bad brace 0 QPDFParser bad brace in parseRemainder 0 diff --git a/qpdf/qtest/merge-and-split.test b/qpdf/qtest/merge-and-split.test index 33935e71..db18e87e 100644 --- a/qpdf/qtest/merge-and-split.test +++ b/qpdf/qtest/merge-and-split.test @@ -14,7 +14,7 @@ cleanup(); my $td = new TestDriver('merge-and-split'); -my $n_tests = 28; +my $n_tests = 34; # Select pages from the same file multiple times including selecting # twice from an encrypted file and specifying the password only the @@ -103,6 +103,39 @@ $td->runtest("check output", {$td->COMMAND => "qpdf-test-compare a.pdf remove-labels.pdf"}, {$td->FILE => "remove-labels.pdf", $td->EXIT_STATUS => 0}); +$td->runtest("remove metadata", + {$td->COMMAND => + "qpdf metadata-crypt-filter.pdf a.pdf" . + " --remove-metadata" . + " --decrypt" . + " --static-id"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "remove-metadata.pdf"}); + +$td->runtest("remove info (with moddate)", + {$td->COMMAND => + "qpdf remove-metadata.pdf a.pdf" . + " --remove-info" . + " --decrypt" . + " --static-id"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "remove-info.pdf"}); + +$td->runtest("remove info (without moddate)", + {$td->COMMAND => + "qpdf remove-metadata-no-moddate.pdf a.pdf" . + " --remove-info" . + " --decrypt" . + " --static-id"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "remove-info-no-moddate.pdf"}); + $td->runtest("split with shared resources", {$td->COMMAND => "qpdf --qdf --static-id" . diff --git a/qpdf/qtest/qpdf/issue-fuzz.out b/qpdf/qtest/qpdf/issue-fuzz.out new file mode 100644 index 00000000..456485b2 --- /dev/null +++ b/qpdf/qtest/qpdf/issue-fuzz.out @@ -0,0 +1,19 @@ +WARNING: issue-fuzz.pdf: can't find PDF header +WARNING: issue-fuzz.pdf (xref table, offset 19): accepting invalid xref table entry +WARNING: issue-fuzz.pdf (trailer, offset 36): unknown token while reading object; treating as string +WARNING: issue-fuzz.pdf (trailer, offset 53): unexpected > +WARNING: issue-fuzz.pdf (trailer, offset 54): unknown token while reading object; treating as string +WARNING: issue-fuzz.pdf (trailer, offset 58): unknown token while reading object; treating as string +WARNING: issue-fuzz.pdf (trailer, offset 72): unknown token while reading object; treating as string +WARNING: issue-fuzz.pdf (trailer, offset 36): dictionary ended prematurely; using null as value for last key +WARNING: issue-fuzz.pdf (trailer, offset 36): expected dictionary key but found non-name object; inserting key /QPDFFake1 +WARNING: issue-fuzz.pdf (trailer, offset 36): expected dictionary key but found non-name object; inserting key /QPDFFake2 +WARNING: issue-fuzz.pdf (trailer, offset 36): expected dictionary key but found non-name object; inserting key /QPDFFake3 +WARNING: issue-fuzz.pdf (trailer, offset 36): expected dictionary key but found non-name object; inserting key /QPDFFake4 +WARNING: issue-fuzz.pdf (trailer, offset 36): expected dictionary key but found non-name object; inserting key /QPDFFake5 +WARNING: issue-fuzz.pdf (trailer, offset 36): expected dictionary key but found non-name object; inserting key /QPDFFake6 +WARNING: issue-fuzz.pdf (trailer, offset 36): expected dictionary key but found non-name object; inserting key /QPDFFake7 +WARNING: issue-fuzz.pdf: file is damaged +WARNING: issue-fuzz.pdf (trailer, offset 32): /Size key in trailer dictionary is impossibly large +WARNING: issue-fuzz.pdf: Attempting to reconstruct cross-reference table +qpdf: issue-fuzz.pdf: unable to find /Root dictionary diff --git a/qpdf/qtest/qpdf/issue-fuzz.pdf b/qpdf/qtest/qpdf/issue-fuzz.pdf new file mode 100644 index 00000000..288a6b5c Binary files /dev/null and b/qpdf/qtest/qpdf/issue-fuzz.pdf differ diff --git a/qpdf/qtest/qpdf/remove-info-no-moddate.pdf b/qpdf/qtest/qpdf/remove-info-no-moddate.pdf new file mode 100644 index 00000000..2fc0302c Binary files /dev/null and b/qpdf/qtest/qpdf/remove-info-no-moddate.pdf differ diff --git a/qpdf/qtest/qpdf/remove-info.pdf b/qpdf/qtest/qpdf/remove-info.pdf new file mode 100644 index 00000000..7b657ed2 Binary files /dev/null and b/qpdf/qtest/qpdf/remove-info.pdf differ diff --git a/qpdf/qtest/qpdf/remove-metadata-no-moddate.pdf b/qpdf/qtest/qpdf/remove-metadata-no-moddate.pdf new file mode 100644 index 00000000..797a099e Binary files /dev/null and b/qpdf/qtest/qpdf/remove-metadata-no-moddate.pdf differ diff --git a/qpdf/qtest/qpdf/remove-metadata.pdf b/qpdf/qtest/qpdf/remove-metadata.pdf new file mode 100644 index 00000000..e42bf51a Binary files /dev/null and b/qpdf/qtest/qpdf/remove-metadata.pdf differ diff --git a/qpdf/qtest/specific-bugs.test b/qpdf/qtest/specific-bugs.test index 99a7e80b..428471bb 100644 --- a/qpdf/qtest/specific-bugs.test +++ b/qpdf/qtest/specific-bugs.test @@ -38,6 +38,7 @@ my @bug_tests = ( ["263", "empty xref stream", 2], ["335a", "ozz-fuzz-12152", 2], ["335b", "ozz-fuzz-14845", 2], + ["fuzz", "impossibly large trailer /Size"], # ["fuzz-16214", "stream in object stream", 3, "--preserve-unreferenced"], # When adding to this list, consider adding to CORPUS_FROM_TEST in # fuzz/CMakeLists.txt and updating the count in