diff --git a/ChangeLog b/ChangeLog index 6968781c..3b646eea 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2024-08-25 M Holger + + * Add new command-line arguments --remove-metadata and --remove-info + to exclude document metadata and information from the output PDF + file. Patially fixes #1145. + 2024-08-06 M Holger * Bug fix: when writing real numbers as JSON ensure that they don't diff --git a/include/qpdf/QPDFJob.hh b/include/qpdf/QPDFJob.hh index b26a9dcf..1c64dcb9 100644 --- a/include/qpdf/QPDFJob.hh +++ b/include/qpdf/QPDFJob.hh @@ -692,6 +692,8 @@ class QPDFJob bool optimize_images{false}; bool externalize_inline_images{false}; bool keep_inline_images{false}; + bool remove_info{false}; + bool remove_metadata{false}; bool remove_page_labels{false}; size_t oi_min_width{DEFAULT_OI_MIN_WIDTH}; size_t oi_min_height{DEFAULT_OI_MIN_HEIGHT}; diff --git a/include/qpdf/auto_job_c_main.hh b/include/qpdf/auto_job_c_main.hh index 6c661404..bb584255 100644 --- a/include/qpdf/auto_job_c_main.hh +++ b/include/qpdf/auto_job_c_main.hh @@ -32,6 +32,8 @@ QPDF_DLL Config* progress(); QPDF_DLL Config* qdf(); QPDF_DLL Config* rawStreamData(); QPDF_DLL Config* recompressFlate(); +QPDF_DLL Config* removeInfo(); +QPDF_DLL Config* removeMetadata(); QPDF_DLL Config* removePageLabels(); QPDF_DLL Config* reportMemoryUsage(); QPDF_DLL Config* requiresPassword(); diff --git a/job.sums b/job.sums index 4f53c923..0ad8c664 100644 --- a/job.sums +++ b/job.sums @@ -4,17 +4,17 @@ generate_auto_job f64733b79dcee5a0e3e8ccc6976448e8ddf0e8b6529987a66a7d3ab2ebc10a include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4 include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42 include/qpdf/auto_job_c_enc.hh 28446f3c32153a52afa239ea40503e6cc8ac2c026813526a349e0cd4ae17ddd5 -include/qpdf/auto_job_c_main.hh dbfc221d1533120d1aa9c361d8d2483dea5fcb1c0fd95144d98d305e64ed32a6 +include/qpdf/auto_job_c_main.hh 84f463237235b2c095b747a4f5dd00f109ee596a1c207b944efb296c0c568cae include/qpdf/auto_job_c_pages.hh 09ca15649cc94fdaf6d9bdae28a20723f2a66616bf15aa86d83df31051d82506 include/qpdf/auto_job_c_uo.hh 9c2f98a355858dd54d0bba444b73177a59c9e56833e02fa6406f429c07f39e62 -job.yml 53cad86659db6722e8f415aacb19fc51ab81bb1589c3cb8f65ec893bb4bf5566 +job.yml 31935064eca625af7657b23f2f12c614d14751ec0b12702482b1768a04905d22 libqpdf/qpdf/auto_job_decl.hh 20d6affe1e260f5a1af4f1d82a820b933835440ff03020e877382da2e8dac6c6 -libqpdf/qpdf/auto_job_help.hh 74b2982771720927ce7be8f1690720ec65cb9989620493a0c154f50ba2c254e4 -libqpdf/qpdf/auto_job_init.hh 19d1da7c4c0c635bd1c5db8d5f17df8edad3442f8eba006adb075cec295fa158 +libqpdf/qpdf/auto_job_help.hh 1e9181f4729a22ff91ab54e2b4a82e6af0c57a8327efb222a4196adb609c1ade +libqpdf/qpdf/auto_job_init.hh e2a6bb87870c5522a01b15461c9fe909e360f5c7fed06e41acf13a125bd1d03e libqpdf/qpdf/auto_job_json_decl.hh 843892c8e8652a86b7eb573893ef24050b7f36fe313f7251874be5cd4cdbe3fd -libqpdf/qpdf/auto_job_json_init.hh a87256c082427ec0318223762472970b2eced535c0c8b0288d45c8cdaaf62f74 -libqpdf/qpdf/auto_job_schema.hh 5dac568dff39614e161a0af59a0f328f1e28edf69b96f08bb76fd592d51bb053 +libqpdf/qpdf/auto_job_json_init.hh 344c2fb473f88fe829c93b1efe6c70a0e4796537b8eb35e421d955fff481ba7d +libqpdf/qpdf/auto_job_schema.hh 6d3eef5137b8828eaa301a1b3cf75cb7bb812aa6e2d8301de865b42d238d7a7c manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580 -manual/cli.rst 94057baba9ecffb4ce19ae61c8fa507ef07209c280fccae97b283c3dfce834e0 -manual/qpdf.1 0ec05f1392c160165cdf6adada4de84c0de75bd2fb5762caff4e1372aacada4c +manual/cli.rst b7f37995f13346518ae7b2ea84836fba13b4da4e1f55be5f2a861f20dea0ccdb +manual/qpdf.1 59c26635017cba5d142ec3fcc4aebcb91e0cf1355d51365db84f48b21585ad8d manual/qpdf.1.in 436ecc85d45c4c9e2dbd1725fb7f0177fb627179469f114561adf3cb6cbb677b diff --git a/job.yml b/job.yml index 6f5c6fb7..13b61a4b 100644 --- a/job.yml +++ b/job.yml @@ -130,6 +130,8 @@ options: - qdf - raw-stream-data - recompress-flate + - remove-info + - remove-metadata - remove-page-labels - replace-input - report-memory-usage @@ -440,6 +442,8 @@ json: - Pages.file: Pages.password: range: + remove-info: + remove-metadata: remove-page-labels: report-memory-usage: rotate: diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index 2433f242..ab57dd1c 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -471,6 +471,21 @@ QPDFJob::createQPDF() } handleUnderOverlay(pdf); handleTransformations(pdf); + if (m->remove_info) { + auto trailer = pdf.getTrailer(); + auto mod_date = trailer.getKey("/Info").getKeyIfDict("/ModDate"); + if (mod_date.isNull()) { + trailer.removeKey("/Info"); + } else { + auto info = trailer.replaceKeyAndGetNew( + "/Info", pdf.makeIndirectObject(QPDFObjectHandle::newDictionary())); + info.replaceKey("/ModDate", mod_date); + } + pdf.getRoot().removeKey("/Metadata"); + } + if (m->remove_metadata) { + pdf.getRoot().removeKey("/Metadata"); + } for (auto& foreign: page_heap) { if (foreign->anyWarnings()) { diff --git a/libqpdf/QPDFJob_config.cc b/libqpdf/QPDFJob_config.cc index 107abf7c..cf6eadd8 100644 --- a/libqpdf/QPDFJob_config.cc +++ b/libqpdf/QPDFJob_config.cc @@ -510,6 +510,20 @@ QPDFJob::Config::removeAttachment(std::string const& parameter) return this; } +QPDFJob::Config* +QPDFJob::Config::removeInfo() +{ + o.m->remove_info = true; + return this; +} + +QPDFJob::Config* +QPDFJob::Config::removeMetadata() +{ + o.m->remove_metadata = true; + return this; +} + QPDFJob::Config* QPDFJob::Config::removePageLabels() { diff --git a/libqpdf/qpdf/auto_job_help.hh b/libqpdf/qpdf/auto_job_help.hh index 79c32ceb..a7207a63 100644 --- a/libqpdf/qpdf/auto_job_help.hh +++ b/libqpdf/qpdf/auto_job_help.hh @@ -414,6 +414,13 @@ Don't optimize images whose area in pixels is below the specified value. )"); ap.addOptionHelp("--keep-inline-images", "modification", "exclude inline images from optimization", R"(Prevent inline images from being considered by --optimize-images. )"); +ap.addOptionHelp("--remove-info", "modification", "remove file information", R"(Exclude file information (except modification date) from the output file. +)"); +ap.addOptionHelp("--remove-metadata", "modification", "remove metadata", R"(Exclude metadata from the output file. +)"); +} +static void add_help_5(QPDFArgParser& ap) +{ ap.addOptionHelp("--remove-page-labels", "modification", "remove explicit page numbers", R"(Exclude page labels (explicit page numbers) from the output file. )"); ap.addOptionHelp("--set-page-labels", "modification", "number pages for the entire document", R"(--set-page-labels label-spec ... -- @@ -460,9 +467,6 @@ iv, then the remaining pages with Arabic numerals starting with 1 and continuing sequentially until the end of the document. For additional examples, please consult the manual. )"); -} -static void add_help_5(QPDFArgParser& ap) -{ ap.addHelpTopic("encryption", "create encrypted files", R"(Create encrypted files. Usage: --encrypt \ @@ -641,6 +645,9 @@ ap.addOptionHelp("--force-R5", "encryption", "use unsupported R=5 encryption", R algorithm that existed only in Acrobat version IX. This option should not be used except for compatibility testing. )"); +} +static void add_help_6(QPDFArgParser& ap) +{ ap.addHelpTopic("page-selection", "select pages from one or more files", R"(Use the --pages option to select pages from multiple files. Usage: qpdf in.pdf --pages --file=input-file \ @@ -725,9 +732,6 @@ appearance: first underlays, then the original page, then overlays. Run qpdf --help=page-ranges for help with page ranges. )"); -} -static void add_help_6(QPDFArgParser& ap) -{ ap.addOptionHelp("--to", "overlay-underlay", "destination pages for underlay/overlay", R"(--to=page-range Specify the range of pages in the primary output to apply @@ -829,6 +833,9 @@ its terminating "--". To copy attachments from a password-protected file, use the --password option after the file name. )"); +} +static void add_help_7(QPDFArgParser& ap) +{ ap.addOptionHelp("--prefix", "copy-attachments", "key prefix for copying attachments", R"(--prefix=prefix Prepend a prefix to each key; may be needed if there are @@ -839,9 +846,6 @@ ap.addHelpTopic("inspection", "inspect PDF files", R"(These options provide tool the options in this section are specified, no output file may be given. )"); -} -static void add_help_7(QPDFArgParser& ap) -{ ap.addOptionHelp("--is-encrypted", "inspection", "silently test whether a file is encrypted", R"(Silently exit with a code indicating the file's encryption status: 0: the file is encrypted @@ -919,6 +923,9 @@ output as binary data. Get the key with --list-attachments. ap.addHelpTopic("json", "JSON output for PDF information", R"(Show information about the PDF file in JSON format. Please see the JSON chapter in the qpdf manual for details. )"); +} +static void add_help_8(QPDFArgParser& ap) +{ ap.addOptionHelp("--json", "json", "show file in JSON format", R"(--json[=version] Generate a JSON representation of the file. This is described in @@ -932,9 +939,6 @@ Describe the format of the JSON output by writing to standard output a JSON object with the same keys and with values containing descriptive text. )"); -} -static void add_help_8(QPDFArgParser& ap) -{ ap.addOptionHelp("--json-key", "json", "limit which keys are in JSON output", R"(--json-key=key This option is repeatable. If given, only the specified diff --git a/libqpdf/qpdf/auto_job_init.hh b/libqpdf/qpdf/auto_job_init.hh index ac42ea6a..5db11318 100644 --- a/libqpdf/qpdf/auto_job_init.hh +++ b/libqpdf/qpdf/auto_job_init.hh @@ -68,6 +68,8 @@ this->ap.addBare("progress", [this](){c_main->progress();}); this->ap.addBare("qdf", [this](){c_main->qdf();}); this->ap.addBare("raw-stream-data", [this](){c_main->rawStreamData();}); this->ap.addBare("recompress-flate", [this](){c_main->recompressFlate();}); +this->ap.addBare("remove-info", [this](){c_main->removeInfo();}); +this->ap.addBare("remove-metadata", [this](){c_main->removeMetadata();}); this->ap.addBare("remove-page-labels", [this](){c_main->removePageLabels();}); this->ap.addBare("replace-input", b(&ArgParser::argReplaceInput)); this->ap.addBare("report-memory-usage", [this](){c_main->reportMemoryUsage();}); diff --git a/libqpdf/qpdf/auto_job_json_init.hh b/libqpdf/qpdf/auto_job_json_init.hh index ee4c7421..fa4c4089 100644 --- a/libqpdf/qpdf/auto_job_json_init.hh +++ b/libqpdf/qpdf/auto_job_json_init.hh @@ -412,6 +412,12 @@ addParameter([this](std::string const& p) { c_pages->range(p); }); popHandler(); // key: range popHandler(); // array: .pages[] popHandler(); // key: pages +pushKey("removeInfo"); +addBare([this]() { c_main->removeInfo(); }); +popHandler(); // key: removeInfo +pushKey("removeMetadata"); +addBare([this]() { c_main->removeMetadata(); }); +popHandler(); // key: removeMetadata pushKey("removePageLabels"); addBare([this]() { c_main->removePageLabels(); }); popHandler(); // key: removePageLabels diff --git a/libqpdf/qpdf/auto_job_schema.hh b/libqpdf/qpdf/auto_job_schema.hh index df75a5e8..6854fd8c 100644 --- a/libqpdf/qpdf/auto_job_schema.hh +++ b/libqpdf/qpdf/auto_job_schema.hh @@ -145,6 +145,8 @@ static constexpr char const* JOB_SCHEMA_DATA = R"({ "range": "page range" } ], + "removeInfo": "remove file information", + "removeMetadata": "remove metadata", "removePageLabels": "remove explicit page numbers", "reportMemoryUsage": "best effort report of memory usage", "rotate": "rotate pages", diff --git a/manual/cli.rst b/manual/cli.rst index c11c2bf3..fee79454 100644 --- a/manual/cli.rst +++ b/manual/cli.rst @@ -1773,6 +1773,27 @@ Related Options Prevent inline images from being included in image optimization done by :qpdf:ref:`--optimize-images`. +.. qpdf:option:: --remove-info + + .. help: remove file information + + Exclude file information (except modification date) from the output file. + + Exclude file information (except modification date) from the output file by + omitting all entries (except ``/ModDate``) from the ``/Info`` dictionary in + the document trailer. + See also :qpdf:ref:`--remove-metadata`. + +.. qpdf:option:: --remove-metadata + + .. help: remove metadata + + Exclude metadata from the output file. + + Exclude metadata from the output file by omitting the ``/Metadata`` + dictionary in the document catalog. + See also :qpdf:ref:`--remove-info`. + .. qpdf:option:: --remove-page-labels .. help: remove explicit page numbers diff --git a/manual/qpdf.1 b/manual/qpdf.1 index ec7b851b..95c23be6 100644 --- a/manual/qpdf.1 +++ b/manual/qpdf.1 @@ -530,6 +530,12 @@ Don't optimize images whose area in pixels is below the specified value. .B --keep-inline-images \-\- exclude inline images from optimization Prevent inline images from being considered by --optimize-images. .TP +.B --remove-info \-\- remove file information +Exclude file information (except modification date) from the output file. +.TP +.B --remove-metadata \-\- remove metadata +Exclude metadata from the output file. +.TP .B --remove-page-labels \-\- remove explicit page numbers Exclude page labels (explicit page numbers) from the output file. .TP diff --git a/qpdf/qtest/merge-and-split.test b/qpdf/qtest/merge-and-split.test index 33935e71..db18e87e 100644 --- a/qpdf/qtest/merge-and-split.test +++ b/qpdf/qtest/merge-and-split.test @@ -14,7 +14,7 @@ cleanup(); my $td = new TestDriver('merge-and-split'); -my $n_tests = 28; +my $n_tests = 34; # Select pages from the same file multiple times including selecting # twice from an encrypted file and specifying the password only the @@ -103,6 +103,39 @@ $td->runtest("check output", {$td->COMMAND => "qpdf-test-compare a.pdf remove-labels.pdf"}, {$td->FILE => "remove-labels.pdf", $td->EXIT_STATUS => 0}); +$td->runtest("remove metadata", + {$td->COMMAND => + "qpdf metadata-crypt-filter.pdf a.pdf" . + " --remove-metadata" . + " --decrypt" . + " --static-id"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "remove-metadata.pdf"}); + +$td->runtest("remove info (with moddate)", + {$td->COMMAND => + "qpdf remove-metadata.pdf a.pdf" . + " --remove-info" . + " --decrypt" . + " --static-id"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "remove-info.pdf"}); + +$td->runtest("remove info (without moddate)", + {$td->COMMAND => + "qpdf remove-metadata-no-moddate.pdf a.pdf" . + " --remove-info" . + " --decrypt" . + " --static-id"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "remove-info-no-moddate.pdf"}); + $td->runtest("split with shared resources", {$td->COMMAND => "qpdf --qdf --static-id" . diff --git a/qpdf/qtest/qpdf/remove-info-no-moddate.pdf b/qpdf/qtest/qpdf/remove-info-no-moddate.pdf new file mode 100644 index 00000000..2fc0302c Binary files /dev/null and b/qpdf/qtest/qpdf/remove-info-no-moddate.pdf differ diff --git a/qpdf/qtest/qpdf/remove-info.pdf b/qpdf/qtest/qpdf/remove-info.pdf new file mode 100644 index 00000000..7b657ed2 Binary files /dev/null and b/qpdf/qtest/qpdf/remove-info.pdf differ diff --git a/qpdf/qtest/qpdf/remove-metadata-no-moddate.pdf b/qpdf/qtest/qpdf/remove-metadata-no-moddate.pdf new file mode 100644 index 00000000..797a099e Binary files /dev/null and b/qpdf/qtest/qpdf/remove-metadata-no-moddate.pdf differ diff --git a/qpdf/qtest/qpdf/remove-metadata.pdf b/qpdf/qtest/qpdf/remove-metadata.pdf new file mode 100644 index 00000000..e42bf51a Binary files /dev/null and b/qpdf/qtest/qpdf/remove-metadata.pdf differ