2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-12-30 13:40:37 +00:00

Add new commands --remove-metadata and --remove-info

This commit is contained in:
m-holger 2024-08-25 11:47:04 +01:00
parent 0b3debaf86
commit 8cb9bce780
18 changed files with 138 additions and 21 deletions

View File

@ -1,3 +1,9 @@
2024-08-25 M Holger <m.holger@qpdf.org>
* Add new command-line arguments --remove-metadata and --remove-info
to exclude document metadata and information from the output PDF
file. Patially fixes #1145.
2024-08-06 M Holger <m.holger@qpdf.org>
* Bug fix: when writing real numbers as JSON ensure that they don't

View File

@ -692,6 +692,8 @@ class QPDFJob
bool optimize_images{false};
bool externalize_inline_images{false};
bool keep_inline_images{false};
bool remove_info{false};
bool remove_metadata{false};
bool remove_page_labels{false};
size_t oi_min_width{DEFAULT_OI_MIN_WIDTH};
size_t oi_min_height{DEFAULT_OI_MIN_HEIGHT};

View File

@ -32,6 +32,8 @@ QPDF_DLL Config* progress();
QPDF_DLL Config* qdf();
QPDF_DLL Config* rawStreamData();
QPDF_DLL Config* recompressFlate();
QPDF_DLL Config* removeInfo();
QPDF_DLL Config* removeMetadata();
QPDF_DLL Config* removePageLabels();
QPDF_DLL Config* reportMemoryUsage();
QPDF_DLL Config* requiresPassword();

View File

@ -4,17 +4,17 @@ generate_auto_job f64733b79dcee5a0e3e8ccc6976448e8ddf0e8b6529987a66a7d3ab2ebc10a
include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4
include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42
include/qpdf/auto_job_c_enc.hh 28446f3c32153a52afa239ea40503e6cc8ac2c026813526a349e0cd4ae17ddd5
include/qpdf/auto_job_c_main.hh dbfc221d1533120d1aa9c361d8d2483dea5fcb1c0fd95144d98d305e64ed32a6
include/qpdf/auto_job_c_main.hh 84f463237235b2c095b747a4f5dd00f109ee596a1c207b944efb296c0c568cae
include/qpdf/auto_job_c_pages.hh 09ca15649cc94fdaf6d9bdae28a20723f2a66616bf15aa86d83df31051d82506
include/qpdf/auto_job_c_uo.hh 9c2f98a355858dd54d0bba444b73177a59c9e56833e02fa6406f429c07f39e62
job.yml 53cad86659db6722e8f415aacb19fc51ab81bb1589c3cb8f65ec893bb4bf5566
job.yml 31935064eca625af7657b23f2f12c614d14751ec0b12702482b1768a04905d22
libqpdf/qpdf/auto_job_decl.hh 20d6affe1e260f5a1af4f1d82a820b933835440ff03020e877382da2e8dac6c6
libqpdf/qpdf/auto_job_help.hh 74b2982771720927ce7be8f1690720ec65cb9989620493a0c154f50ba2c254e4
libqpdf/qpdf/auto_job_init.hh 19d1da7c4c0c635bd1c5db8d5f17df8edad3442f8eba006adb075cec295fa158
libqpdf/qpdf/auto_job_help.hh 1e9181f4729a22ff91ab54e2b4a82e6af0c57a8327efb222a4196adb609c1ade
libqpdf/qpdf/auto_job_init.hh e2a6bb87870c5522a01b15461c9fe909e360f5c7fed06e41acf13a125bd1d03e
libqpdf/qpdf/auto_job_json_decl.hh 843892c8e8652a86b7eb573893ef24050b7f36fe313f7251874be5cd4cdbe3fd
libqpdf/qpdf/auto_job_json_init.hh a87256c082427ec0318223762472970b2eced535c0c8b0288d45c8cdaaf62f74
libqpdf/qpdf/auto_job_schema.hh 5dac568dff39614e161a0af59a0f328f1e28edf69b96f08bb76fd592d51bb053
libqpdf/qpdf/auto_job_json_init.hh 344c2fb473f88fe829c93b1efe6c70a0e4796537b8eb35e421d955fff481ba7d
libqpdf/qpdf/auto_job_schema.hh 6d3eef5137b8828eaa301a1b3cf75cb7bb812aa6e2d8301de865b42d238d7a7c
manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580
manual/cli.rst 94057baba9ecffb4ce19ae61c8fa507ef07209c280fccae97b283c3dfce834e0
manual/qpdf.1 0ec05f1392c160165cdf6adada4de84c0de75bd2fb5762caff4e1372aacada4c
manual/cli.rst b7f37995f13346518ae7b2ea84836fba13b4da4e1f55be5f2a861f20dea0ccdb
manual/qpdf.1 59c26635017cba5d142ec3fcc4aebcb91e0cf1355d51365db84f48b21585ad8d
manual/qpdf.1.in 436ecc85d45c4c9e2dbd1725fb7f0177fb627179469f114561adf3cb6cbb677b

View File

@ -130,6 +130,8 @@ options:
- qdf
- raw-stream-data
- recompress-flate
- remove-info
- remove-metadata
- remove-page-labels
- replace-input
- report-memory-usage
@ -440,6 +442,8 @@ json:
- Pages.file:
Pages.password:
range:
remove-info:
remove-metadata:
remove-page-labels:
report-memory-usage:
rotate:

View File

@ -471,6 +471,21 @@ QPDFJob::createQPDF()
}
handleUnderOverlay(pdf);
handleTransformations(pdf);
if (m->remove_info) {
auto trailer = pdf.getTrailer();
auto mod_date = trailer.getKey("/Info").getKeyIfDict("/ModDate");
if (mod_date.isNull()) {
trailer.removeKey("/Info");
} else {
auto info = trailer.replaceKeyAndGetNew(
"/Info", pdf.makeIndirectObject(QPDFObjectHandle::newDictionary()));
info.replaceKey("/ModDate", mod_date);
}
pdf.getRoot().removeKey("/Metadata");
}
if (m->remove_metadata) {
pdf.getRoot().removeKey("/Metadata");
}
for (auto& foreign: page_heap) {
if (foreign->anyWarnings()) {

View File

@ -510,6 +510,20 @@ QPDFJob::Config::removeAttachment(std::string const& parameter)
return this;
}
QPDFJob::Config*
QPDFJob::Config::removeInfo()
{
o.m->remove_info = true;
return this;
}
QPDFJob::Config*
QPDFJob::Config::removeMetadata()
{
o.m->remove_metadata = true;
return this;
}
QPDFJob::Config*
QPDFJob::Config::removePageLabels()
{

View File

@ -414,6 +414,13 @@ Don't optimize images whose area in pixels is below the specified value.
)");
ap.addOptionHelp("--keep-inline-images", "modification", "exclude inline images from optimization", R"(Prevent inline images from being considered by --optimize-images.
)");
ap.addOptionHelp("--remove-info", "modification", "remove file information", R"(Exclude file information (except modification date) from the output file.
)");
ap.addOptionHelp("--remove-metadata", "modification", "remove metadata", R"(Exclude metadata from the output file.
)");
}
static void add_help_5(QPDFArgParser& ap)
{
ap.addOptionHelp("--remove-page-labels", "modification", "remove explicit page numbers", R"(Exclude page labels (explicit page numbers) from the output file.
)");
ap.addOptionHelp("--set-page-labels", "modification", "number pages for the entire document", R"(--set-page-labels label-spec ... --
@ -460,9 +467,6 @@ iv, then the remaining pages with Arabic numerals starting with
1 and continuing sequentially until the end of the document. For
additional examples, please consult the manual.
)");
}
static void add_help_5(QPDFArgParser& ap)
{
ap.addHelpTopic("encryption", "create encrypted files", R"(Create encrypted files. Usage:
--encrypt \
@ -641,6 +645,9 @@ ap.addOptionHelp("--force-R5", "encryption", "use unsupported R=5 encryption", R
algorithm that existed only in Acrobat version IX. This option
should not be used except for compatibility testing.
)");
}
static void add_help_6(QPDFArgParser& ap)
{
ap.addHelpTopic("page-selection", "select pages from one or more files", R"(Use the --pages option to select pages from multiple files. Usage:
qpdf in.pdf --pages --file=input-file \
@ -725,9 +732,6 @@ appearance: first underlays, then the original page, then overlays.
Run qpdf --help=page-ranges for help with page ranges.
)");
}
static void add_help_6(QPDFArgParser& ap)
{
ap.addOptionHelp("--to", "overlay-underlay", "destination pages for underlay/overlay", R"(--to=page-range
Specify the range of pages in the primary output to apply
@ -829,6 +833,9 @@ its terminating "--".
To copy attachments from a password-protected file, use
the --password option after the file name.
)");
}
static void add_help_7(QPDFArgParser& ap)
{
ap.addOptionHelp("--prefix", "copy-attachments", "key prefix for copying attachments", R"(--prefix=prefix
Prepend a prefix to each key; may be needed if there are
@ -839,9 +846,6 @@ ap.addHelpTopic("inspection", "inspect PDF files", R"(These options provide tool
the options in this section are specified, no output file may be
given.
)");
}
static void add_help_7(QPDFArgParser& ap)
{
ap.addOptionHelp("--is-encrypted", "inspection", "silently test whether a file is encrypted", R"(Silently exit with a code indicating the file's encryption status:
0: the file is encrypted
@ -919,6 +923,9 @@ output as binary data. Get the key with --list-attachments.
ap.addHelpTopic("json", "JSON output for PDF information", R"(Show information about the PDF file in JSON format. Please see the
JSON chapter in the qpdf manual for details.
)");
}
static void add_help_8(QPDFArgParser& ap)
{
ap.addOptionHelp("--json", "json", "show file in JSON format", R"(--json[=version]
Generate a JSON representation of the file. This is described in
@ -932,9 +939,6 @@ Describe the format of the JSON output by writing to standard
output a JSON object with the same keys and with values
containing descriptive text.
)");
}
static void add_help_8(QPDFArgParser& ap)
{
ap.addOptionHelp("--json-key", "json", "limit which keys are in JSON output", R"(--json-key=key
This option is repeatable. If given, only the specified

View File

@ -68,6 +68,8 @@ this->ap.addBare("progress", [this](){c_main->progress();});
this->ap.addBare("qdf", [this](){c_main->qdf();});
this->ap.addBare("raw-stream-data", [this](){c_main->rawStreamData();});
this->ap.addBare("recompress-flate", [this](){c_main->recompressFlate();});
this->ap.addBare("remove-info", [this](){c_main->removeInfo();});
this->ap.addBare("remove-metadata", [this](){c_main->removeMetadata();});
this->ap.addBare("remove-page-labels", [this](){c_main->removePageLabels();});
this->ap.addBare("replace-input", b(&ArgParser::argReplaceInput));
this->ap.addBare("report-memory-usage", [this](){c_main->reportMemoryUsage();});

View File

@ -412,6 +412,12 @@ addParameter([this](std::string const& p) { c_pages->range(p); });
popHandler(); // key: range
popHandler(); // array: .pages[]
popHandler(); // key: pages
pushKey("removeInfo");
addBare([this]() { c_main->removeInfo(); });
popHandler(); // key: removeInfo
pushKey("removeMetadata");
addBare([this]() { c_main->removeMetadata(); });
popHandler(); // key: removeMetadata
pushKey("removePageLabels");
addBare([this]() { c_main->removePageLabels(); });
popHandler(); // key: removePageLabels

View File

@ -145,6 +145,8 @@ static constexpr char const* JOB_SCHEMA_DATA = R"({
"range": "page range"
}
],
"removeInfo": "remove file information",
"removeMetadata": "remove metadata",
"removePageLabels": "remove explicit page numbers",
"reportMemoryUsage": "best effort report of memory usage",
"rotate": "rotate pages",

View File

@ -1773,6 +1773,27 @@ Related Options
Prevent inline images from being included in image optimization
done by :qpdf:ref:`--optimize-images`.
.. qpdf:option:: --remove-info
.. help: remove file information
Exclude file information (except modification date) from the output file.
Exclude file information (except modification date) from the output file by
omitting all entries (except ``/ModDate``) from the ``/Info`` dictionary in
the document trailer.
See also :qpdf:ref:`--remove-metadata`.
.. qpdf:option:: --remove-metadata
.. help: remove metadata
Exclude metadata from the output file.
Exclude metadata from the output file by omitting the ``/Metadata``
dictionary in the document catalog.
See also :qpdf:ref:`--remove-info`.
.. qpdf:option:: --remove-page-labels
.. help: remove explicit page numbers

View File

@ -530,6 +530,12 @@ Don't optimize images whose area in pixels is below the specified value.
.B --keep-inline-images \-\- exclude inline images from optimization
Prevent inline images from being considered by --optimize-images.
.TP
.B --remove-info \-\- remove file information
Exclude file information (except modification date) from the output file.
.TP
.B --remove-metadata \-\- remove metadata
Exclude metadata from the output file.
.TP
.B --remove-page-labels \-\- remove explicit page numbers
Exclude page labels (explicit page numbers) from the output file.
.TP

View File

@ -14,7 +14,7 @@ cleanup();
my $td = new TestDriver('merge-and-split');
my $n_tests = 28;
my $n_tests = 34;
# Select pages from the same file multiple times including selecting
# twice from an encrypted file and specifying the password only the
@ -103,6 +103,39 @@ $td->runtest("check output",
{$td->COMMAND => "qpdf-test-compare a.pdf remove-labels.pdf"},
{$td->FILE => "remove-labels.pdf", $td->EXIT_STATUS => 0});
$td->runtest("remove metadata",
{$td->COMMAND =>
"qpdf metadata-crypt-filter.pdf a.pdf" .
" --remove-metadata" .
" --decrypt" .
" --static-id"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "remove-metadata.pdf"});
$td->runtest("remove info (with moddate)",
{$td->COMMAND =>
"qpdf remove-metadata.pdf a.pdf" .
" --remove-info" .
" --decrypt" .
" --static-id"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "remove-info.pdf"});
$td->runtest("remove info (without moddate)",
{$td->COMMAND =>
"qpdf remove-metadata-no-moddate.pdf a.pdf" .
" --remove-info" .
" --decrypt" .
" --static-id"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "remove-info-no-moddate.pdf"});
$td->runtest("split with shared resources",
{$td->COMMAND =>
"qpdf --qdf --static-id" .

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.