From 8cb9bce7806ef41afbc6b9d236d2343f82814da6 Mon Sep 17 00:00:00 2001 From: m-holger Date: Sun, 25 Aug 2024 11:47:04 +0100 Subject: [PATCH 1/6] Add new commands --remove-metadata and --remove-info --- ChangeLog | 6 +++ include/qpdf/QPDFJob.hh | 2 + include/qpdf/auto_job_c_main.hh | 2 + job.sums | 16 ++++---- job.yml | 4 ++ libqpdf/QPDFJob.cc | 15 ++++++++ libqpdf/QPDFJob_config.cc | 14 +++++++ libqpdf/qpdf/auto_job_help.hh | 28 ++++++++------ libqpdf/qpdf/auto_job_init.hh | 2 + libqpdf/qpdf/auto_job_json_init.hh | 6 +++ libqpdf/qpdf/auto_job_schema.hh | 2 + manual/cli.rst | 21 +++++++++++ manual/qpdf.1 | 6 +++ qpdf/qtest/merge-and-split.test | 35 +++++++++++++++++- qpdf/qtest/qpdf/remove-info-no-moddate.pdf | Bin 0 -> 13365 bytes qpdf/qtest/qpdf/remove-info.pdf | Bin 0 -> 13455 bytes .../qtest/qpdf/remove-metadata-no-moddate.pdf | Bin 0 -> 13495 bytes qpdf/qtest/qpdf/remove-metadata.pdf | Bin 0 -> 13495 bytes 18 files changed, 138 insertions(+), 21 deletions(-) create mode 100644 qpdf/qtest/qpdf/remove-info-no-moddate.pdf create mode 100644 qpdf/qtest/qpdf/remove-info.pdf create mode 100644 qpdf/qtest/qpdf/remove-metadata-no-moddate.pdf create mode 100644 qpdf/qtest/qpdf/remove-metadata.pdf diff --git a/ChangeLog b/ChangeLog index 6968781c..3b646eea 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2024-08-25 M Holger + + * Add new command-line arguments --remove-metadata and --remove-info + to exclude document metadata and information from the output PDF + file. Patially fixes #1145. + 2024-08-06 M Holger * Bug fix: when writing real numbers as JSON ensure that they don't diff --git a/include/qpdf/QPDFJob.hh b/include/qpdf/QPDFJob.hh index b26a9dcf..1c64dcb9 100644 --- a/include/qpdf/QPDFJob.hh +++ b/include/qpdf/QPDFJob.hh @@ -692,6 +692,8 @@ class QPDFJob bool optimize_images{false}; bool externalize_inline_images{false}; bool keep_inline_images{false}; + bool remove_info{false}; + bool remove_metadata{false}; bool remove_page_labels{false}; size_t oi_min_width{DEFAULT_OI_MIN_WIDTH}; size_t oi_min_height{DEFAULT_OI_MIN_HEIGHT}; diff --git a/include/qpdf/auto_job_c_main.hh b/include/qpdf/auto_job_c_main.hh index 6c661404..bb584255 100644 --- a/include/qpdf/auto_job_c_main.hh +++ b/include/qpdf/auto_job_c_main.hh @@ -32,6 +32,8 @@ QPDF_DLL Config* progress(); QPDF_DLL Config* qdf(); QPDF_DLL Config* rawStreamData(); QPDF_DLL Config* recompressFlate(); +QPDF_DLL Config* removeInfo(); +QPDF_DLL Config* removeMetadata(); QPDF_DLL Config* removePageLabels(); QPDF_DLL Config* reportMemoryUsage(); QPDF_DLL Config* requiresPassword(); diff --git a/job.sums b/job.sums index 4f53c923..0ad8c664 100644 --- a/job.sums +++ b/job.sums @@ -4,17 +4,17 @@ generate_auto_job f64733b79dcee5a0e3e8ccc6976448e8ddf0e8b6529987a66a7d3ab2ebc10a include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4 include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42 include/qpdf/auto_job_c_enc.hh 28446f3c32153a52afa239ea40503e6cc8ac2c026813526a349e0cd4ae17ddd5 -include/qpdf/auto_job_c_main.hh dbfc221d1533120d1aa9c361d8d2483dea5fcb1c0fd95144d98d305e64ed32a6 +include/qpdf/auto_job_c_main.hh 84f463237235b2c095b747a4f5dd00f109ee596a1c207b944efb296c0c568cae include/qpdf/auto_job_c_pages.hh 09ca15649cc94fdaf6d9bdae28a20723f2a66616bf15aa86d83df31051d82506 include/qpdf/auto_job_c_uo.hh 9c2f98a355858dd54d0bba444b73177a59c9e56833e02fa6406f429c07f39e62 -job.yml 53cad86659db6722e8f415aacb19fc51ab81bb1589c3cb8f65ec893bb4bf5566 +job.yml 31935064eca625af7657b23f2f12c614d14751ec0b12702482b1768a04905d22 libqpdf/qpdf/auto_job_decl.hh 20d6affe1e260f5a1af4f1d82a820b933835440ff03020e877382da2e8dac6c6 -libqpdf/qpdf/auto_job_help.hh 74b2982771720927ce7be8f1690720ec65cb9989620493a0c154f50ba2c254e4 -libqpdf/qpdf/auto_job_init.hh 19d1da7c4c0c635bd1c5db8d5f17df8edad3442f8eba006adb075cec295fa158 +libqpdf/qpdf/auto_job_help.hh 1e9181f4729a22ff91ab54e2b4a82e6af0c57a8327efb222a4196adb609c1ade +libqpdf/qpdf/auto_job_init.hh e2a6bb87870c5522a01b15461c9fe909e360f5c7fed06e41acf13a125bd1d03e libqpdf/qpdf/auto_job_json_decl.hh 843892c8e8652a86b7eb573893ef24050b7f36fe313f7251874be5cd4cdbe3fd -libqpdf/qpdf/auto_job_json_init.hh a87256c082427ec0318223762472970b2eced535c0c8b0288d45c8cdaaf62f74 -libqpdf/qpdf/auto_job_schema.hh 5dac568dff39614e161a0af59a0f328f1e28edf69b96f08bb76fd592d51bb053 +libqpdf/qpdf/auto_job_json_init.hh 344c2fb473f88fe829c93b1efe6c70a0e4796537b8eb35e421d955fff481ba7d +libqpdf/qpdf/auto_job_schema.hh 6d3eef5137b8828eaa301a1b3cf75cb7bb812aa6e2d8301de865b42d238d7a7c manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580 -manual/cli.rst 94057baba9ecffb4ce19ae61c8fa507ef07209c280fccae97b283c3dfce834e0 -manual/qpdf.1 0ec05f1392c160165cdf6adada4de84c0de75bd2fb5762caff4e1372aacada4c +manual/cli.rst b7f37995f13346518ae7b2ea84836fba13b4da4e1f55be5f2a861f20dea0ccdb +manual/qpdf.1 59c26635017cba5d142ec3fcc4aebcb91e0cf1355d51365db84f48b21585ad8d manual/qpdf.1.in 436ecc85d45c4c9e2dbd1725fb7f0177fb627179469f114561adf3cb6cbb677b diff --git a/job.yml b/job.yml index 6f5c6fb7..13b61a4b 100644 --- a/job.yml +++ b/job.yml @@ -130,6 +130,8 @@ options: - qdf - raw-stream-data - recompress-flate + - remove-info + - remove-metadata - remove-page-labels - replace-input - report-memory-usage @@ -440,6 +442,8 @@ json: - Pages.file: Pages.password: range: + remove-info: + remove-metadata: remove-page-labels: report-memory-usage: rotate: diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index 55902865..9ed1685e 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -471,6 +471,21 @@ QPDFJob::createQPDF() } handleUnderOverlay(pdf); handleTransformations(pdf); + if (m->remove_info) { + auto trailer = pdf.getTrailer(); + auto mod_date = trailer.getKey("/Info").getKeyIfDict("/ModDate"); + if (mod_date.isNull()) { + trailer.removeKey("/Info"); + } else { + auto info = trailer.replaceKeyAndGetNew( + "/Info", pdf.makeIndirectObject(QPDFObjectHandle::newDictionary())); + info.replaceKey("/ModDate", mod_date); + } + pdf.getRoot().removeKey("/Metadata"); + } + if (m->remove_metadata) { + pdf.getRoot().removeKey("/Metadata"); + } for (auto& foreign: page_heap) { if (foreign->anyWarnings()) { diff --git a/libqpdf/QPDFJob_config.cc b/libqpdf/QPDFJob_config.cc index 107abf7c..cf6eadd8 100644 --- a/libqpdf/QPDFJob_config.cc +++ b/libqpdf/QPDFJob_config.cc @@ -510,6 +510,20 @@ QPDFJob::Config::removeAttachment(std::string const& parameter) return this; } +QPDFJob::Config* +QPDFJob::Config::removeInfo() +{ + o.m->remove_info = true; + return this; +} + +QPDFJob::Config* +QPDFJob::Config::removeMetadata() +{ + o.m->remove_metadata = true; + return this; +} + QPDFJob::Config* QPDFJob::Config::removePageLabels() { diff --git a/libqpdf/qpdf/auto_job_help.hh b/libqpdf/qpdf/auto_job_help.hh index 79c32ceb..a7207a63 100644 --- a/libqpdf/qpdf/auto_job_help.hh +++ b/libqpdf/qpdf/auto_job_help.hh @@ -414,6 +414,13 @@ Don't optimize images whose area in pixels is below the specified value. )"); ap.addOptionHelp("--keep-inline-images", "modification", "exclude inline images from optimization", R"(Prevent inline images from being considered by --optimize-images. )"); +ap.addOptionHelp("--remove-info", "modification", "remove file information", R"(Exclude file information (except modification date) from the output file. +)"); +ap.addOptionHelp("--remove-metadata", "modification", "remove metadata", R"(Exclude metadata from the output file. +)"); +} +static void add_help_5(QPDFArgParser& ap) +{ ap.addOptionHelp("--remove-page-labels", "modification", "remove explicit page numbers", R"(Exclude page labels (explicit page numbers) from the output file. )"); ap.addOptionHelp("--set-page-labels", "modification", "number pages for the entire document", R"(--set-page-labels label-spec ... -- @@ -460,9 +467,6 @@ iv, then the remaining pages with Arabic numerals starting with 1 and continuing sequentially until the end of the document. For additional examples, please consult the manual. )"); -} -static void add_help_5(QPDFArgParser& ap) -{ ap.addHelpTopic("encryption", "create encrypted files", R"(Create encrypted files. Usage: --encrypt \ @@ -641,6 +645,9 @@ ap.addOptionHelp("--force-R5", "encryption", "use unsupported R=5 encryption", R algorithm that existed only in Acrobat version IX. This option should not be used except for compatibility testing. )"); +} +static void add_help_6(QPDFArgParser& ap) +{ ap.addHelpTopic("page-selection", "select pages from one or more files", R"(Use the --pages option to select pages from multiple files. Usage: qpdf in.pdf --pages --file=input-file \ @@ -725,9 +732,6 @@ appearance: first underlays, then the original page, then overlays. Run qpdf --help=page-ranges for help with page ranges. )"); -} -static void add_help_6(QPDFArgParser& ap) -{ ap.addOptionHelp("--to", "overlay-underlay", "destination pages for underlay/overlay", R"(--to=page-range Specify the range of pages in the primary output to apply @@ -829,6 +833,9 @@ its terminating "--". To copy attachments from a password-protected file, use the --password option after the file name. )"); +} +static void add_help_7(QPDFArgParser& ap) +{ ap.addOptionHelp("--prefix", "copy-attachments", "key prefix for copying attachments", R"(--prefix=prefix Prepend a prefix to each key; may be needed if there are @@ -839,9 +846,6 @@ ap.addHelpTopic("inspection", "inspect PDF files", R"(These options provide tool the options in this section are specified, no output file may be given. )"); -} -static void add_help_7(QPDFArgParser& ap) -{ ap.addOptionHelp("--is-encrypted", "inspection", "silently test whether a file is encrypted", R"(Silently exit with a code indicating the file's encryption status: 0: the file is encrypted @@ -919,6 +923,9 @@ output as binary data. Get the key with --list-attachments. ap.addHelpTopic("json", "JSON output for PDF information", R"(Show information about the PDF file in JSON format. Please see the JSON chapter in the qpdf manual for details. )"); +} +static void add_help_8(QPDFArgParser& ap) +{ ap.addOptionHelp("--json", "json", "show file in JSON format", R"(--json[=version] Generate a JSON representation of the file. This is described in @@ -932,9 +939,6 @@ Describe the format of the JSON output by writing to standard output a JSON object with the same keys and with values containing descriptive text. )"); -} -static void add_help_8(QPDFArgParser& ap) -{ ap.addOptionHelp("--json-key", "json", "limit which keys are in JSON output", R"(--json-key=key This option is repeatable. If given, only the specified diff --git a/libqpdf/qpdf/auto_job_init.hh b/libqpdf/qpdf/auto_job_init.hh index ac42ea6a..5db11318 100644 --- a/libqpdf/qpdf/auto_job_init.hh +++ b/libqpdf/qpdf/auto_job_init.hh @@ -68,6 +68,8 @@ this->ap.addBare("progress", [this](){c_main->progress();}); this->ap.addBare("qdf", [this](){c_main->qdf();}); this->ap.addBare("raw-stream-data", [this](){c_main->rawStreamData();}); this->ap.addBare("recompress-flate", [this](){c_main->recompressFlate();}); +this->ap.addBare("remove-info", [this](){c_main->removeInfo();}); +this->ap.addBare("remove-metadata", [this](){c_main->removeMetadata();}); this->ap.addBare("remove-page-labels", [this](){c_main->removePageLabels();}); this->ap.addBare("replace-input", b(&ArgParser::argReplaceInput)); this->ap.addBare("report-memory-usage", [this](){c_main->reportMemoryUsage();}); diff --git a/libqpdf/qpdf/auto_job_json_init.hh b/libqpdf/qpdf/auto_job_json_init.hh index ee4c7421..fa4c4089 100644 --- a/libqpdf/qpdf/auto_job_json_init.hh +++ b/libqpdf/qpdf/auto_job_json_init.hh @@ -412,6 +412,12 @@ addParameter([this](std::string const& p) { c_pages->range(p); }); popHandler(); // key: range popHandler(); // array: .pages[] popHandler(); // key: pages +pushKey("removeInfo"); +addBare([this]() { c_main->removeInfo(); }); +popHandler(); // key: removeInfo +pushKey("removeMetadata"); +addBare([this]() { c_main->removeMetadata(); }); +popHandler(); // key: removeMetadata pushKey("removePageLabels"); addBare([this]() { c_main->removePageLabels(); }); popHandler(); // key: removePageLabels diff --git a/libqpdf/qpdf/auto_job_schema.hh b/libqpdf/qpdf/auto_job_schema.hh index df75a5e8..6854fd8c 100644 --- a/libqpdf/qpdf/auto_job_schema.hh +++ b/libqpdf/qpdf/auto_job_schema.hh @@ -145,6 +145,8 @@ static constexpr char const* JOB_SCHEMA_DATA = R"({ "range": "page range" } ], + "removeInfo": "remove file information", + "removeMetadata": "remove metadata", "removePageLabels": "remove explicit page numbers", "reportMemoryUsage": "best effort report of memory usage", "rotate": "rotate pages", diff --git a/manual/cli.rst b/manual/cli.rst index c11c2bf3..fee79454 100644 --- a/manual/cli.rst +++ b/manual/cli.rst @@ -1773,6 +1773,27 @@ Related Options Prevent inline images from being included in image optimization done by :qpdf:ref:`--optimize-images`. +.. qpdf:option:: --remove-info + + .. help: remove file information + + Exclude file information (except modification date) from the output file. + + Exclude file information (except modification date) from the output file by + omitting all entries (except ``/ModDate``) from the ``/Info`` dictionary in + the document trailer. + See also :qpdf:ref:`--remove-metadata`. + +.. qpdf:option:: --remove-metadata + + .. help: remove metadata + + Exclude metadata from the output file. + + Exclude metadata from the output file by omitting the ``/Metadata`` + dictionary in the document catalog. + See also :qpdf:ref:`--remove-info`. + .. qpdf:option:: --remove-page-labels .. help: remove explicit page numbers diff --git a/manual/qpdf.1 b/manual/qpdf.1 index ec7b851b..95c23be6 100644 --- a/manual/qpdf.1 +++ b/manual/qpdf.1 @@ -530,6 +530,12 @@ Don't optimize images whose area in pixels is below the specified value. .B --keep-inline-images \-\- exclude inline images from optimization Prevent inline images from being considered by --optimize-images. .TP +.B --remove-info \-\- remove file information +Exclude file information (except modification date) from the output file. +.TP +.B --remove-metadata \-\- remove metadata +Exclude metadata from the output file. +.TP .B --remove-page-labels \-\- remove explicit page numbers Exclude page labels (explicit page numbers) from the output file. .TP diff --git a/qpdf/qtest/merge-and-split.test b/qpdf/qtest/merge-and-split.test index 33935e71..db18e87e 100644 --- a/qpdf/qtest/merge-and-split.test +++ b/qpdf/qtest/merge-and-split.test @@ -14,7 +14,7 @@ cleanup(); my $td = new TestDriver('merge-and-split'); -my $n_tests = 28; +my $n_tests = 34; # Select pages from the same file multiple times including selecting # twice from an encrypted file and specifying the password only the @@ -103,6 +103,39 @@ $td->runtest("check output", {$td->COMMAND => "qpdf-test-compare a.pdf remove-labels.pdf"}, {$td->FILE => "remove-labels.pdf", $td->EXIT_STATUS => 0}); +$td->runtest("remove metadata", + {$td->COMMAND => + "qpdf metadata-crypt-filter.pdf a.pdf" . + " --remove-metadata" . + " --decrypt" . + " --static-id"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "remove-metadata.pdf"}); + +$td->runtest("remove info (with moddate)", + {$td->COMMAND => + "qpdf remove-metadata.pdf a.pdf" . + " --remove-info" . + " --decrypt" . + " --static-id"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "remove-info.pdf"}); + +$td->runtest("remove info (without moddate)", + {$td->COMMAND => + "qpdf remove-metadata-no-moddate.pdf a.pdf" . + " --remove-info" . + " --decrypt" . + " --static-id"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "remove-info-no-moddate.pdf"}); + $td->runtest("split with shared resources", {$td->COMMAND => "qpdf --qdf --static-id" . diff --git a/qpdf/qtest/qpdf/remove-info-no-moddate.pdf b/qpdf/qtest/qpdf/remove-info-no-moddate.pdf new file mode 100644 index 0000000000000000000000000000000000000000..2fc0302cfed53d48ecffcb7664fbf48faf188722 GIT binary patch literal 13365 zcmd5@&5ztf7Ux5V$P#y8VYfg;f@OnURd(4%VMNH#Vjv$w&+xH34M<3Q?7|HRPW>NnL%dg2ZkPS6R<|@QIfqWVy8Pa+UcLG~ z*RLw4cei_I_%o4r`io!x`k7f*i zzI1#vV+ZgPe%WPDKE<}SpeugcWBwFA3&yc6uJuUkJrwrfyw-&;tZZv5LidTv2Th}S z&{$h|H(vukQ^OorL6qtrWNFf%jRM`;Kp2JCdIdjp2rf4#(endr|{fAYZmh11(-5ElKH0nCkeZX9!Cf*Tv$7~#f>Fjj=I0%1if z!dMZ;iZE7$u_BBWVXO#a1>oXy(pZtkiZoWFu_BF?E6g{+#LSyHp~PPG7=$q8IYjRO za|zA+Z~pafYAvuO?!WU%@B!(x=PCwFGU8D#XC zEfzi7X8)-YuAMapFpj4LY;oEsYMe^3I8u;6fUxd*cbeZU)5F5ppXN8RcL5s!;U$vN z!V(NP<`TpC$+&<4Le840@y|~uH(r?B((5^33Nj>41;V+Q4~ALmFBYhoIKQ9ICdbo5 zm?k8E9@sbk;7pon*UM)*Io!)3-%Sc&Q-4xqMGnxMsdfT~b3;Ifc8a?S8e>X9+hpo0 zXq+Vq+RkoQL2F*4s11vBHmNm5QPhU?x{DeUn4-2++Sv@j45p|p=XVx$y~0_t4m(cP z`W1r4+VFpO^SZS#!n>e;jp%HKv{uHbn}y!@!F{0)+y?a*GBzX zOi_2dHtM|)Mcwh*sJCtub;oO?-f2?Q9j}dgV@y$Zyf*56JVC8GUK_RSKv8$RHfoiG zqV9NY)S?bW-SOI}H7JU@L+)Gh+ zyf$hPxk8PC{ux=x&^A+g!vbYgqdUj8aBAH-w)IHs&ZP)#MO8~gR`ES|l#K@&PUmcr|Zv>si`=N8@`p!)#O8AVzeblUVX zh^|Q58;*(`&R}{Y(1N$~Lw&+>As=5a-e&qx#+wz>JUjAk{b%O=+~57?eYqcqS5NKS zd;9Bue{|>h$DX+F&JU;V_&b03@{Qn|{Zn85GJW9Zd+z$_A@4yre6i2kV;C(*m$7*v zo8?-o_Fu|JH}hh6m@)swF#s5juQUIR;rQHmHmn*hWk+zf)8lX~b$qqZ2R(Q~0pHC* zg|=0l)o7g?eI0+%yIeyN2ggSPQeooZ!|0~Bm?@{X5m9+I6Qa*JkCG!|{jEuU8b^NKbj^FVfBaXF5xsqc%h+l_9!)nO$ z*xR9=v3PC9b2aAq^WMwU^UB<{_LwX3n(uh8P|quq*V>*d_L}!SD142Y1y>L@743?? z<_F$I>Um}QT6@eDf6WKpCF*%){#x5}MPTzI?=tngGJ&n_xni*SO>dWaUYWtx_57K$ zp8xK>NIAcE7!X#>A944GGzudwkYGWKG91h!SVG^}5 z4igTCz16}bYGn??TUe{ts{6kh{`nt+_dfsUjl1uf=Pg8{Hb!B>p|Eiak-(*#4bir6 zDD3?fB2gQoFyT^nq_Dch$)(A?eqYbhV3J@}NwG6z=nig}Vvh*3=frHD8JPB9~ z?m=}iv8)RjrP5-7;cYqy=G&9$03{3?P7}?h$z|AFUbWb4bm$lnS{XAt024DDAY@7% znx1E2)RkcEn7OQ5-aNEv*AC3Y%qF8nxok4a3{2LB=JIk0&H8PJ)<9!oW(U)jXQd>S zt_=;Ru3RQ1w56+~gl09pHZ(T3#M*g(Ynos4FsHp>@rS7>0@?H$TeRWj8~nq@i#Cbx z8I86Oh3!V0N@-hyb6)QU+(qnho{(p`#9H5YHVO$o9^>(1qhA_8oa=2ONC_4pVuD44 zxKC<4t5B?Lv?X{ArQRmtD{12i@gZBrCZmw}kTD+THGHIaU9sL425`-`*(PE#jxdD# z!OeaOZ|^qjC3x+l-WIjYTf`IMBa-;S(s(w)OJ|L?7>~~yZ7E(VueYfnB|enE3exBo zM#P6woaB{?Tb@l)V9>g~I0#6t#R1KOm`C(k5fd!2OySj{;Ul7Pjui^Jjeap*gKVQEU%KF}4(7eJgg=a&bP%q0&()W~aoc@Pm^<3Wr^ZjF8^ zi2;X>&9QMV@IbfzESE`heQ+MRc~wla;Rp^(@S1&pGQt02qhA{Kh9BloBbdLl%?{2g zHIOQkL6*jOaE+^Lki$`QsD$8oa452^vyz8AN`-773P;^m(AJEL%-6aWAK literal 0 HcmV?d00001 diff --git a/qpdf/qtest/qpdf/remove-info.pdf b/qpdf/qtest/qpdf/remove-info.pdf new file mode 100644 index 0000000000000000000000000000000000000000..7b657ed23057479c75539f6cda6c429454c0fb9f GIT binary patch literal 13455 zcmd5@&yVE96^29+D@(bDK%j`S;^$yjm0h;cX2h~Xi-G+y^h|(cvIp<9XKB*Yz3KKW z3zu++A~w<&?B)|=cocce=4duP6a@($FwYsHg$-C10sJ7qt)vK!S z*cW6KB_9m4)=w-^GjM)CpG{7uM=(sNBRz<3e&8%L!>-rebaJ$x z7c4YW0Gs-g0#^{s;cBRBiNmoWq(d`xI!hXJN=e(v)YVLDrJ$_s@OG9p&Lqm($Vg`= zh}L1s+NfT4S>p_*tgV)Ic81^#rmU^#cb0Xt#96ftJFeFH8G_^*p2x86cXwX5R>sKG zsP3|EEsasfOQU|Eq%)-BwNXF+Qq~=>jrwt&vhH|o)X&9~b;oO?ei@>yJ6;?0yBlTQ z@!F_gH7V^bMs4v?)*Y{n+L5HJJ6;>Lfl66-yf$jTm#~gIUK_QGT(L$=e@j+Aw4Euv zVSzrXF`i>vIJNN{+j^vp=hB6?uBsIxYx!P0$;N|>@n^OB%sJT45O#^ztD`lki3Kz3 z=3|JLN}Jcxv$e+1vl8B9qz&oPKezPuf!#a6&REjYu+yQJQLMY9z2T_HVfUst0yB6g zKhk?Mm-F$>;!UQvbG%tG&9f8l&c9~fkNv%`-j)Y}c=g=w&+mTu;d}R%;NI@oZQ%T+L2kSE$EfL+kWLp|^<8w}#)1eXV2Fc56y|@(_&uogQ3; zn+(^>Jt1vmmsHL)2mh&Z@LTUenuEHPcc#GKRKQPrzox)-H||Vl?;#3YH`LA) z`0EPz_uj)4xW4yuroi7;z&-Cb6u53{ohk6YE8uJ1BNVvqOr0t4_Z9FP-lG(FQ%BseR~E0ekt_9@_dO_lPR)u|RVsP*|`i;}2~X_%<>`Q;WRQ5(}RQPZ&f79~*|(=bufus^pbiCS61@ch>5 zwd#Q{hEM%{@YhfOaqIs3=6Q>dz}c7$l`2t_uyKo!sEtXOs7ct{EkdF;CSjr`VehpG z$;Kq?Zf!Y#&>|$-FW88L-K{O>e|q~w!t`r>iEIw{ZB_5v_7EMuZ@X#jz zi!BpF+5Y0N)E+@;b+l17MgdZ$P|L{IRnu~=T$Vn=LvV1Y5sqN87$yU_E+&^vBcpU$ zEHOMz2g!VMG993WVbf`{>4sd!t*)zXt_j?X+trW^!gHqh+~lFv}U3tWB-1%Oy37+fL14V{&FY(>BjaODbKP8V+5#OiF4? zS4TAK z`sTF7_)_b%rMMl~Y~y$c+ier*u{qa;cpSlLQvu<};{;Zcb6tuhyxAsrNctfpZbmuR zMR?fGX;Ue|5J^lhWDpSy+0r)|;-Q5mhExH;ka0pWQY_WYdqYlhA4=T(ajuK-2)EOw zQi2gCF~NueeBy838woTdowg7U2{>&kAQ(}cP>d8ya`Rrr@raVsCULXHX^ZeMnbW3F zhn%*g<=zzEmz&qc0X}oPZFC;Qg07=j;s%9tZy1ra5JM8kbt)z6BTmqQcjBdZD7x8} z1a$o-JR$u^V4{+HL%LRz2oDfC7%e$X;*?^fG%sm@8v)IDa2df!MMCo$1;Uga|FHNd2l4M?F*8F4y8gtnMqQZ5ke-R5($07I5pMUXM&-*_wq_zhD literal 0 HcmV?d00001 diff --git a/qpdf/qtest/qpdf/remove-metadata-no-moddate.pdf b/qpdf/qtest/qpdf/remove-metadata-no-moddate.pdf new file mode 100644 index 0000000000000000000000000000000000000000..797a099e565a5fda0e157f4db9595749f730905b GIT binary patch literal 13495 zcmd5@OOGVQ5ynOqtCF}IV?%;i@vw)g%&aF`k631CF|d!JXLt^KsGY7^3VOQM-8IYN zi#b@9ecka&(LU6ShWz2nJmM!9=nrF<{ zVLJTyHhjL5mg%^-1y4EIWP~9VR%myoSz6}BWH&7{_RQ`J!t*5e;D7A-QfzthY0rE5 zS%`75^*vo``#4M0)a?{!lahtZ9puwl$pVe+N(w)RHp;~iYBM;BrWxrm_qBY4lO3st zWA!yrU%98hVd4xK1YCX78N+o%xOUIAXRd8wpLR%5q3QQPU5!#B-R)n@9v=yPP z2yI1ZD*zOqliG^ZR;0EfwH2wYNNwc?bM!d}fF_>fZ>gzu;tqx<$d@3P6+ zcx-(Edph7!!*f-{r3UF<%kD11zn@Msm?4eLJs<$sQ??CCW^{V7_uk`!5tuMy565=cdGm_Fb=2L?es%f$^e8{NGc0UFyNRg3_Hc7 zgaHC~=&5mcrp4`-io1$1JQhNtM2SE+SF=%`c79@j>WOoQ*{nF59>X*t-weRM{(&=J zPrF*5)8hCbE19oT0GozIiHi(!xSnb&a5y&vw69aAx1cem6tqdEzD8Ox1x0P9x3{P< zlPGFUBfU)!#lsY}X1)HR#tf#Y4VU&dLokCWYD4EPq^Nsd817aYm2{Ag%M4C= z24fI|ce7)4hI1vG+$!H^>e$Dbm2f6=>fHU)%=vn_|K&&W&=ap;*n9Ba-#`82{)7SoYx!E)vQBF%dGZK^ z{oMid!b64|)tQqLvMVU(8iRk=F?iB>gvP*@^3K)qKWoQ7cYZ=0+iKjoI{vD5>^qNA z$F`t$u8#jvJN~8fQ|j10`#D#~|EV1hoS#w0wybroj{j9VzV19m9ox#(xjO!D?f7ly zaq4(el=7KHDXGJVW2JCzVjX{Gt>X`zC#d6!U(})7G7`n3{&h&yuZKL3oIUCpi`Pm+ zUfXj$=J|`xtJL$#+_kpnn!M(F&TG{3%H*}S=bF9dLk9|9t7Zi&v)9_5Yx-lqQJ^$VrQqLRXFky}PatD)y8{;rxjrr9MCJ8sjVZ!3DgAOJMH^yPY;;{ER zm?T`8!|b2^jzva*UcJ!;i{OLezVfUu_ifetZF7kZ_uFqW z?s=9RV7B>szinhZB?na6Ky@3;h-HhLXvHi)hC6AX2pCO3=v}vRuw>V*>^70Y-nF_q zhf@a!^~R2o4gb#y`2LIFaiu9ktfS<_&yRhA*cOK@X7j4SW}-vKh|sE-nE~jS;Q(O*n$Xld z3$3nrYsbuH-RkP0NxN=fI%Xyr4a#McS!H0nHZ+@;D`>`VH?$TS9WyhSt~{$Gsbp2;woxh2-l`@7TZ#DScS9dGfEg+Tyf?+IJ9;XPjb$0mz5iElWq zHa~!jYqbS<^wesL5|Ce8Z3z~%W*hes@+=py0^9xk5Z?1z&xR;~wc27lly0`c^=n9r zRvVAWIE2I!(CQc9m6%prgyMdyEy2r6%{J*J#E0w{oAha%W#|(hGR8yTrVroqh!3CB z*!vQxyY*}Um*3lM5sK-pehFUZZngzjh}vx;CO!fm-xyoZhCcBT#CYi1>=(k7dD6z= zUU9phjEE0NZ{j12;4cx{&&D0kCJ9*!5%+*xoA@G5^B@wq*KYNb5y2Aq30VtKNaGwu zc*VM9FUH+gt1XGi*kbOHSj7Ubnlzt{rB5)&K3%JE5ECqMM@-`wkL+4L5*nAp!?%HE zzl2jhNF-iGYW4Gd;v)%gkI?ED#?(jbk#Qsm9_coog$yTaAO41e0Y{P z)@v^KfDjzGaOT7>uvWfs;<`pcGB0yOGN=9rzF9r5&J`XGErel!t#|N?3Bk?jC}aK)=h-qorg_c+ z8>Zv;?!)JMX_1bn_uwfnn~X4|vKHFCS(X<0bh4io8GCO3Maj7m9R9~VP;!T>XE}fN zdFF$!|I?+ikF!)x-QM(KQZUW@K|Y%oEVRgOr0}z@qg)Q5GJ~UNno*qjujeD2?8rPE zo3DxaD!BcIi8EvnaP!S(jIa?A*1fQvg|#895n-)JYeiZsFkxs#S}W38k=BZ|R;0Be ztrcml08o5RSu4s~QPzsGR+P1(td%>=x3h(*J+I9b_G$(pfSk%8d?)1?{kMMn6E?XR zk85ARo(;IN@O%?-WkLG4vWLs?AEvVmX2@dmkMm*-d2le#=dfgUc3Jo$>#z?AVd1)I zs{zLGlz<&h7e!-7YZiqX+yTO->%&?0pv(^k<9L?cPk#nW8GwiqNoipP1{`yVVQ)Gq zV1U3Kc53{++4TO)(}#vIIMa|QQ6dq}&1{sXou4?McH;bDHlJS1PGOpmZw6rB{vcRj zr`@d2+4S@%D_CGt0Go!>0v8$Na5dGnz~S5w(1A^z-h#%MQqV4$`Wk7&6cn|a-rl0d zOroeQjr2A_3=dP(mi79J8Z(%pc3j%q48aVhs2%C|7Im}4IkpaZ&erA~g2Wo$+pzBU zH?KP@V`ytse^Gaq#<1t5(Y#R74C#4oH1EF@bF0t$SV@%|@Q0?s;uAx&uYs^V(=+5{kO#wb7_L6m`#Q zqYYmp|qnT0EJ+F;M;-jd0UK@=fNm2K_HW~qyqV9QZG`=c#d2>GQxAILOWH}5s{O8ub-!rQOd*%Mtv3n)^RsW8papZz3{+bg7>^ct?QlJwRtAX=TwF)2krXRnkE|E;2af z8H_;;-p@|W8P1Jta<6!snPVSsUci~mx%co7bMMpP;m7Z)V=mvga`5D%zy9w1$1gqo zoi9E9 z*xw&OFFa(pQ=T~)A-jZfsWJFx6@zEIr)Ui7Qr@LH{(I&4d*0Wm{&}|io;!*!5BpOshUPRsj^^C=9 zts!shxf=80CGR!rd2R06*mFf*iv#a<>UnMQ+Sqf&UW=gzg|Ah!!nN6JW6u?REl#|f z)brZ(wXx@lzZN6!7WKR~e{Jl!BCy4|cbj@%o4_{qTrt?UAJ+tV%Kf#Hj%>K zwY)lqQwIn2tFF#P4vB^AercgJ8bK*_bV0U40Yav(R)Mdxro&vREJKEu;NVgt+|pzb zBm<}}HkM5xqf}ZPFuYC&!F+o<8=-{Z!fB(~Hn|F0omU+;7aclAgjUAP4Zy|>2M80; zgl6VhT6KkQ9J4y>mRAp5+I0i7F>}f2P_CNHG6Um{q1Ac0gy#HqLu;Y2F>{0I%Cl0E zN;ZauQ&%pN657huQ9^T?UKbjdTXN&Pe>ls|Jjm(TQ>!gXKz?ntC0Nj!ZGtD{Ss`Hsw)+Jd-t$_|Y81d)Z8087H{0O) zHKawWO~hmzQeg>b^$YPzOsg$Ialh4;;N_)eo8k%ap*qH<0vczf1L8x)cnI9|5pYg? z1cJssP)OaaXG6IB-foLfOmFo|@H%(1EyP09Zj&+b5eE3i*m_n6#77w8p=-0BhAZ== zO~Ae4c0UynACTU}hmPPc5!%nj9nU5SSqqWiK(0-Ek)U}HN!)9<`l*OuiGqZzg-Fvl zM-g7JZrO`*x7BJ(VluW^a1yIn;#HI8v#|;Y<~X2hH4bBfCGLo69OIE)%SS@vl5l(* zX!c74<%2}wRiswGARs=H5cdeJembT;Vot`9BzUCTcos68tbOjidmYE;Q$3?z=O%z6#hEE zyhseEQ~WC*^Yc;uiwtTAbl7Jn*R&ofohqItQO3`NK2xx0!c#4!$oQ#DcdscCh%k{F q%1oR@Foz0qO6ou<7+#cwyZARZuuV&6g(*-lnpk*OufB47;QbG_zQ6VW literal 0 HcmV?d00001 From 06a2d955fceb45795e42ecfbc1e5c4ecfaa661a7 Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 17 Sep 2024 11:09:19 +0100 Subject: [PATCH 2/6] In QPDFParser add a limit on total number of errors in one object Currently, QPDFParser gives up attempting to parse an object if 5 near-consecutive bad tokens are encountered. Add a limit of a total of 15 bad tokens in a single object before giving up. --- libqpdf/QPDFParser.cc | 13 +++++++------ libqpdf/qpdf/QPDFParser.hh | 6 ++++-- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/libqpdf/QPDFParser.cc b/libqpdf/QPDFParser.cc index a9d3e04b..3f176c5d 100644 --- a/libqpdf/QPDFParser.cc +++ b/libqpdf/QPDFParser.cc @@ -469,13 +469,14 @@ QPDFParser::fixMissingKeys() bool QPDFParser::tooManyBadTokens() { - if (good_count <= 4) { - if (++bad_count > 5) { - warn("too many errors; giving up on reading object"); - return true; - } - } else { + if (--max_bad_count > 0 && good_count > 4) { + good_count = 0; bad_count = 1; + return false; + } + if (++bad_count > 5) { + warn("too many errors; giving up on reading object"); + return true; } good_count = 0; return false; diff --git a/libqpdf/qpdf/QPDFParser.hh b/libqpdf/qpdf/QPDFParser.hh index 1eecf975..7b92da25 100644 --- a/libqpdf/qpdf/QPDFParser.hh +++ b/libqpdf/qpdf/QPDFParser.hh @@ -83,9 +83,11 @@ class QPDFParser std::vector stack; StackFrame* frame; // Number of recent bad tokens. - int bad_count = 0; + int bad_count{0}; + // Number of bad tokens (remaining) before giving up. + int max_bad_count{15}; // Number of good tokens since last bad token. Irrelevant if bad_count == 0. - int good_count = 0; + int good_count{0}; // Start offset including any leading whitespace. qpdf_offset_t start; // Number of successive integer tokens. From 44a13951940def9699b2c169e8a8eabd35973934 Mon Sep 17 00:00:00 2001 From: m-holger Date: Fri, 20 Sep 2024 13:58:47 +0100 Subject: [PATCH 3/6] Refactor QPDF::Xref_table::read_entry and read_bad_entry Return results rather than using reference parameters. Fixes bug in #1272 where parameters were not reinitialized when calling read_bad_entry from read_entry. --- libqpdf/QPDF.cc | 54 +++++++++++++++++++----------------- libqpdf/qpdf/QPDF_private.hh | 4 +-- 2 files changed, 30 insertions(+), 28 deletions(-) diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index f263551a..e9802b52 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -832,10 +832,6 @@ std::vector QPDF::Xref_table::bad_subsections(std::string& line, qpdf_offset_t start) { std::vector result; - qpdf_offset_t f1 = 0; - int f2 = 0; - char type = '\0'; - file->seek(start, SEEK_SET); while (true) { @@ -844,7 +840,7 @@ QPDF::Xref_table::bad_subsections(std::string& line, qpdf_offset_t start) auto [obj, num, offset] = result.emplace_back(subsection(line)); file->seek(offset, SEEK_SET); for (qpdf_offset_t i = obj; i - num < obj; ++i) { - if (!read_entry(f1, f2, type)) { + if (!std::get<0>(read_entry())) { QTC::TC("qpdf", "QPDF invalid xref entry"); throw damaged_table("invalid xref entry (obj=" + std::to_string(i) + ")"); } @@ -890,9 +886,13 @@ QPDF::Xref_table::subsections(std::string& line) } } -bool -QPDF::Xref_table::read_bad_entry(qpdf_offset_t& f1, int& f2, char& type) +// Returns (success, f1, f2, type). +std::tuple +QPDF::Xref_table::read_bad_entry() { + qpdf_offset_t f1{0}; + int f2{0}; + char type{'\0'}; // Reposition after initial read attempt and reread. file->seek(file->getLastOffset(), SEEK_SET); auto line = file->readLine(30); @@ -910,7 +910,7 @@ QPDF::Xref_table::read_bad_entry(qpdf_offset_t& f1, int& f2, char& type) } // Require digit if (!QUtil::is_digit(*p)) { - return false; + return {false, 0, 0, '\0'}; } // Gather digits std::string f1_str; @@ -919,7 +919,7 @@ QPDF::Xref_table::read_bad_entry(qpdf_offset_t& f1, int& f2, char& type) } // Require space if (!QUtil::is_space(*p)) { - return false; + return {false, 0, 0, '\0'}; } if (QUtil::is_space(*(p + 1))) { QTC::TC("qpdf", "QPDF ignore first extra space in xref entry"); @@ -931,7 +931,7 @@ QPDF::Xref_table::read_bad_entry(qpdf_offset_t& f1, int& f2, char& type) } // Require digit if (!QUtil::is_digit(*p)) { - return false; + return {false, 0, 0, '\0'}; } // Gather digits std::string f2_str; @@ -940,7 +940,7 @@ QPDF::Xref_table::read_bad_entry(qpdf_offset_t& f1, int& f2, char& type) } // Require space if (!QUtil::is_space(*p)) { - return false; + return {false, 0, 0, '\0'}; } if (QUtil::is_space(*(p + 1))) { QTC::TC("qpdf", "QPDF ignore second extra space in xref entry"); @@ -953,7 +953,7 @@ QPDF::Xref_table::read_bad_entry(qpdf_offset_t& f1, int& f2, char& type) if ((*p == 'f') || (*p == 'n')) { type = *p; } else { - return false; + return {false, 0, 0, '\0'}; } if ((f1_str.length() != 10) || (f2_str.length() != 5)) { QTC::TC("qpdf", "QPDF ignore length error xref entry"); @@ -967,18 +967,23 @@ QPDF::Xref_table::read_bad_entry(qpdf_offset_t& f1, int& f2, char& type) f1 = QUtil::string_to_ll(f1_str.c_str()); f2 = QUtil::string_to_int(f2_str.c_str()); - return true; + return {true, f1, f2, type}; } // Optimistically read and parse xref entry. If entry is bad, call read_bad_xrefEntry and return -// result. -bool -QPDF::Xref_table::read_entry(qpdf_offset_t& f1, int& f2, char& type) +// result. Returns (success, f1, f2, type). +std::tuple +QPDF::Xref_table::read_entry() { + qpdf_offset_t f1{0}; + int f2{0}; + char type{'\0'}; std::array line; + f1 = 0; + f2 = 0; if (file->read(line.data(), 20) != 20) { // C++20: [[unlikely]] - return false; + return {false, 0, 0, '\0'}; } line[20] = '\0'; char const* p = line.data(); @@ -1002,7 +1007,7 @@ QPDF::Xref_table::read_entry(qpdf_offset_t& f1, int& f2, char& type) if (!QUtil::is_space(*p++)) { // Entry doesn't start with space or digit. // C++20: [[unlikely]] - return false; + return {false, 0, 0, '\0'}; } // Gather digits. NB No risk of overflow as 99'999 < max int. while (*p == '0') { @@ -1019,10 +1024,10 @@ QPDF::Xref_table::read_entry(qpdf_offset_t& f1, int& f2, char& type) // No test for valid line[19]. if (*(++p) && *(++p) && (*p == '\n' || *p == '\r') && f1_len == 10 && f2_len == 5) { // C++20: [[likely]] - return true; + return {true, f1, f2, type}; } } - return read_bad_entry(f1, f2, type); + return read_bad_entry(); } // Read a single cross-reference table section and associated trailer. @@ -1064,10 +1069,8 @@ QPDF::Xref_table::process_section(qpdf_offset_t xref_offset) first_item_offset_ = file->tell(); } // For xref_table, these will always be small enough to be ints - qpdf_offset_t f1 = 0; - int f2 = 0; - char type = '\0'; - if (!read_entry(f1, f2, type)) { + auto [success, f1, f2, type] = read_entry(); + if (!success) { throw damaged_table("invalid xref entry (obj=" + std::to_string(i) + ")"); } if (type == 'f') { @@ -1585,8 +1588,7 @@ QPDF::Xref_table::read_trailer() { qpdf_offset_t offset = file->tell(); bool empty = false; - auto object = - QPDFParser(*file, "trailer", tokenizer, nullptr, &qpdf, true).parse(empty, false); + auto object = QPDFParser(*file, "trailer", tokenizer, nullptr, &qpdf, true).parse(empty, false); if (empty) { // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in // actual PDF files and Adobe Reader appears to ignore them. diff --git a/libqpdf/qpdf/QPDF_private.hh b/libqpdf/qpdf/QPDF_private.hh index b055763a..fa14fdc3 100644 --- a/libqpdf/qpdf/QPDF_private.hh +++ b/libqpdf/qpdf/QPDF_private.hh @@ -292,8 +292,8 @@ class QPDF::Xref_table std::vector subsections(std::string& line); std::vector bad_subsections(std::string& line, qpdf_offset_t offset); Subsection subsection(std::string const& line); - bool read_entry(qpdf_offset_t& f1, int& f2, char& type); - bool read_bad_entry(qpdf_offset_t& f1, int& f2, char& type); + std::tuple read_entry(); + std::tuple read_bad_entry(); // Methods to parse streams qpdf_offset_t read_stream(qpdf_offset_t offset); From 21f176d374dd229401b5de5bf8a4cb89b10e1731 Mon Sep 17 00:00:00 2001 From: m-holger Date: Fri, 20 Sep 2024 14:20:34 +0100 Subject: [PATCH 4/6] Add sanity check on trailer /Size entry --- libqpdf/QPDF.cc | 5 ++++- qpdf/qpdf.testcov | 1 + qpdf/qtest/qpdf/issue-fuzz.out | 19 +++++++++++++++++++ qpdf/qtest/qpdf/issue-fuzz.pdf | Bin 0 -> 81 bytes qpdf/qtest/specific-bugs.test | 1 + 5 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 qpdf/qtest/qpdf/issue-fuzz.out create mode 100644 qpdf/qtest/qpdf/issue-fuzz.pdf diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index e9802b52..5a38ec94 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -1057,7 +1057,10 @@ QPDF::Xref_table::process_section(qpdf_offset_t xref_offset) QTC::TC("qpdf", "QPDF trailer size not integer"); throw qpdf.damagedPDF("trailer", "/Size key in trailer dictionary is not an integer"); } - + if (sz >= static_cast(max_id_)) { + QTC::TC("qpdf", "QPDF trailer size impossibly large"); + throw qpdf.damagedPDF("trailer", "/Size key in trailer dictionary is impossibly large"); + } table.resize(sz); } diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index b66ba83f..25e4dd70 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -55,6 +55,7 @@ QPDF invalid xref entry 0 QPDF missing trailer 0 QPDF trailer lacks size 0 QPDF trailer size not integer 0 +QPDF trailer size impossibly large 0 QPDF trailer prev not integer 0 QPDFParser bad brace 0 QPDFParser bad brace in parseRemainder 0 diff --git a/qpdf/qtest/qpdf/issue-fuzz.out b/qpdf/qtest/qpdf/issue-fuzz.out new file mode 100644 index 00000000..456485b2 --- /dev/null +++ b/qpdf/qtest/qpdf/issue-fuzz.out @@ -0,0 +1,19 @@ +WARNING: issue-fuzz.pdf: can't find PDF header +WARNING: issue-fuzz.pdf (xref table, offset 19): accepting invalid xref table entry +WARNING: issue-fuzz.pdf (trailer, offset 36): unknown token while reading object; treating as string +WARNING: issue-fuzz.pdf (trailer, offset 53): unexpected > +WARNING: issue-fuzz.pdf (trailer, offset 54): unknown token while reading object; treating as string +WARNING: issue-fuzz.pdf (trailer, offset 58): unknown token while reading object; treating as string +WARNING: issue-fuzz.pdf (trailer, offset 72): unknown token while reading object; treating as string +WARNING: issue-fuzz.pdf (trailer, offset 36): dictionary ended prematurely; using null as value for last key +WARNING: issue-fuzz.pdf (trailer, offset 36): expected dictionary key but found non-name object; inserting key /QPDFFake1 +WARNING: issue-fuzz.pdf (trailer, offset 36): expected dictionary key but found non-name object; inserting key /QPDFFake2 +WARNING: issue-fuzz.pdf (trailer, offset 36): expected dictionary key but found non-name object; inserting key /QPDFFake3 +WARNING: issue-fuzz.pdf (trailer, offset 36): expected dictionary key but found non-name object; inserting key /QPDFFake4 +WARNING: issue-fuzz.pdf (trailer, offset 36): expected dictionary key but found non-name object; inserting key /QPDFFake5 +WARNING: issue-fuzz.pdf (trailer, offset 36): expected dictionary key but found non-name object; inserting key /QPDFFake6 +WARNING: issue-fuzz.pdf (trailer, offset 36): expected dictionary key but found non-name object; inserting key /QPDFFake7 +WARNING: issue-fuzz.pdf: file is damaged +WARNING: issue-fuzz.pdf (trailer, offset 32): /Size key in trailer dictionary is impossibly large +WARNING: issue-fuzz.pdf: Attempting to reconstruct cross-reference table +qpdf: issue-fuzz.pdf: unable to find /Root dictionary diff --git a/qpdf/qtest/qpdf/issue-fuzz.pdf b/qpdf/qtest/qpdf/issue-fuzz.pdf new file mode 100644 index 0000000000000000000000000000000000000000..288a6b5c2898c8c9c44be73b6e0b3bdee5e2cb1b GIT binary patch literal 81 zcmZ={XDBX7EGnreN=@T6u;4M|;W1M%R7m40DN4-DNiE{yva!+C56-Mg Date: Fri, 20 Sep 2024 14:56:57 +0100 Subject: [PATCH 5/6] Add additional fuzz test cases --- fuzz/CMakeLists.txt | 3 ++ fuzz/qpdf_extra/99999a.fuzz | 63 ++++++++++++++++++++++++++++++++++++ fuzz/qpdf_extra/99999b.fuzz | Bin 0 -> 81 bytes fuzz/qpdf_extra/99999c.fuzz | Bin 0 -> 13650 bytes fuzz/qtest/fuzz.test | 2 +- 5 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 fuzz/qpdf_extra/99999a.fuzz create mode 100644 fuzz/qpdf_extra/99999b.fuzz create mode 100644 fuzz/qpdf_extra/99999c.fuzz diff --git a/fuzz/CMakeLists.txt b/fuzz/CMakeLists.txt index 73d886c0..adb68cd4 100644 --- a/fuzz/CMakeLists.txt +++ b/fuzz/CMakeLists.txt @@ -142,6 +142,9 @@ set(CORPUS_OTHER 70306b.fuzz 71624.fuzz 71689.fuzz + 99999a.fuzz + 99999b.fuzz + 99999c.fuzz ) set(CORPUS_DIR ${CMAKE_CURRENT_BINARY_DIR}/qpdf_corpus) diff --git a/fuzz/qpdf_extra/99999a.fuzz b/fuzz/qpdf_extra/99999a.fuzz new file mode 100644 index 00000000..026c7427 --- /dev/null +++ b/fuzz/qpdf_extra/99999a.fuzz @@ -0,0 +1,63 @@ +%PDF-1.5 +%€€€€ +1 0 obj +<< + /Type /Catalog + /Pages 2 0 R +>> +endobj +2 0 obj +<< + /Count 6 Ri + 0K/ds [3 0 R] + /Type /Pages +>> +endobj +3 0 obj +<< + /Resources << + /Font << + /F1 5 0 R + >> + >> + /MediaBox [0 0 795 842] + /Parent 2 0 R + /Contents 4 0 R + /Type /Page +=> +endobj +4 0 obj +<<444444444444444444444444 1 Tr /F1 30 Tf 350 750 Td (foobar) Tj ET +endstream +endobj +5 0 obj +<< + /Name /F1 + /BaseFont /Helvetica + /Type /Font + /Subtype /Type1 +>> +e„dobj +6 0 obj +<< /Length 6 0 R >> +stre444444444444444444444444444444<<>> +endobj +xref +0 8 +0000000000 65535 f +0000000015 00000 n +0000000066 00000 n +0000000130 00000 n +0000000269 00000 n +0000000362 00000 n +000000ÎËËÉßÏÏÏ00 n +0000000500 00000 n +trailer +<< + /Size 713115528178535 + /Root 1 0 R + /Info 7 0 R +>> +startxref +520 +%%EOF \ No newline at end of file diff --git a/fuzz/qpdf_extra/99999b.fuzz b/fuzz/qpdf_extra/99999b.fuzz new file mode 100644 index 0000000000000000000000000000000000000000..288a6b5c2898c8c9c44be73b6e0b3bdee5e2cb1b GIT binary patch literal 81 zcmZ={XDBX7EGnreN=@T6u;4M|;W1M%R7m40DN4-DNiE{yva!+C56-Mg`qwNJR~7cfk4tZ z=l+yRlDqBh3_TE5V5{oZt$WWs=iGC?d(KfLn^}m_xDk$g`p+NzT{uJ&22qJuItr&! zB&@8|o37KYKXEIB!tFYt2@}3=lRZMIktCXFk|Opanq`;=wCwZ!mvnrRKuqS<=?yD*!sYTDo&x~2`itLxSfeR|@I z@ho+CJX;@rH<=jXfu1~rr>Ft_e3|*>kxtiXcbTN@OPA=SOByOvlZ=eK@uQGEG7{km zmk34v@<0Fhe?pNBrhjTE1R_Jd3#C#aLX??u+gtG34Z?1rA(JCI^vJ*sFzrV!mTTAzXe>I&*E;L25Ei*Co-+gr?G@N>!a|2}3txYGPbf$Jz2&Pp?f< zRkvDI)r6(0mZqv^621-iPT)ObOt$S+PRp*})hP3BCUj@7Kvx5XFir6XLzp`JK|6F& zb!gXI_@?O~;V7ye;R{|9mMg`tz?|j0+Z9YzK{Xizm4f@I%Xk;y0E)Rc&`cHtCbdOb zbt`pe)_WueVxu7U2{Eb0zO5vF3u&}nAVVsVl5;!WX&W^~Dmx=n+@Q8oV#v9K9+3}x z;ti>%T*0^qQQ|>1L?Q&6K-3{LDuI>E0ZV|K4zZ+)E8lwk%lVgcmeY_Q0(-PjWx53> zp^ss{Tsb2$TU_gyc76M^_$&|uWT-3@V2d2Gt18ixx<<_qxd!cEhuFiyBiM3;?KT3^ zErWsq%T;ip6uhO9KP`3HKHIKWYM_1Z6i5_aFKqHJcr>caG@Gz7l`U-QDPd0sfdcQe z*?smIet**qZ zmT=FXso_Xuetm(%7r-GnHCP0WlyG;gI3@QOcQH}cP8-NbmEB}PiU5B!c9SPc8*~87 z5U_w)!4Ap(4kmU8QyT07eh7z$K{9L*4f}vM=xbyzQa+lPJFIb1{1;&$BxztcYO0AO zd!Wl9IkA8UkQi-%MBp|w7uhrEWfIIDA@u8$H;4#WwCYvB!EAQi_0#S#fXfc_^Dk+F zIU;CUb-PXlybvsnnaMu*NG)O1k@LU`Y9v6p2u*4P5UK{C1n|Le7hH+5a10(Q-tx4pyo^cBV(({?+IYpKd&T|Bdfj z?|*Y->$kpOzJ2}R`@iw!4=#P<-MQG?Bc+*W_26yy$G`XQfArm7`3}? z_X_{^58r?K$;if6|LW<%ym;e*=y@4mBB4ZS(H^X{8}|Jy77y>t}*{qKDH zqd%nOFa5zt=%X+Gm-mZ*{U={bkG%1vktB^gL!c~36Ofq`joL;HK-aPr(0D)xeN#?N zhH4lirDuYUDtaRVo`_-gti#+8_2m`xL%=$8mB?W5KGb6F2gU5$Y0CWt7k$s^z?_A{ z(?*Hd%*(>2|j zf&uFv5<8zgB+^^&9s`V7>ijEUBoa%L!~{W9MG#T-{#`>27A%u`%4b89vj&g>sFGv| zDoxHeI3O`9O}RBPE)Wf8D6ZUROokHbV+sy@-U^ypA8cwqUdpORxN-3~)q23J=B8AAg=d(w0k8f0;d#W_{ zob53@W(t~h&RHCPh*W%Z4yh8JBA%r12RPCMcxnO0L~L2LRNc{4y#yGyRDnMO{@@w3 zbLhVOfS`JrIM6vfilTaT>g7YP4bgzOCZx)Rnj@$w)Btd; z63bEpLiVL9ge-}adw?q&P&PQmkRb8R3la$%8cAeZK|OCf`xFW;ORh=^nUxhAqTePA zkth%v#{_-0D`;puU?ZbYZViodaSPCGAM2#~5Ui8>FU2p-kPyFA7O_2o@)r;cxXk7l zqPq{vT!e0JWRUI`T1sDI1#9JwmMh>OS?+MI3c~{qk=y6?E<`TZR_%O(C69&hA*kgW z!*gW+e82TP*^4b?pUXwbHlYVn(1J#RiO-P45aaO8^S01ZC#au%57Fd>X%v!{x+!Vo ze7y*b0aphG>qS708+|lnJs{T({Un$D_@Q1iF2h60ZTI*+w_7heAkz6J z;L19LbCm6RbEes;`wajGoCe@rfKG6Nk;gsMQ*7Y+%Es&S@fcxa!U1{uXo#UKBxDcX zga8E~T|^q;qTeGZQ6B+H6XqY__zCni;=iP{T`!6M0-orF(&ujEWo7@PpzJS)zS1Xq zCVJ^)0z$yu=j#Hpn4S4vOq~yyUrIHNpl<%m3N4*MAC8RFz{yDJ07E~06takp{`qSG zdC^1yR<%fq&6d^(38>8EAiZwlhc`aO1rE6 zdU{ywkSY>8%v{L?K6!n7OTSTT#OCMS?oMQK)*Cx*Yn{&ZwS#zUH2z@m z{)xGm$&808%ihYUz3g0>-?}-yHM)Of?=F;D=KOprS#D0PnMbKrd*jyL73*^4WSpke zW_2=pb#i0VENy1Sua&3wL&;cWKQl8`-c>i!wWZauvB`2pTc5t_OzzxVYupScwXN9ga1N`I*vqV zGfU0`OSyfuu{HXjk=gW&ok@GLxRuUEi%ZnWm7~k_-l>}o)fSZKjYVhKWAcS`tq2wJ z%}HbqUbl5fhp8k9DX(c|o9t1DM^KjkYOX-mi@43c9P29_bwBkqQh z44@?O`3DL3*E`(ZE<0Pcr)%jsNRpNDV}XAR8Xwl1zJuHjGEJtSdx` zFg%DKd_m+ySVWemK7eTulVJG~bL^z)u54%88&@V1`}yep*vYuDannv5#O%p@bZYnV zc7Aq!>-xg#HRI;3+2YMys#eObO7@*vC-`_H$L;%2*&Ubvec}& zk03)0p||(yWjD7tOEk!Zuwnpo>b;W*i!29VDI~0TQHV%0VKE6Vf*<3!0J&)ss8Pg6 zdEy!Z{H!<2UxG$(g$rxkT!;W5Rx`keIv<8-O^FC-8hkcpnsP)Onq>k>#FIrRNSyjj zK8%Qdv$R7Sj(ZCb<~lbEsbnBS2pS`@kSzU;xonp5#?&|DsWCw;LXV~uw^YhnsGf-H zMiQ>scJa@TJY5LQknD(-y++BaKxmz^Tqr+NvaBI?5R>vKCeCO=Hk~Eh`?KSIS{Mp{ zfN>xzS$~1~M_FLOc*;nF!W&>OzhqmIUowEgE*Xp26=NN{Vgh1UTrB)D(8}b>TDjc$ zWp7YQKf^TV3x@jgvuFPDIGW~^^wv+4y=Bp)Tmlo5JbHif!;k)n^GI22_a*#$^%p%W zCl@J7GI)ZaL$G#0n6kl4J{zScC?4xW#SzI{PBPmZaLGvP9ZJ|kUYPLQ@`N*p+3cpb@^~*^N%P1!6Y_;ps=b*>y5? z!YxbR$O6@fO)`6S$p_V~xDKNbQhn9h1Nyx34LhWv%Jfzz)sFD2CKCTr;-1MCr3}^x z*0izh%^97UAyG9B5q%xj9oOIs_IzBUP(%XL2)t$($RU10lWvW9t#kgNlk0rr>(bqU z z3HUE;s<2U^h+ljV5;~nqv6U;x8G)-I$*pV)c*WsBGMyIkc*MBOlH8#;MwkzRG!x{W z`bk2b_Ef5$q>HMA$#}S6$U>c0`M!W#6*l1K#XT?ZV`n@XD(?CG&;tgBz{)@xl6}Eh z67OsQ=dhaCcQ;rbg6BYas$`fA6qTpfpq8XpfCzPeRHYOURzjBrh!}JnEL4>JDAaBF z{rcwM2IA)^hr_&!pd9bC=g8-?RtwIU9skA^%sMwxhc5*^hQ%bO# zd~L3+qe)g(Uq&N=-#<{>P+{}o$Nx0=YfOiSv*IujOMdkoj g>)MbeXu2M38!jVG7#FV%h7y36s11giR8bxLzi)vX#Q*>R literal 0 HcmV?d00001 diff --git a/fuzz/qtest/fuzz.test b/fuzz/qtest/fuzz.test index 02dbc98a..51a35532 100644 --- a/fuzz/qtest/fuzz.test +++ b/fuzz/qtest/fuzz.test @@ -11,7 +11,7 @@ my $td = new TestDriver('fuzz'); my $qpdf_corpus = $ENV{'QPDF_FUZZ_CORPUS'} || die "must set QPDF_FUZZ_CORPUS"; -my $n_qpdf_files = 79; # increment when adding new files +my $n_qpdf_files = 82; # increment when adding new files my @fuzzers = ( ['ascii85' => 1], From 0aa6b67eea993fa75c8bb3c6548ec975f8383154 Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 24 Sep 2024 01:32:32 +0100 Subject: [PATCH 6/6] In QPDFWordTokenFinder::check limit the token length Tokens longer than the target cannot be a match and therefore there is no need to read to the end of token. --- .idea/cmake.xml | 1 - fuzz/CMakeLists.txt | 1 + fuzz/qpdf_extra/99999d.fuzz | Bin 0 -> 5408 bytes fuzz/qtest/fuzz.test | 2 +- libqpdf/QPDFTokenizer.cc | 2 +- 5 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 fuzz/qpdf_extra/99999d.fuzz diff --git a/.idea/cmake.xml b/.idea/cmake.xml index f0a93aba..5762463a 100644 --- a/.idea/cmake.xml +++ b/.idea/cmake.xml @@ -2,7 +2,6 @@ - diff --git a/fuzz/CMakeLists.txt b/fuzz/CMakeLists.txt index adb68cd4..a4af9256 100644 --- a/fuzz/CMakeLists.txt +++ b/fuzz/CMakeLists.txt @@ -145,6 +145,7 @@ set(CORPUS_OTHER 99999a.fuzz 99999b.fuzz 99999c.fuzz + 99999d.fuzz ) set(CORPUS_DIR ${CMAKE_CURRENT_BINARY_DIR}/qpdf_corpus) diff --git a/fuzz/qpdf_extra/99999d.fuzz b/fuzz/qpdf_extra/99999d.fuzz new file mode 100644 index 0000000000000000000000000000000000000000..4504412f604a5fa072b153bf5f71488eabc137a8 GIT binary patch literal 5408 zcmY#T_^%q^;-+h;XHpDfaH$@D)4zyIRn;@EEI&E1Br`wHN+G?tJTuSCBw0Z>#U-=2 zASbcXEx#x?vBcKU(!$)r0wElfTAZI#3ese2Xkr2sch1i(02-1sKPR)K(kHbnHAfj} zn}Tkkf^L8=P(Ikv%{2%}`}zAjh6cL=Sx$~2&K?T7#V)R4uFkH?1u1FeMVTe3KoS4a zl7i9_x6GVWF?^ za`D`;BjtA*3@3$he0AV<%Q+||b+D;w;$`JBG0v@lXUo~LsjgeKpZ~vc|NqC&wL5>G{~iBsb-=r4|6lL=@!#p@ z)ZP34RenEyxBl*bq4!tKT;t!?UBB)sod4A~Jmjy$|Ge(RIi2_a7yQ4=|9{i(_xt~y zpZ|XU@5gyN{=d7sP7BEV+y8&Z>UaBR|699D|Nq~A?U~=NhVwsl{v*6^*Z<}6W&d}z z@h{9Uers@dZ{-c!{9Dek;;Cv=``dSC{%+*AuMPiFs#f>?t9k$adv|jmEtKirb@Isk z@4x*|DD%h0w!E2Nc|@H>{`aD{Me;xYaMxDcIC6ZO&MCX!;#RFsCiW}VRh~G$-@Wcx zXe7U@?53~Z{pA&EHM$p6b%upr72CN#=!cNf6P@v@^3FXQ?I$ON}?-n+gTpp z&^#;sfW#}CYGm5Bas2$r@oHh0uJbo1-jCdy#4ksb9bItwHWyid0R+g>NrWb46qEId z&@nia2!KO{aJWE~*%&&yxH&pG8=06I8acWeyP7+j7`vI68@QTUm^hicnc6`W0;#JE zEC>!2SjJQ^gEAvNy=7n^rT8MG7U&udqFe;Z8UKAk@ni~BVsnKyDBT(B;Z1$u#LW2t zuNuOVIDPgH7%osr5EBG&s--|;v_wLRqajx!v3>jc70LaWOlZm&Es-EOh&m)NPzR<& zGQwUWjaE-26o8}E6J{xaTLfpoj#f|5Km&&m?f|4rW{5YKN2@2C`3Fg2*px`vsweuj zl957`>P(DW4DyzqejtW8K7o1U0rEWa>fE;PKke(;Q;+S9EX_-S4(g=lrGN&4K*K*E z+R)I%*nkTp_?J?k5X7Yb1cu;|B?!|TG{^*DS|FH~5GK%e6R0bI!%`uY1*yRCjPz7} z-_(@MM5p`;g=hnyw5g@3f`y4uEEiC+C^fG{!5FMtA2b?QT9lk>4AdJ?l%E`&S`w`f z9Pm@n_sj*F;uOmT9LUml&d)0WYARL$4S)rKgrNQhnF1LQQvh470JghWLEk$wrC1>v z?6M%PSOuU7r9ev!k+d4ZwL*0|Czd4U 1], diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index d48abd3e..7f7c6d9e 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -47,7 +47,7 @@ QPDFWordTokenFinder::check() // Find a word token matching the given string, preceded by a delimiter, and followed by a // delimiter or EOF. QPDFTokenizer tokenizer; - QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true); + QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true, str.size() + 2); qpdf_offset_t pos = is.tell(); if (!(t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, str))) { QTC::TC("qpdf", "QPDFTokenizer finder found wrong word");