From 3a1ff5ded9cf22e114991b5a49857b54f8e56b02 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Fri, 28 Jul 2017 19:18:57 -0400 Subject: [PATCH] Add option to preserve unreferenced objects --- ChangeLog | 11 ++- include/qpdf/QPDF.hh | 6 ++ include/qpdf/QPDFWriter.hh | 7 ++ libqpdf/QPDF.cc | 16 ++++ libqpdf/QPDFWriter.cc | 18 ++++ manual/qpdf-manual.xml | 21 +++++ qpdf/qpdf.cc | 10 ++ qpdf/qpdf.testcov | 1 + qpdf/qtest/qpdf.test | 18 ++++ qpdf/qtest/qpdf/unreferenced-dropped.pdf | Bin 0 -> 799 bytes qpdf/qtest/qpdf/unreferenced-objects.pdf | 105 +++++++++++++++++++++ qpdf/qtest/qpdf/unreferenced-preserved.pdf | Bin 0 -> 927 bytes 12 files changed, 212 insertions(+), 1 deletion(-) create mode 100644 qpdf/qtest/qpdf/unreferenced-dropped.pdf create mode 100644 qpdf/qtest/qpdf/unreferenced-objects.pdf create mode 100644 qpdf/qtest/qpdf/unreferenced-preserved.pdf diff --git a/ChangeLog b/ChangeLog index 026833d4..1a565ff9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,16 @@ +2017-07-28 Jay Berkenbilt + + * Add --preserve-unreferenced command-line option and + setPreserveUnreferencedObjects method to QPDFWriter. This option + causes QPDFWriter to write all objects from the input file to the + output file regardless of whether the objects are referenced. + Objects are written to the output file in numerical order from the + input file. This option has no effect for linearized files. + 2017-07-27 Jay Berkenbilt * Add --precheck-streams command-line option and setStreamPrecheck - option to QPDFWriter to tell QPDFWriter to attempt decoding a + method to QPDFWriter to tell QPDFWriter to attempt decoding a stream fully before deciding whether to filter it or not. * Recover gracefully from streams that aren't filterable because diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index ef9ce597..ad8503dc 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -396,6 +396,12 @@ class QPDF QPDF_DLL void showXRefTable(); + // Returns a list of indirect objects for every object in the xref + // table. Useful for discovering objects that are not otherwised + // referenced. + QPDF_DLL + std::vector getAllObjects(); + // Optimization support -- see doc/optimization. Implemented in // QPDF_optimization.cc diff --git a/include/qpdf/QPDFWriter.hh b/include/qpdf/QPDFWriter.hh index 2687cce0..fd35fecd 100644 --- a/include/qpdf/QPDFWriter.hh +++ b/include/qpdf/QPDFWriter.hh @@ -155,6 +155,12 @@ class QPDFWriter QPDF_DLL void setPrecheckStreams(bool); + // Preserve unreferenced objects. The default behavior is to + // discard any object that is not visited during a traversal of + // the object structure from the trailer. + QPDF_DLL + void setPreserveUnreferencedObjects(bool); + // Set the minimum PDF version. If the PDF version of the input // file (or previously set minimum version) is less than the // version passed to this method, the PDF version of the output @@ -427,6 +433,7 @@ class QPDFWriter qpdf_stream_data_e stream_data_mode; bool qdf_mode; bool precheck_streams; + bool preserve_unreferenced_objects; bool static_id; bool suppress_original_object_ids; bool direct_stream_lengths; diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 4d5bf67f..d82813d0 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -989,6 +989,22 @@ QPDF::showXRefTable() } } +std::vector +QPDF::getAllObjects() +{ + std::vector result; + for (std::map::iterator iter = + this->xref_table.begin(); + iter != this->xref_table.end(); ++iter) + { + + QPDFObjGen const& og = (*iter).first; + result.push_back(QPDFObjectHandle::Factory::newIndirect( + this, og.getObj(), og.getGen())); + } + return result; +} + void QPDF::setLastObjectDescription(std::string const& description, int objid, int generation) diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 59e306fc..01309f43 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -58,6 +58,7 @@ QPDFWriter::init() stream_data_mode = qpdf_s_compress; qdf_mode = false; precheck_streams = false; + preserve_unreferenced_objects = false; static_id = false; suppress_original_object_ids = false; direct_stream_lengths = true; @@ -183,6 +184,12 @@ QPDFWriter::setPrecheckStreams(bool val) this->precheck_streams = val; } +void +QPDFWriter::setPreserveUnreferencedObjects(bool val) +{ + this->preserve_unreferenced_objects = val; +} + void QPDFWriter::setMinimumPDFVersion(std::string const& version) { @@ -3074,6 +3081,17 @@ QPDFWriter::writeStandard() writeHeader(); writeString(this->extra_header_text); + if (this->preserve_unreferenced_objects) + { + QTC::TC("qpdf", "QPDFWriter preserve unreferenced standard"); + std::vector all = this->pdf.getAllObjects(); + for (std::vector::iterator iter = all.begin(); + iter != all.end(); ++iter) + { + enqueueObject(*iter); + } + } + // Put root first on queue. QPDFObjectHandle trailer = getTrimmedTrailer(); enqueueObject(trailer.getKey("/Root")); diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml index cd35718d..18abc013 100644 --- a/manual/qpdf-manual.xml +++ b/manual/qpdf-manual.xml @@ -838,6 +838,27 @@ outfile.pdf + + + + + Tells qpdf to preserve objects that are not referenced when + writing the file. Ordinarily any object that is not referenced + in a traversal of the document from the trailer dictionary + will be discarded. This may be useful in working with some + damaged files or inspecting files with known unreferenced + objects. + + + This flag is ignored for linearized files and has the effect + of causing objects in the new file to be written in order by + object ID from the original file. This does not mean that + object numbers will be the same since qpdf may create stream + lengths as direct or indirect differently from the original + file, and the original file may have gaps in its numbering. + + + diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc index 99cfd3a1..65a6de1e 100644 --- a/qpdf/qpdf.cc +++ b/qpdf/qpdf.cc @@ -203,6 +203,7 @@ familiar with the PDF file format or who are PDF developers.\n\ --object-streams=mode controls handing of object streams\n\ --ignore-xref-streams tells qpdf to ignore any cross-reference streams\n\ --precheck-streams precheck ability to decode streams\n\ +--preserve-unreferenced preserve unreferenced objects\n\ --qdf turns on \"QDF mode\" (below)\n\ --min-version=version sets the minimum PDF version of the output file\n\ --force-version=version forces this to be the PDF version of the output file\n\ @@ -1030,6 +1031,7 @@ int main(int argc, char* argv[]) bool ignore_xref_streams = false; bool qdf_mode = false; bool precheck_streams = false; + bool preserve_unreferenced_objects = false; std::string min_version; std::string force_version; @@ -1219,6 +1221,10 @@ int main(int argc, char* argv[]) { precheck_streams = true; } + else if (strcmp(arg, "preserve-unreferenced") == 0) + { + preserve_unreferenced_objects = true; + } else if (strcmp(arg, "min-version") == 0) { if (parameter == 0) @@ -1714,6 +1720,10 @@ int main(int argc, char* argv[]) { w.setPrecheckStreams(true); } + if (preserve_unreferenced_objects) + { + w.setPreserveUnreferencedObjects(true); + } if (normalize_set) { w.setContentNormalization(normalize); diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index bf227c7a..c64c63ee 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -280,3 +280,4 @@ QPDFObjectHandle found fake 1 QPDFObjectHandle no val for last key 0 QPDF resolve failure to null 0 QPDFWriter precheck stream 0 +QPDFWriter preserve unreferenced standard 0 diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index b61882b9..45ed8c46 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -743,6 +743,24 @@ $td->runtest("check output", {$td->FILE => "bad-data-precheck.pdf"}); show_ntests(); # ---------- +$td->notify("--- Preserve unreferenced objects ---"); +$n_tests += 4; + +$td->runtest("drop unused objects", + {$td->COMMAND => "qpdf --static-id unreferenced-objects.pdf a.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "unreferenced-dropped.pdf"}); +$td->runtest("keep unused objects", + {$td->COMMAND => "qpdf --static-id --preserve-unreferenced" . + " unreferenced-objects.pdf a.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "unreferenced-preserved.pdf"}); +show_ntests(); +# ---------- $td->notify("--- Copy Foreign Objects ---"); $n_tests += 7; diff --git a/qpdf/qtest/qpdf/unreferenced-dropped.pdf b/qpdf/qtest/qpdf/unreferenced-dropped.pdf new file mode 100644 index 0000000000000000000000000000000000000000..071d8d7d4e2bd1c79e71a0ae4f38ebc7933f419d GIT binary patch literal 799 zcmZWnOODe(5alDJV2L{v64?QEcf0+OA`50R77@RYL}C$T(XrdX$kRr42LdZDzztY& z1TMffBZR~X*25*(A*!8mj6!>1`*pplSFfr+9>#abU1s&a{QmUEBH%!|d}$2^V2{O0 z)<6-NfIWNrN`ieXv?$6IM3E&`Y68u5OE@mqN&^A=VV>472i8y*EqRx(qnhiEs@e zUH$y+!~5;Mo42lQfAqHY==RyqS?}yub>sZXrLUK-Z_%|2=7oPoW?S`0)Us*8ejtn2 zQs;>P`$S>MT&=)> +endobj + +1 0 obj +<< + /Count 1 + /Kids [ + 3 0 R + ] + /Type /Pages +>> +endobj + +%% Page 1 +3 0 obj +<< + /Contents 4 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 1 0 R + /Resources << + /Font << + /F1 6 0 R + >> + /ProcSet 9 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +4 0 obj +<< + /Length 5 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +5 0 obj +44 +endobj + +6 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +7 0 obj +[ 8 0 R ] +endobj + +8 0 obj +/Potato +endobj + +9 0 obj +[ + /PDF + /Text +] +endobj + +xref +0 10 +0000000000 65535 f +0000000079 00000 n +0000000025 00000 n +0000000161 00000 n +0000000376 00000 n +0000000475 00000 n +0000000494 00000 n +0000000612 00000 n +0000000638 00000 n +0000000662 00000 n +trailer << + /Root 2 0 R + /Size 10 + /ID [<5c2381b459937c988290150df782f1fd><5c2381b459937c988290150df782f1fd>] +>> +startxref +697 +%%EOF diff --git a/qpdf/qtest/qpdf/unreferenced-preserved.pdf b/qpdf/qtest/qpdf/unreferenced-preserved.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b92fff5d9f769e67ea5b343e375e1b1bacea230c GIT binary patch literal 927 zcmZWnOODe(5alDJV2L{v64?QEcf0+OA`50R5fQ)OL}C%mqGPv%QO1qz4g^+QfE%#l z2wZ?|MhJ-&tcOdmLsUC)9A)f<+pp_Yy?RyMQGalc+-Fwz+utw$ECLRw*=wuU1N&Gl z3k?M9$9Y=86d2yca0>S1{Tm7PNX%sgQDn&?H59r>-Q1aEaT9(lv{L^mYZY1+ zx&n8@c`DOf9H|wSb@25F1wW*w_(+r(h_dS3v8>dxOi+kXJWzO39|(ALKolXPRLNLs z2zN-^z@8jaxsmfk7IXay+yHat3oT3hE`*kSnW$76Ql(2N&aBnv%DRY0Ki{#5Ltk`; z*T4Sw^l|;*_MIE+U!Ao*ynFt8(mDTA-nzJU<@?o}YxHW%+`7iMRb997(FkvJ9EnQS zP1%p+;;qzqBEUW=P()tL!G4(+4~r^qfoI|jCo{mAjh8cBkID3k|C^TUT8qBZJV2_j*S~ak7(7;nPwLie5a;00+s+5`K0C6m5_khPa_(P0c=5mgvG;z8V`2vS;Rw`CXgV}N1=yCo* zVxM3S`!MbCgtCClTpotZPr@Lep+mTnW`003lBH3P5ts0gdT2!u`XR%3*Q1`xSiq