diff --git a/ChangeLog b/ChangeLog index 026833d4..1a565ff9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,16 @@ +2017-07-28 Jay Berkenbilt + + * Add --preserve-unreferenced command-line option and + setPreserveUnreferencedObjects method to QPDFWriter. This option + causes QPDFWriter to write all objects from the input file to the + output file regardless of whether the objects are referenced. + Objects are written to the output file in numerical order from the + input file. This option has no effect for linearized files. + 2017-07-27 Jay Berkenbilt * Add --precheck-streams command-line option and setStreamPrecheck - option to QPDFWriter to tell QPDFWriter to attempt decoding a + method to QPDFWriter to tell QPDFWriter to attempt decoding a stream fully before deciding whether to filter it or not. * Recover gracefully from streams that aren't filterable because diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index ef9ce597..ad8503dc 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -396,6 +396,12 @@ class QPDF QPDF_DLL void showXRefTable(); + // Returns a list of indirect objects for every object in the xref + // table. Useful for discovering objects that are not otherwised + // referenced. + QPDF_DLL + std::vector getAllObjects(); + // Optimization support -- see doc/optimization. Implemented in // QPDF_optimization.cc diff --git a/include/qpdf/QPDFWriter.hh b/include/qpdf/QPDFWriter.hh index 2687cce0..fd35fecd 100644 --- a/include/qpdf/QPDFWriter.hh +++ b/include/qpdf/QPDFWriter.hh @@ -155,6 +155,12 @@ class QPDFWriter QPDF_DLL void setPrecheckStreams(bool); + // Preserve unreferenced objects. The default behavior is to + // discard any object that is not visited during a traversal of + // the object structure from the trailer. + QPDF_DLL + void setPreserveUnreferencedObjects(bool); + // Set the minimum PDF version. If the PDF version of the input // file (or previously set minimum version) is less than the // version passed to this method, the PDF version of the output @@ -427,6 +433,7 @@ class QPDFWriter qpdf_stream_data_e stream_data_mode; bool qdf_mode; bool precheck_streams; + bool preserve_unreferenced_objects; bool static_id; bool suppress_original_object_ids; bool direct_stream_lengths; diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 4d5bf67f..d82813d0 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -989,6 +989,22 @@ QPDF::showXRefTable() } } +std::vector +QPDF::getAllObjects() +{ + std::vector result; + for (std::map::iterator iter = + this->xref_table.begin(); + iter != this->xref_table.end(); ++iter) + { + + QPDFObjGen const& og = (*iter).first; + result.push_back(QPDFObjectHandle::Factory::newIndirect( + this, og.getObj(), og.getGen())); + } + return result; +} + void QPDF::setLastObjectDescription(std::string const& description, int objid, int generation) diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 59e306fc..01309f43 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -58,6 +58,7 @@ QPDFWriter::init() stream_data_mode = qpdf_s_compress; qdf_mode = false; precheck_streams = false; + preserve_unreferenced_objects = false; static_id = false; suppress_original_object_ids = false; direct_stream_lengths = true; @@ -183,6 +184,12 @@ QPDFWriter::setPrecheckStreams(bool val) this->precheck_streams = val; } +void +QPDFWriter::setPreserveUnreferencedObjects(bool val) +{ + this->preserve_unreferenced_objects = val; +} + void QPDFWriter::setMinimumPDFVersion(std::string const& version) { @@ -3074,6 +3081,17 @@ QPDFWriter::writeStandard() writeHeader(); writeString(this->extra_header_text); + if (this->preserve_unreferenced_objects) + { + QTC::TC("qpdf", "QPDFWriter preserve unreferenced standard"); + std::vector all = this->pdf.getAllObjects(); + for (std::vector::iterator iter = all.begin(); + iter != all.end(); ++iter) + { + enqueueObject(*iter); + } + } + // Put root first on queue. QPDFObjectHandle trailer = getTrimmedTrailer(); enqueueObject(trailer.getKey("/Root")); diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml index cd35718d..18abc013 100644 --- a/manual/qpdf-manual.xml +++ b/manual/qpdf-manual.xml @@ -838,6 +838,27 @@ outfile.pdf + + + + + Tells qpdf to preserve objects that are not referenced when + writing the file. Ordinarily any object that is not referenced + in a traversal of the document from the trailer dictionary + will be discarded. This may be useful in working with some + damaged files or inspecting files with known unreferenced + objects. + + + This flag is ignored for linearized files and has the effect + of causing objects in the new file to be written in order by + object ID from the original file. This does not mean that + object numbers will be the same since qpdf may create stream + lengths as direct or indirect differently from the original + file, and the original file may have gaps in its numbering. + + + diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc index 99cfd3a1..65a6de1e 100644 --- a/qpdf/qpdf.cc +++ b/qpdf/qpdf.cc @@ -203,6 +203,7 @@ familiar with the PDF file format or who are PDF developers.\n\ --object-streams=mode controls handing of object streams\n\ --ignore-xref-streams tells qpdf to ignore any cross-reference streams\n\ --precheck-streams precheck ability to decode streams\n\ +--preserve-unreferenced preserve unreferenced objects\n\ --qdf turns on \"QDF mode\" (below)\n\ --min-version=version sets the minimum PDF version of the output file\n\ --force-version=version forces this to be the PDF version of the output file\n\ @@ -1030,6 +1031,7 @@ int main(int argc, char* argv[]) bool ignore_xref_streams = false; bool qdf_mode = false; bool precheck_streams = false; + bool preserve_unreferenced_objects = false; std::string min_version; std::string force_version; @@ -1219,6 +1221,10 @@ int main(int argc, char* argv[]) { precheck_streams = true; } + else if (strcmp(arg, "preserve-unreferenced") == 0) + { + preserve_unreferenced_objects = true; + } else if (strcmp(arg, "min-version") == 0) { if (parameter == 0) @@ -1714,6 +1720,10 @@ int main(int argc, char* argv[]) { w.setPrecheckStreams(true); } + if (preserve_unreferenced_objects) + { + w.setPreserveUnreferencedObjects(true); + } if (normalize_set) { w.setContentNormalization(normalize); diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index bf227c7a..c64c63ee 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -280,3 +280,4 @@ QPDFObjectHandle found fake 1 QPDFObjectHandle no val for last key 0 QPDF resolve failure to null 0 QPDFWriter precheck stream 0 +QPDFWriter preserve unreferenced standard 0 diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index b61882b9..45ed8c46 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -743,6 +743,24 @@ $td->runtest("check output", {$td->FILE => "bad-data-precheck.pdf"}); show_ntests(); # ---------- +$td->notify("--- Preserve unreferenced objects ---"); +$n_tests += 4; + +$td->runtest("drop unused objects", + {$td->COMMAND => "qpdf --static-id unreferenced-objects.pdf a.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "unreferenced-dropped.pdf"}); +$td->runtest("keep unused objects", + {$td->COMMAND => "qpdf --static-id --preserve-unreferenced" . + " unreferenced-objects.pdf a.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "unreferenced-preserved.pdf"}); +show_ntests(); +# ---------- $td->notify("--- Copy Foreign Objects ---"); $n_tests += 7; diff --git a/qpdf/qtest/qpdf/unreferenced-dropped.pdf b/qpdf/qtest/qpdf/unreferenced-dropped.pdf new file mode 100644 index 00000000..071d8d7d Binary files /dev/null and b/qpdf/qtest/qpdf/unreferenced-dropped.pdf differ diff --git a/qpdf/qtest/qpdf/unreferenced-objects.pdf b/qpdf/qtest/qpdf/unreferenced-objects.pdf new file mode 100644 index 00000000..a6ea90c8 --- /dev/null +++ b/qpdf/qtest/qpdf/unreferenced-objects.pdf @@ -0,0 +1,105 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +2 0 obj +<< + /Pages 1 0 R + /Type /Catalog +>> +endobj + +1 0 obj +<< + /Count 1 + /Kids [ + 3 0 R + ] + /Type /Pages +>> +endobj + +%% Page 1 +3 0 obj +<< + /Contents 4 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 1 0 R + /Resources << + /Font << + /F1 6 0 R + >> + /ProcSet 9 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +4 0 obj +<< + /Length 5 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +5 0 obj +44 +endobj + +6 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +7 0 obj +[ 8 0 R ] +endobj + +8 0 obj +/Potato +endobj + +9 0 obj +[ + /PDF + /Text +] +endobj + +xref +0 10 +0000000000 65535 f +0000000079 00000 n +0000000025 00000 n +0000000161 00000 n +0000000376 00000 n +0000000475 00000 n +0000000494 00000 n +0000000612 00000 n +0000000638 00000 n +0000000662 00000 n +trailer << + /Root 2 0 R + /Size 10 + /ID [<5c2381b459937c988290150df782f1fd><5c2381b459937c988290150df782f1fd>] +>> +startxref +697 +%%EOF diff --git a/qpdf/qtest/qpdf/unreferenced-preserved.pdf b/qpdf/qtest/qpdf/unreferenced-preserved.pdf new file mode 100644 index 00000000..b92fff5d Binary files /dev/null and b/qpdf/qtest/qpdf/unreferenced-preserved.pdf differ