Add option to preserve unreferenced objects

This commit is contained in:
Jay Berkenbilt 2017-07-28 19:18:57 -04:00
parent a94a729fee
commit 3a1ff5ded9
12 changed files with 212 additions and 1 deletions

View File

@ -1,7 +1,16 @@
2017-07-28 Jay Berkenbilt <ejb@ql.org>
* Add --preserve-unreferenced command-line option and
setPreserveUnreferencedObjects method to QPDFWriter. This option
causes QPDFWriter to write all objects from the input file to the
output file regardless of whether the objects are referenced.
Objects are written to the output file in numerical order from the
input file. This option has no effect for linearized files.
2017-07-27 Jay Berkenbilt <ejb@ql.org>
* Add --precheck-streams command-line option and setStreamPrecheck
option to QPDFWriter to tell QPDFWriter to attempt decoding a
method to QPDFWriter to tell QPDFWriter to attempt decoding a
stream fully before deciding whether to filter it or not.
* Recover gracefully from streams that aren't filterable because

View File

@ -396,6 +396,12 @@ class QPDF
QPDF_DLL
void showXRefTable();
// Returns a list of indirect objects for every object in the xref
// table. Useful for discovering objects that are not otherwised
// referenced.
QPDF_DLL
std::vector<QPDFObjectHandle> getAllObjects();
// Optimization support -- see doc/optimization. Implemented in
// QPDF_optimization.cc

View File

@ -155,6 +155,12 @@ class QPDFWriter
QPDF_DLL
void setPrecheckStreams(bool);
// Preserve unreferenced objects. The default behavior is to
// discard any object that is not visited during a traversal of
// the object structure from the trailer.
QPDF_DLL
void setPreserveUnreferencedObjects(bool);
// Set the minimum PDF version. If the PDF version of the input
// file (or previously set minimum version) is less than the
// version passed to this method, the PDF version of the output
@ -427,6 +433,7 @@ class QPDFWriter
qpdf_stream_data_e stream_data_mode;
bool qdf_mode;
bool precheck_streams;
bool preserve_unreferenced_objects;
bool static_id;
bool suppress_original_object_ids;
bool direct_stream_lengths;

View File

@ -989,6 +989,22 @@ QPDF::showXRefTable()
}
}
std::vector<QPDFObjectHandle>
QPDF::getAllObjects()
{
std::vector<QPDFObjectHandle> result;
for (std::map<QPDFObjGen, QPDFXRefEntry>::iterator iter =
this->xref_table.begin();
iter != this->xref_table.end(); ++iter)
{
QPDFObjGen const& og = (*iter).first;
result.push_back(QPDFObjectHandle::Factory::newIndirect(
this, og.getObj(), og.getGen()));
}
return result;
}
void
QPDF::setLastObjectDescription(std::string const& description,
int objid, int generation)

View File

@ -58,6 +58,7 @@ QPDFWriter::init()
stream_data_mode = qpdf_s_compress;
qdf_mode = false;
precheck_streams = false;
preserve_unreferenced_objects = false;
static_id = false;
suppress_original_object_ids = false;
direct_stream_lengths = true;
@ -183,6 +184,12 @@ QPDFWriter::setPrecheckStreams(bool val)
this->precheck_streams = val;
}
void
QPDFWriter::setPreserveUnreferencedObjects(bool val)
{
this->preserve_unreferenced_objects = val;
}
void
QPDFWriter::setMinimumPDFVersion(std::string const& version)
{
@ -3074,6 +3081,17 @@ QPDFWriter::writeStandard()
writeHeader();
writeString(this->extra_header_text);
if (this->preserve_unreferenced_objects)
{
QTC::TC("qpdf", "QPDFWriter preserve unreferenced standard");
std::vector<QPDFObjectHandle> all = this->pdf.getAllObjects();
for (std::vector<QPDFObjectHandle>::iterator iter = all.begin();
iter != all.end(); ++iter)
{
enqueueObject(*iter);
}
}
// Put root first on queue.
QPDFObjectHandle trailer = getTrimmedTrailer();
enqueueObject(trailer.getKey("/Root"));

View File

@ -838,6 +838,27 @@ outfile.pdf</option>
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--preserve-unreferenced</option></term>
<listitem>
<para>
Tells qpdf to preserve objects that are not referenced when
writing the file. Ordinarily any object that is not referenced
in a traversal of the document from the trailer dictionary
will be discarded. This may be useful in working with some
damaged files or inspecting files with known unreferenced
objects.
</para>
<para>
This flag is ignored for linearized files and has the effect
of causing objects in the new file to be written in order by
object ID from the original file. This does not mean that
object numbers will be the same since qpdf may create stream
lengths as direct or indirect differently from the original
file, and the original file may have gaps in its numbering.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--qdf</option></term>
<listitem>

View File

@ -203,6 +203,7 @@ familiar with the PDF file format or who are PDF developers.\n\
--object-streams=mode controls handing of object streams\n\
--ignore-xref-streams tells qpdf to ignore any cross-reference streams\n\
--precheck-streams precheck ability to decode streams\n\
--preserve-unreferenced preserve unreferenced objects\n\
--qdf turns on \"QDF mode\" (below)\n\
--min-version=version sets the minimum PDF version of the output file\n\
--force-version=version forces this to be the PDF version of the output file\n\
@ -1030,6 +1031,7 @@ int main(int argc, char* argv[])
bool ignore_xref_streams = false;
bool qdf_mode = false;
bool precheck_streams = false;
bool preserve_unreferenced_objects = false;
std::string min_version;
std::string force_version;
@ -1219,6 +1221,10 @@ int main(int argc, char* argv[])
{
precheck_streams = true;
}
else if (strcmp(arg, "preserve-unreferenced") == 0)
{
preserve_unreferenced_objects = true;
}
else if (strcmp(arg, "min-version") == 0)
{
if (parameter == 0)
@ -1714,6 +1720,10 @@ int main(int argc, char* argv[])
{
w.setPrecheckStreams(true);
}
if (preserve_unreferenced_objects)
{
w.setPreserveUnreferencedObjects(true);
}
if (normalize_set)
{
w.setContentNormalization(normalize);

View File

@ -280,3 +280,4 @@ QPDFObjectHandle found fake 1
QPDFObjectHandle no val for last key 0
QPDF resolve failure to null 0
QPDFWriter precheck stream 0
QPDFWriter preserve unreferenced standard 0

View File

@ -743,6 +743,24 @@ $td->runtest("check output",
{$td->FILE => "bad-data-precheck.pdf"});
show_ntests();
# ----------
$td->notify("--- Preserve unreferenced objects ---");
$n_tests += 4;
$td->runtest("drop unused objects",
{$td->COMMAND => "qpdf --static-id unreferenced-objects.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "unreferenced-dropped.pdf"});
$td->runtest("keep unused objects",
{$td->COMMAND => "qpdf --static-id --preserve-unreferenced" .
" unreferenced-objects.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "unreferenced-preserved.pdf"});
show_ntests();
# ----------
$td->notify("--- Copy Foreign Objects ---");
$n_tests += 7;

Binary file not shown.

View File

@ -0,0 +1,105 @@
%PDF-1.3
%¿÷¢þ
%QDF-1.0
2 0 obj
<<
/Pages 1 0 R
/Type /Catalog
>>
endobj
1 0 obj
<<
/Count 1
/Kids [
3 0 R
]
/Type /Pages
>>
endobj
%% Page 1
3 0 obj
<<
/Contents 4 0 R
/MediaBox [
0
0
612
792
]
/Parent 1 0 R
/Resources <<
/Font <<
/F1 6 0 R
>>
/ProcSet 9 0 R
>>
/Type /Page
>>
endobj
%% Contents for page 1
4 0 obj
<<
/Length 5 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
5 0 obj
44
endobj
6 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
7 0 obj
[ 8 0 R ]
endobj
8 0 obj
/Potato
endobj
9 0 obj
[
/PDF
/Text
]
endobj
xref
0 10
0000000000 65535 f
0000000079 00000 n
0000000025 00000 n
0000000161 00000 n
0000000376 00000 n
0000000475 00000 n
0000000494 00000 n
0000000612 00000 n
0000000638 00000 n
0000000662 00000 n
trailer <<
/Root 2 0 R
/Size 10
/ID [<5c2381b459937c988290150df782f1fd><5c2381b459937c988290150df782f1fd>]
>>
startxref
697
%%EOF

Binary file not shown.