2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-06-03 19:00:51 +00:00

Precheck streams by default

There is no need for a --precheck-streams option. We can do the
precheck without imposing any penalty, only re-encoding the stream if
it fails the first time.
This commit is contained in:
Jay Berkenbilt 2017-08-19 14:40:33 -04:00
parent 9744414c66
commit ddc6cf0cf6
8 changed files with 33 additions and 82 deletions

View File

@ -200,17 +200,6 @@ class QPDFWriter
QPDF_DLL
void setQDFMode(bool);
// Enable stream precheck mode. In this mode, all filterable
// streams are checked by actually attempting to decode them
// before filtering. This may add significant time to the process
// of writing the data because all streams from the input must be
// read twice, but it enables the raw stream data to be preserved
// even in cases where qpdf would run into errors decoding the
// stream after it determines that it should be able to do it.
// Examples would include compressed data with errors in it.
QPDF_DLL
void setPrecheckStreams(bool);
// Preserve unreferenced objects. The default behavior is to
// discard any object that is not visited during a traversal of
// the object structure from the trailer.
@ -495,7 +484,6 @@ class QPDFWriter
qpdf_stream_decode_level_e stream_decode_level;
bool stream_decode_level_set;
bool qdf_mode;
bool precheck_streams;
bool preserve_unreferenced_objects;
bool newline_before_endstream;
bool static_id;

View File

@ -59,7 +59,6 @@ QPDFWriter::init()
stream_decode_level = qpdf_dl_none;
stream_decode_level_set = false;
qdf_mode = false;
precheck_streams = false;
preserve_unreferenced_objects = false;
newline_before_endstream = false;
static_id = false;
@ -215,12 +214,6 @@ QPDFWriter::setQDFMode(bool val)
this->qdf_mode = val;
}
void
QPDFWriter::setPrecheckStreams(bool val)
{
this->precheck_streams = val;
}
void
QPDFWriter::setPreserveUnreferencedObjects(bool val)
{
@ -1590,34 +1583,32 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
flags |= f_stream;
if (filter && this->precheck_streams)
PointerHolder<Buffer> stream_data;
bool filtered = false;
for (int attempt = 1; attempt <= 2; ++attempt)
{
try
{
QTC::TC("qpdf", "QPDFWriter precheck stream");
Pl_Discard discard;
filter = object.pipeStreamData(
&discard, 0, qpdf_dl_all, true);
}
catch (std::exception&)
pushPipeline(new Pl_Buffer("stream data"));
activatePipelineStack();
filtered =
object.pipeStreamData(
this->pipeline,
(((filter && normalize) ? qpdf_ef_normalize : 0) |
((filter && compress) ? qpdf_ef_compress : 0)),
(filter
? (uncompress ? qpdf_dl_all : this->stream_decode_level)
: qpdf_dl_none));
popPipelineStack(&stream_data);
if (filter && (! filtered))
{
// Try again
filter = false;
}
else
{
break;
}
}
pushPipeline(new Pl_Buffer("stream data"));
activatePipelineStack();
bool filtered =
object.pipeStreamData(
this->pipeline,
(((filter && normalize) ? qpdf_ef_normalize : 0) |
((filter && compress) ? qpdf_ef_compress : 0)),
(filter
? (uncompress ? qpdf_dl_all : this->stream_decode_level)
: qpdf_dl_none));
PointerHolder<Buffer> stream_data;
popPipelineStack(&stream_data);
if (filtered)
{
flags |= f_filtered;

View File

@ -996,23 +996,6 @@ outfile.pdf</option>
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--precheck-streams</option></term>
<listitem>
<para>
Tells qpdf to precheck each stream for the ability to decode
it. Ordinarily qpdf tries to decode streams that it thinks it
can decode based on the filters, and if there ends up being an
error when actually trying to do the decode, the stream data
is truncated. This flag causes qpdf to actually read the
stream fully before deciding whether to filter the stream.
This option will slow qpdf down since it will have to read the
stream twice, but it allows raw stream data to be preserved in
cases where the decoding of the stream would fail for some
reason. This may be useful in working with some damaged files.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--preserve-unreferenced</option></term>
<listitem>

View File

@ -87,7 +87,6 @@ struct Options
object_stream_mode(qpdf_o_preserve),
ignore_xref_streams(false),
qdf_mode(false),
precheck_streams(false),
preserve_unreferenced_objects(false),
newline_before_endstream(false),
show_npages(false),
@ -149,7 +148,6 @@ struct Options
qpdf_object_stream_e object_stream_mode;
bool ignore_xref_streams;
bool qdf_mode;
bool precheck_streams;
bool preserve_unreferenced_objects;
bool newline_before_endstream;
std::string min_version;
@ -371,7 +369,6 @@ familiar with the PDF file format or who are PDF developers.\n\
--suppress-recovery prevents qpdf from attempting to recover damaged files\n\
--object-streams=mode controls handing of object streams\n\
--ignore-xref-streams tells qpdf to ignore any cross-reference streams\n\
--precheck-streams precheck ability to decode streams\n\
--preserve-unreferenced preserve unreferenced objects\n\
--newline-before-endstream always put a newline before endstream\n\
--qdf turns on \"QDF mode\" (below)\n\
@ -1467,10 +1464,6 @@ static void parse_options(int argc, char* argv[], Options& o)
{
o.qdf_mode = true;
}
else if (strcmp(arg, "precheck-streams") == 0)
{
o.precheck_streams = true;
}
else if (strcmp(arg, "preserve-unreferenced") == 0)
{
o.preserve_unreferenced_objects = true;
@ -2094,10 +2087,6 @@ static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w)
{
w.setQDFMode(true);
}
if (o.precheck_streams)
{
w.setPrecheckStreams(true);
}
if (o.preserve_unreferenced_objects)
{
w.setPreserveUnreferencedObjects(true);

View File

@ -277,7 +277,6 @@ QPDFObjectHandle treat word as string 0
QPDFObjectHandle found fake 1
QPDFObjectHandle no val for last key 0
QPDF resolve failure to null 0
QPDFWriter precheck stream 0
QPDFWriter preserve unreferenced standard 0
QPDFObjectHandle non-stream in parsecontent 0
QPDFObjectHandle errors in parsecontent 0

View File

@ -918,27 +918,20 @@ $td->runtest("check output",
show_ntests();
# ----------
$td->notify("--- Precheck streams ---");
$n_tests += 4;
$n_tests += 2;
$td->runtest("bad stream without precheck",
$td->runtest("bad stream",
{$td->COMMAND => "qpdf --static-id bad-data.pdf a.pdf"},
{$td->FILE => "bad-data.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "bad-data-out.pdf"});
$td->runtest("bad stream with precheck",
{$td->COMMAND =>
"qpdf --static-id --precheck-streams bad-data.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "bad-data-precheck.pdf"});
show_ntests();
# ----------
$td->notify("--- Decode levels ---");
$n_tests += 10;
$n_tests += 12;
# image-streams.pdf is the output of examples/pdf-create.
# examples/pdf-create validates the actual image data.
@ -962,6 +955,14 @@ $td->runtest("check finds bad jpeg data",
{$td->FILE => "bad-jpeg-check.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("precheck detects bad jpeg data",
{$td->COMMAND => "qpdf --static-id --decode-level=all" .
" bad-jpeg.pdf a.pdf"},
{$td->FILE => "bad-jpeg.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check file",
{$td->FILE => "a.pdf"},
{$td->FILE => "bad-jpeg-out.pdf"});
$td->runtest("get data",
{$td->COMMAND => "qpdf --show-object=6" .
" --filtered-stream-data bad-jpeg.pdf"},

Binary file not shown.