mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-31 02:48:31 +00:00
Precheck streams by default
There is no need for a --precheck-streams option. We can do the precheck without imposing any penalty, only re-encoding the stream if it fails the first time.
This commit is contained in:
parent
9744414c66
commit
ddc6cf0cf6
@ -200,17 +200,6 @@ class QPDFWriter
|
||||
QPDF_DLL
|
||||
void setQDFMode(bool);
|
||||
|
||||
// Enable stream precheck mode. In this mode, all filterable
|
||||
// streams are checked by actually attempting to decode them
|
||||
// before filtering. This may add significant time to the process
|
||||
// of writing the data because all streams from the input must be
|
||||
// read twice, but it enables the raw stream data to be preserved
|
||||
// even in cases where qpdf would run into errors decoding the
|
||||
// stream after it determines that it should be able to do it.
|
||||
// Examples would include compressed data with errors in it.
|
||||
QPDF_DLL
|
||||
void setPrecheckStreams(bool);
|
||||
|
||||
// Preserve unreferenced objects. The default behavior is to
|
||||
// discard any object that is not visited during a traversal of
|
||||
// the object structure from the trailer.
|
||||
@ -495,7 +484,6 @@ class QPDFWriter
|
||||
qpdf_stream_decode_level_e stream_decode_level;
|
||||
bool stream_decode_level_set;
|
||||
bool qdf_mode;
|
||||
bool precheck_streams;
|
||||
bool preserve_unreferenced_objects;
|
||||
bool newline_before_endstream;
|
||||
bool static_id;
|
||||
|
@ -59,7 +59,6 @@ QPDFWriter::init()
|
||||
stream_decode_level = qpdf_dl_none;
|
||||
stream_decode_level_set = false;
|
||||
qdf_mode = false;
|
||||
precheck_streams = false;
|
||||
preserve_unreferenced_objects = false;
|
||||
newline_before_endstream = false;
|
||||
static_id = false;
|
||||
@ -215,12 +214,6 @@ QPDFWriter::setQDFMode(bool val)
|
||||
this->qdf_mode = val;
|
||||
}
|
||||
|
||||
void
|
||||
QPDFWriter::setPrecheckStreams(bool val)
|
||||
{
|
||||
this->precheck_streams = val;
|
||||
}
|
||||
|
||||
void
|
||||
QPDFWriter::setPreserveUnreferencedObjects(bool val)
|
||||
{
|
||||
@ -1590,34 +1583,32 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
|
||||
|
||||
flags |= f_stream;
|
||||
|
||||
if (filter && this->precheck_streams)
|
||||
PointerHolder<Buffer> stream_data;
|
||||
bool filtered = false;
|
||||
for (int attempt = 1; attempt <= 2; ++attempt)
|
||||
{
|
||||
try
|
||||
{
|
||||
QTC::TC("qpdf", "QPDFWriter precheck stream");
|
||||
Pl_Discard discard;
|
||||
filter = object.pipeStreamData(
|
||||
&discard, 0, qpdf_dl_all, true);
|
||||
}
|
||||
catch (std::exception&)
|
||||
pushPipeline(new Pl_Buffer("stream data"));
|
||||
activatePipelineStack();
|
||||
|
||||
filtered =
|
||||
object.pipeStreamData(
|
||||
this->pipeline,
|
||||
(((filter && normalize) ? qpdf_ef_normalize : 0) |
|
||||
((filter && compress) ? qpdf_ef_compress : 0)),
|
||||
(filter
|
||||
? (uncompress ? qpdf_dl_all : this->stream_decode_level)
|
||||
: qpdf_dl_none));
|
||||
popPipelineStack(&stream_data);
|
||||
if (filter && (! filtered))
|
||||
{
|
||||
// Try again
|
||||
filter = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
pushPipeline(new Pl_Buffer("stream data"));
|
||||
activatePipelineStack();
|
||||
|
||||
bool filtered =
|
||||
object.pipeStreamData(
|
||||
this->pipeline,
|
||||
(((filter && normalize) ? qpdf_ef_normalize : 0) |
|
||||
((filter && compress) ? qpdf_ef_compress : 0)),
|
||||
(filter
|
||||
? (uncompress ? qpdf_dl_all : this->stream_decode_level)
|
||||
: qpdf_dl_none));
|
||||
PointerHolder<Buffer> stream_data;
|
||||
popPipelineStack(&stream_data);
|
||||
if (filtered)
|
||||
{
|
||||
flags |= f_filtered;
|
||||
|
@ -996,23 +996,6 @@ outfile.pdf</option>
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><option>--precheck-streams</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Tells qpdf to precheck each stream for the ability to decode
|
||||
it. Ordinarily qpdf tries to decode streams that it thinks it
|
||||
can decode based on the filters, and if there ends up being an
|
||||
error when actually trying to do the decode, the stream data
|
||||
is truncated. This flag causes qpdf to actually read the
|
||||
stream fully before deciding whether to filter the stream.
|
||||
This option will slow qpdf down since it will have to read the
|
||||
stream twice, but it allows raw stream data to be preserved in
|
||||
cases where the decoding of the stream would fail for some
|
||||
reason. This may be useful in working with some damaged files.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><option>--preserve-unreferenced</option></term>
|
||||
<listitem>
|
||||
|
11
qpdf/qpdf.cc
11
qpdf/qpdf.cc
@ -87,7 +87,6 @@ struct Options
|
||||
object_stream_mode(qpdf_o_preserve),
|
||||
ignore_xref_streams(false),
|
||||
qdf_mode(false),
|
||||
precheck_streams(false),
|
||||
preserve_unreferenced_objects(false),
|
||||
newline_before_endstream(false),
|
||||
show_npages(false),
|
||||
@ -149,7 +148,6 @@ struct Options
|
||||
qpdf_object_stream_e object_stream_mode;
|
||||
bool ignore_xref_streams;
|
||||
bool qdf_mode;
|
||||
bool precheck_streams;
|
||||
bool preserve_unreferenced_objects;
|
||||
bool newline_before_endstream;
|
||||
std::string min_version;
|
||||
@ -371,7 +369,6 @@ familiar with the PDF file format or who are PDF developers.\n\
|
||||
--suppress-recovery prevents qpdf from attempting to recover damaged files\n\
|
||||
--object-streams=mode controls handing of object streams\n\
|
||||
--ignore-xref-streams tells qpdf to ignore any cross-reference streams\n\
|
||||
--precheck-streams precheck ability to decode streams\n\
|
||||
--preserve-unreferenced preserve unreferenced objects\n\
|
||||
--newline-before-endstream always put a newline before endstream\n\
|
||||
--qdf turns on \"QDF mode\" (below)\n\
|
||||
@ -1467,10 +1464,6 @@ static void parse_options(int argc, char* argv[], Options& o)
|
||||
{
|
||||
o.qdf_mode = true;
|
||||
}
|
||||
else if (strcmp(arg, "precheck-streams") == 0)
|
||||
{
|
||||
o.precheck_streams = true;
|
||||
}
|
||||
else if (strcmp(arg, "preserve-unreferenced") == 0)
|
||||
{
|
||||
o.preserve_unreferenced_objects = true;
|
||||
@ -2094,10 +2087,6 @@ static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w)
|
||||
{
|
||||
w.setQDFMode(true);
|
||||
}
|
||||
if (o.precheck_streams)
|
||||
{
|
||||
w.setPrecheckStreams(true);
|
||||
}
|
||||
if (o.preserve_unreferenced_objects)
|
||||
{
|
||||
w.setPreserveUnreferencedObjects(true);
|
||||
|
@ -277,7 +277,6 @@ QPDFObjectHandle treat word as string 0
|
||||
QPDFObjectHandle found fake 1
|
||||
QPDFObjectHandle no val for last key 0
|
||||
QPDF resolve failure to null 0
|
||||
QPDFWriter precheck stream 0
|
||||
QPDFWriter preserve unreferenced standard 0
|
||||
QPDFObjectHandle non-stream in parsecontent 0
|
||||
QPDFObjectHandle errors in parsecontent 0
|
||||
|
@ -918,27 +918,20 @@ $td->runtest("check output",
|
||||
show_ntests();
|
||||
# ----------
|
||||
$td->notify("--- Precheck streams ---");
|
||||
$n_tests += 4;
|
||||
$n_tests += 2;
|
||||
|
||||
$td->runtest("bad stream without precheck",
|
||||
$td->runtest("bad stream",
|
||||
{$td->COMMAND => "qpdf --static-id bad-data.pdf a.pdf"},
|
||||
{$td->FILE => "bad-data.out", $td->EXIT_STATUS => 3},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("check output",
|
||||
{$td->FILE => "a.pdf"},
|
||||
{$td->FILE => "bad-data-out.pdf"});
|
||||
$td->runtest("bad stream with precheck",
|
||||
{$td->COMMAND =>
|
||||
"qpdf --static-id --precheck-streams bad-data.pdf a.pdf"},
|
||||
{$td->STRING => "", $td->EXIT_STATUS => 0},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("check output",
|
||||
{$td->FILE => "a.pdf"},
|
||||
{$td->FILE => "bad-data-precheck.pdf"});
|
||||
|
||||
show_ntests();
|
||||
# ----------
|
||||
$td->notify("--- Decode levels ---");
|
||||
$n_tests += 10;
|
||||
$n_tests += 12;
|
||||
|
||||
# image-streams.pdf is the output of examples/pdf-create.
|
||||
# examples/pdf-create validates the actual image data.
|
||||
@ -962,6 +955,14 @@ $td->runtest("check finds bad jpeg data",
|
||||
{$td->FILE => "bad-jpeg-check.out",
|
||||
$td->EXIT_STATUS => 3},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("precheck detects bad jpeg data",
|
||||
{$td->COMMAND => "qpdf --static-id --decode-level=all" .
|
||||
" bad-jpeg.pdf a.pdf"},
|
||||
{$td->FILE => "bad-jpeg.out", $td->EXIT_STATUS => 3},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("check file",
|
||||
{$td->FILE => "a.pdf"},
|
||||
{$td->FILE => "bad-jpeg-out.pdf"});
|
||||
$td->runtest("get data",
|
||||
{$td->COMMAND => "qpdf --show-object=6" .
|
||||
" --filtered-stream-data bad-jpeg.pdf"},
|
||||
|
Binary file not shown.
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user