Enable finer grained control of stream decoding

This commit adds several API methods that enable control over which
types of filters QPDF will attempt to decode. It also adds support for
/RunLengthDecode and /DCTDecode filters for both encoding and
decoding.
This commit is contained in:
Jay Berkenbilt 2017-08-19 09:18:14 -04:00
parent ae90d2c485
commit 9744414c66
29 changed files with 658 additions and 118 deletions

View File

@ -1,3 +1,20 @@
2017-08-19 Jay Berkenbilt <ejb@ql.org>
* Remove --precheck-streams. This is enabled by default now
without any efficiency cost. This feature was never released.
* Update pdf-create example to illustrate use of additional image
compression filters.
* Add support for /RunLengthDecode and /DCTDecode:
- New pipeline types Pl_RunLength and Pl_DCT
- New command-line flags --compress-streams and --decode-level
to replace/enhance --stream-data
- New QPDFWriter::setCompressStreams and
QPDFWriter::setDecodeLevel methods
Please see documentation, header files, and help messages for
details on these new features.
2017-08-12 Jay Berkenbilt <ejb@ql.org>
* Add QPDFObjectHandle::rotatePage to apply rotation to a page

View File

@ -121,7 +121,8 @@ int main(int argc, char* argv[])
// pipeStreamData with a null pipeline to determine
// whether the image is filterable. Directly inspect
// keys to determine the image type.
if (image.pipeStreamData(0, true, false, false) &&
if (image.pipeStreamData(0, qpdf_ef_compress,
qpdf_dl_generalized) &&
color_space.isName() &&
bits_per_component.isInteger() &&
(color_space.getName() == "/DeviceGray") &&

View File

@ -26,7 +26,7 @@ enum qpdf_error_code_e
qpdf_e_pages, /* erroneous or unsupported pages structure */
};
/* Write Parameters */
/* Write Parameters. See QPDFWriter.hh for details. */
enum qpdf_object_stream_e
{
@ -41,6 +41,23 @@ enum qpdf_stream_data_e
qpdf_s_compress /* compress stream data */
};
/* Stream data flags */
/* See pipeStreamData in QPDFObjectHandle.hh for details on these flags. */
enum qpdf_stream_encode_flags_e
{
qpdf_ef_compress = 1 << 0, /* compress uncompressed streams */
qpdf_ef_normalize = 1 << 1, /* normalize content stream */
};
enum qpdf_stream_decode_level_e
{
/* These must be in order from less to more decoding. */
qpdf_dl_none = 0, /* preserve all stream filters */
qpdf_dl_generalized, /* decode general-purpose filters */
qpdf_dl_specialized, /* also decode other non-lossy filters */
qpdf_dl_all /* also decode loss filters */
};
/* R3 Encryption Parameters */
enum qpdf_r3_print_e

View File

@ -10,6 +10,7 @@
#include <qpdf/DLL.h>
#include <qpdf/Types.h>
#include <qpdf/Constants.h>
#include <string>
#include <vector>
@ -44,19 +45,19 @@ class QPDFObjectHandle
virtual ~StreamDataProvider()
{
}
// The implementation of this function must write the
// unencrypted, raw stream data to the given pipeline. Every
// call to provideStreamData for a given stream must write the
// same data. The number of bytes written must agree with the
// length provided at the time the StreamDataProvider object
// was associated with the stream. The object ID and
// generation passed to this method are those that belong to
// the stream on behalf of which the provider is called. They
// may be ignored or used by the implementation for indexing
// or other purposes. This information is made available just
// to make it more convenient to use a single
// StreamDataProvider object to provide data for multiple
// streams.
// The implementation of this function must write stream data
// to the given pipeline. The stream data must conform to
// whatever filters are explicitly associated with the stream.
// QPDFWriter may, in some cases, add compression, but if it
// does, it will update the filters as needed. Every call to
// provideStreamData for a given stream must write the same
// data.The object ID and generation passed to this method are
// those that belong to the stream on behalf of which the
// provider is called. They may be ignored or used by the
// implementation for indexing or other purposes. This
// information is made available just to make it more
// convenient to use a single StreamDataProvider object to
// provide data for multiple streams.
virtual void provideStreamData(int objid, int generation,
Pipeline* pipeline) = 0;
};
@ -370,32 +371,71 @@ class QPDFObjectHandle
// Returns filtered (uncompressed) stream data. Throws an
// exception if the stream is filtered and we can't decode it.
QPDF_DLL
PointerHolder<Buffer> getStreamData();
PointerHolder<Buffer> getStreamData(
qpdf_stream_decode_level_e level = qpdf_dl_generalized);
// Returns unfiltered (raw) stream data.
QPDF_DLL
PointerHolder<Buffer> getRawStreamData();
// Write stream data through the given pipeline. A null pipeline
// Write stream data through the given pipeline. A null pipeline
// value may be used if all you want to do is determine whether a
// stream is filterable. If filter is false, write raw stream
// data and return false. If filter is true, then attempt to
// apply all the decoding filters to the stream data. If we are
// successful, return true. Otherwise, return false and write raw
// data. If filtering is requested and successfully performed,
// then the normalize and compress flags are used to determine
// whether stream data should be normalized and compressed. In
// all cases, if this function returns false, raw data has been
// written. If it returns true, then any requested filtering has
// been performed. Note that if the original stream data has no
// filters applied to it, the return value will be equal to the
// value of the filter parameter. Callers may use the return
// value of this function to determine whether or not the /Filter
// and /DecodeParms keys in the stream dictionary should be
// replaced if writing a new stream object.
// stream is filterable and would be filtered based on the
// provided flags. If flags is 0, write raw stream data and return
// false. Otherwise, the flags alter the behavior in the following
// way:
//
// encode_flags:
//
// qpdf_sf_compress -- compress data with /FlateDecode if no other
// compression filters are applied.
//
// qpdf_sf_normalize -- tokenize as content stream and normalize tokens
//
// decode_level:
//
// qpdf_dl_none -- do not decode any streams.
//
// qpdf_dl_generalized -- decode supported general-purpose
// filters. This includes /ASCIIHexDecode, /ASCII85Decode,
// /LZWDecode, and /FlateDecode.
//
// qpdf_dl_specialized -- in addition to generalized filters, also
// decode supported non-lossy specialized filters. This includes
// /RunLengthDecode.
//
// qpdf_dl_all -- in addition to generalized and non-lossy
// specialized filters, decode supported lossy filters. This
// includes /DCTDecode.
//
// If, based on the flags and the filters and decode parameters,
// we determine that we know how to apply all requested filters,
// do so and return true if we are successful.
//
// In all cases, a return value of true means that filtered data
// has been written successfully. If filtering is requested but
// this method returns false, it means there was some error in the
// filtering, in which case the resulting data is likely partially
// filtered and/or incomplete and may not be consistent with the
// configured filters. QPDFWriter handles this by attempting to
// get the stream data without filtering, but callers should
// consider a false return value when decode_level is not
// qpdf_dl_none to be a potential loss of data.
QPDF_DLL
bool pipeStreamData(Pipeline*,
unsigned long encode_flags,
qpdf_stream_decode_level_e decode_level,
bool suppress_warnings = false);
// Legacy pipeStreamData. This maps to the the flags-based
// pipeStreamData as follows:
// filter = false -> encode_flags = 0
// filter = true -> decode_level = qpdf_dl_generalized
// normalize = true -> encode_flags |= qpdf_sf_normalize
// compress = true -> encode_flags |= qpdf_sf_compress
QPDF_DLL
bool pipeStreamData(Pipeline*, bool filter,
bool normalize, bool compress,
bool suppress_warnings = false);
bool normalize, bool compress);
// Replace a stream's dictionary. The new dictionary must be
// consistent with the stream's data. This is most appropriately

View File

@ -118,14 +118,70 @@ class QPDFWriter
QPDF_DLL
void setObjectStreamMode(qpdf_object_stream_e);
// Set value of stream data mode. In uncompress mode, we attempt
// to uncompress any stream that we can. In preserve mode, we
// preserve any filtering applied to streams. In compress mode,
// if we can apply all filters and the stream is not already
// optimally compressed, recompress the stream.
// Set value of stream data mode. This is an older interface.
// Instead of using this, prefer setCompressStreams() and
// setDecodeLevel(). This method is retained for compatibility,
// but it does not cover the full range of available
// configurations. The mapping between this and the new methods is
// as follows:
//
// qpdf_s_uncompress:
// setCompressStreams(false)
// setDecodeLevel(qpdf_dl_generalized)
// qpdf_s_preserve:
// setCompressStreams(false)
// setDecodeLevel(qpdf_dl_none)
// qpdf_s_compress:
// setCompressStreams(true)
// setDecodeLevel(qpdf_dl_generalized)
//
// The default is qpdf_s_compress.
QPDF_DLL
void setStreamDataMode(qpdf_stream_data_e);
// If true, compress any uncompressed streams when writing them.
// Metadata streams are a special case and are not compressed even
// if this is true. This is true by default for QPDFWriter. If you
// want QPDFWriter to leave uncompressed streams uncompressed,
// pass false to this method.
QPDF_DLL
void setCompressStreams(bool);
// When QPDFWriter encounters streams, this parameter controls the
// behavior with respect to attempting to apply any filters to the
// streams when copying to the output. The decode levels are as
// follows:
//
// qpdf_dl_none: Do not attempt to apply any filters. Streams
// remain as they appear in the original file. Note that
// uncompressed streams may still be compressed on output. You can
// disable that by calling setCompressStreams(false).
//
// qpdf_dl_generalized: This is the default. QPDFWriter will apply
// LZWDecode, ASCII85Decode, ASCIIHexDecode, and FlateDecode
// filters on the input. When combined with
// setCompressStreams(true), which the default, the effect of this
// is that streams filtered with these older and less efficient
// filters will be recompressed with the Flate filter. As a
// special case, if a stream is already compressed with
// FlateDecode and setCompressStreams is enabled, the original
// compressed data will be preserved.
//
// qpdf_dl_specialized: In addition to uncompressing the
// generalized compression formats, supported non-lossy
// compression will also be be decoded. At present, this includes
// the RunLengthDecode filter.
//
// qpdf_dl_all: In addition to generalized and non-lossy
// specialized filters, supported lossy compression filters will
// be applied. At present, this includes DCTDecode (JPEG)
// compression. Note that compressing the resulting data with
// DCTDecode again will accumulate loss, so avoid multiple
// compression and decompression cycles. This is mostly useful for
// retreiving image data.
QPDF_DLL
void setDecodeLevel(qpdf_stream_decode_level_e);
// Set value of content stream normalization. The default is
// "false". If true, we attempt to normalize newlines inside of
// content streams. Some constructs such as inline images may
@ -434,8 +490,10 @@ class QPDFWriter
Buffer* output_buffer;
bool normalize_content_set;
bool normalize_content;
bool stream_data_mode_set;
qpdf_stream_data_e stream_data_mode;
bool compress_streams;
bool compress_streams_set;
qpdf_stream_decode_level_e stream_decode_level;
bool stream_decode_level_set;
bool qdf_mode;
bool precheck_streams;
bool preserve_unreferenced_objects;

View File

@ -45,7 +45,7 @@ QPDF::CopiedStreamDataProvider::provideStreamData(
{
QPDFObjectHandle foreign_stream =
this->foreign_streams[QPDFObjGen(objid, generation)];
foreign_stream.pipeStreamData(pipeline, false, false, false);
foreign_stream.pipeStreamData(pipeline, 0, qpdf_dl_none);
}
void
@ -2377,6 +2377,7 @@ QPDF::pipeStreamData(int objid, int generation,
length -= len;
pipeline->write(QUtil::unsigned_char_pointer(buf), len);
}
pipeline->finish();
success = true;
}
catch (QPDFExc& e)
@ -2398,13 +2399,16 @@ QPDF::pipeStreamData(int objid, int generation,
QUtil::int_to_string(generation) + ": " + e.what()));
}
}
try
if (! success)
{
pipeline->finish();
}
catch (std::exception&)
{
// ignore
try
{
pipeline->finish();
}
catch (std::exception&)
{
// ignore
}
}
return success;
}

View File

@ -482,10 +482,10 @@ QPDFObjectHandle::replaceDict(QPDFObjectHandle new_dict)
}
PointerHolder<Buffer>
QPDFObjectHandle::getStreamData()
QPDFObjectHandle::getStreamData(qpdf_stream_decode_level_e level)
{
assertStream();
return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getStreamData();
return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getStreamData(level);
}
PointerHolder<Buffer>
@ -496,13 +496,35 @@ QPDFObjectHandle::getRawStreamData()
}
bool
QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter,
bool normalize, bool compress,
QPDFObjectHandle::pipeStreamData(Pipeline* p,
unsigned long encode_flags,
qpdf_stream_decode_level_e decode_level,
bool suppress_warnings)
{
assertStream();
return dynamic_cast<QPDF_Stream*>(obj.getPointer())->pipeStreamData(
p, filter, normalize, compress, suppress_warnings);
p, encode_flags, decode_level, suppress_warnings);
}
bool
QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter,
bool normalize, bool compress)
{
unsigned long encode_flags = 0;
qpdf_stream_decode_level_e decode_level = qpdf_dl_none;
if (filter)
{
decode_level = qpdf_dl_generalized;
if (normalize)
{
encode_flags |= qpdf_ef_normalize;
}
if (compress)
{
encode_flags |= qpdf_ef_compress;
}
}
return pipeStreamData(p, encode_flags, decode_level, false);
}
void
@ -825,7 +847,7 @@ QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array,
all_description += ",";
}
all_description += " " + og;
if (! stream.pipeStreamData(&buf, true, false, false, false))
if (! stream.pipeStreamData(&buf, 0, qpdf_dl_specialized))
{
QTC::TC("qpdf", "QPDFObjectHandle errors in parsecontent");
warn(stream.getOwningQPDF(),

View File

@ -54,8 +54,10 @@ QPDFWriter::init()
output_buffer = 0;
normalize_content_set = false;
normalize_content = false;
stream_data_mode_set = false;
stream_data_mode = qpdf_s_compress;
compress_streams = true;
compress_streams_set = false;
stream_decode_level = qpdf_dl_none;
stream_decode_level_set = false;
qdf_mode = false;
precheck_streams = false;
preserve_unreferenced_objects = false;
@ -162,8 +164,42 @@ QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode)
void
QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode)
{
this->stream_data_mode_set = true;
this->stream_data_mode = mode;
switch (mode)
{
case qpdf_s_uncompress:
this->stream_decode_level =
std::max(qpdf_dl_generalized, this->stream_decode_level);
this->compress_streams = false;
break;
case qpdf_s_preserve:
this->stream_decode_level = qpdf_dl_none;
this->compress_streams = false;
break;
case qpdf_s_compress:
this->stream_decode_level =
std::max(qpdf_dl_generalized, this->stream_decode_level);
this->compress_streams = true;
break;
}
this->stream_decode_level_set = true;
this->compress_streams_set = true;
}
void
QPDFWriter::setCompressStreams(bool val)
{
this->compress_streams = val;
this->compress_streams_set = true;
}
void
QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val)
{
this->stream_decode_level = val;
this->stream_decode_level_set = true;
}
void
@ -1512,8 +1548,8 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
{
is_metadata = true;
}
bool filter = (this->stream_data_mode != qpdf_s_preserve);
if (this->stream_data_mode == qpdf_s_compress)
bool filter = (this->compress_streams || this->stream_decode_level);
if (this->compress_streams)
{
// Don't filter if the stream is already compressed with
// FlateDecode. We don't want to make it worse by getting
@ -1532,19 +1568,21 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
}
bool normalize = false;
bool compress = false;
bool uncompress = false;
if (is_metadata &&
((! this->encrypted) || (this->encrypt_metadata == false)))
{
QTC::TC("qpdf", "QPDFWriter not compressing metadata");
filter = true;
compress = false;
uncompress = true;
}
else if (this->normalize_content && normalized_streams.count(old_og))
{
normalize = true;
filter = true;
}
else if (filter && (this->stream_data_mode == qpdf_s_compress))
else if (filter && this->compress_streams)
{
compress = true;
QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream");
@ -1559,7 +1597,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
QTC::TC("qpdf", "QPDFWriter precheck stream");
Pl_Discard discard;
filter = object.pipeStreamData(
&discard, true, false, false, true);
&discard, 0, qpdf_dl_all, true);
}
catch (std::exception&)
{
@ -1569,8 +1607,15 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
pushPipeline(new Pl_Buffer("stream data"));
activatePipelineStack();
bool filtered =
object.pipeStreamData(this->pipeline, filter, normalize, compress);
object.pipeStreamData(
this->pipeline,
(((filter && normalize) ? qpdf_ef_normalize : 0) |
((filter && compress) ? qpdf_ef_compress : 0)),
(filter
? (uncompress ? qpdf_dl_all : this->stream_decode_level)
: qpdf_dl_none));
PointerHolder<Buffer> stream_data;
popPipelineStack(&stream_data);
if (filtered)
@ -1717,8 +1762,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
// Set up a stream to write the stream data into a buffer.
Pipeline* next = pushPipeline(new Pl_Buffer("object stream"));
if (! ((this->stream_data_mode == qpdf_s_uncompress) ||
this->qdf_mode))
if (! (this->stream_decode_level || this->qdf_mode))
{
compressed = true;
next = pushPipeline(
@ -2180,7 +2224,8 @@ QPDFWriter::prepareFileForWrite()
is_stream = true;
dict = node.getDict();
// See whether we are able to filter this stream.
filterable = node.pipeStreamData(0, true, false, false);
filterable = node.pipeStreamData(
0, 0, this->stream_decode_level, true);
}
else if (pdf.getRoot().getObjectID() == node.getObjectID())
{
@ -2260,10 +2305,14 @@ QPDFWriter::write()
{
this->normalize_content = true;
}
if (! this->stream_data_mode_set)
if (! this->compress_streams_set)
{
this->stream_data_mode = qpdf_s_uncompress;
this->compress_streams = false;
}
if (! this->stream_decode_level_set)
{
this->stream_decode_level = qpdf_dl_generalized;
}
}
if (this->encrypted)
@ -2272,7 +2321,7 @@ QPDFWriter::write()
this->preserve_encryption = false;
}
else if (this->normalize_content ||
(this->stream_data_mode == qpdf_s_uncompress) ||
this->stream_decode_level ||
this->qdf_mode)
{
// Encryption makes looking at contents pretty useless. If
@ -2300,7 +2349,7 @@ QPDFWriter::write()
}
if (this->qdf_mode || this->normalize_content ||
(this->stream_data_mode == qpdf_s_uncompress))
this->stream_decode_level)
{
initializeSpecialStreams();
}
@ -2586,7 +2635,7 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset,
Pipeline* p = pushPipeline(new Pl_Buffer("xref stream"));
bool compressed = false;
if (! ((this->stream_data_mode == qpdf_s_uncompress) || this->qdf_mode))
if (! (this->stream_decode_level || this->qdf_mode))
{
compressed = true;
if (! skip_compression)

View File

@ -9,6 +9,8 @@
#include <qpdf/Pl_ASCII85Decoder.hh>
#include <qpdf/Pl_ASCIIHexDecoder.hh>
#include <qpdf/Pl_LZWDecoder.hh>
#include <qpdf/Pl_RunLength.hh>
#include <qpdf/Pl_DCT.hh>
#include <qpdf/Pl_Count.hh>
#include <qpdf/QTC.hh>
@ -82,10 +84,10 @@ QPDF_Stream::getDict() const
}
PointerHolder<Buffer>
QPDF_Stream::getStreamData()
QPDF_Stream::getStreamData(qpdf_stream_decode_level_e decode_level)
{
Pl_Buffer buf("stream data buffer");
if (! pipeStreamData(&buf, true, false, false, false))
if (! pipeStreamData(&buf, 0, decode_level, false))
{
throw std::logic_error("getStreamData called on unfilterable stream");
}
@ -97,7 +99,7 @@ PointerHolder<Buffer>
QPDF_Stream::getRawStreamData()
{
Pl_Buffer buf("stream data buffer");
pipeStreamData(&buf, false, false, false, false);
pipeStreamData(&buf, 0, qpdf_dl_none, false);
QTC::TC("qpdf", "QPDF_Stream getRawStreamData");
return buf.getBuffer();
}
@ -178,6 +180,8 @@ QPDF_Stream::understandDecodeParams(
bool
QPDF_Stream::filterable(std::vector<std::string>& filters,
bool& specialized_compression,
bool& lossy_compression,
int& predictor, int& columns,
bool& early_code_change)
{
@ -254,11 +258,20 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
filter = filter_abbreviations[filter];
}
if (! ((filter == "/Crypt") ||
(filter == "/FlateDecode") ||
(filter == "/LZWDecode") ||
(filter == "/ASCII85Decode") ||
(filter == "/ASCIIHexDecode")))
if (filter == "/RunLengthDecode")
{
specialized_compression = true;
}
else if (filter == "/DCTDecode")
{
specialized_compression = true;
lossy_compression = true;
}
else if (! ((filter == "/Crypt") ||
(filter == "/FlateDecode") ||
(filter == "/LZWDecode") ||
(filter == "/ASCII85Decode") ||
(filter == "/ASCIIHexDecode")))
{
filterable = false;
}
@ -350,17 +363,35 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
}
bool
QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter,
bool normalize, bool compress,
QPDF_Stream::pipeStreamData(Pipeline* pipeline,
unsigned long encode_flags,
qpdf_stream_decode_level_e decode_level,
bool suppress_warnings)
{
std::vector<std::string> filters;
int predictor = 1;
int columns = 0;
bool early_code_change = true;
bool specialized_compression = false;
bool lossy_compression = false;
bool filter = (! ((encode_flags == 0) && (decode_level == qpdf_dl_none)));
if (filter)
{
filter = filterable(filters, predictor, columns, early_code_change);
filter = filterable(filters, specialized_compression, lossy_compression,
predictor, columns, early_code_change);
if ((decode_level < qpdf_dl_all) && lossy_compression)
{
filter = false;
}
if ((decode_level < qpdf_dl_specialized) && specialized_compression)
{
filter = false;
}
QTC::TC("qpdf", "QPDF_Stream special filters",
(! filter) ? 0 :
lossy_compression ? 1 :
specialized_compression ? 2 :
3);
}
if (pipeline == 0)
@ -375,14 +406,14 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter,
if (filter)
{
if (compress)
if (encode_flags & qpdf_ef_compress)
{
pipeline = new Pl_Flate("compress object stream", pipeline,
Pl_Flate::a_deflate);
to_delete.push_back(pipeline);
}
if (normalize)
if (encode_flags & qpdf_ef_normalize)
{
pipeline = new Pl_QPDFTokenizer("normalizer", pipeline);
to_delete.push_back(pipeline);
@ -427,6 +458,17 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter,
early_code_change);
to_delete.push_back(pipeline);
}
else if (filter == "/RunLengthDecode")
{
pipeline = new Pl_RunLength("runlength decode", pipeline,
Pl_RunLength::a_decode);
to_delete.push_back(pipeline);
}
else if (filter == "/DCTDecode")
{
pipeline = new Pl_DCT("DCT decode", pipeline);
to_delete.push_back(pipeline);
}
else
{
throw std::logic_error(

View File

@ -393,7 +393,7 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length)
this->file->getLastOffset(),
"hint table length mismatch");
}
H.pipeStreamData(&pl, true, false, false);
H.pipeStreamData(&pl, 0, qpdf_dl_specialized);
return Hdict;
}

View File

@ -22,10 +22,11 @@ class QPDF_Stream: public QPDFObject
QPDFObjectHandle getDict() const;
// See comments in QPDFObjectHandle.hh for these methods.
bool pipeStreamData(Pipeline*, bool filter,
bool normalize, bool compress,
bool pipeStreamData(Pipeline*,
unsigned long encode_flags,
qpdf_stream_decode_level_e decode_level,
bool suppress_warnings);
PointerHolder<Buffer> getStreamData();
PointerHolder<Buffer> getStreamData(qpdf_stream_decode_level_e);
PointerHolder<Buffer> getRawStreamData();
void replaceStreamData(PointerHolder<Buffer> data,
QPDFObjectHandle const& filter,
@ -52,6 +53,7 @@ class QPDF_Stream: public QPDFObject
std::string const& filter, QPDFObjectHandle decode_params,
int& predictor, int& columns, bool& early_code_change);
bool filterable(std::vector<std::string>& filters,
bool& specialized_compression, bool& lossy_compression,
int& predictor, int& columns, bool& early_code_change);
void warn(QPDFExc const& e);

View File

@ -853,28 +853,90 @@ outfile.pdf</option>
developers. The following options are available:
<variablelist>
<varlistentry>
<term><option>--stream-data=<replaceable>option</replaceable></option></term>
<term><option>--compress-streams=<replaceable>[yn]</replaceable></option></term>
<listitem>
<para>
Controls transformation of stream data. The value of
<option><replaceable>option</replaceable></option> may be one
of the following:
By default, or with <option>--compress-streams=y</option>,
qpdf will compress any stream with no other filters applied to
it with the <literal>/FlateDecode</literal> filter when it
writes it. To suppress this behavior and preserve uncompressed
streams as uncompressed, use
<option>--compress-streams=n</option>.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--decode-level=<replaceable>option</replaceable></option></term>
<listitem>
<para>
Controls which streams qpdf tries to decode. The default is
<option>generalized</option>. The following options are
available:
<itemizedlist>
<listitem>
<para>
<option>compress</option>: recompress stream data when
possible (default)
<option>none</option>: do not attempt to decode any streams
</para>
</listitem>
<listitem>
<para>
<option>preserve</option>: leave all stream data as is
<option>generalized</option>: decode streams filtered with
supported generalized filters: <option>/LZWDecode</option>,
<option>/FlateDecode</option>,
<option>/ASCII85Decode</option>, and
<option>/ASCIIHexDecode</option>
</para>
</listitem>
<listitem>
<para>
<option>specialized</option>: in addition to generalized,
decode streams with supported non-lossy specialized
filters; currently this is just <option>/RunLengthDecode</option>
</para>
</listitem>
<listitem>
<para>
<option>all</option>: in addition to generalized and
specialized, decode streams with supported lossy filters;
currently this is just <option>/DCTDecode</option> (JPEG)
</para>
</listitem>
</itemizedlist>
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--stream-data=<replaceable>option</replaceable></option></term>
<listitem>
<para>
Controls transformation of stream data. This option predates
the <option>--compress-streams</option> and
<option>--decode-level</option> options. Those options can be
used to achieve the same affect with more control. The value
of <option><replaceable>option</replaceable></option> may be
one of the following:
<itemizedlist>
<listitem>
<para>
<option>compress</option>: recompress stream data when
possible (default); equivalent to
<option>--compress-streams=y</option>
<option>--decode-level=generalized</option>
</para>
</listitem>
<listitem>
<para>
<option>preserve</option>: leave all stream data as is;
equivalent to <option>--compress-streams=n</option>
<option>--decode-level=none</option>
</para>
</listitem>
<listitem>
<para>
<option>uncompress</option>: uncompress stream data when
possible
possible; equivalent to
<option>--compress-streams=n</option>
<option>--decode-level=generalized</option>
</para>
</listitem>
</itemizedlist>

View File

@ -76,6 +76,10 @@ struct Options
use_aes(false),
stream_data_set(false),
stream_data_mode(qpdf_s_compress),
compress_streams(true),
compress_streams_set(false),
decode_level(qpdf_dl_generalized),
decode_level_set(false),
normalize_set(false),
normalize(false),
suppress_recovery(false),
@ -134,6 +138,10 @@ struct Options
bool use_aes;
bool stream_data_set;
qpdf_stream_data_e stream_data_mode;
bool compress_streams;
bool compress_streams_set;
qpdf_stream_decode_level_e decode_level;
bool decode_level_set;
bool normalize_set;
bool normalize;
bool suppress_recovery;
@ -357,6 +365,8 @@ the output file. Mostly these are of use only to people who are very\n\
familiar with the PDF file format or who are PDF developers.\n\
\n\
--stream-data=option controls transformation of stream data (below)\n\
--compress-streams=[yn] controls whether to compress streams on output\n\
--decode-level=option controls how to filter streams from the input\n\
--normalize-content=[yn] enables or disables normalization of content streams\n\
--suppress-recovery prevents qpdf from attempting to recover damaged files\n\
--object-streams=mode controls handing of object streams\n\
@ -383,6 +393,19 @@ Values for object stream mode:\n\
disable don't write any object streams\n\
generate use object streams wherever possible\n\
\n\
When --compress-streams=n is specified, this overrides the default behavior\n\
of qpdf, which is to attempt compress uncompressed streams. Setting\n\
stream data mode to uncompress or preserve has the same effect.\n\
\n\
The --decode-level parameter may be set to one of the following values:\n\
none do not decode streams\n\
generalized decode streams compressed with generalized filters\n\
including LZW, Flate, and the ASCII encoding filters.\n\
specialized additionally decode streams with non-lossy specialized\n\
filters including RunLength\n\
all additionally decode streams with lossy filters\n\
including DCT (JPEG)\n\
\n\
In qdf mode, by default, content normalization is turned on, and the\n\
stream data mode is set to uncompress.\n\
\n\
@ -1344,15 +1367,68 @@ static void parse_options(int argc, char* argv[], Options& o)
usage("invalid stream-data option");
}
}
else if (strcmp(arg, "compress-streams") == 0)
{
o.compress_streams_set = true;
if (parameter && (strcmp(parameter, "y") == 0))
{
o.compress_streams = true;
}
else if (parameter && (strcmp(parameter, "n") == 0))
{
o.compress_streams = false;
}
else
{
usage("--compress-streams must be given as"
" --compress-streams=[yn]");
}
}
else if (strcmp(arg, "decode-level") == 0)
{
if (parameter == 0)
{
usage("--decode-level must be given as"
"--decode-level=option");
}
o.decode_level_set = true;
if (strcmp(parameter, "none") == 0)
{
o.decode_level = qpdf_dl_none;
}
else if (strcmp(parameter, "generalized") == 0)
{
o.decode_level = qpdf_dl_generalized;
}
else if (strcmp(parameter, "specialized") == 0)
{
o.decode_level = qpdf_dl_specialized;
}
else if (strcmp(parameter, "all") == 0)
{
o.decode_level = qpdf_dl_all;
}
else
{
usage("invalid stream-data option");
}
}
else if (strcmp(arg, "normalize-content") == 0)
{
if ((parameter == 0) || (*parameter == '\0'))
o.normalize_set = true;
if (parameter && (strcmp(parameter, "y") == 0))
{
o.normalize = true;
}
else if (parameter && (strcmp(parameter, "n") == 0))
{
o.normalize = false;
}
else
{
usage("--normalize-content must be given as"
" --normalize-content=[yn]");
}
o.normalize_set = true;
o.normalize = (parameter[0] == 'y');
}
else if (strcmp(arg, "suppress-recovery") == 0)
{
@ -1606,7 +1682,7 @@ static void do_check(QPDF& pdf, Options& o, int& exit_code)
QPDFWriter w(pdf);
Pl_Discard discard;
w.setOutputPipeline(&discard);
w.setStreamDataMode(qpdf_s_uncompress);
w.setDecodeLevel(qpdf_dl_all);
w.write();
// Parse all content streams
@ -1667,7 +1743,7 @@ static void do_show_obj(QPDF& pdf, Options& o, int& exit_code)
{
bool filter = o.show_filtered_stream_data;
if (filter &&
(! obj.pipeStreamData(0, true, false, false)))
(! obj.pipeStreamData(0, 0, qpdf_dl_all)))
{
QTC::TC("qpdf", "qpdf unable to filter");
std::cerr << "Unable to filter stream data."
@ -1678,7 +1754,10 @@ static void do_show_obj(QPDF& pdf, Options& o, int& exit_code)
{
QUtil::binary_stdout();
Pl_StdioFile out("stdout", stdout);
obj.pipeStreamData(&out, filter, o.normalize, false);
obj.pipeStreamData(
&out,
(filter && o.normalize) ? qpdf_ef_normalize : 0,
filter ? qpdf_dl_all : qpdf_dl_none);
}
}
else
@ -2035,6 +2114,14 @@ static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w)
{
w.setStreamDataMode(o.stream_data_mode);
}
if (o.compress_streams_set)
{
w.setCompressStreams(o.compress_streams);
}
if (o.decode_level_set)
{
w.setDecodeLevel(o.decode_level);
}
if (o.decrypt)
{
w.setPreserveEncryption(false);

View File

@ -296,3 +296,4 @@ QPDF ignore length error xref entry 0
QPDF_encryption pad short parameter 0
QPDFWriter ignore self-referential object stream 0
QPDFObjectHandle found old angle 1
QPDF_Stream special filters 3

View File

@ -937,6 +937,39 @@ $td->runtest("check output",
{$td->FILE => "bad-data-precheck.pdf"});
show_ntests();
# ----------
$td->notify("--- Decode levels ---");
$n_tests += 10;
# image-streams.pdf is the output of examples/pdf-create.
# examples/pdf-create validates the actual image data.
foreach my $l (qw(none generalized specialized all))
{
$td->runtest("image-streams: $l",
{$td->COMMAND =>
"qpdf image-streams.pdf --compress-streams=n" .
" --decode-level=$l a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check image-streams: $l",
{$td->COMMAND => "test_driver 39 a.pdf"},
{$td->FILE => "image-streams-$l.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
# Bad JPEG data
$td->runtest("check finds bad jpeg data",
{$td->COMMAND => "qpdf --check bad-jpeg.pdf"},
{$td->FILE => "bad-jpeg-check.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("get data",
{$td->COMMAND => "qpdf --show-object=6" .
" --filtered-stream-data bad-jpeg.pdf"},
{$td->FILE => "bad-jpeg-show.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Preserve unreferenced objects ---");
$n_tests += 4;
@ -1429,8 +1462,8 @@ $td->runtest("show-page-1-image",
$td->EXIT_STATUS => 0});
$td->runtest("unfilterable stream data",
{$td->COMMAND => "qpdf encrypted-with-images.pdf" .
" --show-object=8 --filtered-stream-data"},
{$td->COMMAND => "qpdf unfilterable.pdf" .
" --show-object=4 --filtered-stream-data"},
{$td->FILE => "show-unfilterable.out",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
@ -1461,7 +1494,7 @@ foreach my $f (qw(compressed-metadata.pdf enc-base.pdf))
{
foreach my $w (qw(compress preserve))
{
$td->runtest("$w streams",
$td->runtest("$w streams ($f)",
{$td->COMMAND => "qpdf --stream-data=$w $f a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
check_metadata("a.pdf", 0, 1);

View File

@ -0,0 +1,5 @@
checking bad-jpeg.pdf
PDF Version: 1.3
File is not encrypted
File is not linearized
WARNING: bad-jpeg.pdf (file position 735): error decoding stream data for object 6 0: Not a JPEG file: starts with 0x77 0x77

Binary file not shown.

View File

@ -0,0 +1,2 @@
WARNING: bad-jpeg.pdf (file position 735): error decoding stream data for object 6 0: Not a JPEG file: starts with 0x77 0x77
qpdf: operation succeeded with warnings; resulting file may have some problems

View File

@ -0,0 +1,2 @@
WARNING: bad-jpeg.pdf (file position 735): error decoding stream data for object 6 0: Not a JPEG file: starts with 0x77 0x77
qpdf: operation succeeded with warnings; resulting file may have some problems

Binary file not shown.

View File

@ -18,8 +18,3 @@ warning: bad33.pdf (file position 629): stream filter type is not name or array
file: bad33.pdf
pos : 629
text: stream filter type is not name or array
warning: bad33.pdf (file position 629): stream filter type is not name or array
code: 5
file: bad33.pdf
pos : 629
text: stream filter type is not name or array

View File

@ -0,0 +1,19 @@
page 1
filter: null, color space: /DeviceCMYK
page 2
filter: null, color space: /DeviceCMYK
page 3
filter: null, color space: /DeviceCMYK
page 4
filter: null, color space: /DeviceRGB
page 5
filter: null, color space: /DeviceRGB
page 6
filter: null, color space: /DeviceRGB
page 7
filter: null, color space: /DeviceGray
page 8
filter: null, color space: /DeviceGray
page 9
filter: null, color space: /DeviceGray
test 39 done

View File

@ -0,0 +1,19 @@
page 1
filter: null, color space: /DeviceCMYK
page 2
filter: /DCTDecode, color space: /DeviceCMYK
page 3
filter: /RunLengthDecode, color space: /DeviceCMYK
page 4
filter: null, color space: /DeviceRGB
page 5
filter: /DCTDecode, color space: /DeviceRGB
page 6
filter: /RunLengthDecode, color space: /DeviceRGB
page 7
filter: null, color space: /DeviceGray
page 8
filter: /DCTDecode, color space: /DeviceGray
page 9
filter: /RunLengthDecode, color space: /DeviceGray
test 39 done

View File

@ -0,0 +1,19 @@
page 1
filter: /FlateDecode, color space: /DeviceCMYK
page 2
filter: /DCTDecode, color space: /DeviceCMYK
page 3
filter: /RunLengthDecode, color space: /DeviceCMYK
page 4
filter: /FlateDecode, color space: /DeviceRGB
page 5
filter: /DCTDecode, color space: /DeviceRGB
page 6
filter: /RunLengthDecode, color space: /DeviceRGB
page 7
filter: /FlateDecode, color space: /DeviceGray
page 8
filter: /DCTDecode, color space: /DeviceGray
page 9
filter: /RunLengthDecode, color space: /DeviceGray
test 39 done

View File

@ -0,0 +1,19 @@
page 1
filter: null, color space: /DeviceCMYK
page 2
filter: /DCTDecode, color space: /DeviceCMYK
page 3
filter: null, color space: /DeviceCMYK
page 4
filter: null, color space: /DeviceRGB
page 5
filter: /DCTDecode, color space: /DeviceRGB
page 6
filter: null, color space: /DeviceRGB
page 7
filter: null, color space: /DeviceGray
page 8
filter: /DCTDecode, color space: /DeviceGray
page 9
filter: null, color space: /DeviceGray
test 39 done

Binary file not shown.

Binary file not shown.

View File

@ -314,15 +314,15 @@ void runtest(int n, char const* filename1, char const* arg2)
std::cout.flush();
QUtil::binary_stdout();
PointerHolder<Pl_StdioFile> out = new Pl_StdioFile("raw", stdout);
qtest.pipeStreamData(out.getPointer(), false, false, false);
qtest.pipeStreamData(out.getPointer(), 0, qpdf_dl_none);
std::cout << std::endl << "Uncompressed stream data:" << std::endl;
if (qtest.pipeStreamData(0, true, false, false))
if (qtest.pipeStreamData(0, 0, qpdf_dl_all))
{
std::cout.flush();
QUtil::binary_stdout();
out = new Pl_StdioFile("filtered", stdout);
qtest.pipeStreamData(out.getPointer(), true, false, false);
qtest.pipeStreamData(out.getPointer(), 0, qpdf_dl_all);
std::cout << std::endl << "End of stream data" << std::endl;
}
else
@ -362,7 +362,7 @@ void runtest(int n, char const* filename1, char const* arg2)
QPDFObjectHandle contents = page.getKey("/Contents");
QUtil::binary_stdout();
PointerHolder<Pl_StdioFile> out = new Pl_StdioFile("filtered", stdout);
contents.pipeStreamData(out.getPointer(), true, false, false);
contents.pipeStreamData(out.getPointer(), 0, qpdf_dl_generalized);
}
else if (n == 3)
{
@ -375,7 +375,8 @@ void runtest(int n, char const* filename1, char const* arg2)
QUtil::binary_stdout();
PointerHolder<Pl_StdioFile> out =
new Pl_StdioFile("tokenized stream", stdout);
stream.pipeStreamData(out.getPointer(), true, true, false);
stream.pipeStreamData(out.getPointer(),
qpdf_ef_normalize, qpdf_dl_generalized);
}
}
else if (n == 4)
@ -497,7 +498,7 @@ void runtest(int n, char const* filename1, char const* arg2)
throw std::logic_error("test 6 run on file with no metadata");
}
Pl_Buffer bufpl("buffer");
metadata.pipeStreamData(&bufpl, false, false, false);
metadata.pipeStreamData(&bufpl, 0, qpdf_dl_none);
Buffer* buf = bufpl.getBuffer();
unsigned char const* data = buf->getBuffer();
bool cleartext = false;
@ -1277,7 +1278,7 @@ void runtest(int n, char const* filename1, char const* arg2)
QPDFObjectHandle stream = item.getKey("/EF").getKey("/F");
Pl_Buffer p1("buffer");
Pl_Flate p2("compress", &p1, Pl_Flate::a_inflate);
stream.pipeStreamData(&p2, false, false, false);
stream.pipeStreamData(&p2, 0, qpdf_dl_none);
PointerHolder<Buffer> buf = p1.getBuffer();
std::string data = std::string(
reinterpret_cast<char const*>(buf->getBuffer()),
@ -1309,6 +1310,30 @@ void runtest(int n, char const* filename1, char const* arg2)
std::cout << qtest.getArrayItem(i).unparseResolved() << std::endl;
}
}
else if (n == 39)
{
// Display image filter and color set for each image on each page
std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
int pageno = 0;
for (std::vector<QPDFObjectHandle>::iterator p_iter =
pages.begin();
p_iter != pages.end(); ++p_iter)
{
std::cout << "page " << ++pageno << std::endl;
std::map<std::string, QPDFObjectHandle> images =
(*p_iter).getPageImages();
for (std::map<std::string, QPDFObjectHandle>::iterator i_iter =
images.begin(); i_iter != images.end(); ++i_iter)
{
QPDFObjectHandle image_dict = (*i_iter).second.getDict();
std::cout << "filter: "
<< image_dict.getKey("/Filter").unparseResolved()
<< ", color space: "
<< image_dict.getKey("/ColorSpace").unparseResolved()
<< std::endl;
}
}
}
else
{
throw std::runtime_error(std::string("invalid test ") +

View File

@ -273,7 +273,7 @@ static void check_image(int pageno, QPDFObjectHandle page)
QPDFObjectHandle image =
page.getKey("/Resources").getKey("/XObject").getKey("/Im1");
ImageChecker ic(pageno);
image.pipeStreamData(&ic, true, false, false);
image.pipeStreamData(&ic, 0, qpdf_dl_specialized);
}
static void check_pdf(char const* filename)