From 9744414c66e3f85700ebc8b32d90f45ff97221bd Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 19 Aug 2017 09:18:14 -0400 Subject: [PATCH] Enable finer grained control of stream decoding This commit adds several API methods that enable control over which types of filters QPDF will attempt to decode. It also adds support for /RunLengthDecode and /DCTDecode filters for both encoding and decoding. --- ChangeLog | 17 +++ examples/pdf-invert-images.cc | 3 +- include/qpdf/Constants.h | 19 +++- include/qpdf/QPDFObjectHandle.hh | 104 ++++++++++++------ include/qpdf/QPDFWriter.hh | 72 ++++++++++-- libqpdf/QPDF.cc | 18 +-- libqpdf/QPDFObjectHandle.cc | 34 +++++- libqpdf/QPDFWriter.cc | 83 +++++++++++--- libqpdf/QPDF_Stream.cc | 68 +++++++++--- libqpdf/QPDF_linearization.cc | 2 +- libqpdf/qpdf/QPDF_Stream.hh | 8 +- manual/qpdf-manual.xml | 78 +++++++++++-- qpdf/qpdf.cc | 99 ++++++++++++++++- qpdf/qpdf.testcov | 1 + qpdf/qtest/qpdf.test | 39 ++++++- qpdf/qtest/qpdf/bad-jpeg-check.out | 5 + qpdf/qtest/qpdf/bad-jpeg-out.pdf | Bin 0 -> 4681 bytes qpdf/qtest/qpdf/bad-jpeg-show.out | 2 + qpdf/qtest/qpdf/bad-jpeg.out | 2 + qpdf/qtest/qpdf/bad-jpeg.pdf | Bin 0 -> 4681 bytes qpdf/qtest/qpdf/c-write-warnings.out | 5 - qpdf/qtest/qpdf/image-streams-all.out | 19 ++++ qpdf/qtest/qpdf/image-streams-generalized.out | 19 ++++ qpdf/qtest/qpdf/image-streams-none.out | 19 ++++ qpdf/qtest/qpdf/image-streams-specialized.out | 19 ++++ qpdf/qtest/qpdf/image-streams.pdf | Bin 0 -> 1436659 bytes qpdf/qtest/qpdf/unfilterable.pdf | Bin 0 -> 799 bytes qpdf/test_driver.cc | 39 +++++-- qpdf/test_large_file.cc | 2 +- 29 files changed, 658 insertions(+), 118 deletions(-) create mode 100644 qpdf/qtest/qpdf/bad-jpeg-check.out create mode 100644 qpdf/qtest/qpdf/bad-jpeg-out.pdf create mode 100644 qpdf/qtest/qpdf/bad-jpeg-show.out create mode 100644 qpdf/qtest/qpdf/bad-jpeg.out create mode 100644 qpdf/qtest/qpdf/bad-jpeg.pdf create mode 100644 qpdf/qtest/qpdf/image-streams-all.out create mode 100644 qpdf/qtest/qpdf/image-streams-generalized.out create mode 100644 qpdf/qtest/qpdf/image-streams-none.out create mode 100644 qpdf/qtest/qpdf/image-streams-specialized.out create mode 100644 qpdf/qtest/qpdf/image-streams.pdf create mode 100644 qpdf/qtest/qpdf/unfilterable.pdf diff --git a/ChangeLog b/ChangeLog index 87f5a2e2..5b9553d9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +2017-08-19 Jay Berkenbilt + + * Remove --precheck-streams. This is enabled by default now + without any efficiency cost. This feature was never released. + + * Update pdf-create example to illustrate use of additional image + compression filters. + + * Add support for /RunLengthDecode and /DCTDecode: + - New pipeline types Pl_RunLength and Pl_DCT + - New command-line flags --compress-streams and --decode-level + to replace/enhance --stream-data + - New QPDFWriter::setCompressStreams and + QPDFWriter::setDecodeLevel methods + Please see documentation, header files, and help messages for + details on these new features. + 2017-08-12 Jay Berkenbilt * Add QPDFObjectHandle::rotatePage to apply rotation to a page diff --git a/examples/pdf-invert-images.cc b/examples/pdf-invert-images.cc index 00362091..537fd35e 100644 --- a/examples/pdf-invert-images.cc +++ b/examples/pdf-invert-images.cc @@ -121,7 +121,8 @@ int main(int argc, char* argv[]) // pipeStreamData with a null pipeline to determine // whether the image is filterable. Directly inspect // keys to determine the image type. - if (image.pipeStreamData(0, true, false, false) && + if (image.pipeStreamData(0, qpdf_ef_compress, + qpdf_dl_generalized) && color_space.isName() && bits_per_component.isInteger() && (color_space.getName() == "/DeviceGray") && diff --git a/include/qpdf/Constants.h b/include/qpdf/Constants.h index 38f1e71e..c2763956 100644 --- a/include/qpdf/Constants.h +++ b/include/qpdf/Constants.h @@ -26,7 +26,7 @@ enum qpdf_error_code_e qpdf_e_pages, /* erroneous or unsupported pages structure */ }; -/* Write Parameters */ +/* Write Parameters. See QPDFWriter.hh for details. */ enum qpdf_object_stream_e { @@ -41,6 +41,23 @@ enum qpdf_stream_data_e qpdf_s_compress /* compress stream data */ }; +/* Stream data flags */ + +/* See pipeStreamData in QPDFObjectHandle.hh for details on these flags. */ +enum qpdf_stream_encode_flags_e +{ + qpdf_ef_compress = 1 << 0, /* compress uncompressed streams */ + qpdf_ef_normalize = 1 << 1, /* normalize content stream */ +}; +enum qpdf_stream_decode_level_e +{ + /* These must be in order from less to more decoding. */ + qpdf_dl_none = 0, /* preserve all stream filters */ + qpdf_dl_generalized, /* decode general-purpose filters */ + qpdf_dl_specialized, /* also decode other non-lossy filters */ + qpdf_dl_all /* also decode loss filters */ +}; + /* R3 Encryption Parameters */ enum qpdf_r3_print_e diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index fbe02ba8..588768fc 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -44,19 +45,19 @@ class QPDFObjectHandle virtual ~StreamDataProvider() { } - // The implementation of this function must write the - // unencrypted, raw stream data to the given pipeline. Every - // call to provideStreamData for a given stream must write the - // same data. The number of bytes written must agree with the - // length provided at the time the StreamDataProvider object - // was associated with the stream. The object ID and - // generation passed to this method are those that belong to - // the stream on behalf of which the provider is called. They - // may be ignored or used by the implementation for indexing - // or other purposes. This information is made available just - // to make it more convenient to use a single - // StreamDataProvider object to provide data for multiple - // streams. + // The implementation of this function must write stream data + // to the given pipeline. The stream data must conform to + // whatever filters are explicitly associated with the stream. + // QPDFWriter may, in some cases, add compression, but if it + // does, it will update the filters as needed. Every call to + // provideStreamData for a given stream must write the same + // data.The object ID and generation passed to this method are + // those that belong to the stream on behalf of which the + // provider is called. They may be ignored or used by the + // implementation for indexing or other purposes. This + // information is made available just to make it more + // convenient to use a single StreamDataProvider object to + // provide data for multiple streams. virtual void provideStreamData(int objid, int generation, Pipeline* pipeline) = 0; }; @@ -370,32 +371,71 @@ class QPDFObjectHandle // Returns filtered (uncompressed) stream data. Throws an // exception if the stream is filtered and we can't decode it. QPDF_DLL - PointerHolder getStreamData(); + PointerHolder getStreamData( + qpdf_stream_decode_level_e level = qpdf_dl_generalized); + // Returns unfiltered (raw) stream data. QPDF_DLL PointerHolder getRawStreamData(); - // Write stream data through the given pipeline. A null pipeline + // Write stream data through the given pipeline. A null pipeline // value may be used if all you want to do is determine whether a - // stream is filterable. If filter is false, write raw stream - // data and return false. If filter is true, then attempt to - // apply all the decoding filters to the stream data. If we are - // successful, return true. Otherwise, return false and write raw - // data. If filtering is requested and successfully performed, - // then the normalize and compress flags are used to determine - // whether stream data should be normalized and compressed. In - // all cases, if this function returns false, raw data has been - // written. If it returns true, then any requested filtering has - // been performed. Note that if the original stream data has no - // filters applied to it, the return value will be equal to the - // value of the filter parameter. Callers may use the return - // value of this function to determine whether or not the /Filter - // and /DecodeParms keys in the stream dictionary should be - // replaced if writing a new stream object. + // stream is filterable and would be filtered based on the + // provided flags. If flags is 0, write raw stream data and return + // false. Otherwise, the flags alter the behavior in the following + // way: + // + // encode_flags: + // + // qpdf_sf_compress -- compress data with /FlateDecode if no other + // compression filters are applied. + // + // qpdf_sf_normalize -- tokenize as content stream and normalize tokens + // + // decode_level: + // + // qpdf_dl_none -- do not decode any streams. + // + // qpdf_dl_generalized -- decode supported general-purpose + // filters. This includes /ASCIIHexDecode, /ASCII85Decode, + // /LZWDecode, and /FlateDecode. + // + // qpdf_dl_specialized -- in addition to generalized filters, also + // decode supported non-lossy specialized filters. This includes + // /RunLengthDecode. + // + // qpdf_dl_all -- in addition to generalized and non-lossy + // specialized filters, decode supported lossy filters. This + // includes /DCTDecode. + // + // If, based on the flags and the filters and decode parameters, + // we determine that we know how to apply all requested filters, + // do so and return true if we are successful. + // + // In all cases, a return value of true means that filtered data + // has been written successfully. If filtering is requested but + // this method returns false, it means there was some error in the + // filtering, in which case the resulting data is likely partially + // filtered and/or incomplete and may not be consistent with the + // configured filters. QPDFWriter handles this by attempting to + // get the stream data without filtering, but callers should + // consider a false return value when decode_level is not + // qpdf_dl_none to be a potential loss of data. + QPDF_DLL + bool pipeStreamData(Pipeline*, + unsigned long encode_flags, + qpdf_stream_decode_level_e decode_level, + bool suppress_warnings = false); + + // Legacy pipeStreamData. This maps to the the flags-based + // pipeStreamData as follows: + // filter = false -> encode_flags = 0 + // filter = true -> decode_level = qpdf_dl_generalized + // normalize = true -> encode_flags |= qpdf_sf_normalize + // compress = true -> encode_flags |= qpdf_sf_compress QPDF_DLL bool pipeStreamData(Pipeline*, bool filter, - bool normalize, bool compress, - bool suppress_warnings = false); + bool normalize, bool compress); // Replace a stream's dictionary. The new dictionary must be // consistent with the stream's data. This is most appropriately diff --git a/include/qpdf/QPDFWriter.hh b/include/qpdf/QPDFWriter.hh index 2519ed12..c4bc7846 100644 --- a/include/qpdf/QPDFWriter.hh +++ b/include/qpdf/QPDFWriter.hh @@ -118,14 +118,70 @@ class QPDFWriter QPDF_DLL void setObjectStreamMode(qpdf_object_stream_e); - // Set value of stream data mode. In uncompress mode, we attempt - // to uncompress any stream that we can. In preserve mode, we - // preserve any filtering applied to streams. In compress mode, - // if we can apply all filters and the stream is not already - // optimally compressed, recompress the stream. + // Set value of stream data mode. This is an older interface. + // Instead of using this, prefer setCompressStreams() and + // setDecodeLevel(). This method is retained for compatibility, + // but it does not cover the full range of available + // configurations. The mapping between this and the new methods is + // as follows: + // + // qpdf_s_uncompress: + // setCompressStreams(false) + // setDecodeLevel(qpdf_dl_generalized) + // qpdf_s_preserve: + // setCompressStreams(false) + // setDecodeLevel(qpdf_dl_none) + // qpdf_s_compress: + // setCompressStreams(true) + // setDecodeLevel(qpdf_dl_generalized) + // + // The default is qpdf_s_compress. QPDF_DLL void setStreamDataMode(qpdf_stream_data_e); + // If true, compress any uncompressed streams when writing them. + // Metadata streams are a special case and are not compressed even + // if this is true. This is true by default for QPDFWriter. If you + // want QPDFWriter to leave uncompressed streams uncompressed, + // pass false to this method. + QPDF_DLL + void setCompressStreams(bool); + + // When QPDFWriter encounters streams, this parameter controls the + // behavior with respect to attempting to apply any filters to the + // streams when copying to the output. The decode levels are as + // follows: + // + // qpdf_dl_none: Do not attempt to apply any filters. Streams + // remain as they appear in the original file. Note that + // uncompressed streams may still be compressed on output. You can + // disable that by calling setCompressStreams(false). + // + // qpdf_dl_generalized: This is the default. QPDFWriter will apply + // LZWDecode, ASCII85Decode, ASCIIHexDecode, and FlateDecode + // filters on the input. When combined with + // setCompressStreams(true), which the default, the effect of this + // is that streams filtered with these older and less efficient + // filters will be recompressed with the Flate filter. As a + // special case, if a stream is already compressed with + // FlateDecode and setCompressStreams is enabled, the original + // compressed data will be preserved. + // + // qpdf_dl_specialized: In addition to uncompressing the + // generalized compression formats, supported non-lossy + // compression will also be be decoded. At present, this includes + // the RunLengthDecode filter. + // + // qpdf_dl_all: In addition to generalized and non-lossy + // specialized filters, supported lossy compression filters will + // be applied. At present, this includes DCTDecode (JPEG) + // compression. Note that compressing the resulting data with + // DCTDecode again will accumulate loss, so avoid multiple + // compression and decompression cycles. This is mostly useful for + // retreiving image data. + QPDF_DLL + void setDecodeLevel(qpdf_stream_decode_level_e); + // Set value of content stream normalization. The default is // "false". If true, we attempt to normalize newlines inside of // content streams. Some constructs such as inline images may @@ -434,8 +490,10 @@ class QPDFWriter Buffer* output_buffer; bool normalize_content_set; bool normalize_content; - bool stream_data_mode_set; - qpdf_stream_data_e stream_data_mode; + bool compress_streams; + bool compress_streams_set; + qpdf_stream_decode_level_e stream_decode_level; + bool stream_decode_level_set; bool qdf_mode; bool precheck_streams; bool preserve_unreferenced_objects; diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index d1360b14..9c79fc3a 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -45,7 +45,7 @@ QPDF::CopiedStreamDataProvider::provideStreamData( { QPDFObjectHandle foreign_stream = this->foreign_streams[QPDFObjGen(objid, generation)]; - foreign_stream.pipeStreamData(pipeline, false, false, false); + foreign_stream.pipeStreamData(pipeline, 0, qpdf_dl_none); } void @@ -2377,6 +2377,7 @@ QPDF::pipeStreamData(int objid, int generation, length -= len; pipeline->write(QUtil::unsigned_char_pointer(buf), len); } + pipeline->finish(); success = true; } catch (QPDFExc& e) @@ -2398,13 +2399,16 @@ QPDF::pipeStreamData(int objid, int generation, QUtil::int_to_string(generation) + ": " + e.what())); } } - try + if (! success) { - pipeline->finish(); - } - catch (std::exception&) - { - // ignore + try + { + pipeline->finish(); + } + catch (std::exception&) + { + // ignore + } } return success; } diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index a8a7e5a7..105ecad9 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -482,10 +482,10 @@ QPDFObjectHandle::replaceDict(QPDFObjectHandle new_dict) } PointerHolder -QPDFObjectHandle::getStreamData() +QPDFObjectHandle::getStreamData(qpdf_stream_decode_level_e level) { assertStream(); - return dynamic_cast(obj.getPointer())->getStreamData(); + return dynamic_cast(obj.getPointer())->getStreamData(level); } PointerHolder @@ -496,13 +496,35 @@ QPDFObjectHandle::getRawStreamData() } bool -QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter, - bool normalize, bool compress, +QPDFObjectHandle::pipeStreamData(Pipeline* p, + unsigned long encode_flags, + qpdf_stream_decode_level_e decode_level, bool suppress_warnings) { assertStream(); return dynamic_cast(obj.getPointer())->pipeStreamData( - p, filter, normalize, compress, suppress_warnings); + p, encode_flags, decode_level, suppress_warnings); +} + +bool +QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter, + bool normalize, bool compress) +{ + unsigned long encode_flags = 0; + qpdf_stream_decode_level_e decode_level = qpdf_dl_none; + if (filter) + { + decode_level = qpdf_dl_generalized; + if (normalize) + { + encode_flags |= qpdf_ef_normalize; + } + if (compress) + { + encode_flags |= qpdf_ef_compress; + } + } + return pipeStreamData(p, encode_flags, decode_level, false); } void @@ -825,7 +847,7 @@ QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array, all_description += ","; } all_description += " " + og; - if (! stream.pipeStreamData(&buf, true, false, false, false)) + if (! stream.pipeStreamData(&buf, 0, qpdf_dl_specialized)) { QTC::TC("qpdf", "QPDFObjectHandle errors in parsecontent"); warn(stream.getOwningQPDF(), diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 345613ad..fe25853a 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -54,8 +54,10 @@ QPDFWriter::init() output_buffer = 0; normalize_content_set = false; normalize_content = false; - stream_data_mode_set = false; - stream_data_mode = qpdf_s_compress; + compress_streams = true; + compress_streams_set = false; + stream_decode_level = qpdf_dl_none; + stream_decode_level_set = false; qdf_mode = false; precheck_streams = false; preserve_unreferenced_objects = false; @@ -162,8 +164,42 @@ QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode) void QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode) { - this->stream_data_mode_set = true; - this->stream_data_mode = mode; + switch (mode) + { + case qpdf_s_uncompress: + this->stream_decode_level = + std::max(qpdf_dl_generalized, this->stream_decode_level); + this->compress_streams = false; + break; + + case qpdf_s_preserve: + this->stream_decode_level = qpdf_dl_none; + this->compress_streams = false; + break; + + case qpdf_s_compress: + this->stream_decode_level = + std::max(qpdf_dl_generalized, this->stream_decode_level); + this->compress_streams = true; + break; + } + this->stream_decode_level_set = true; + this->compress_streams_set = true; +} + + +void +QPDFWriter::setCompressStreams(bool val) +{ + this->compress_streams = val; + this->compress_streams_set = true; +} + +void +QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val) +{ + this->stream_decode_level = val; + this->stream_decode_level_set = true; } void @@ -1512,8 +1548,8 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, { is_metadata = true; } - bool filter = (this->stream_data_mode != qpdf_s_preserve); - if (this->stream_data_mode == qpdf_s_compress) + bool filter = (this->compress_streams || this->stream_decode_level); + if (this->compress_streams) { // Don't filter if the stream is already compressed with // FlateDecode. We don't want to make it worse by getting @@ -1532,19 +1568,21 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, } bool normalize = false; bool compress = false; + bool uncompress = false; if (is_metadata && ((! this->encrypted) || (this->encrypt_metadata == false))) { QTC::TC("qpdf", "QPDFWriter not compressing metadata"); filter = true; compress = false; + uncompress = true; } else if (this->normalize_content && normalized_streams.count(old_og)) { normalize = true; filter = true; } - else if (filter && (this->stream_data_mode == qpdf_s_compress)) + else if (filter && this->compress_streams) { compress = true; QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream"); @@ -1559,7 +1597,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, QTC::TC("qpdf", "QPDFWriter precheck stream"); Pl_Discard discard; filter = object.pipeStreamData( - &discard, true, false, false, true); + &discard, 0, qpdf_dl_all, true); } catch (std::exception&) { @@ -1569,8 +1607,15 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, pushPipeline(new Pl_Buffer("stream data")); activatePipelineStack(); + bool filtered = - object.pipeStreamData(this->pipeline, filter, normalize, compress); + object.pipeStreamData( + this->pipeline, + (((filter && normalize) ? qpdf_ef_normalize : 0) | + ((filter && compress) ? qpdf_ef_compress : 0)), + (filter + ? (uncompress ? qpdf_dl_all : this->stream_decode_level) + : qpdf_dl_none)); PointerHolder stream_data; popPipelineStack(&stream_data); if (filtered) @@ -1717,8 +1762,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) // Set up a stream to write the stream data into a buffer. Pipeline* next = pushPipeline(new Pl_Buffer("object stream")); - if (! ((this->stream_data_mode == qpdf_s_uncompress) || - this->qdf_mode)) + if (! (this->stream_decode_level || this->qdf_mode)) { compressed = true; next = pushPipeline( @@ -2180,7 +2224,8 @@ QPDFWriter::prepareFileForWrite() is_stream = true; dict = node.getDict(); // See whether we are able to filter this stream. - filterable = node.pipeStreamData(0, true, false, false); + filterable = node.pipeStreamData( + 0, 0, this->stream_decode_level, true); } else if (pdf.getRoot().getObjectID() == node.getObjectID()) { @@ -2260,10 +2305,14 @@ QPDFWriter::write() { this->normalize_content = true; } - if (! this->stream_data_mode_set) + if (! this->compress_streams_set) { - this->stream_data_mode = qpdf_s_uncompress; + this->compress_streams = false; } + if (! this->stream_decode_level_set) + { + this->stream_decode_level = qpdf_dl_generalized; + } } if (this->encrypted) @@ -2272,7 +2321,7 @@ QPDFWriter::write() this->preserve_encryption = false; } else if (this->normalize_content || - (this->stream_data_mode == qpdf_s_uncompress) || + this->stream_decode_level || this->qdf_mode) { // Encryption makes looking at contents pretty useless. If @@ -2300,7 +2349,7 @@ QPDFWriter::write() } if (this->qdf_mode || this->normalize_content || - (this->stream_data_mode == qpdf_s_uncompress)) + this->stream_decode_level) { initializeSpecialStreams(); } @@ -2586,7 +2635,7 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset, Pipeline* p = pushPipeline(new Pl_Buffer("xref stream")); bool compressed = false; - if (! ((this->stream_data_mode == qpdf_s_uncompress) || this->qdf_mode)) + if (! (this->stream_decode_level || this->qdf_mode)) { compressed = true; if (! skip_compression) diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index 31d583b8..bcf9be92 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include #include @@ -82,10 +84,10 @@ QPDF_Stream::getDict() const } PointerHolder -QPDF_Stream::getStreamData() +QPDF_Stream::getStreamData(qpdf_stream_decode_level_e decode_level) { Pl_Buffer buf("stream data buffer"); - if (! pipeStreamData(&buf, true, false, false, false)) + if (! pipeStreamData(&buf, 0, decode_level, false)) { throw std::logic_error("getStreamData called on unfilterable stream"); } @@ -97,7 +99,7 @@ PointerHolder QPDF_Stream::getRawStreamData() { Pl_Buffer buf("stream data buffer"); - pipeStreamData(&buf, false, false, false, false); + pipeStreamData(&buf, 0, qpdf_dl_none, false); QTC::TC("qpdf", "QPDF_Stream getRawStreamData"); return buf.getBuffer(); } @@ -178,6 +180,8 @@ QPDF_Stream::understandDecodeParams( bool QPDF_Stream::filterable(std::vector& filters, + bool& specialized_compression, + bool& lossy_compression, int& predictor, int& columns, bool& early_code_change) { @@ -254,11 +258,20 @@ QPDF_Stream::filterable(std::vector& filters, filter = filter_abbreviations[filter]; } - if (! ((filter == "/Crypt") || - (filter == "/FlateDecode") || - (filter == "/LZWDecode") || - (filter == "/ASCII85Decode") || - (filter == "/ASCIIHexDecode"))) + if (filter == "/RunLengthDecode") + { + specialized_compression = true; + } + else if (filter == "/DCTDecode") + { + specialized_compression = true; + lossy_compression = true; + } + else if (! ((filter == "/Crypt") || + (filter == "/FlateDecode") || + (filter == "/LZWDecode") || + (filter == "/ASCII85Decode") || + (filter == "/ASCIIHexDecode"))) { filterable = false; } @@ -350,17 +363,35 @@ QPDF_Stream::filterable(std::vector& filters, } bool -QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter, - bool normalize, bool compress, +QPDF_Stream::pipeStreamData(Pipeline* pipeline, + unsigned long encode_flags, + qpdf_stream_decode_level_e decode_level, bool suppress_warnings) { std::vector filters; int predictor = 1; int columns = 0; bool early_code_change = true; + bool specialized_compression = false; + bool lossy_compression = false; + bool filter = (! ((encode_flags == 0) && (decode_level == qpdf_dl_none))); if (filter) { - filter = filterable(filters, predictor, columns, early_code_change); + filter = filterable(filters, specialized_compression, lossy_compression, + predictor, columns, early_code_change); + if ((decode_level < qpdf_dl_all) && lossy_compression) + { + filter = false; + } + if ((decode_level < qpdf_dl_specialized) && specialized_compression) + { + filter = false; + } + QTC::TC("qpdf", "QPDF_Stream special filters", + (! filter) ? 0 : + lossy_compression ? 1 : + specialized_compression ? 2 : + 3); } if (pipeline == 0) @@ -375,14 +406,14 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter, if (filter) { - if (compress) + if (encode_flags & qpdf_ef_compress) { pipeline = new Pl_Flate("compress object stream", pipeline, Pl_Flate::a_deflate); to_delete.push_back(pipeline); } - if (normalize) + if (encode_flags & qpdf_ef_normalize) { pipeline = new Pl_QPDFTokenizer("normalizer", pipeline); to_delete.push_back(pipeline); @@ -427,6 +458,17 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter, early_code_change); to_delete.push_back(pipeline); } + else if (filter == "/RunLengthDecode") + { + pipeline = new Pl_RunLength("runlength decode", pipeline, + Pl_RunLength::a_decode); + to_delete.push_back(pipeline); + } + else if (filter == "/DCTDecode") + { + pipeline = new Pl_DCT("DCT decode", pipeline); + to_delete.push_back(pipeline); + } else { throw std::logic_error( diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc index 424d6d6f..b05b1d4c 100644 --- a/libqpdf/QPDF_linearization.cc +++ b/libqpdf/QPDF_linearization.cc @@ -393,7 +393,7 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) this->file->getLastOffset(), "hint table length mismatch"); } - H.pipeStreamData(&pl, true, false, false); + H.pipeStreamData(&pl, 0, qpdf_dl_specialized); return Hdict; } diff --git a/libqpdf/qpdf/QPDF_Stream.hh b/libqpdf/qpdf/QPDF_Stream.hh index d053fd0f..8b960f00 100644 --- a/libqpdf/qpdf/QPDF_Stream.hh +++ b/libqpdf/qpdf/QPDF_Stream.hh @@ -22,10 +22,11 @@ class QPDF_Stream: public QPDFObject QPDFObjectHandle getDict() const; // See comments in QPDFObjectHandle.hh for these methods. - bool pipeStreamData(Pipeline*, bool filter, - bool normalize, bool compress, + bool pipeStreamData(Pipeline*, + unsigned long encode_flags, + qpdf_stream_decode_level_e decode_level, bool suppress_warnings); - PointerHolder getStreamData(); + PointerHolder getStreamData(qpdf_stream_decode_level_e); PointerHolder getRawStreamData(); void replaceStreamData(PointerHolder data, QPDFObjectHandle const& filter, @@ -52,6 +53,7 @@ class QPDF_Stream: public QPDFObject std::string const& filter, QPDFObjectHandle decode_params, int& predictor, int& columns, bool& early_code_change); bool filterable(std::vector& filters, + bool& specialized_compression, bool& lossy_compression, int& predictor, int& columns, bool& early_code_change); void warn(QPDFExc const& e); diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml index ac1de4c6..14e8b8bc 100644 --- a/manual/qpdf-manual.xml +++ b/manual/qpdf-manual.xml @@ -853,28 +853,90 @@ outfile.pdf developers. The following options are available: - + - Controls transformation of stream data. The value of - may be one - of the following: + By default, or with , + qpdf will compress any stream with no other filters applied to + it with the /FlateDecode filter when it + writes it. To suppress this behavior and preserve uncompressed + streams as uncompressed, use + . + + + + + + + + Controls which streams qpdf tries to decode. The default is + . The following options are + available: - : recompress stream data when - possible (default) + : do not attempt to decode any streams - : leave all stream data as is + : decode streams filtered with + supported generalized filters: , + , + , and + + + + + + : in addition to generalized, + decode streams with supported non-lossy specialized + filters; currently this is just + + + + + : in addition to generalized and + specialized, decode streams with supported lossy filters; + currently this is just (JPEG) + + + + + + + + + + + Controls transformation of stream data. This option predates + the and + options. Those options can be + used to achieve the same affect with more control. The value + of may be + one of the following: + + + + : recompress stream data when + possible (default); equivalent to + + + + + + + : leave all stream data as is; + equivalent to + : uncompress stream data when - possible + possible; equivalent to + + diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc index f6b1de28..df2ba225 100644 --- a/qpdf/qpdf.cc +++ b/qpdf/qpdf.cc @@ -76,6 +76,10 @@ struct Options use_aes(false), stream_data_set(false), stream_data_mode(qpdf_s_compress), + compress_streams(true), + compress_streams_set(false), + decode_level(qpdf_dl_generalized), + decode_level_set(false), normalize_set(false), normalize(false), suppress_recovery(false), @@ -134,6 +138,10 @@ struct Options bool use_aes; bool stream_data_set; qpdf_stream_data_e stream_data_mode; + bool compress_streams; + bool compress_streams_set; + qpdf_stream_decode_level_e decode_level; + bool decode_level_set; bool normalize_set; bool normalize; bool suppress_recovery; @@ -357,6 +365,8 @@ the output file. Mostly these are of use only to people who are very\n\ familiar with the PDF file format or who are PDF developers.\n\ \n\ --stream-data=option controls transformation of stream data (below)\n\ +--compress-streams=[yn] controls whether to compress streams on output\n\ +--decode-level=option controls how to filter streams from the input\n\ --normalize-content=[yn] enables or disables normalization of content streams\n\ --suppress-recovery prevents qpdf from attempting to recover damaged files\n\ --object-streams=mode controls handing of object streams\n\ @@ -383,6 +393,19 @@ Values for object stream mode:\n\ disable don't write any object streams\n\ generate use object streams wherever possible\n\ \n\ +When --compress-streams=n is specified, this overrides the default behavior\n\ +of qpdf, which is to attempt compress uncompressed streams. Setting\n\ +stream data mode to uncompress or preserve has the same effect.\n\ +\n\ +The --decode-level parameter may be set to one of the following values:\n\ + none do not decode streams\n\ + generalized decode streams compressed with generalized filters\n\ + including LZW, Flate, and the ASCII encoding filters.\n\ + specialized additionally decode streams with non-lossy specialized\n\ + filters including RunLength\n\ + all additionally decode streams with lossy filters\n\ + including DCT (JPEG)\n\ +\n\ In qdf mode, by default, content normalization is turned on, and the\n\ stream data mode is set to uncompress.\n\ \n\ @@ -1344,15 +1367,68 @@ static void parse_options(int argc, char* argv[], Options& o) usage("invalid stream-data option"); } } + else if (strcmp(arg, "compress-streams") == 0) + { + o.compress_streams_set = true; + if (parameter && (strcmp(parameter, "y") == 0)) + { + o.compress_streams = true; + } + else if (parameter && (strcmp(parameter, "n") == 0)) + { + o.compress_streams = false; + } + else + { + usage("--compress-streams must be given as" + " --compress-streams=[yn]"); + } + } + else if (strcmp(arg, "decode-level") == 0) + { + if (parameter == 0) + { + usage("--decode-level must be given as" + "--decode-level=option"); + } + o.decode_level_set = true; + if (strcmp(parameter, "none") == 0) + { + o.decode_level = qpdf_dl_none; + } + else if (strcmp(parameter, "generalized") == 0) + { + o.decode_level = qpdf_dl_generalized; + } + else if (strcmp(parameter, "specialized") == 0) + { + o.decode_level = qpdf_dl_specialized; + } + else if (strcmp(parameter, "all") == 0) + { + o.decode_level = qpdf_dl_all; + } + else + { + usage("invalid stream-data option"); + } + } else if (strcmp(arg, "normalize-content") == 0) { - if ((parameter == 0) || (*parameter == '\0')) + o.normalize_set = true; + if (parameter && (strcmp(parameter, "y") == 0)) + { + o.normalize = true; + } + else if (parameter && (strcmp(parameter, "n") == 0)) + { + o.normalize = false; + } + else { usage("--normalize-content must be given as" " --normalize-content=[yn]"); } - o.normalize_set = true; - o.normalize = (parameter[0] == 'y'); } else if (strcmp(arg, "suppress-recovery") == 0) { @@ -1606,7 +1682,7 @@ static void do_check(QPDF& pdf, Options& o, int& exit_code) QPDFWriter w(pdf); Pl_Discard discard; w.setOutputPipeline(&discard); - w.setStreamDataMode(qpdf_s_uncompress); + w.setDecodeLevel(qpdf_dl_all); w.write(); // Parse all content streams @@ -1667,7 +1743,7 @@ static void do_show_obj(QPDF& pdf, Options& o, int& exit_code) { bool filter = o.show_filtered_stream_data; if (filter && - (! obj.pipeStreamData(0, true, false, false))) + (! obj.pipeStreamData(0, 0, qpdf_dl_all))) { QTC::TC("qpdf", "qpdf unable to filter"); std::cerr << "Unable to filter stream data." @@ -1678,7 +1754,10 @@ static void do_show_obj(QPDF& pdf, Options& o, int& exit_code) { QUtil::binary_stdout(); Pl_StdioFile out("stdout", stdout); - obj.pipeStreamData(&out, filter, o.normalize, false); + obj.pipeStreamData( + &out, + (filter && o.normalize) ? qpdf_ef_normalize : 0, + filter ? qpdf_dl_all : qpdf_dl_none); } } else @@ -2035,6 +2114,14 @@ static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w) { w.setStreamDataMode(o.stream_data_mode); } + if (o.compress_streams_set) + { + w.setCompressStreams(o.compress_streams); + } + if (o.decode_level_set) + { + w.setDecodeLevel(o.decode_level); + } if (o.decrypt) { w.setPreserveEncryption(false); diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 72f5331e..2a157c91 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -296,3 +296,4 @@ QPDF ignore length error xref entry 0 QPDF_encryption pad short parameter 0 QPDFWriter ignore self-referential object stream 0 QPDFObjectHandle found old angle 1 +QPDF_Stream special filters 3 diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 9242a8a7..97d73277 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -937,6 +937,39 @@ $td->runtest("check output", {$td->FILE => "bad-data-precheck.pdf"}); show_ntests(); # ---------- +$td->notify("--- Decode levels ---"); +$n_tests += 10; + +# image-streams.pdf is the output of examples/pdf-create. +# examples/pdf-create validates the actual image data. +foreach my $l (qw(none generalized specialized all)) +{ + $td->runtest("image-streams: $l", + {$td->COMMAND => + "qpdf image-streams.pdf --compress-streams=n" . + " --decode-level=$l a.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + $td->runtest("check image-streams: $l", + {$td->COMMAND => "test_driver 39 a.pdf"}, + {$td->FILE => "image-streams-$l.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +} + +# Bad JPEG data +$td->runtest("check finds bad jpeg data", + {$td->COMMAND => "qpdf --check bad-jpeg.pdf"}, + {$td->FILE => "bad-jpeg-check.out", + $td->EXIT_STATUS => 3}, + $td->NORMALIZE_NEWLINES); +$td->runtest("get data", + {$td->COMMAND => "qpdf --show-object=6" . + " --filtered-stream-data bad-jpeg.pdf"}, + {$td->FILE => "bad-jpeg-show.out", $td->EXIT_STATUS => 3}, + $td->NORMALIZE_NEWLINES); + +show_ntests(); +# ---------- $td->notify("--- Preserve unreferenced objects ---"); $n_tests += 4; @@ -1429,8 +1462,8 @@ $td->runtest("show-page-1-image", $td->EXIT_STATUS => 0}); $td->runtest("unfilterable stream data", - {$td->COMMAND => "qpdf encrypted-with-images.pdf" . - " --show-object=8 --filtered-stream-data"}, + {$td->COMMAND => "qpdf unfilterable.pdf" . + " --show-object=4 --filtered-stream-data"}, {$td->FILE => "show-unfilterable.out", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); @@ -1461,7 +1494,7 @@ foreach my $f (qw(compressed-metadata.pdf enc-base.pdf)) { foreach my $w (qw(compress preserve)) { - $td->runtest("$w streams", + $td->runtest("$w streams ($f)", {$td->COMMAND => "qpdf --stream-data=$w $f a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); check_metadata("a.pdf", 0, 1); diff --git a/qpdf/qtest/qpdf/bad-jpeg-check.out b/qpdf/qtest/qpdf/bad-jpeg-check.out new file mode 100644 index 00000000..ad7f8ecc --- /dev/null +++ b/qpdf/qtest/qpdf/bad-jpeg-check.out @@ -0,0 +1,5 @@ +checking bad-jpeg.pdf +PDF Version: 1.3 +File is not encrypted +File is not linearized +WARNING: bad-jpeg.pdf (file position 735): error decoding stream data for object 6 0: Not a JPEG file: starts with 0x77 0x77 diff --git a/qpdf/qtest/qpdf/bad-jpeg-out.pdf b/qpdf/qtest/qpdf/bad-jpeg-out.pdf new file mode 100644 index 0000000000000000000000000000000000000000..70ccd02ff91ca78b8ed736826b9f79ae62e2cfb7 GIT binary patch literal 4681 zcmeHLTTmNS7~ai=Bnu1;&fQ)<1W z4~n%?9mkQh)#^BQs#a~MV?}Abj5AI_Yg_Ts+E(ln^-{ob#Ip$j242)x_;=1u@}Kix zzVF-Fnf-HW?2ZDekRftTP6huaC_no{4E=yco?2k6?`?;`XyYZ`BQ}HL zVgh>I@H!ihx09j%G(Q}UI}_7KVZYWs*6*?p)PEm)Ht$XQ2dIdxx#>gmNvYDYG_TN|5|D&}S3d+C{IdmK8lZqDS=Awev2N z)DF7@7pyHWMfvCGv)k$;?=;R#bjM+pkBE5Uw zFdWA)99`J#x0nsnMn!3&)LM;3qtj`Ln8eta=;)Z__=LE`w3PH)(o#}WL3Uvt$lQ>X znrg7*Zy+g_Wz+MxJFIk3A;Z#g5?rU##YD#>#l|Ml8L1icf412VF})W55r0~VXJQIH zuGHhRBUn1>Q;mlVn=@RYRH>shS{)IM4jf&BDR8Ayp;D^VDiu1t9qnT(y*fUFDv3(i z$ZIk?66qZSL)xs;Bg1QJ&rD`BPG7%{Sa(xWa!O8ao+1BM)?~I=x!Y{^GDmqu<=u7l z_dn3E>A}q|H*9Kd5nBCHXIFPmZ{N;GcRlv_6Hf-7+4Jn)=LYvZ|3Yy8frGESdg!&+ z-+KFGpMNel8qX^BO#OCp-n6u7&Pvp*yyRuAr@I1%!{bkptGcf&-Dl4)`=;*k!6t$P7J{5NU^bZ;6KDeS zJ_>z#%Lcp)J~J8{Eb&n^8iFkGF(kLl$6Bn*d?xFn8j_C}JZKCd4?k)}QJ%tt#x`|A z9|RUO?XZK!A`{~@b0#a~oDepVB;;w{MOrLwQxnZmw9{N%#850{;%GBMW92LyLo-&^ hOq*GTu`;xUGZk+Me_QiDDMSOyk~EQ%Q&#OD{sBI}^)CPb literal 0 HcmV?d00001 diff --git a/qpdf/qtest/qpdf/bad-jpeg-show.out b/qpdf/qtest/qpdf/bad-jpeg-show.out new file mode 100644 index 00000000..915060a4 --- /dev/null +++ b/qpdf/qtest/qpdf/bad-jpeg-show.out @@ -0,0 +1,2 @@ +WARNING: bad-jpeg.pdf (file position 735): error decoding stream data for object 6 0: Not a JPEG file: starts with 0x77 0x77 +qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/bad-jpeg.out b/qpdf/qtest/qpdf/bad-jpeg.out new file mode 100644 index 00000000..915060a4 --- /dev/null +++ b/qpdf/qtest/qpdf/bad-jpeg.out @@ -0,0 +1,2 @@ +WARNING: bad-jpeg.pdf (file position 735): error decoding stream data for object 6 0: Not a JPEG file: starts with 0x77 0x77 +qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/bad-jpeg.pdf b/qpdf/qtest/qpdf/bad-jpeg.pdf new file mode 100644 index 0000000000000000000000000000000000000000..81511115eb96de8cd25501d2339d325c01e201c2 GIT binary patch literal 4681 zcmeHLTTmNS7~ai=Bnu1;&wh|>JKwrx>!vLU>rw-^FdfOq;+c=T)3eBLj zlz=|BbWqxk2H1p7p9mtU?dU zLZZt^(SWhg0OhLX{ODk`tvVPGI{hxxr_`}SKs(_d6flGnm6oCizu>BeB1)x4U{N=) zM+Ew+HU!TmW!zNbY=y2!U{xChjL}FbID%VheGzD>$!O;e-YY^ss_o@OXooJrjd-et zzGkrnSS(UdK=ebdjp%tXe)sLf&byMaZ8a2p<3>{1*6(r;G<+X_w%|?s2y*YI+(qyjKoH`N-(MGLY0EP+DS3++W6nPg1^yNO(Fz;&y`bOSY<_qu( zj{CSagv5d5tnYM+VfiI8(EkW0;am%9O03P7;>AEM^xK5CcEKm9Wk!#V;1&G!?VJlG zwZksn1?wuTDE}ONc3VS~yh_M7w}^l>lZ#4+<%*V$Du$YNBbr$fot&ebF&fB4q<7C7 zhT|B9qYIn;7PDdMm>5-zQms;{G#WJ#mmD7#8ylCJn3Ry5k(PN&Mp}A0$Sp1a*&A}w z({-l84J5^~Y-WMw4l`X+%&@eS1lMRZaj|hJ@$o5iR(clwpKbO-OsmF!#GjVq*_cd= z%eDCI2$qTZRN`U7<_wp~70MWuT0_L514q|jGF&c~Ddb9}LV-?iNBfvUt4z$I%3_i> za;of(WO~QIkUGbDWO!}enaNzn>F?JN>uyR(P0PzK&=uaw8jL2hqAMd!F6<+~B_FUkL3#aPXB^554yK zTW`N}^xgN~KX&}1k4H|O{^ZloJ|7+X>g%)LoI8Kv;-#N1U%C48#4p#b|N7hSf83aw zp7~S4g=6vvEOd_mTY^iAaLE)3xk4r3!e!lP$+Ze)78R3Nwo%1(BxTb()XCO?p(DeZ z9Hwq^tgJa^3g*`_x;l+bfF-&x>gPcldZbY$-@a_vY_te)wOasNT?14*I%s}?_bq@MLqWk zy-4>N^qm8SpLmjA(4I#7EI+2(rY-kd|dW)?{AhGng0E5d9qQMPmqQ_)#Ya(iA2l@DUZcNQ6`q5 hI2Xl0jx`!Qtf_QM0ZLFfb*t@lui9&`whD^X+>-<}l1J#~rJuYrnMpXaod5Zq z_neu-%d-IA9BJpS}+FUm~GG%CdB=4{ z$8}}Lbv)Mjt9|x0(UZ2#u5$z*&8+(GpRFvM zo~%r?KiQPpmhhh2mZT=t+Pt8pK3uf+g`C(N-qP_$JR0vD@Yu28Lt2{ar=`;2a}|ey z!ttaQr=!KybHgN6?N52^(RH&^^_`Ebo*O>2b1B1zwm-TvR{Q9?lxn|6_rZ5|Dc-e% z?_4cA8hraOyAQr|MegFkcRJe9;M*6k``|m}?b^X7I^FMR@a>Je4L;Giq3q(pcW!t) z8hrb@=|1?*&1@GBe&-u(JCN>!|7X}>6Fc8vJNBsENB_^T#U^&X#m2k74poJ_=#D*o z;-6uYP40Y??RZ7n)_!uTX=Zv>w6Z*Wt!bQ-PPK%;=hUWC6H@if4IQr!t?8Cj?cB)X z%UUB-H!Mm{k0qw&RZokR*Tl*aQ}U{(ZrHG>`oM=KAGKi9ci;L-(Zi$GO_}u2`jOQK z)~r3?m+Ll8&s$eBug`P)XU$GdjRoVYqH||utjWoVtXmn`a{0<-kk`hQ*Uoa~7u3jrFzB;zOIlK{htcj20i$*fg%Gweh0~ zN7c>^-w;fUM~kN|s7rUeJ7|B|3;*5u>eumHAH8&Uyg+_9uf}xilvGPq^W1sOP3j8GIrhEgRkHH z-?_VR_NTu-tor=_Pk$~i-G$+TD}UBw7tVh2=gzyf=A`aBnXYdgw{Dj4SYqdU!#?N! zJaT5Nf3WCB&rdw~rEA9@eDbKX(?#Dn;&<REN$FCk!c-OcYO_%@c z9cw4dJ!7Advp0X^dzUqS?UF}V{l0I%gL)3Uqq=zQx4(Kr^QC*wS$)x}Q#T%8cYp7v z^2@&ZtHDS8VZa}{kGAW3#qoc|Zfe`UeL~grt$X&rZ_ibkEGub0c-XqfZhNxr55ecg zg@;_JprKz9`wSXbSQs5P zdPH={sG)_0Mdc$$#o{F;C4)y)ez77kW^}S7(e{g=ATQ(DjPGYv zYl6W+kdd9yaoR>Iqeo^|c1~_y&q%NE4!7+Q^vKA}?2(n3ot>2x-utxh_8==id(VC2 z<8t~utB+ap%fa_uRYYzWX0oyY9h99)0Xrk3aF`Q%^tl z{0lF>^z!dsdG)n7|M;i3-rltNop-miJso^5t137$I4(FM2)-0--tz7TcYX8bAHA|- zd66Fl6i`5c&rpE_o{hZsfqlCI3Min!XRg4_ryua&TfB8wKmi35_>2{>Z~u&~P>Z90 z0t$2%c<+O|g3r+ZHCe3+D4>7>9R=nsqSB%>iS*v z55MXaez$Ymm!m%Ei?4mY^Yh3(;a9yjkG=1^3m^Pm_Wv!7)D60@dHzv%KV9(a6%$T7 z?cVv%EIfF1;h?+5olI7v9i)%w4~t{bPd4z)wqVzu4L|_}6i`3`1r$&~0Ri!W=9Ox>_3 zIX#w`npZt7R$db;OH9eDp1NVfqUr-5ntW8BmPIv1506?mWzs|IM^+zLv-W^ruG=_0 zZ(Ys2KF{r+^;%+TEEr!EojWUIO-@c^-O9+8%U3RojD9z=^u*x2g8n_a4KoqzGR*Of z>DDQ!ma68t^O~DdP3dSww79ByPIJq&dA0SaXz_&9!p8bk&0*uC#gkHvGiRlvB^9w~ z@w5eX=_T{R+pFi+&J2G|@A${DN7v0x)u*Gy$22yC%U%)-@42-@SxM>6L%jYU@%$jx zJM)$|M!de~td|#W_)+kU89jne@$^RPH$TN^4}!;5ed$wu{=>6B&8PqRr(Ea4^M_Uc zv;16jV{YEawe6eo$2aOkyvx?IZKJN-x#d3R9vnIIkb>aUqvNR`Z|XPhc zJU-&432UOKoOW^TC9{uT{qKYB8o%G{haWwm;lY}ea%PZO%9 zZ{0cleVcl+tTfhT|JL?o+q1zQhfl1Y7-VDwK}L8E+I}5W1$j9+xjET+xw*MLd*(%Y z7xd}ft5@#7hqVkcWV)2ralEEV?zgUqNGdfw4 zXn&H7o;`c^?$x`0pFaH)`xfq-_^VHCzX%&erCpyphtd2W`0K7nqY7^&g_hi z)3)s^qeo^|c1~_y&q%NE4!7+Q^vKA}?2(n3ot>2x-utxh_8==id(VC2<8t~utB+ap%fa_uRYYzWX0oyY9h99)0Xrk3aF`Q%^tl{0lF>^z!ds zdG)n7|M;i3-rltNop-miFIPs8*|}KZb?1_8w_N$*a`ni{%FN1bU#^TEi^5B0epdFr z@ti%!P0g*H-)~4_X+_w1Blm;{JRz- zzYWaV``(|ee)+yRYjf|t;fX~9mc4pS{`y}&cXGqC|5dr<+_wrRZ)jfr^lg3azVNVR zB{P2$|IN(PUfJ?s{f0Z=K4rpwH`V<7p)2m+>%{ASR&e9+(U-jUeBRyb9T{koXGeM5#s>ooFu(u<3^2d| z0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdb zFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?0 z00Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u< z3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs# zzyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d| z0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdb zFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?0 z00Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u< z3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs# zzyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d| z0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdb zFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000W_c4>;g}0R|XgfB^;=V1NMz z7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|Xg zfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_ z1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;= zV1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~ z0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz z7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|Xg zfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_ z1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;= zV1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~ z0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz z7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|Xg zfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_26kv*Y2d7Y0tzVbSt}3(r>B}4 zTGK75+PRVNujabhkwj@U7H$7y%$R8L`oM=KAGM(P@E=bvdU({j zDU%*rKeGD3nzaY~a^1%1dFyKC^?7dptl7z_v0!{vbndK-H90wvbt@xVE?>DUGWy-f z(i4OGZtK6Q`#8(GjB|Wrx^+sbrK)-Eyym7}c!lf^X zh4Q33pWZ+E_SA;%tD(HhYS53kS_UXZ@`;bC%q6%Yci2zt3?epVRA_6AEjuE*?E=2W@MD!Qqoto-af_l=Ls={L1DcgXyL#M1Mw$QwHTmerrH+4#n=WL?V{JtKR4q5pt^ z!}l9eG;;rv(z5c3%7dyV96Ir^N!5o>oBrjm9CPe($JaNcX3U(`IJ-5yVBw<0OHMiS zTW2l%_SxqwKmURYFZ#j7mt6Y86<1z$^)=UCcl`}N`RT2<-G0ZND_7lf@0$DWe_-vp z2OoL#v0pv@#FI}w{oL~}y!g_~zkB7?*WUc&pWb?V)8=>H-O~1Skk_8=OTnhsidHQ5 z^S@qT<rUU`>xr|>aP2)*xq;5i_P6_Pj!C}TiNA3?1yi-!{eQ8 z?@>SL0Ikn=4vyRtzTtlFvbw`>+*E(a$;b7}+WV!G4k;_1F??|Ee|^6F9rvor$Tjod z8Tjf`r!M^MkU@L*d+UM~xBveA2g+YsI`+HA9kc%RFMcikFE8#p>%@b8Gvw%_YJUFU ziu?CE@%n=cZXABpC4YJ`uh$c=^{weWsAsn^Cu3d4{I|UCu36C3xv2k;ci+iac!2WJ zvFp-5IPZ1^)-QG5-3nZ~)OmL+Fk`9n?p7eV)OmL+;8W_|u0Z$Ct`@hXW<+AqWIPi4 z@Dwd8Elrk2XGA}o77st+YClDrKAKim(rsG2JlSnpvMkYUT4`y#+qCkM@Q7!-c~mCC zBe#!Fi^t>PClcFDOUA;FRJNN|QW<{QwcWI`itf`Y%DYWV#7ep?TOwZCZ8?(hvTox` zCQG(&5KnZUM?4uT-EO|+6^TT0yDs6hWaW0tTV9b&#J3wuI4xPxZCZIETw@<=P+n1< zEdTiP#Y+;EvF+AJ`?N&-u- zR$vn{0b};|6u~&;iWhPTVaQOVI#541ghRO!3M?=lms#3*GlMFrjt z*%qVy{8fq0+}L@HVyRw2&nL~iP*{>m!4(cMk!ek;REhkAtv^)kVlw{r#+jM+^Umn{ zr?2ndZ64gdb7S+pvoS_@&wtE1=ReC^7uT+Qxq5R$UAtsn_D|bvvL5gX>k^D(EMB9^ z6As3app>~-g7G334@H%?z*BxgGl{6-(^ahM0qM%-*Kr|(9cGbTeL%FBM~&OVESqM#huI$G-@_dLUv4j;9c out = new Pl_StdioFile("raw", stdout); - qtest.pipeStreamData(out.getPointer(), false, false, false); + qtest.pipeStreamData(out.getPointer(), 0, qpdf_dl_none); std::cout << std::endl << "Uncompressed stream data:" << std::endl; - if (qtest.pipeStreamData(0, true, false, false)) + if (qtest.pipeStreamData(0, 0, qpdf_dl_all)) { std::cout.flush(); QUtil::binary_stdout(); out = new Pl_StdioFile("filtered", stdout); - qtest.pipeStreamData(out.getPointer(), true, false, false); + qtest.pipeStreamData(out.getPointer(), 0, qpdf_dl_all); std::cout << std::endl << "End of stream data" << std::endl; } else @@ -362,7 +362,7 @@ void runtest(int n, char const* filename1, char const* arg2) QPDFObjectHandle contents = page.getKey("/Contents"); QUtil::binary_stdout(); PointerHolder out = new Pl_StdioFile("filtered", stdout); - contents.pipeStreamData(out.getPointer(), true, false, false); + contents.pipeStreamData(out.getPointer(), 0, qpdf_dl_generalized); } else if (n == 3) { @@ -375,7 +375,8 @@ void runtest(int n, char const* filename1, char const* arg2) QUtil::binary_stdout(); PointerHolder out = new Pl_StdioFile("tokenized stream", stdout); - stream.pipeStreamData(out.getPointer(), true, true, false); + stream.pipeStreamData(out.getPointer(), + qpdf_ef_normalize, qpdf_dl_generalized); } } else if (n == 4) @@ -497,7 +498,7 @@ void runtest(int n, char const* filename1, char const* arg2) throw std::logic_error("test 6 run on file with no metadata"); } Pl_Buffer bufpl("buffer"); - metadata.pipeStreamData(&bufpl, false, false, false); + metadata.pipeStreamData(&bufpl, 0, qpdf_dl_none); Buffer* buf = bufpl.getBuffer(); unsigned char const* data = buf->getBuffer(); bool cleartext = false; @@ -1277,7 +1278,7 @@ void runtest(int n, char const* filename1, char const* arg2) QPDFObjectHandle stream = item.getKey("/EF").getKey("/F"); Pl_Buffer p1("buffer"); Pl_Flate p2("compress", &p1, Pl_Flate::a_inflate); - stream.pipeStreamData(&p2, false, false, false); + stream.pipeStreamData(&p2, 0, qpdf_dl_none); PointerHolder buf = p1.getBuffer(); std::string data = std::string( reinterpret_cast(buf->getBuffer()), @@ -1309,6 +1310,30 @@ void runtest(int n, char const* filename1, char const* arg2) std::cout << qtest.getArrayItem(i).unparseResolved() << std::endl; } } + else if (n == 39) + { + // Display image filter and color set for each image on each page + std::vector pages = pdf.getAllPages(); + int pageno = 0; + for (std::vector::iterator p_iter = + pages.begin(); + p_iter != pages.end(); ++p_iter) + { + std::cout << "page " << ++pageno << std::endl; + std::map images = + (*p_iter).getPageImages(); + for (std::map::iterator i_iter = + images.begin(); i_iter != images.end(); ++i_iter) + { + QPDFObjectHandle image_dict = (*i_iter).second.getDict(); + std::cout << "filter: " + << image_dict.getKey("/Filter").unparseResolved() + << ", color space: " + << image_dict.getKey("/ColorSpace").unparseResolved() + << std::endl; + } + } + } else { throw std::runtime_error(std::string("invalid test ") + diff --git a/qpdf/test_large_file.cc b/qpdf/test_large_file.cc index a7ed7170..5e4557c8 100644 --- a/qpdf/test_large_file.cc +++ b/qpdf/test_large_file.cc @@ -273,7 +273,7 @@ static void check_image(int pageno, QPDFObjectHandle page) QPDFObjectHandle image = page.getKey("/Resources").getKey("/XObject").getKey("/Im1"); ImageChecker ic(pageno); - image.pipeStreamData(&ic, true, false, false); + image.pipeStreamData(&ic, 0, qpdf_dl_specialized); } static void check_pdf(char const* filename)