diff --git a/ChangeLog b/ChangeLog index 87f5a2e2..5b9553d9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +2017-08-19 Jay Berkenbilt + + * Remove --precheck-streams. This is enabled by default now + without any efficiency cost. This feature was never released. + + * Update pdf-create example to illustrate use of additional image + compression filters. + + * Add support for /RunLengthDecode and /DCTDecode: + - New pipeline types Pl_RunLength and Pl_DCT + - New command-line flags --compress-streams and --decode-level + to replace/enhance --stream-data + - New QPDFWriter::setCompressStreams and + QPDFWriter::setDecodeLevel methods + Please see documentation, header files, and help messages for + details on these new features. + 2017-08-12 Jay Berkenbilt * Add QPDFObjectHandle::rotatePage to apply rotation to a page diff --git a/examples/pdf-invert-images.cc b/examples/pdf-invert-images.cc index 00362091..537fd35e 100644 --- a/examples/pdf-invert-images.cc +++ b/examples/pdf-invert-images.cc @@ -121,7 +121,8 @@ int main(int argc, char* argv[]) // pipeStreamData with a null pipeline to determine // whether the image is filterable. Directly inspect // keys to determine the image type. - if (image.pipeStreamData(0, true, false, false) && + if (image.pipeStreamData(0, qpdf_ef_compress, + qpdf_dl_generalized) && color_space.isName() && bits_per_component.isInteger() && (color_space.getName() == "/DeviceGray") && diff --git a/include/qpdf/Constants.h b/include/qpdf/Constants.h index 38f1e71e..c2763956 100644 --- a/include/qpdf/Constants.h +++ b/include/qpdf/Constants.h @@ -26,7 +26,7 @@ enum qpdf_error_code_e qpdf_e_pages, /* erroneous or unsupported pages structure */ }; -/* Write Parameters */ +/* Write Parameters. See QPDFWriter.hh for details. */ enum qpdf_object_stream_e { @@ -41,6 +41,23 @@ enum qpdf_stream_data_e qpdf_s_compress /* compress stream data */ }; +/* Stream data flags */ + +/* See pipeStreamData in QPDFObjectHandle.hh for details on these flags. */ +enum qpdf_stream_encode_flags_e +{ + qpdf_ef_compress = 1 << 0, /* compress uncompressed streams */ + qpdf_ef_normalize = 1 << 1, /* normalize content stream */ +}; +enum qpdf_stream_decode_level_e +{ + /* These must be in order from less to more decoding. */ + qpdf_dl_none = 0, /* preserve all stream filters */ + qpdf_dl_generalized, /* decode general-purpose filters */ + qpdf_dl_specialized, /* also decode other non-lossy filters */ + qpdf_dl_all /* also decode loss filters */ +}; + /* R3 Encryption Parameters */ enum qpdf_r3_print_e diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index fbe02ba8..588768fc 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -44,19 +45,19 @@ class QPDFObjectHandle virtual ~StreamDataProvider() { } - // The implementation of this function must write the - // unencrypted, raw stream data to the given pipeline. Every - // call to provideStreamData for a given stream must write the - // same data. The number of bytes written must agree with the - // length provided at the time the StreamDataProvider object - // was associated with the stream. The object ID and - // generation passed to this method are those that belong to - // the stream on behalf of which the provider is called. They - // may be ignored or used by the implementation for indexing - // or other purposes. This information is made available just - // to make it more convenient to use a single - // StreamDataProvider object to provide data for multiple - // streams. + // The implementation of this function must write stream data + // to the given pipeline. The stream data must conform to + // whatever filters are explicitly associated with the stream. + // QPDFWriter may, in some cases, add compression, but if it + // does, it will update the filters as needed. Every call to + // provideStreamData for a given stream must write the same + // data.The object ID and generation passed to this method are + // those that belong to the stream on behalf of which the + // provider is called. They may be ignored or used by the + // implementation for indexing or other purposes. This + // information is made available just to make it more + // convenient to use a single StreamDataProvider object to + // provide data for multiple streams. virtual void provideStreamData(int objid, int generation, Pipeline* pipeline) = 0; }; @@ -370,32 +371,71 @@ class QPDFObjectHandle // Returns filtered (uncompressed) stream data. Throws an // exception if the stream is filtered and we can't decode it. QPDF_DLL - PointerHolder getStreamData(); + PointerHolder getStreamData( + qpdf_stream_decode_level_e level = qpdf_dl_generalized); + // Returns unfiltered (raw) stream data. QPDF_DLL PointerHolder getRawStreamData(); - // Write stream data through the given pipeline. A null pipeline + // Write stream data through the given pipeline. A null pipeline // value may be used if all you want to do is determine whether a - // stream is filterable. If filter is false, write raw stream - // data and return false. If filter is true, then attempt to - // apply all the decoding filters to the stream data. If we are - // successful, return true. Otherwise, return false and write raw - // data. If filtering is requested and successfully performed, - // then the normalize and compress flags are used to determine - // whether stream data should be normalized and compressed. In - // all cases, if this function returns false, raw data has been - // written. If it returns true, then any requested filtering has - // been performed. Note that if the original stream data has no - // filters applied to it, the return value will be equal to the - // value of the filter parameter. Callers may use the return - // value of this function to determine whether or not the /Filter - // and /DecodeParms keys in the stream dictionary should be - // replaced if writing a new stream object. + // stream is filterable and would be filtered based on the + // provided flags. If flags is 0, write raw stream data and return + // false. Otherwise, the flags alter the behavior in the following + // way: + // + // encode_flags: + // + // qpdf_sf_compress -- compress data with /FlateDecode if no other + // compression filters are applied. + // + // qpdf_sf_normalize -- tokenize as content stream and normalize tokens + // + // decode_level: + // + // qpdf_dl_none -- do not decode any streams. + // + // qpdf_dl_generalized -- decode supported general-purpose + // filters. This includes /ASCIIHexDecode, /ASCII85Decode, + // /LZWDecode, and /FlateDecode. + // + // qpdf_dl_specialized -- in addition to generalized filters, also + // decode supported non-lossy specialized filters. This includes + // /RunLengthDecode. + // + // qpdf_dl_all -- in addition to generalized and non-lossy + // specialized filters, decode supported lossy filters. This + // includes /DCTDecode. + // + // If, based on the flags and the filters and decode parameters, + // we determine that we know how to apply all requested filters, + // do so and return true if we are successful. + // + // In all cases, a return value of true means that filtered data + // has been written successfully. If filtering is requested but + // this method returns false, it means there was some error in the + // filtering, in which case the resulting data is likely partially + // filtered and/or incomplete and may not be consistent with the + // configured filters. QPDFWriter handles this by attempting to + // get the stream data without filtering, but callers should + // consider a false return value when decode_level is not + // qpdf_dl_none to be a potential loss of data. + QPDF_DLL + bool pipeStreamData(Pipeline*, + unsigned long encode_flags, + qpdf_stream_decode_level_e decode_level, + bool suppress_warnings = false); + + // Legacy pipeStreamData. This maps to the the flags-based + // pipeStreamData as follows: + // filter = false -> encode_flags = 0 + // filter = true -> decode_level = qpdf_dl_generalized + // normalize = true -> encode_flags |= qpdf_sf_normalize + // compress = true -> encode_flags |= qpdf_sf_compress QPDF_DLL bool pipeStreamData(Pipeline*, bool filter, - bool normalize, bool compress, - bool suppress_warnings = false); + bool normalize, bool compress); // Replace a stream's dictionary. The new dictionary must be // consistent with the stream's data. This is most appropriately diff --git a/include/qpdf/QPDFWriter.hh b/include/qpdf/QPDFWriter.hh index 2519ed12..c4bc7846 100644 --- a/include/qpdf/QPDFWriter.hh +++ b/include/qpdf/QPDFWriter.hh @@ -118,14 +118,70 @@ class QPDFWriter QPDF_DLL void setObjectStreamMode(qpdf_object_stream_e); - // Set value of stream data mode. In uncompress mode, we attempt - // to uncompress any stream that we can. In preserve mode, we - // preserve any filtering applied to streams. In compress mode, - // if we can apply all filters and the stream is not already - // optimally compressed, recompress the stream. + // Set value of stream data mode. This is an older interface. + // Instead of using this, prefer setCompressStreams() and + // setDecodeLevel(). This method is retained for compatibility, + // but it does not cover the full range of available + // configurations. The mapping between this and the new methods is + // as follows: + // + // qpdf_s_uncompress: + // setCompressStreams(false) + // setDecodeLevel(qpdf_dl_generalized) + // qpdf_s_preserve: + // setCompressStreams(false) + // setDecodeLevel(qpdf_dl_none) + // qpdf_s_compress: + // setCompressStreams(true) + // setDecodeLevel(qpdf_dl_generalized) + // + // The default is qpdf_s_compress. QPDF_DLL void setStreamDataMode(qpdf_stream_data_e); + // If true, compress any uncompressed streams when writing them. + // Metadata streams are a special case and are not compressed even + // if this is true. This is true by default for QPDFWriter. If you + // want QPDFWriter to leave uncompressed streams uncompressed, + // pass false to this method. + QPDF_DLL + void setCompressStreams(bool); + + // When QPDFWriter encounters streams, this parameter controls the + // behavior with respect to attempting to apply any filters to the + // streams when copying to the output. The decode levels are as + // follows: + // + // qpdf_dl_none: Do not attempt to apply any filters. Streams + // remain as they appear in the original file. Note that + // uncompressed streams may still be compressed on output. You can + // disable that by calling setCompressStreams(false). + // + // qpdf_dl_generalized: This is the default. QPDFWriter will apply + // LZWDecode, ASCII85Decode, ASCIIHexDecode, and FlateDecode + // filters on the input. When combined with + // setCompressStreams(true), which the default, the effect of this + // is that streams filtered with these older and less efficient + // filters will be recompressed with the Flate filter. As a + // special case, if a stream is already compressed with + // FlateDecode and setCompressStreams is enabled, the original + // compressed data will be preserved. + // + // qpdf_dl_specialized: In addition to uncompressing the + // generalized compression formats, supported non-lossy + // compression will also be be decoded. At present, this includes + // the RunLengthDecode filter. + // + // qpdf_dl_all: In addition to generalized and non-lossy + // specialized filters, supported lossy compression filters will + // be applied. At present, this includes DCTDecode (JPEG) + // compression. Note that compressing the resulting data with + // DCTDecode again will accumulate loss, so avoid multiple + // compression and decompression cycles. This is mostly useful for + // retreiving image data. + QPDF_DLL + void setDecodeLevel(qpdf_stream_decode_level_e); + // Set value of content stream normalization. The default is // "false". If true, we attempt to normalize newlines inside of // content streams. Some constructs such as inline images may @@ -434,8 +490,10 @@ class QPDFWriter Buffer* output_buffer; bool normalize_content_set; bool normalize_content; - bool stream_data_mode_set; - qpdf_stream_data_e stream_data_mode; + bool compress_streams; + bool compress_streams_set; + qpdf_stream_decode_level_e stream_decode_level; + bool stream_decode_level_set; bool qdf_mode; bool precheck_streams; bool preserve_unreferenced_objects; diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index d1360b14..9c79fc3a 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -45,7 +45,7 @@ QPDF::CopiedStreamDataProvider::provideStreamData( { QPDFObjectHandle foreign_stream = this->foreign_streams[QPDFObjGen(objid, generation)]; - foreign_stream.pipeStreamData(pipeline, false, false, false); + foreign_stream.pipeStreamData(pipeline, 0, qpdf_dl_none); } void @@ -2377,6 +2377,7 @@ QPDF::pipeStreamData(int objid, int generation, length -= len; pipeline->write(QUtil::unsigned_char_pointer(buf), len); } + pipeline->finish(); success = true; } catch (QPDFExc& e) @@ -2398,13 +2399,16 @@ QPDF::pipeStreamData(int objid, int generation, QUtil::int_to_string(generation) + ": " + e.what())); } } - try + if (! success) { - pipeline->finish(); - } - catch (std::exception&) - { - // ignore + try + { + pipeline->finish(); + } + catch (std::exception&) + { + // ignore + } } return success; } diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index a8a7e5a7..105ecad9 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -482,10 +482,10 @@ QPDFObjectHandle::replaceDict(QPDFObjectHandle new_dict) } PointerHolder -QPDFObjectHandle::getStreamData() +QPDFObjectHandle::getStreamData(qpdf_stream_decode_level_e level) { assertStream(); - return dynamic_cast(obj.getPointer())->getStreamData(); + return dynamic_cast(obj.getPointer())->getStreamData(level); } PointerHolder @@ -496,13 +496,35 @@ QPDFObjectHandle::getRawStreamData() } bool -QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter, - bool normalize, bool compress, +QPDFObjectHandle::pipeStreamData(Pipeline* p, + unsigned long encode_flags, + qpdf_stream_decode_level_e decode_level, bool suppress_warnings) { assertStream(); return dynamic_cast(obj.getPointer())->pipeStreamData( - p, filter, normalize, compress, suppress_warnings); + p, encode_flags, decode_level, suppress_warnings); +} + +bool +QPDFObjectHandle::pipeStreamData(Pipeline* p, bool filter, + bool normalize, bool compress) +{ + unsigned long encode_flags = 0; + qpdf_stream_decode_level_e decode_level = qpdf_dl_none; + if (filter) + { + decode_level = qpdf_dl_generalized; + if (normalize) + { + encode_flags |= qpdf_ef_normalize; + } + if (compress) + { + encode_flags |= qpdf_ef_compress; + } + } + return pipeStreamData(p, encode_flags, decode_level, false); } void @@ -825,7 +847,7 @@ QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array, all_description += ","; } all_description += " " + og; - if (! stream.pipeStreamData(&buf, true, false, false, false)) + if (! stream.pipeStreamData(&buf, 0, qpdf_dl_specialized)) { QTC::TC("qpdf", "QPDFObjectHandle errors in parsecontent"); warn(stream.getOwningQPDF(), diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 345613ad..fe25853a 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -54,8 +54,10 @@ QPDFWriter::init() output_buffer = 0; normalize_content_set = false; normalize_content = false; - stream_data_mode_set = false; - stream_data_mode = qpdf_s_compress; + compress_streams = true; + compress_streams_set = false; + stream_decode_level = qpdf_dl_none; + stream_decode_level_set = false; qdf_mode = false; precheck_streams = false; preserve_unreferenced_objects = false; @@ -162,8 +164,42 @@ QPDFWriter::setObjectStreamMode(qpdf_object_stream_e mode) void QPDFWriter::setStreamDataMode(qpdf_stream_data_e mode) { - this->stream_data_mode_set = true; - this->stream_data_mode = mode; + switch (mode) + { + case qpdf_s_uncompress: + this->stream_decode_level = + std::max(qpdf_dl_generalized, this->stream_decode_level); + this->compress_streams = false; + break; + + case qpdf_s_preserve: + this->stream_decode_level = qpdf_dl_none; + this->compress_streams = false; + break; + + case qpdf_s_compress: + this->stream_decode_level = + std::max(qpdf_dl_generalized, this->stream_decode_level); + this->compress_streams = true; + break; + } + this->stream_decode_level_set = true; + this->compress_streams_set = true; +} + + +void +QPDFWriter::setCompressStreams(bool val) +{ + this->compress_streams = val; + this->compress_streams_set = true; +} + +void +QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val) +{ + this->stream_decode_level = val; + this->stream_decode_level_set = true; } void @@ -1512,8 +1548,8 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, { is_metadata = true; } - bool filter = (this->stream_data_mode != qpdf_s_preserve); - if (this->stream_data_mode == qpdf_s_compress) + bool filter = (this->compress_streams || this->stream_decode_level); + if (this->compress_streams) { // Don't filter if the stream is already compressed with // FlateDecode. We don't want to make it worse by getting @@ -1532,19 +1568,21 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, } bool normalize = false; bool compress = false; + bool uncompress = false; if (is_metadata && ((! this->encrypted) || (this->encrypt_metadata == false))) { QTC::TC("qpdf", "QPDFWriter not compressing metadata"); filter = true; compress = false; + uncompress = true; } else if (this->normalize_content && normalized_streams.count(old_og)) { normalize = true; filter = true; } - else if (filter && (this->stream_data_mode == qpdf_s_compress)) + else if (filter && this->compress_streams) { compress = true; QTC::TC("qpdf", "QPDFWriter compressing uncompressed stream"); @@ -1559,7 +1597,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, QTC::TC("qpdf", "QPDFWriter precheck stream"); Pl_Discard discard; filter = object.pipeStreamData( - &discard, true, false, false, true); + &discard, 0, qpdf_dl_all, true); } catch (std::exception&) { @@ -1569,8 +1607,15 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, pushPipeline(new Pl_Buffer("stream data")); activatePipelineStack(); + bool filtered = - object.pipeStreamData(this->pipeline, filter, normalize, compress); + object.pipeStreamData( + this->pipeline, + (((filter && normalize) ? qpdf_ef_normalize : 0) | + ((filter && compress) ? qpdf_ef_compress : 0)), + (filter + ? (uncompress ? qpdf_dl_all : this->stream_decode_level) + : qpdf_dl_none)); PointerHolder stream_data; popPipelineStack(&stream_data); if (filtered) @@ -1717,8 +1762,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) // Set up a stream to write the stream data into a buffer. Pipeline* next = pushPipeline(new Pl_Buffer("object stream")); - if (! ((this->stream_data_mode == qpdf_s_uncompress) || - this->qdf_mode)) + if (! (this->stream_decode_level || this->qdf_mode)) { compressed = true; next = pushPipeline( @@ -2180,7 +2224,8 @@ QPDFWriter::prepareFileForWrite() is_stream = true; dict = node.getDict(); // See whether we are able to filter this stream. - filterable = node.pipeStreamData(0, true, false, false); + filterable = node.pipeStreamData( + 0, 0, this->stream_decode_level, true); } else if (pdf.getRoot().getObjectID() == node.getObjectID()) { @@ -2260,10 +2305,14 @@ QPDFWriter::write() { this->normalize_content = true; } - if (! this->stream_data_mode_set) + if (! this->compress_streams_set) { - this->stream_data_mode = qpdf_s_uncompress; + this->compress_streams = false; } + if (! this->stream_decode_level_set) + { + this->stream_decode_level = qpdf_dl_generalized; + } } if (this->encrypted) @@ -2272,7 +2321,7 @@ QPDFWriter::write() this->preserve_encryption = false; } else if (this->normalize_content || - (this->stream_data_mode == qpdf_s_uncompress) || + this->stream_decode_level || this->qdf_mode) { // Encryption makes looking at contents pretty useless. If @@ -2300,7 +2349,7 @@ QPDFWriter::write() } if (this->qdf_mode || this->normalize_content || - (this->stream_data_mode == qpdf_s_uncompress)) + this->stream_decode_level) { initializeSpecialStreams(); } @@ -2586,7 +2635,7 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset, Pipeline* p = pushPipeline(new Pl_Buffer("xref stream")); bool compressed = false; - if (! ((this->stream_data_mode == qpdf_s_uncompress) || this->qdf_mode)) + if (! (this->stream_decode_level || this->qdf_mode)) { compressed = true; if (! skip_compression) diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index 31d583b8..bcf9be92 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include #include @@ -82,10 +84,10 @@ QPDF_Stream::getDict() const } PointerHolder -QPDF_Stream::getStreamData() +QPDF_Stream::getStreamData(qpdf_stream_decode_level_e decode_level) { Pl_Buffer buf("stream data buffer"); - if (! pipeStreamData(&buf, true, false, false, false)) + if (! pipeStreamData(&buf, 0, decode_level, false)) { throw std::logic_error("getStreamData called on unfilterable stream"); } @@ -97,7 +99,7 @@ PointerHolder QPDF_Stream::getRawStreamData() { Pl_Buffer buf("stream data buffer"); - pipeStreamData(&buf, false, false, false, false); + pipeStreamData(&buf, 0, qpdf_dl_none, false); QTC::TC("qpdf", "QPDF_Stream getRawStreamData"); return buf.getBuffer(); } @@ -178,6 +180,8 @@ QPDF_Stream::understandDecodeParams( bool QPDF_Stream::filterable(std::vector& filters, + bool& specialized_compression, + bool& lossy_compression, int& predictor, int& columns, bool& early_code_change) { @@ -254,11 +258,20 @@ QPDF_Stream::filterable(std::vector& filters, filter = filter_abbreviations[filter]; } - if (! ((filter == "/Crypt") || - (filter == "/FlateDecode") || - (filter == "/LZWDecode") || - (filter == "/ASCII85Decode") || - (filter == "/ASCIIHexDecode"))) + if (filter == "/RunLengthDecode") + { + specialized_compression = true; + } + else if (filter == "/DCTDecode") + { + specialized_compression = true; + lossy_compression = true; + } + else if (! ((filter == "/Crypt") || + (filter == "/FlateDecode") || + (filter == "/LZWDecode") || + (filter == "/ASCII85Decode") || + (filter == "/ASCIIHexDecode"))) { filterable = false; } @@ -350,17 +363,35 @@ QPDF_Stream::filterable(std::vector& filters, } bool -QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter, - bool normalize, bool compress, +QPDF_Stream::pipeStreamData(Pipeline* pipeline, + unsigned long encode_flags, + qpdf_stream_decode_level_e decode_level, bool suppress_warnings) { std::vector filters; int predictor = 1; int columns = 0; bool early_code_change = true; + bool specialized_compression = false; + bool lossy_compression = false; + bool filter = (! ((encode_flags == 0) && (decode_level == qpdf_dl_none))); if (filter) { - filter = filterable(filters, predictor, columns, early_code_change); + filter = filterable(filters, specialized_compression, lossy_compression, + predictor, columns, early_code_change); + if ((decode_level < qpdf_dl_all) && lossy_compression) + { + filter = false; + } + if ((decode_level < qpdf_dl_specialized) && specialized_compression) + { + filter = false; + } + QTC::TC("qpdf", "QPDF_Stream special filters", + (! filter) ? 0 : + lossy_compression ? 1 : + specialized_compression ? 2 : + 3); } if (pipeline == 0) @@ -375,14 +406,14 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter, if (filter) { - if (compress) + if (encode_flags & qpdf_ef_compress) { pipeline = new Pl_Flate("compress object stream", pipeline, Pl_Flate::a_deflate); to_delete.push_back(pipeline); } - if (normalize) + if (encode_flags & qpdf_ef_normalize) { pipeline = new Pl_QPDFTokenizer("normalizer", pipeline); to_delete.push_back(pipeline); @@ -427,6 +458,17 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool filter, early_code_change); to_delete.push_back(pipeline); } + else if (filter == "/RunLengthDecode") + { + pipeline = new Pl_RunLength("runlength decode", pipeline, + Pl_RunLength::a_decode); + to_delete.push_back(pipeline); + } + else if (filter == "/DCTDecode") + { + pipeline = new Pl_DCT("DCT decode", pipeline); + to_delete.push_back(pipeline); + } else { throw std::logic_error( diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc index 424d6d6f..b05b1d4c 100644 --- a/libqpdf/QPDF_linearization.cc +++ b/libqpdf/QPDF_linearization.cc @@ -393,7 +393,7 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) this->file->getLastOffset(), "hint table length mismatch"); } - H.pipeStreamData(&pl, true, false, false); + H.pipeStreamData(&pl, 0, qpdf_dl_specialized); return Hdict; } diff --git a/libqpdf/qpdf/QPDF_Stream.hh b/libqpdf/qpdf/QPDF_Stream.hh index d053fd0f..8b960f00 100644 --- a/libqpdf/qpdf/QPDF_Stream.hh +++ b/libqpdf/qpdf/QPDF_Stream.hh @@ -22,10 +22,11 @@ class QPDF_Stream: public QPDFObject QPDFObjectHandle getDict() const; // See comments in QPDFObjectHandle.hh for these methods. - bool pipeStreamData(Pipeline*, bool filter, - bool normalize, bool compress, + bool pipeStreamData(Pipeline*, + unsigned long encode_flags, + qpdf_stream_decode_level_e decode_level, bool suppress_warnings); - PointerHolder getStreamData(); + PointerHolder getStreamData(qpdf_stream_decode_level_e); PointerHolder getRawStreamData(); void replaceStreamData(PointerHolder data, QPDFObjectHandle const& filter, @@ -52,6 +53,7 @@ class QPDF_Stream: public QPDFObject std::string const& filter, QPDFObjectHandle decode_params, int& predictor, int& columns, bool& early_code_change); bool filterable(std::vector& filters, + bool& specialized_compression, bool& lossy_compression, int& predictor, int& columns, bool& early_code_change); void warn(QPDFExc const& e); diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml index ac1de4c6..14e8b8bc 100644 --- a/manual/qpdf-manual.xml +++ b/manual/qpdf-manual.xml @@ -853,28 +853,90 @@ outfile.pdf developers. The following options are available: - + - Controls transformation of stream data. The value of - may be one - of the following: + By default, or with , + qpdf will compress any stream with no other filters applied to + it with the /FlateDecode filter when it + writes it. To suppress this behavior and preserve uncompressed + streams as uncompressed, use + . + + + + + + + + Controls which streams qpdf tries to decode. The default is + . The following options are + available: - : recompress stream data when - possible (default) + : do not attempt to decode any streams - : leave all stream data as is + : decode streams filtered with + supported generalized filters: , + , + , and + + + + + + : in addition to generalized, + decode streams with supported non-lossy specialized + filters; currently this is just + + + + + : in addition to generalized and + specialized, decode streams with supported lossy filters; + currently this is just (JPEG) + + + + + + + + + + + Controls transformation of stream data. This option predates + the and + options. Those options can be + used to achieve the same affect with more control. The value + of may be + one of the following: + + + + : recompress stream data when + possible (default); equivalent to + + + + + + + : leave all stream data as is; + equivalent to + : uncompress stream data when - possible + possible; equivalent to + + diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc index f6b1de28..df2ba225 100644 --- a/qpdf/qpdf.cc +++ b/qpdf/qpdf.cc @@ -76,6 +76,10 @@ struct Options use_aes(false), stream_data_set(false), stream_data_mode(qpdf_s_compress), + compress_streams(true), + compress_streams_set(false), + decode_level(qpdf_dl_generalized), + decode_level_set(false), normalize_set(false), normalize(false), suppress_recovery(false), @@ -134,6 +138,10 @@ struct Options bool use_aes; bool stream_data_set; qpdf_stream_data_e stream_data_mode; + bool compress_streams; + bool compress_streams_set; + qpdf_stream_decode_level_e decode_level; + bool decode_level_set; bool normalize_set; bool normalize; bool suppress_recovery; @@ -357,6 +365,8 @@ the output file. Mostly these are of use only to people who are very\n\ familiar with the PDF file format or who are PDF developers.\n\ \n\ --stream-data=option controls transformation of stream data (below)\n\ +--compress-streams=[yn] controls whether to compress streams on output\n\ +--decode-level=option controls how to filter streams from the input\n\ --normalize-content=[yn] enables or disables normalization of content streams\n\ --suppress-recovery prevents qpdf from attempting to recover damaged files\n\ --object-streams=mode controls handing of object streams\n\ @@ -383,6 +393,19 @@ Values for object stream mode:\n\ disable don't write any object streams\n\ generate use object streams wherever possible\n\ \n\ +When --compress-streams=n is specified, this overrides the default behavior\n\ +of qpdf, which is to attempt compress uncompressed streams. Setting\n\ +stream data mode to uncompress or preserve has the same effect.\n\ +\n\ +The --decode-level parameter may be set to one of the following values:\n\ + none do not decode streams\n\ + generalized decode streams compressed with generalized filters\n\ + including LZW, Flate, and the ASCII encoding filters.\n\ + specialized additionally decode streams with non-lossy specialized\n\ + filters including RunLength\n\ + all additionally decode streams with lossy filters\n\ + including DCT (JPEG)\n\ +\n\ In qdf mode, by default, content normalization is turned on, and the\n\ stream data mode is set to uncompress.\n\ \n\ @@ -1344,15 +1367,68 @@ static void parse_options(int argc, char* argv[], Options& o) usage("invalid stream-data option"); } } + else if (strcmp(arg, "compress-streams") == 0) + { + o.compress_streams_set = true; + if (parameter && (strcmp(parameter, "y") == 0)) + { + o.compress_streams = true; + } + else if (parameter && (strcmp(parameter, "n") == 0)) + { + o.compress_streams = false; + } + else + { + usage("--compress-streams must be given as" + " --compress-streams=[yn]"); + } + } + else if (strcmp(arg, "decode-level") == 0) + { + if (parameter == 0) + { + usage("--decode-level must be given as" + "--decode-level=option"); + } + o.decode_level_set = true; + if (strcmp(parameter, "none") == 0) + { + o.decode_level = qpdf_dl_none; + } + else if (strcmp(parameter, "generalized") == 0) + { + o.decode_level = qpdf_dl_generalized; + } + else if (strcmp(parameter, "specialized") == 0) + { + o.decode_level = qpdf_dl_specialized; + } + else if (strcmp(parameter, "all") == 0) + { + o.decode_level = qpdf_dl_all; + } + else + { + usage("invalid stream-data option"); + } + } else if (strcmp(arg, "normalize-content") == 0) { - if ((parameter == 0) || (*parameter == '\0')) + o.normalize_set = true; + if (parameter && (strcmp(parameter, "y") == 0)) + { + o.normalize = true; + } + else if (parameter && (strcmp(parameter, "n") == 0)) + { + o.normalize = false; + } + else { usage("--normalize-content must be given as" " --normalize-content=[yn]"); } - o.normalize_set = true; - o.normalize = (parameter[0] == 'y'); } else if (strcmp(arg, "suppress-recovery") == 0) { @@ -1606,7 +1682,7 @@ static void do_check(QPDF& pdf, Options& o, int& exit_code) QPDFWriter w(pdf); Pl_Discard discard; w.setOutputPipeline(&discard); - w.setStreamDataMode(qpdf_s_uncompress); + w.setDecodeLevel(qpdf_dl_all); w.write(); // Parse all content streams @@ -1667,7 +1743,7 @@ static void do_show_obj(QPDF& pdf, Options& o, int& exit_code) { bool filter = o.show_filtered_stream_data; if (filter && - (! obj.pipeStreamData(0, true, false, false))) + (! obj.pipeStreamData(0, 0, qpdf_dl_all))) { QTC::TC("qpdf", "qpdf unable to filter"); std::cerr << "Unable to filter stream data." @@ -1678,7 +1754,10 @@ static void do_show_obj(QPDF& pdf, Options& o, int& exit_code) { QUtil::binary_stdout(); Pl_StdioFile out("stdout", stdout); - obj.pipeStreamData(&out, filter, o.normalize, false); + obj.pipeStreamData( + &out, + (filter && o.normalize) ? qpdf_ef_normalize : 0, + filter ? qpdf_dl_all : qpdf_dl_none); } } else @@ -2035,6 +2114,14 @@ static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w) { w.setStreamDataMode(o.stream_data_mode); } + if (o.compress_streams_set) + { + w.setCompressStreams(o.compress_streams); + } + if (o.decode_level_set) + { + w.setDecodeLevel(o.decode_level); + } if (o.decrypt) { w.setPreserveEncryption(false); diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 72f5331e..2a157c91 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -296,3 +296,4 @@ QPDF ignore length error xref entry 0 QPDF_encryption pad short parameter 0 QPDFWriter ignore self-referential object stream 0 QPDFObjectHandle found old angle 1 +QPDF_Stream special filters 3 diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 9242a8a7..97d73277 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -937,6 +937,39 @@ $td->runtest("check output", {$td->FILE => "bad-data-precheck.pdf"}); show_ntests(); # ---------- +$td->notify("--- Decode levels ---"); +$n_tests += 10; + +# image-streams.pdf is the output of examples/pdf-create. +# examples/pdf-create validates the actual image data. +foreach my $l (qw(none generalized specialized all)) +{ + $td->runtest("image-streams: $l", + {$td->COMMAND => + "qpdf image-streams.pdf --compress-streams=n" . + " --decode-level=$l a.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + $td->runtest("check image-streams: $l", + {$td->COMMAND => "test_driver 39 a.pdf"}, + {$td->FILE => "image-streams-$l.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +} + +# Bad JPEG data +$td->runtest("check finds bad jpeg data", + {$td->COMMAND => "qpdf --check bad-jpeg.pdf"}, + {$td->FILE => "bad-jpeg-check.out", + $td->EXIT_STATUS => 3}, + $td->NORMALIZE_NEWLINES); +$td->runtest("get data", + {$td->COMMAND => "qpdf --show-object=6" . + " --filtered-stream-data bad-jpeg.pdf"}, + {$td->FILE => "bad-jpeg-show.out", $td->EXIT_STATUS => 3}, + $td->NORMALIZE_NEWLINES); + +show_ntests(); +# ---------- $td->notify("--- Preserve unreferenced objects ---"); $n_tests += 4; @@ -1429,8 +1462,8 @@ $td->runtest("show-page-1-image", $td->EXIT_STATUS => 0}); $td->runtest("unfilterable stream data", - {$td->COMMAND => "qpdf encrypted-with-images.pdf" . - " --show-object=8 --filtered-stream-data"}, + {$td->COMMAND => "qpdf unfilterable.pdf" . + " --show-object=4 --filtered-stream-data"}, {$td->FILE => "show-unfilterable.out", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); @@ -1461,7 +1494,7 @@ foreach my $f (qw(compressed-metadata.pdf enc-base.pdf)) { foreach my $w (qw(compress preserve)) { - $td->runtest("$w streams", + $td->runtest("$w streams ($f)", {$td->COMMAND => "qpdf --stream-data=$w $f a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); check_metadata("a.pdf", 0, 1); diff --git a/qpdf/qtest/qpdf/bad-jpeg-check.out b/qpdf/qtest/qpdf/bad-jpeg-check.out new file mode 100644 index 00000000..ad7f8ecc --- /dev/null +++ b/qpdf/qtest/qpdf/bad-jpeg-check.out @@ -0,0 +1,5 @@ +checking bad-jpeg.pdf +PDF Version: 1.3 +File is not encrypted +File is not linearized +WARNING: bad-jpeg.pdf (file position 735): error decoding stream data for object 6 0: Not a JPEG file: starts with 0x77 0x77 diff --git a/qpdf/qtest/qpdf/bad-jpeg-out.pdf b/qpdf/qtest/qpdf/bad-jpeg-out.pdf new file mode 100644 index 00000000..70ccd02f Binary files /dev/null and b/qpdf/qtest/qpdf/bad-jpeg-out.pdf differ diff --git a/qpdf/qtest/qpdf/bad-jpeg-show.out b/qpdf/qtest/qpdf/bad-jpeg-show.out new file mode 100644 index 00000000..915060a4 --- /dev/null +++ b/qpdf/qtest/qpdf/bad-jpeg-show.out @@ -0,0 +1,2 @@ +WARNING: bad-jpeg.pdf (file position 735): error decoding stream data for object 6 0: Not a JPEG file: starts with 0x77 0x77 +qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/bad-jpeg.out b/qpdf/qtest/qpdf/bad-jpeg.out new file mode 100644 index 00000000..915060a4 --- /dev/null +++ b/qpdf/qtest/qpdf/bad-jpeg.out @@ -0,0 +1,2 @@ +WARNING: bad-jpeg.pdf (file position 735): error decoding stream data for object 6 0: Not a JPEG file: starts with 0x77 0x77 +qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/bad-jpeg.pdf b/qpdf/qtest/qpdf/bad-jpeg.pdf new file mode 100644 index 00000000..81511115 Binary files /dev/null and b/qpdf/qtest/qpdf/bad-jpeg.pdf differ diff --git a/qpdf/qtest/qpdf/c-write-warnings.out b/qpdf/qtest/qpdf/c-write-warnings.out index bc412658..cc964f3b 100644 --- a/qpdf/qtest/qpdf/c-write-warnings.out +++ b/qpdf/qtest/qpdf/c-write-warnings.out @@ -18,8 +18,3 @@ warning: bad33.pdf (file position 629): stream filter type is not name or array file: bad33.pdf pos : 629 text: stream filter type is not name or array -warning: bad33.pdf (file position 629): stream filter type is not name or array - code: 5 - file: bad33.pdf - pos : 629 - text: stream filter type is not name or array diff --git a/qpdf/qtest/qpdf/image-streams-all.out b/qpdf/qtest/qpdf/image-streams-all.out new file mode 100644 index 00000000..6b6ba653 --- /dev/null +++ b/qpdf/qtest/qpdf/image-streams-all.out @@ -0,0 +1,19 @@ +page 1 +filter: null, color space: /DeviceCMYK +page 2 +filter: null, color space: /DeviceCMYK +page 3 +filter: null, color space: /DeviceCMYK +page 4 +filter: null, color space: /DeviceRGB +page 5 +filter: null, color space: /DeviceRGB +page 6 +filter: null, color space: /DeviceRGB +page 7 +filter: null, color space: /DeviceGray +page 8 +filter: null, color space: /DeviceGray +page 9 +filter: null, color space: /DeviceGray +test 39 done diff --git a/qpdf/qtest/qpdf/image-streams-generalized.out b/qpdf/qtest/qpdf/image-streams-generalized.out new file mode 100644 index 00000000..5016a4ee --- /dev/null +++ b/qpdf/qtest/qpdf/image-streams-generalized.out @@ -0,0 +1,19 @@ +page 1 +filter: null, color space: /DeviceCMYK +page 2 +filter: /DCTDecode, color space: /DeviceCMYK +page 3 +filter: /RunLengthDecode, color space: /DeviceCMYK +page 4 +filter: null, color space: /DeviceRGB +page 5 +filter: /DCTDecode, color space: /DeviceRGB +page 6 +filter: /RunLengthDecode, color space: /DeviceRGB +page 7 +filter: null, color space: /DeviceGray +page 8 +filter: /DCTDecode, color space: /DeviceGray +page 9 +filter: /RunLengthDecode, color space: /DeviceGray +test 39 done diff --git a/qpdf/qtest/qpdf/image-streams-none.out b/qpdf/qtest/qpdf/image-streams-none.out new file mode 100644 index 00000000..8faed282 --- /dev/null +++ b/qpdf/qtest/qpdf/image-streams-none.out @@ -0,0 +1,19 @@ +page 1 +filter: /FlateDecode, color space: /DeviceCMYK +page 2 +filter: /DCTDecode, color space: /DeviceCMYK +page 3 +filter: /RunLengthDecode, color space: /DeviceCMYK +page 4 +filter: /FlateDecode, color space: /DeviceRGB +page 5 +filter: /DCTDecode, color space: /DeviceRGB +page 6 +filter: /RunLengthDecode, color space: /DeviceRGB +page 7 +filter: /FlateDecode, color space: /DeviceGray +page 8 +filter: /DCTDecode, color space: /DeviceGray +page 9 +filter: /RunLengthDecode, color space: /DeviceGray +test 39 done diff --git a/qpdf/qtest/qpdf/image-streams-specialized.out b/qpdf/qtest/qpdf/image-streams-specialized.out new file mode 100644 index 00000000..933ff796 --- /dev/null +++ b/qpdf/qtest/qpdf/image-streams-specialized.out @@ -0,0 +1,19 @@ +page 1 +filter: null, color space: /DeviceCMYK +page 2 +filter: /DCTDecode, color space: /DeviceCMYK +page 3 +filter: null, color space: /DeviceCMYK +page 4 +filter: null, color space: /DeviceRGB +page 5 +filter: /DCTDecode, color space: /DeviceRGB +page 6 +filter: null, color space: /DeviceRGB +page 7 +filter: null, color space: /DeviceGray +page 8 +filter: /DCTDecode, color space: /DeviceGray +page 9 +filter: null, color space: /DeviceGray +test 39 done diff --git a/qpdf/qtest/qpdf/image-streams.pdf b/qpdf/qtest/qpdf/image-streams.pdf new file mode 100644 index 00000000..552439c2 Binary files /dev/null and b/qpdf/qtest/qpdf/image-streams.pdf differ diff --git a/qpdf/qtest/qpdf/unfilterable.pdf b/qpdf/qtest/qpdf/unfilterable.pdf new file mode 100644 index 00000000..8be44380 Binary files /dev/null and b/qpdf/qtest/qpdf/unfilterable.pdf differ diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index d1db805e..055fba60 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -314,15 +314,15 @@ void runtest(int n, char const* filename1, char const* arg2) std::cout.flush(); QUtil::binary_stdout(); PointerHolder out = new Pl_StdioFile("raw", stdout); - qtest.pipeStreamData(out.getPointer(), false, false, false); + qtest.pipeStreamData(out.getPointer(), 0, qpdf_dl_none); std::cout << std::endl << "Uncompressed stream data:" << std::endl; - if (qtest.pipeStreamData(0, true, false, false)) + if (qtest.pipeStreamData(0, 0, qpdf_dl_all)) { std::cout.flush(); QUtil::binary_stdout(); out = new Pl_StdioFile("filtered", stdout); - qtest.pipeStreamData(out.getPointer(), true, false, false); + qtest.pipeStreamData(out.getPointer(), 0, qpdf_dl_all); std::cout << std::endl << "End of stream data" << std::endl; } else @@ -362,7 +362,7 @@ void runtest(int n, char const* filename1, char const* arg2) QPDFObjectHandle contents = page.getKey("/Contents"); QUtil::binary_stdout(); PointerHolder out = new Pl_StdioFile("filtered", stdout); - contents.pipeStreamData(out.getPointer(), true, false, false); + contents.pipeStreamData(out.getPointer(), 0, qpdf_dl_generalized); } else if (n == 3) { @@ -375,7 +375,8 @@ void runtest(int n, char const* filename1, char const* arg2) QUtil::binary_stdout(); PointerHolder out = new Pl_StdioFile("tokenized stream", stdout); - stream.pipeStreamData(out.getPointer(), true, true, false); + stream.pipeStreamData(out.getPointer(), + qpdf_ef_normalize, qpdf_dl_generalized); } } else if (n == 4) @@ -497,7 +498,7 @@ void runtest(int n, char const* filename1, char const* arg2) throw std::logic_error("test 6 run on file with no metadata"); } Pl_Buffer bufpl("buffer"); - metadata.pipeStreamData(&bufpl, false, false, false); + metadata.pipeStreamData(&bufpl, 0, qpdf_dl_none); Buffer* buf = bufpl.getBuffer(); unsigned char const* data = buf->getBuffer(); bool cleartext = false; @@ -1277,7 +1278,7 @@ void runtest(int n, char const* filename1, char const* arg2) QPDFObjectHandle stream = item.getKey("/EF").getKey("/F"); Pl_Buffer p1("buffer"); Pl_Flate p2("compress", &p1, Pl_Flate::a_inflate); - stream.pipeStreamData(&p2, false, false, false); + stream.pipeStreamData(&p2, 0, qpdf_dl_none); PointerHolder buf = p1.getBuffer(); std::string data = std::string( reinterpret_cast(buf->getBuffer()), @@ -1309,6 +1310,30 @@ void runtest(int n, char const* filename1, char const* arg2) std::cout << qtest.getArrayItem(i).unparseResolved() << std::endl; } } + else if (n == 39) + { + // Display image filter and color set for each image on each page + std::vector pages = pdf.getAllPages(); + int pageno = 0; + for (std::vector::iterator p_iter = + pages.begin(); + p_iter != pages.end(); ++p_iter) + { + std::cout << "page " << ++pageno << std::endl; + std::map images = + (*p_iter).getPageImages(); + for (std::map::iterator i_iter = + images.begin(); i_iter != images.end(); ++i_iter) + { + QPDFObjectHandle image_dict = (*i_iter).second.getDict(); + std::cout << "filter: " + << image_dict.getKey("/Filter").unparseResolved() + << ", color space: " + << image_dict.getKey("/ColorSpace").unparseResolved() + << std::endl; + } + } + } else { throw std::runtime_error(std::string("invalid test ") + diff --git a/qpdf/test_large_file.cc b/qpdf/test_large_file.cc index a7ed7170..5e4557c8 100644 --- a/qpdf/test_large_file.cc +++ b/qpdf/test_large_file.cc @@ -273,7 +273,7 @@ static void check_image(int pageno, QPDFObjectHandle page) QPDFObjectHandle image = page.getKey("/Resources").getKey("/XObject").getKey("/Im1"); ImageChecker ic(pageno); - image.pipeStreamData(&ic, true, false, false); + image.pipeStreamData(&ic, 0, qpdf_dl_specialized); } static void check_pdf(char const* filename)