From 39bfa0130713defc9abb478a70717ca07377cdab Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Wed, 23 Dec 2020 06:12:49 -0500 Subject: [PATCH] Implement user-provided stream filters Refactor QPDF_Stream to use stream filter classes to handle supported stream filters as well. --- ChangeLog | 10 + TODO | 7 - include/qpdf/QPDF.hh | 17 ++ include/qpdf/QPDFStreamFilter.hh | 78 ++++++ libqpdf/QPDF.cc | 8 + libqpdf/QPDFStreamFilter.cc | 19 ++ libqpdf/QPDF_Stream.cc | 381 +++++++++-------------------- libqpdf/SF_FlateLzwDecode.cc | 153 ++++++++++++ libqpdf/build.mk | 2 + libqpdf/qpdf/QPDF_Stream.hh | 23 +- libqpdf/qpdf/SF_ASCII85Decode.hh | 30 +++ libqpdf/qpdf/SF_ASCIIHexDecode.hh | 30 +++ libqpdf/qpdf/SF_DCTDecode.hh | 39 +++ libqpdf/qpdf/SF_FlateLzwDecode.hh | 30 +++ libqpdf/qpdf/SF_RunLengthDecode.hh | 35 +++ qpdf/qpdf.testcov | 4 +- 16 files changed, 587 insertions(+), 279 deletions(-) create mode 100644 include/qpdf/QPDFStreamFilter.hh create mode 100644 libqpdf/QPDFStreamFilter.cc create mode 100644 libqpdf/SF_FlateLzwDecode.cc create mode 100644 libqpdf/qpdf/SF_ASCII85Decode.hh create mode 100644 libqpdf/qpdf/SF_ASCIIHexDecode.hh create mode 100644 libqpdf/qpdf/SF_DCTDecode.hh create mode 100644 libqpdf/qpdf/SF_FlateLzwDecode.hh create mode 100644 libqpdf/qpdf/SF_RunLengthDecode.hh diff --git a/ChangeLog b/ChangeLog index 1af2520e..7c1b43d1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2020-12-23 Jay Berkenbilt + + * Allow library users to provide their own decoders for stream + filters by deriving classes from QPDFStreamFilter and registering + them using QPDF::registerStreamFilter. Registered stream filters + provide code to validate and interpret /DecodeParms for a specific + /Filter and also to provide a pipeline that will decode. Note that + it is possible to encode to a filter type that is not supported + even without this feature. + 2020-12-22 Jay Berkenbilt * Add QPDFObjectHandle::makeDirect(bool allow_streams) -- if diff --git a/TODO b/TODO index 1479aa56..28917b66 100644 --- a/TODO +++ b/TODO @@ -317,13 +317,6 @@ I find it useful to make reference to them in this list is exercised elsewhere in qpdf's test suite, so this is not that pressing. - * Support user-pluggable stream filters. This would enable external - code to provide interpretation for filters that are missing from - qpdf. Make it possible for user-provided filters to override - built-in filters. Make sure that the pluggable filters can be - prioritized so that we can poll all registered filters to see - whether they are capable of filtering a particular stream. - * If possible, consider adding CCITT3, CCITT4, or any other easy filters. For some reference code that we probably can't use but may be handy anyway, see diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 285ba1e3..b0e9b717 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -31,6 +31,8 @@ #include #include #include +#include +#include #include #include @@ -39,6 +41,7 @@ #include #include #include +#include #include #include @@ -132,6 +135,20 @@ class QPDF QPDF_DLL void emptyPDF(); + // From 10.1: register a new filter implementation for a specific + // stream filter. You can add your own implementations for new + // filter types or override existing ones provided by the library. + // Registered stream filters are used for decoding only as you can + // override encoding with stream data providers. For example, you + // could use this method to support for one of the other filter + // types by using additional third-party libraries that qpdf does + // not presently use. The standard filters are implemented using + // QPDFStreamFilter classes. + QPDF_DLL + static void registerStreamFilter( + std::string const& filter_name, + std::function ()> factory); + // Parameter settings // By default, warning messages are issued to std::cerr and output diff --git a/include/qpdf/QPDFStreamFilter.hh b/include/qpdf/QPDFStreamFilter.hh new file mode 100644 index 00000000..5fdcf5ca --- /dev/null +++ b/include/qpdf/QPDFStreamFilter.hh @@ -0,0 +1,78 @@ +// Copyright (c) 2005-2020 Jay Berkenbilt +// +// This file is part of qpdf. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Versions of qpdf prior to version 7 were released under the terms +// of version 2.0 of the Artistic License. At your option, you may +// continue to consider qpdf to be licensed under those terms. Please +// see the manual for additional information. + +#ifndef QPDFSTREAMFILTER_HH +#define QPDFSTREAMFILTER_HH + +#include +#include +#include + +class QPDF_DLL_CLASS QPDFStreamFilter +{ + public: + QPDF_DLL + QPDFStreamFilter() = default; + + QPDF_DLL + virtual ~QPDFStreamFilter() = default; + + // A QPDFStreamFilter class must implement, at a minimum, + // setDecodeParms() and getDecodePipeline(). QPDF will always call + // setDecodeParms() before calling getDecodePipeline(). It is + // expected that you will store any needed information from + // decode_parms (or the decode_parms object iself) in your + // instance so that it can be used to construct the decode + // pipeline. + + // Return a boolean indicating whether your filter can proceed + // with the given /DecodeParms. The default implementation accepts + // a null object and rejects everything else. + QPDF_DLL + virtual bool setDecodeParms(QPDFObjectHandle decode_parms); + + // Return a pipeline that will decode data encoded with your + // filter. Your implementation must ensure that the pipeline is + // deleted when the instance of your class is destroyed. + QPDF_DLL + virtual Pipeline* getDecodePipeline(Pipeline* next) = 0; + + // If your filter implements "specialized" compression or lossy + // compression, override one or both of these methods. The default + // implementations return false. See comments in QPDFWriter for + // details. QPDF defines specialized compression as non-lossy + // compression not intended for general-purpose data. qpdf, by + // default, doesn't mess with streams that are compressed with + // specialized compression, the idea being that the decision to + // use that compression scheme would fall outside of what + // QPDFWriter would know anything about, so any attempt to decode + // and re-encode would probably be undesirable. + QPDF_DLL + virtual bool isSpecializedCompression(); + QPDF_DLL + virtual bool isLossyCompression(); + + private: + QPDFStreamFilter(QPDFStreamFilter const&) = delete; + QPDFStreamFilter& operator=(QPDFStreamFilter const&) = delete; +}; + +#endif // QPDFSTREAMFILTER_HH diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 793ce2fc..ff4866f0 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -269,6 +269,14 @@ QPDF::emptyPDF() processMemoryFile("empty PDF", EMPTY_PDF, strlen(EMPTY_PDF)); } +void +QPDF::registerStreamFilter( + std::string const& filter_name, + std::function ()> factory) +{ + QPDF_Stream::registerStreamFilter(filter_name, factory); +} + void QPDF::setIgnoreXRefStreams(bool val) { diff --git a/libqpdf/QPDFStreamFilter.cc b/libqpdf/QPDFStreamFilter.cc new file mode 100644 index 00000000..66a2f762 --- /dev/null +++ b/libqpdf/QPDFStreamFilter.cc @@ -0,0 +1,19 @@ +#include + +bool +QPDFStreamFilter::setDecodeParms(QPDFObjectHandle decode_parms) +{ + return decode_parms.isNull(); +} + +bool +QPDFStreamFilter::isSpecializedCompression() +{ + return false; +} + +bool +QPDFStreamFilter::isLossyCompression() +{ + return false; +} diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index e4a3c039..8f9b4b52 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -3,15 +3,7 @@ #include #include #include -#include -#include -#include #include -#include -#include -#include -#include -#include #include #include #include @@ -19,10 +11,78 @@ #include #include #include +#include +#include +#include +#include +#include #include -std::map QPDF_Stream::filter_abbreviations; +class SF_Crypt: public QPDFStreamFilter +{ + public: + SF_Crypt() = default; + virtual ~SF_Crypt() = default; + + virtual bool setDecodeParms(QPDFObjectHandle decode_parms) + { + if (decode_parms.isNull()) + { + return true; + } + bool filterable = true; + for (auto const& key: decode_parms.getKeys()) + { + if (((key == "/Type") || (key == "/Name")) && + (decode_parms.getKey("/Type").isNull() || + (decode_parms.getKey("/Type").isName() && + (decode_parms.getKey("/Type").getName() == + "/CryptFilterDecodeParms")))) + { + // we handle this in decryptStream + } + else + { + filterable = false; + } + } + return filterable; + } + + virtual Pipeline* getDecodePipeline(Pipeline*) + { + // Not used -- handled by pipeStreamData + return nullptr; + } +}; + +std::map QPDF_Stream::filter_abbreviations = { + // The PDF specification provides these filter abbreviations for + // use in inline images, but according to table H.1 in the pre-ISO + // versions of the PDF specification, Adobe Reader also accepts + // them for stream filters. + {"/AHx", "/ASCIIHexDecode"}, + {"/A85", "/ASCII85Decode"}, + {"/LZW", "/LZWDecode"}, + {"/Fl", "/FlateDecode"}, + {"/RL", "/RunLengthDecode"}, + {"/CCF", "/CCITTFaxDecode"}, + {"/DCT", "/DCTDecode"}, +}; + +std::map< + std::string, + std::function()>> +QPDF_Stream::filter_factories = { + {"/Crypt", []() { return std::make_shared(); }}, + {"/FlateDecode", SF_FlateLzwDecode::flate_factory}, + {"/LZWDecode", SF_FlateLzwDecode::lzw_factory}, + {"/RunLengthDecode", SF_RunLengthDecode::factory}, + {"/DCTDecode", SF_DCTDecode::factory}, + {"/ASCII85Decode", SF_ASCII85Decode::factory}, + {"/ASCIIHexDecode", SF_ASCIIHexDecode::factory}, +}; QPDF_Stream::QPDF_Stream(QPDF* qpdf, int objid, int generation, QPDFObjectHandle stream_dict, @@ -47,6 +107,14 @@ QPDF_Stream::~QPDF_Stream() { } +void +QPDF_Stream::registerStreamFilter( + std::string const& filter_name, + std::function()> factory) +{ + filter_factories[filter_name] = factory; +} + void QPDF_Stream::releaseResolved() { @@ -190,125 +258,18 @@ QPDF_Stream::getRawStreamData() } bool -QPDF_Stream::understandDecodeParams( - std::string const& filter, QPDFObjectHandle decode_obj, - int& predictor, int& columns, - int& colors, int& bits_per_component, - bool& early_code_change) +QPDF_Stream::filterable( + std::vector>& filters, + bool& specialized_compression, + bool& lossy_compression) { - bool filterable = true; - std::set keys = decode_obj.getKeys(); - for (std::set::iterator iter = keys.begin(); - iter != keys.end(); ++iter) - { - std::string const& key = *iter; - if (((filter == "/FlateDecode") || (filter == "/LZWDecode")) && - (key == "/Predictor")) - { - QPDFObjectHandle predictor_obj = decode_obj.getKey(key); - if (predictor_obj.isInteger()) - { - predictor = predictor_obj.getIntValueAsInt(); - if (! ((predictor == 1) || (predictor == 2) || - ((predictor >= 10) && (predictor <= 15)))) - { - filterable = false; - } - } - else - { - filterable = false; - } - } - else if ((filter == "/LZWDecode") && (key == "/EarlyChange")) - { - QPDFObjectHandle earlychange_obj = decode_obj.getKey(key); - if (earlychange_obj.isInteger()) - { - int earlychange = earlychange_obj.getIntValueAsInt(); - early_code_change = (earlychange == 1); - if (! ((earlychange == 0) || (earlychange == 1))) - { - filterable = false; - } - } - else - { - filterable = false; - } - } - else if ((key == "/Columns") || - (key == "/Colors") || - (key == "/BitsPerComponent")) - { - QPDFObjectHandle param_obj = decode_obj.getKey(key); - if (param_obj.isInteger()) - { - int val = param_obj.getIntValueAsInt(); - if (key == "/Columns") - { - columns = val; - } - else if (key == "/Colors") - { - colors = val; - } - else if (key == "/BitsPerComponent") - { - bits_per_component = val; - } - } - else - { - filterable = false; - } - } - else if ((filter == "/Crypt") && - (((key == "/Type") || (key == "/Name")) && - (decode_obj.getKey("/Type").isNull() || - (decode_obj.getKey("/Type").isName() && - (decode_obj.getKey("/Type").getName() == - "/CryptFilterDecodeParms"))))) - { - // we handle this in decryptStream - } - else - { - filterable = false; - } - } - - return filterable; -} - -bool -QPDF_Stream::filterable(std::vector& filters, - bool& specialized_compression, - bool& lossy_compression, - int& predictor, int& columns, - int& colors, int& bits_per_component, - bool& early_code_change) -{ - if (filter_abbreviations.empty()) - { - // The PDF specification provides these filter abbreviations - // for use in inline images, but according to table H.1 in the - // pre-ISO versions of the PDF specification, Adobe Reader - // also accepts them for stream filters. - filter_abbreviations["/AHx"] = "/ASCIIHexDecode"; - filter_abbreviations["/A85"] = "/ASCII85Decode"; - filter_abbreviations["/LZW"] = "/LZWDecode"; - filter_abbreviations["/Fl"] = "/FlateDecode"; - filter_abbreviations["/RL"] = "/RunLengthDecode"; - filter_abbreviations["/CCF"] = "/CCITTFaxDecode"; - filter_abbreviations["/DCT"] = "/DCTDecode"; - } - // Check filters QPDFObjectHandle filter_obj = this->stream_dict.getKey("/Filter"); bool filters_okay = true; + std::vector filter_names; + if (filter_obj.isNull()) { // No filters @@ -316,7 +277,7 @@ QPDF_Stream::filterable(std::vector& filters, else if (filter_obj.isName()) { // One filter - filters.push_back(filter_obj.getName()); + filter_names.push_back(filter_obj.getName()); } else if (filter_obj.isArray()) { @@ -327,7 +288,7 @@ QPDF_Stream::filterable(std::vector& filters, QPDFObjectHandle item = filter_obj.getArrayItem(i); if (item.isName()) { - filters.push_back(item.getName()); + filter_names.push_back(item.getName()); } else { @@ -351,34 +312,23 @@ QPDF_Stream::filterable(std::vector& filters, bool filterable = true; - for (std::vector::iterator iter = filters.begin(); - iter != filters.end(); ++iter) + for (auto& filter_name: filter_names) { - std::string& filter = *iter; - - if (filter_abbreviations.count(filter)) + if (filter_abbreviations.count(filter_name)) { QTC::TC("qpdf", "QPDF_Stream expand filter abbreviation"); - filter = filter_abbreviations[filter]; + filter_name = filter_abbreviations[filter_name]; } - if (filter == "/RunLengthDecode") + auto ff = filter_factories.find(filter_name); + if (ff == filter_factories.end()) { - specialized_compression = true; + filterable = false; } - else if (filter == "/DCTDecode") + else { - specialized_compression = true; - lossy_compression = true; + filters.push_back((ff->second)()); } - else if (! ((filter == "/Crypt") || - (filter == "/FlateDecode") || - (filter == "/LZWDecode") || - (filter == "/ASCII85Decode") || - (filter == "/ASCIIHexDecode"))) - { - filterable = false; - } } if (! filterable) @@ -386,15 +336,8 @@ QPDF_Stream::filterable(std::vector& filters, return false; } - // `filters' now contains a list of filters to be applied in - // order. See which ones we can support. - - // Initialize values to their defaults as per the PDF spec - predictor = 1; - columns = 0; - colors = 1; - bits_per_component = 8; - early_code_change = true; + // filters now contains a list of filters to be applied in order. + // See which ones we can support. // See if we can support any decode parameters that are specified. @@ -413,7 +356,7 @@ QPDF_Stream::filterable(std::vector& filters, } else { - for (unsigned int i = 0; i < filters.size(); ++i) + for (unsigned int i = 0; i < filter_names.size(); ++i) { decode_parms.push_back(decode_obj); } @@ -436,21 +379,21 @@ QPDF_Stream::filterable(std::vector& filters, return false; } - for (unsigned int i = 0; i < filters.size(); ++i) + for (size_t i = 0; i < filters.size(); ++i) { - QPDFObjectHandle decode_item = decode_parms.at(i); - if (decode_item.isNull()) + auto filter = filters.at(i); + auto decode_item = decode_parms.at(i); + + if (filter->setDecodeParms(decode_item)) { - // okay - } - else if (decode_item.isDictionary()) - { - if (! understandDecodeParams( - filters.at(i), decode_item, - predictor, columns, colors, bits_per_component, - early_code_change)) + if (filter->isSpecializedCompression()) { - filterable = false; + specialized_compression = true; + } + if (filter->isLossyCompression()) + { + specialized_compression = true; + lossy_compression = true; } } else @@ -459,17 +402,6 @@ QPDF_Stream::filterable(std::vector& filters, } } - if ((predictor > 1) && (columns == 0)) - { - // invalid - filterable = false; - } - - if (! filterable) - { - return false; - } - return filterable; } @@ -479,12 +411,7 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp, qpdf_stream_decode_level_e decode_level, bool suppress_warnings, bool will_retry) { - std::vector filters; - int predictor = 1; - int columns = 0; - int colors = 1; - int bits_per_component = 8; - bool early_code_change = true; + std::vector> filters; bool specialized_compression = false; bool lossy_compression = false; bool ignored; @@ -497,10 +424,8 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp, bool success = true; if (filter) { - filter = filterable(filters, specialized_compression, lossy_compression, - predictor, columns, - colors, bits_per_component, - early_code_change); + filter = filterable( + filters, specialized_compression, lossy_compression); if ((decode_level < qpdf_dl_all) && lossy_compression) { filter = false; @@ -523,9 +448,11 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp, return filter; } - // Construct the pipeline in reverse order. Force pipelines we - // create to be deleted when this function finishes. - std::vector > to_delete; + // Construct the pipeline in reverse order. Force pipelines we + // create to be deleted when this function finishes. Pipelines + // created by QPDFStreamFilter objects will be deleted by those + // objects. + std::vector> to_delete; PointerHolder normalizer; if (filter) @@ -555,80 +482,14 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp, to_delete.push_back(pipeline); } - for (std::vector::reverse_iterator f_iter = - filters.rbegin(); - f_iter != filters.rend(); ++f_iter) + for (auto f_iter = filters.rbegin(); + f_iter != filters.rend(); ++f_iter) { - std::string const& filter_name = *f_iter; - - if ((filter_name == "/FlateDecode") || - (filter_name == "/LZWDecode")) + auto decode_pipeline = (*f_iter)->getDecodePipeline(pipeline); + if (decode_pipeline) { - if ((predictor >= 10) && (predictor <= 15)) - { - QTC::TC("qpdf", "QPDF_Stream PNG filter"); - pipeline = new Pl_PNGFilter( - "png decode", pipeline, Pl_PNGFilter::a_decode, - QIntC::to_uint(columns), - QIntC::to_uint(colors), - QIntC::to_uint(bits_per_component)); - to_delete.push_back(pipeline); - } - else if (predictor == 2) - { - QTC::TC("qpdf", "QPDF_Stream TIFF predictor"); - pipeline = new Pl_TIFFPredictor( - "tiff decode", pipeline, Pl_TIFFPredictor::a_decode, - QIntC::to_uint(columns), - QIntC::to_uint(colors), - QIntC::to_uint(bits_per_component)); - to_delete.push_back(pipeline); - } + pipeline = decode_pipeline; } - - if (filter_name == "/Crypt") - { - // Ignore -- handled by pipeStreamData - } - else if (filter_name == "/FlateDecode") - { - pipeline = new Pl_Flate("stream inflate", - pipeline, Pl_Flate::a_inflate); - to_delete.push_back(pipeline); - } - else if (filter_name == "/ASCII85Decode") - { - pipeline = new Pl_ASCII85Decoder("ascii85 decode", pipeline); - to_delete.push_back(pipeline); - } - else if (filter_name == "/ASCIIHexDecode") - { - pipeline = new Pl_ASCIIHexDecoder("asciiHex decode", pipeline); - to_delete.push_back(pipeline); - } - else if (filter_name == "/LZWDecode") - { - pipeline = new Pl_LZWDecoder("lzw decode", pipeline, - early_code_change); - to_delete.push_back(pipeline); - } - else if (filter_name == "/RunLengthDecode") - { - pipeline = new Pl_RunLength("runlength decode", pipeline, - Pl_RunLength::a_decode); - to_delete.push_back(pipeline); - } - else if (filter_name == "/DCTDecode") - { - pipeline = new Pl_DCT("DCT decode", pipeline); - to_delete.push_back(pipeline); - } - else - { - throw std::logic_error( - "INTERNAL ERROR: QPDFStream: unknown filter " - "encountered after check"); - } } } diff --git a/libqpdf/SF_FlateLzwDecode.cc b/libqpdf/SF_FlateLzwDecode.cc new file mode 100644 index 00000000..29064cc0 --- /dev/null +++ b/libqpdf/SF_FlateLzwDecode.cc @@ -0,0 +1,153 @@ +#include +#include +#include +#include +#include +#include +#include + +SF_FlateLzwDecode::SF_FlateLzwDecode(bool lzw) : + lzw(lzw), + // Initialize values to their defaults as per the PDF spec + predictor(1), + columns(0), + colors(1), + bits_per_component(8), + early_code_change(true) +{ +} + +bool +SF_FlateLzwDecode::setDecodeParms(QPDFObjectHandle decode_parms) +{ + if (decode_parms.isNull()) + { + return true; + } + + bool filterable = true; + std::set keys = decode_parms.getKeys(); + for (auto const& key: keys) + { + QPDFObjectHandle value = decode_parms.getKey(key); + if (key == "/Predictor") + { + if (value.isInteger()) + { + this->predictor = value.getIntValueAsInt(); + if (! ((this->predictor == 1) || (this->predictor == 2) || + ((this->predictor >= 10) && (this->predictor <= 15)))) + { + filterable = false; + } + } + else + { + filterable = false; + } + } + else if ((key == "/Columns") || + (key == "/Colors") || + (key == "/BitsPerComponent")) + { + if (value.isInteger()) + { + int val = value.getIntValueAsInt(); + if (key == "/Columns") + { + this->columns = val; + } + else if (key == "/Colors") + { + this->colors = val; + } + else if (key == "/BitsPerComponent") + { + this->bits_per_component = val; + } + } + else + { + filterable = false; + } + } + else if (lzw && (key == "/EarlyChange")) + { + if (value.isInteger()) + { + int earlychange = value.getIntValueAsInt(); + this->early_code_change = (earlychange == 1); + if (! ((earlychange == 0) || (earlychange == 1))) + { + filterable = false; + } + } + else + { + filterable = false; + } + } + } + + if ((this->predictor > 1) && (this->columns == 0)) + { + filterable = false; + } + + return filterable; +} + + + +Pipeline* +SF_FlateLzwDecode::getDecodePipeline(Pipeline* next) +{ + std::shared_ptr pipeline; + if ((this->predictor >= 10) && (this->predictor <= 15)) + { + QTC::TC("qpdf", "SF_FlateLzwDecode PNG filter"); + pipeline = std::make_shared( + "png decode", next, Pl_PNGFilter::a_decode, + QIntC::to_uint(this->columns), + QIntC::to_uint(this->colors), + QIntC::to_uint(this->bits_per_component)); + this->pipelines.push_back(pipeline); + next = pipeline.get(); + } + else if (this->predictor == 2) + { + QTC::TC("qpdf", "SF_FlateLzwDecode TIFF predictor"); + pipeline = std::make_shared( + "tiff decode", next, Pl_TIFFPredictor::a_decode, + QIntC::to_uint(this->columns), + QIntC::to_uint(this->colors), + QIntC::to_uint(this->bits_per_component)); + this->pipelines.push_back(pipeline); + next = pipeline.get(); + } + + if (lzw) + { + pipeline = std::make_shared( + "lzw decode", next, early_code_change); + } + else + { + pipeline = std::make_shared( + "stream inflate", next, Pl_Flate::a_inflate); + } + this->pipelines.push_back(pipeline); + return pipeline.get(); +} + +std::shared_ptr +SF_FlateLzwDecode::flate_factory() +{ + return std::make_shared(false); +} + +std::shared_ptr +SF_FlateLzwDecode::lzw_factory() +{ + return std::make_shared(true); +} diff --git a/libqpdf/build.mk b/libqpdf/build.mk index ec13b46b..40b022d6 100644 --- a/libqpdf/build.mk +++ b/libqpdf/build.mk @@ -70,6 +70,7 @@ SRCS_libqpdf = \ libqpdf/QPDFPageDocumentHelper.cc \ libqpdf/QPDFPageLabelDocumentHelper.cc \ libqpdf/QPDFPageObjectHelper.cc \ + libqpdf/QPDFStreamFilter.cc \ libqpdf/QPDFSystemError.cc \ libqpdf/QPDFTokenizer.cc \ libqpdf/QPDFWriter.cc \ @@ -94,6 +95,7 @@ SRCS_libqpdf = \ libqpdf/QUtil.cc \ libqpdf/RC4.cc \ libqpdf/SecureRandomDataProvider.cc \ + libqpdf/SF_FlateLzwDecode.cc \ libqpdf/SparseOHArray.cc \ libqpdf/qpdf-c.cc diff --git a/libqpdf/qpdf/QPDF_Stream.hh b/libqpdf/qpdf/QPDF_Stream.hh index da9f91a0..b11de6a2 100644 --- a/libqpdf/qpdf/QPDF_Stream.hh +++ b/libqpdf/qpdf/QPDF_Stream.hh @@ -5,6 +5,10 @@ #include #include +#include + +#include +#include class Pipeline; class QPDF; @@ -49,6 +53,10 @@ class QPDF_Stream: public QPDFObject void replaceDict(QPDFObjectHandle new_dict); + static void registerStreamFilter( + std::string const& filter_name, + std::function()> factory); + // Replace object ID and generation. This may only be called if // object ID and generation are 0. It is used by QPDFObjectHandle // when adding streams to files. @@ -59,20 +67,15 @@ class QPDF_Stream: public QPDFObject private: static std::map filter_abbreviations; + static std::map< + std::string, + std::function()>> filter_factories; void replaceFilterData(QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms, size_t length); - bool understandDecodeParams( - std::string const& filter, QPDFObjectHandle decode_params, - int& predictor, int& columns, - int& colors, int& bits_per_component, - bool& early_code_change); - bool filterable(std::vector& filters, - bool& specialized_compression, bool& lossy_compression, - int& predictor, int& columns, - int& colors, int& bits_per_component, - bool& early_code_change); + bool filterable(std::vector>& filters, + bool& specialized_compression, bool& lossy_compression); void warn(QPDFExc const& e); void setDictDescription(); void setStreamDescription(); diff --git a/libqpdf/qpdf/SF_ASCII85Decode.hh b/libqpdf/qpdf/SF_ASCII85Decode.hh new file mode 100644 index 00000000..b0b2f2e3 --- /dev/null +++ b/libqpdf/qpdf/SF_ASCII85Decode.hh @@ -0,0 +1,30 @@ +#include +#include +#include + +#ifndef SF_ASCII85DECODE_HH +#define SF_ASCII85DECODE_HH + +class SF_ASCII85Decode: public QPDFStreamFilter +{ + public: + SF_ASCII85Decode() = default; + virtual ~SF_ASCII85Decode() = default; + + virtual Pipeline* getDecodePipeline(Pipeline* next) override + { + this->pipeline = std::make_shared( + "ascii85 decode", next); + return this->pipeline.get(); + } + + static std::shared_ptr factory() + { + return std::make_shared(); + } + + private: + std::shared_ptr pipeline; +}; + +#endif // SF_ASCII85DECODE_HH diff --git a/libqpdf/qpdf/SF_ASCIIHexDecode.hh b/libqpdf/qpdf/SF_ASCIIHexDecode.hh new file mode 100644 index 00000000..869d0722 --- /dev/null +++ b/libqpdf/qpdf/SF_ASCIIHexDecode.hh @@ -0,0 +1,30 @@ +#include +#include +#include + +#ifndef SF_ASCIIHEXDECODE_HH +#define SF_ASCIIHEXDECODE_HH + +class SF_ASCIIHexDecode: public QPDFStreamFilter +{ + public: + SF_ASCIIHexDecode() = default; + virtual ~SF_ASCIIHexDecode() = default; + + virtual Pipeline* getDecodePipeline(Pipeline* next) override + { + this->pipeline = std::make_shared( + "asciiHex decode", next); + return this->pipeline.get(); + } + + static std::shared_ptr factory() + { + return std::make_shared(); + } + + private: + std::shared_ptr pipeline; +}; + +#endif // SF_ASCIIHEXDECODE_HH diff --git a/libqpdf/qpdf/SF_DCTDecode.hh b/libqpdf/qpdf/SF_DCTDecode.hh new file mode 100644 index 00000000..28aa42c1 --- /dev/null +++ b/libqpdf/qpdf/SF_DCTDecode.hh @@ -0,0 +1,39 @@ +#include +#include +#include + +#ifndef SF_DCTDECODE_HH +#define SF_DCTDECODE_HH + +class SF_DCTDecode: public QPDFStreamFilter +{ + public: + SF_DCTDecode() = default; + virtual ~SF_DCTDecode() = default; + + virtual Pipeline* getDecodePipeline(Pipeline* next) override + { + this->pipeline = std::make_shared("DCT decode", next); + return this->pipeline.get(); + } + + static std::shared_ptr factory() + { + return std::make_shared(); + } + + virtual bool isSpecializedCompression() override + { + return true; + } + + virtual bool isLossyCompression() override + { + return true; + } + + private: + std::shared_ptr pipeline; +}; + +#endif // SF_DCTDECODE_HH diff --git a/libqpdf/qpdf/SF_FlateLzwDecode.hh b/libqpdf/qpdf/SF_FlateLzwDecode.hh new file mode 100644 index 00000000..9c72eff9 --- /dev/null +++ b/libqpdf/qpdf/SF_FlateLzwDecode.hh @@ -0,0 +1,30 @@ +#include +#include +#include + +#ifndef SF_FLATELZWDECODE_HH +#define SF_FLATELZWDECODE_HH + +class SF_FlateLzwDecode: public QPDFStreamFilter +{ + public: + SF_FlateLzwDecode(bool lzw); + virtual ~SF_FlateLzwDecode() = default; + + virtual bool setDecodeParms(QPDFObjectHandle decode_parms); + virtual Pipeline* getDecodePipeline(Pipeline* next); + + static std::shared_ptr flate_factory(); + static std::shared_ptr lzw_factory(); + + private: + bool lzw; + int predictor; + int columns; + int colors; + int bits_per_component; + bool early_code_change; + std::vector> pipelines; +}; + +#endif // SF_FLATELZWDECODE_HH diff --git a/libqpdf/qpdf/SF_RunLengthDecode.hh b/libqpdf/qpdf/SF_RunLengthDecode.hh new file mode 100644 index 00000000..1bdfb4c0 --- /dev/null +++ b/libqpdf/qpdf/SF_RunLengthDecode.hh @@ -0,0 +1,35 @@ +#include +#include +#include + +#ifndef SF_RUNLENGTHDECODE_HH +#define SF_RUNLENGTHDECODE_HH + +class SF_RunLengthDecode: public QPDFStreamFilter +{ + public: + SF_RunLengthDecode() = default; + virtual ~SF_RunLengthDecode() = default; + + virtual Pipeline* getDecodePipeline(Pipeline* next) override + { + this->pipeline = std::make_shared( + "runlength decode", next, Pl_RunLength::a_decode); + return this->pipeline.get(); + } + + static std::shared_ptr factory() + { + return std::make_shared(); + } + + virtual bool isSpecializedCompression() override + { + return true; + } + + private: + std::shared_ptr pipeline; +}; + +#endif // SF_RUNLENGTHDECODE_HH diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 15f6cf1e..f0f96242 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -86,7 +86,7 @@ QPDF prev key in trailer dictionary 0 QPDF found xref stream 0 QPDF ignoring XRefStm in trailer 0 QPDF xref deleted object 0 -QPDF_Stream PNG filter 0 +SF_FlateLzwDecode PNG filter 0 QPDF xref /Index is null 0 QPDF xref /Index is array 1 QPDFWriter copy Extends 0 @@ -294,7 +294,7 @@ qpdf-c called qpdf_set_decode_level 0 qpdf-c called qpdf_set_compress_streams 0 qpdf-c called qpdf_set_preserve_unreferenced_objects 0 qpdf-c called qpdf_set_newline_before_endstream 0 -QPDF_Stream TIFF predictor 0 +SF_FlateLzwDecode TIFF predictor 0 QPDFTokenizer inline image at EOF 0 Pl_QPDFTokenizer found ID 0 QPDFObjectHandle non-stream in stream array 0