2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-12-31 14:01:59 +00:00

Implement user-provided stream filters

Refactor QPDF_Stream to use stream filter classes to handle supported
stream filters as well.
This commit is contained in:
Jay Berkenbilt 2020-12-23 06:12:49 -05:00
parent 1fb26f08ad
commit 39bfa01307
16 changed files with 587 additions and 279 deletions

View File

@ -1,3 +1,13 @@
2020-12-23 Jay Berkenbilt <ejb@ql.org>
* Allow library users to provide their own decoders for stream
filters by deriving classes from QPDFStreamFilter and registering
them using QPDF::registerStreamFilter. Registered stream filters
provide code to validate and interpret /DecodeParms for a specific
/Filter and also to provide a pipeline that will decode. Note that
it is possible to encode to a filter type that is not supported
even without this feature.
2020-12-22 Jay Berkenbilt <ejb@ql.org> 2020-12-22 Jay Berkenbilt <ejb@ql.org>
* Add QPDFObjectHandle::makeDirect(bool allow_streams) -- if * Add QPDFObjectHandle::makeDirect(bool allow_streams) -- if

7
TODO
View File

@ -317,13 +317,6 @@ I find it useful to make reference to them in this list
is exercised elsewhere in qpdf's test suite, so this is not that is exercised elsewhere in qpdf's test suite, so this is not that
pressing. pressing.
* Support user-pluggable stream filters. This would enable external
code to provide interpretation for filters that are missing from
qpdf. Make it possible for user-provided filters to override
built-in filters. Make sure that the pluggable filters can be
prioritized so that we can poll all registered filters to see
whether they are capable of filtering a particular stream.
* If possible, consider adding CCITT3, CCITT4, or any other easy * If possible, consider adding CCITT3, CCITT4, or any other easy
filters. For some reference code that we probably can't use but may filters. For some reference code that we probably can't use but may
be handy anyway, see be handy anyway, see

View File

@ -31,6 +31,8 @@
#include <list> #include <list>
#include <iostream> #include <iostream>
#include <vector> #include <vector>
#include <functional>
#include <memory>
#include <qpdf/QIntC.hh> #include <qpdf/QIntC.hh>
#include <qpdf/QPDFExc.hh> #include <qpdf/QPDFExc.hh>
@ -39,6 +41,7 @@
#include <qpdf/QPDFXRefEntry.hh> #include <qpdf/QPDFXRefEntry.hh>
#include <qpdf/QPDFObjectHandle.hh> #include <qpdf/QPDFObjectHandle.hh>
#include <qpdf/QPDFTokenizer.hh> #include <qpdf/QPDFTokenizer.hh>
#include <qpdf/QPDFStreamFilter.hh>
#include <qpdf/Buffer.hh> #include <qpdf/Buffer.hh>
#include <qpdf/InputSource.hh> #include <qpdf/InputSource.hh>
@ -132,6 +135,20 @@ class QPDF
QPDF_DLL QPDF_DLL
void emptyPDF(); void emptyPDF();
// From 10.1: register a new filter implementation for a specific
// stream filter. You can add your own implementations for new
// filter types or override existing ones provided by the library.
// Registered stream filters are used for decoding only as you can
// override encoding with stream data providers. For example, you
// could use this method to support for one of the other filter
// types by using additional third-party libraries that qpdf does
// not presently use. The standard filters are implemented using
// QPDFStreamFilter classes.
QPDF_DLL
static void registerStreamFilter(
std::string const& filter_name,
std::function<std::shared_ptr<QPDFStreamFilter> ()> factory);
// Parameter settings // Parameter settings
// By default, warning messages are issued to std::cerr and output // By default, warning messages are issued to std::cerr and output

View File

@ -0,0 +1,78 @@
// Copyright (c) 2005-2020 Jay Berkenbilt
//
// This file is part of qpdf.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Versions of qpdf prior to version 7 were released under the terms
// of version 2.0 of the Artistic License. At your option, you may
// continue to consider qpdf to be licensed under those terms. Please
// see the manual for additional information.
#ifndef QPDFSTREAMFILTER_HH
#define QPDFSTREAMFILTER_HH
#include <qpdf/DLL.h>
#include <qpdf/QPDFObjectHandle.hh>
#include <qpdf/Pipeline.hh>
class QPDF_DLL_CLASS QPDFStreamFilter
{
public:
QPDF_DLL
QPDFStreamFilter() = default;
QPDF_DLL
virtual ~QPDFStreamFilter() = default;
// A QPDFStreamFilter class must implement, at a minimum,
// setDecodeParms() and getDecodePipeline(). QPDF will always call
// setDecodeParms() before calling getDecodePipeline(). It is
// expected that you will store any needed information from
// decode_parms (or the decode_parms object iself) in your
// instance so that it can be used to construct the decode
// pipeline.
// Return a boolean indicating whether your filter can proceed
// with the given /DecodeParms. The default implementation accepts
// a null object and rejects everything else.
QPDF_DLL
virtual bool setDecodeParms(QPDFObjectHandle decode_parms);
// Return a pipeline that will decode data encoded with your
// filter. Your implementation must ensure that the pipeline is
// deleted when the instance of your class is destroyed.
QPDF_DLL
virtual Pipeline* getDecodePipeline(Pipeline* next) = 0;
// If your filter implements "specialized" compression or lossy
// compression, override one or both of these methods. The default
// implementations return false. See comments in QPDFWriter for
// details. QPDF defines specialized compression as non-lossy
// compression not intended for general-purpose data. qpdf, by
// default, doesn't mess with streams that are compressed with
// specialized compression, the idea being that the decision to
// use that compression scheme would fall outside of what
// QPDFWriter would know anything about, so any attempt to decode
// and re-encode would probably be undesirable.
QPDF_DLL
virtual bool isSpecializedCompression();
QPDF_DLL
virtual bool isLossyCompression();
private:
QPDFStreamFilter(QPDFStreamFilter const&) = delete;
QPDFStreamFilter& operator=(QPDFStreamFilter const&) = delete;
};
#endif // QPDFSTREAMFILTER_HH

View File

@ -269,6 +269,14 @@ QPDF::emptyPDF()
processMemoryFile("empty PDF", EMPTY_PDF, strlen(EMPTY_PDF)); processMemoryFile("empty PDF", EMPTY_PDF, strlen(EMPTY_PDF));
} }
void
QPDF::registerStreamFilter(
std::string const& filter_name,
std::function<std::shared_ptr<QPDFStreamFilter> ()> factory)
{
QPDF_Stream::registerStreamFilter(filter_name, factory);
}
void void
QPDF::setIgnoreXRefStreams(bool val) QPDF::setIgnoreXRefStreams(bool val)
{ {

View File

@ -0,0 +1,19 @@
#include <qpdf/QPDFStreamFilter.hh>
bool
QPDFStreamFilter::setDecodeParms(QPDFObjectHandle decode_parms)
{
return decode_parms.isNull();
}
bool
QPDFStreamFilter::isSpecializedCompression()
{
return false;
}
bool
QPDFStreamFilter::isLossyCompression()
{
return false;
}

View File

@ -3,15 +3,7 @@
#include <qpdf/QUtil.hh> #include <qpdf/QUtil.hh>
#include <qpdf/Pipeline.hh> #include <qpdf/Pipeline.hh>
#include <qpdf/Pl_Flate.hh> #include <qpdf/Pl_Flate.hh>
#include <qpdf/Pl_PNGFilter.hh>
#include <qpdf/Pl_TIFFPredictor.hh>
#include <qpdf/Pl_RC4.hh>
#include <qpdf/Pl_Buffer.hh> #include <qpdf/Pl_Buffer.hh>
#include <qpdf/Pl_ASCII85Decoder.hh>
#include <qpdf/Pl_ASCIIHexDecoder.hh>
#include <qpdf/Pl_LZWDecoder.hh>
#include <qpdf/Pl_RunLength.hh>
#include <qpdf/Pl_DCT.hh>
#include <qpdf/Pl_Count.hh> #include <qpdf/Pl_Count.hh>
#include <qpdf/ContentNormalizer.hh> #include <qpdf/ContentNormalizer.hh>
#include <qpdf/QTC.hh> #include <qpdf/QTC.hh>
@ -19,10 +11,78 @@
#include <qpdf/QPDFExc.hh> #include <qpdf/QPDFExc.hh>
#include <qpdf/Pl_QPDFTokenizer.hh> #include <qpdf/Pl_QPDFTokenizer.hh>
#include <qpdf/QIntC.hh> #include <qpdf/QIntC.hh>
#include <qpdf/SF_FlateLzwDecode.hh>
#include <qpdf/SF_DCTDecode.hh>
#include <qpdf/SF_RunLengthDecode.hh>
#include <qpdf/SF_ASCII85Decode.hh>
#include <qpdf/SF_ASCIIHexDecode.hh>
#include <stdexcept> #include <stdexcept>
std::map<std::string, std::string> QPDF_Stream::filter_abbreviations; class SF_Crypt: public QPDFStreamFilter
{
public:
SF_Crypt() = default;
virtual ~SF_Crypt() = default;
virtual bool setDecodeParms(QPDFObjectHandle decode_parms)
{
if (decode_parms.isNull())
{
return true;
}
bool filterable = true;
for (auto const& key: decode_parms.getKeys())
{
if (((key == "/Type") || (key == "/Name")) &&
(decode_parms.getKey("/Type").isNull() ||
(decode_parms.getKey("/Type").isName() &&
(decode_parms.getKey("/Type").getName() ==
"/CryptFilterDecodeParms"))))
{
// we handle this in decryptStream
}
else
{
filterable = false;
}
}
return filterable;
}
virtual Pipeline* getDecodePipeline(Pipeline*)
{
// Not used -- handled by pipeStreamData
return nullptr;
}
};
std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = {
// The PDF specification provides these filter abbreviations for
// use in inline images, but according to table H.1 in the pre-ISO
// versions of the PDF specification, Adobe Reader also accepts
// them for stream filters.
{"/AHx", "/ASCIIHexDecode"},
{"/A85", "/ASCII85Decode"},
{"/LZW", "/LZWDecode"},
{"/Fl", "/FlateDecode"},
{"/RL", "/RunLengthDecode"},
{"/CCF", "/CCITTFaxDecode"},
{"/DCT", "/DCTDecode"},
};
std::map<
std::string,
std::function<std::shared_ptr<QPDFStreamFilter>()>>
QPDF_Stream::filter_factories = {
{"/Crypt", []() { return std::make_shared<SF_Crypt>(); }},
{"/FlateDecode", SF_FlateLzwDecode::flate_factory},
{"/LZWDecode", SF_FlateLzwDecode::lzw_factory},
{"/RunLengthDecode", SF_RunLengthDecode::factory},
{"/DCTDecode", SF_DCTDecode::factory},
{"/ASCII85Decode", SF_ASCII85Decode::factory},
{"/ASCIIHexDecode", SF_ASCIIHexDecode::factory},
};
QPDF_Stream::QPDF_Stream(QPDF* qpdf, int objid, int generation, QPDF_Stream::QPDF_Stream(QPDF* qpdf, int objid, int generation,
QPDFObjectHandle stream_dict, QPDFObjectHandle stream_dict,
@ -47,6 +107,14 @@ QPDF_Stream::~QPDF_Stream()
{ {
} }
void
QPDF_Stream::registerStreamFilter(
std::string const& filter_name,
std::function<std::shared_ptr<QPDFStreamFilter>()> factory)
{
filter_factories[filter_name] = factory;
}
void void
QPDF_Stream::releaseResolved() QPDF_Stream::releaseResolved()
{ {
@ -190,125 +258,18 @@ QPDF_Stream::getRawStreamData()
} }
bool bool
QPDF_Stream::understandDecodeParams( QPDF_Stream::filterable(
std::string const& filter, QPDFObjectHandle decode_obj, std::vector<std::shared_ptr<QPDFStreamFilter>>& filters,
int& predictor, int& columns, bool& specialized_compression,
int& colors, int& bits_per_component, bool& lossy_compression)
bool& early_code_change)
{ {
bool filterable = true;
std::set<std::string> keys = decode_obj.getKeys();
for (std::set<std::string>::iterator iter = keys.begin();
iter != keys.end(); ++iter)
{
std::string const& key = *iter;
if (((filter == "/FlateDecode") || (filter == "/LZWDecode")) &&
(key == "/Predictor"))
{
QPDFObjectHandle predictor_obj = decode_obj.getKey(key);
if (predictor_obj.isInteger())
{
predictor = predictor_obj.getIntValueAsInt();
if (! ((predictor == 1) || (predictor == 2) ||
((predictor >= 10) && (predictor <= 15))))
{
filterable = false;
}
}
else
{
filterable = false;
}
}
else if ((filter == "/LZWDecode") && (key == "/EarlyChange"))
{
QPDFObjectHandle earlychange_obj = decode_obj.getKey(key);
if (earlychange_obj.isInteger())
{
int earlychange = earlychange_obj.getIntValueAsInt();
early_code_change = (earlychange == 1);
if (! ((earlychange == 0) || (earlychange == 1)))
{
filterable = false;
}
}
else
{
filterable = false;
}
}
else if ((key == "/Columns") ||
(key == "/Colors") ||
(key == "/BitsPerComponent"))
{
QPDFObjectHandle param_obj = decode_obj.getKey(key);
if (param_obj.isInteger())
{
int val = param_obj.getIntValueAsInt();
if (key == "/Columns")
{
columns = val;
}
else if (key == "/Colors")
{
colors = val;
}
else if (key == "/BitsPerComponent")
{
bits_per_component = val;
}
}
else
{
filterable = false;
}
}
else if ((filter == "/Crypt") &&
(((key == "/Type") || (key == "/Name")) &&
(decode_obj.getKey("/Type").isNull() ||
(decode_obj.getKey("/Type").isName() &&
(decode_obj.getKey("/Type").getName() ==
"/CryptFilterDecodeParms")))))
{
// we handle this in decryptStream
}
else
{
filterable = false;
}
}
return filterable;
}
bool
QPDF_Stream::filterable(std::vector<std::string>& filters,
bool& specialized_compression,
bool& lossy_compression,
int& predictor, int& columns,
int& colors, int& bits_per_component,
bool& early_code_change)
{
if (filter_abbreviations.empty())
{
// The PDF specification provides these filter abbreviations
// for use in inline images, but according to table H.1 in the
// pre-ISO versions of the PDF specification, Adobe Reader
// also accepts them for stream filters.
filter_abbreviations["/AHx"] = "/ASCIIHexDecode";
filter_abbreviations["/A85"] = "/ASCII85Decode";
filter_abbreviations["/LZW"] = "/LZWDecode";
filter_abbreviations["/Fl"] = "/FlateDecode";
filter_abbreviations["/RL"] = "/RunLengthDecode";
filter_abbreviations["/CCF"] = "/CCITTFaxDecode";
filter_abbreviations["/DCT"] = "/DCTDecode";
}
// Check filters // Check filters
QPDFObjectHandle filter_obj = this->stream_dict.getKey("/Filter"); QPDFObjectHandle filter_obj = this->stream_dict.getKey("/Filter");
bool filters_okay = true; bool filters_okay = true;
std::vector<std::string> filter_names;
if (filter_obj.isNull()) if (filter_obj.isNull())
{ {
// No filters // No filters
@ -316,7 +277,7 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
else if (filter_obj.isName()) else if (filter_obj.isName())
{ {
// One filter // One filter
filters.push_back(filter_obj.getName()); filter_names.push_back(filter_obj.getName());
} }
else if (filter_obj.isArray()) else if (filter_obj.isArray())
{ {
@ -327,7 +288,7 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
QPDFObjectHandle item = filter_obj.getArrayItem(i); QPDFObjectHandle item = filter_obj.getArrayItem(i);
if (item.isName()) if (item.isName())
{ {
filters.push_back(item.getName()); filter_names.push_back(item.getName());
} }
else else
{ {
@ -351,34 +312,23 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
bool filterable = true; bool filterable = true;
for (std::vector<std::string>::iterator iter = filters.begin(); for (auto& filter_name: filter_names)
iter != filters.end(); ++iter)
{ {
std::string& filter = *iter; if (filter_abbreviations.count(filter_name))
if (filter_abbreviations.count(filter))
{ {
QTC::TC("qpdf", "QPDF_Stream expand filter abbreviation"); QTC::TC("qpdf", "QPDF_Stream expand filter abbreviation");
filter = filter_abbreviations[filter]; filter_name = filter_abbreviations[filter_name];
} }
if (filter == "/RunLengthDecode") auto ff = filter_factories.find(filter_name);
if (ff == filter_factories.end())
{ {
specialized_compression = true; filterable = false;
} }
else if (filter == "/DCTDecode") else
{ {
specialized_compression = true; filters.push_back((ff->second)());
lossy_compression = true;
} }
else if (! ((filter == "/Crypt") ||
(filter == "/FlateDecode") ||
(filter == "/LZWDecode") ||
(filter == "/ASCII85Decode") ||
(filter == "/ASCIIHexDecode")))
{
filterable = false;
}
} }
if (! filterable) if (! filterable)
@ -386,15 +336,8 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
return false; return false;
} }
// `filters' now contains a list of filters to be applied in // filters now contains a list of filters to be applied in order.
// order. See which ones we can support. // See which ones we can support.
// Initialize values to their defaults as per the PDF spec
predictor = 1;
columns = 0;
colors = 1;
bits_per_component = 8;
early_code_change = true;
// See if we can support any decode parameters that are specified. // See if we can support any decode parameters that are specified.
@ -413,7 +356,7 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
} }
else else
{ {
for (unsigned int i = 0; i < filters.size(); ++i) for (unsigned int i = 0; i < filter_names.size(); ++i)
{ {
decode_parms.push_back(decode_obj); decode_parms.push_back(decode_obj);
} }
@ -436,21 +379,21 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
return false; return false;
} }
for (unsigned int i = 0; i < filters.size(); ++i) for (size_t i = 0; i < filters.size(); ++i)
{ {
QPDFObjectHandle decode_item = decode_parms.at(i); auto filter = filters.at(i);
if (decode_item.isNull()) auto decode_item = decode_parms.at(i);
if (filter->setDecodeParms(decode_item))
{ {
// okay if (filter->isSpecializedCompression())
}
else if (decode_item.isDictionary())
{
if (! understandDecodeParams(
filters.at(i), decode_item,
predictor, columns, colors, bits_per_component,
early_code_change))
{ {
filterable = false; specialized_compression = true;
}
if (filter->isLossyCompression())
{
specialized_compression = true;
lossy_compression = true;
} }
} }
else else
@ -459,17 +402,6 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
} }
} }
if ((predictor > 1) && (columns == 0))
{
// invalid
filterable = false;
}
if (! filterable)
{
return false;
}
return filterable; return filterable;
} }
@ -479,12 +411,7 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp,
qpdf_stream_decode_level_e decode_level, qpdf_stream_decode_level_e decode_level,
bool suppress_warnings, bool will_retry) bool suppress_warnings, bool will_retry)
{ {
std::vector<std::string> filters; std::vector<std::shared_ptr<QPDFStreamFilter>> filters;
int predictor = 1;
int columns = 0;
int colors = 1;
int bits_per_component = 8;
bool early_code_change = true;
bool specialized_compression = false; bool specialized_compression = false;
bool lossy_compression = false; bool lossy_compression = false;
bool ignored; bool ignored;
@ -497,10 +424,8 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp,
bool success = true; bool success = true;
if (filter) if (filter)
{ {
filter = filterable(filters, specialized_compression, lossy_compression, filter = filterable(
predictor, columns, filters, specialized_compression, lossy_compression);
colors, bits_per_component,
early_code_change);
if ((decode_level < qpdf_dl_all) && lossy_compression) if ((decode_level < qpdf_dl_all) && lossy_compression)
{ {
filter = false; filter = false;
@ -523,9 +448,11 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp,
return filter; return filter;
} }
// Construct the pipeline in reverse order. Force pipelines we // Construct the pipeline in reverse order. Force pipelines we
// create to be deleted when this function finishes. // create to be deleted when this function finishes. Pipelines
std::vector<PointerHolder<Pipeline> > to_delete; // created by QPDFStreamFilter objects will be deleted by those
// objects.
std::vector<PointerHolder<Pipeline>> to_delete;
PointerHolder<ContentNormalizer> normalizer; PointerHolder<ContentNormalizer> normalizer;
if (filter) if (filter)
@ -555,80 +482,14 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp,
to_delete.push_back(pipeline); to_delete.push_back(pipeline);
} }
for (std::vector<std::string>::reverse_iterator f_iter = for (auto f_iter = filters.rbegin();
filters.rbegin(); f_iter != filters.rend(); ++f_iter)
f_iter != filters.rend(); ++f_iter)
{ {
std::string const& filter_name = *f_iter; auto decode_pipeline = (*f_iter)->getDecodePipeline(pipeline);
if (decode_pipeline)
if ((filter_name == "/FlateDecode") ||
(filter_name == "/LZWDecode"))
{ {
if ((predictor >= 10) && (predictor <= 15)) pipeline = decode_pipeline;
{
QTC::TC("qpdf", "QPDF_Stream PNG filter");
pipeline = new Pl_PNGFilter(
"png decode", pipeline, Pl_PNGFilter::a_decode,
QIntC::to_uint(columns),
QIntC::to_uint(colors),
QIntC::to_uint(bits_per_component));
to_delete.push_back(pipeline);
}
else if (predictor == 2)
{
QTC::TC("qpdf", "QPDF_Stream TIFF predictor");
pipeline = new Pl_TIFFPredictor(
"tiff decode", pipeline, Pl_TIFFPredictor::a_decode,
QIntC::to_uint(columns),
QIntC::to_uint(colors),
QIntC::to_uint(bits_per_component));
to_delete.push_back(pipeline);
}
} }
if (filter_name == "/Crypt")
{
// Ignore -- handled by pipeStreamData
}
else if (filter_name == "/FlateDecode")
{
pipeline = new Pl_Flate("stream inflate",
pipeline, Pl_Flate::a_inflate);
to_delete.push_back(pipeline);
}
else if (filter_name == "/ASCII85Decode")
{
pipeline = new Pl_ASCII85Decoder("ascii85 decode", pipeline);
to_delete.push_back(pipeline);
}
else if (filter_name == "/ASCIIHexDecode")
{
pipeline = new Pl_ASCIIHexDecoder("asciiHex decode", pipeline);
to_delete.push_back(pipeline);
}
else if (filter_name == "/LZWDecode")
{
pipeline = new Pl_LZWDecoder("lzw decode", pipeline,
early_code_change);
to_delete.push_back(pipeline);
}
else if (filter_name == "/RunLengthDecode")
{
pipeline = new Pl_RunLength("runlength decode", pipeline,
Pl_RunLength::a_decode);
to_delete.push_back(pipeline);
}
else if (filter_name == "/DCTDecode")
{
pipeline = new Pl_DCT("DCT decode", pipeline);
to_delete.push_back(pipeline);
}
else
{
throw std::logic_error(
"INTERNAL ERROR: QPDFStream: unknown filter "
"encountered after check");
}
} }
} }

View File

@ -0,0 +1,153 @@
#include <qpdf/SF_FlateLzwDecode.hh>
#include <qpdf/Pl_PNGFilter.hh>
#include <qpdf/Pl_TIFFPredictor.hh>
#include <qpdf/Pl_Flate.hh>
#include <qpdf/Pl_LZWDecoder.hh>
#include <qpdf/QTC.hh>
#include <qpdf/QIntC.hh>
SF_FlateLzwDecode::SF_FlateLzwDecode(bool lzw) :
lzw(lzw),
// Initialize values to their defaults as per the PDF spec
predictor(1),
columns(0),
colors(1),
bits_per_component(8),
early_code_change(true)
{
}
bool
SF_FlateLzwDecode::setDecodeParms(QPDFObjectHandle decode_parms)
{
if (decode_parms.isNull())
{
return true;
}
bool filterable = true;
std::set<std::string> keys = decode_parms.getKeys();
for (auto const& key: keys)
{
QPDFObjectHandle value = decode_parms.getKey(key);
if (key == "/Predictor")
{
if (value.isInteger())
{
this->predictor = value.getIntValueAsInt();
if (! ((this->predictor == 1) || (this->predictor == 2) ||
((this->predictor >= 10) && (this->predictor <= 15))))
{
filterable = false;
}
}
else
{
filterable = false;
}
}
else if ((key == "/Columns") ||
(key == "/Colors") ||
(key == "/BitsPerComponent"))
{
if (value.isInteger())
{
int val = value.getIntValueAsInt();
if (key == "/Columns")
{
this->columns = val;
}
else if (key == "/Colors")
{
this->colors = val;
}
else if (key == "/BitsPerComponent")
{
this->bits_per_component = val;
}
}
else
{
filterable = false;
}
}
else if (lzw && (key == "/EarlyChange"))
{
if (value.isInteger())
{
int earlychange = value.getIntValueAsInt();
this->early_code_change = (earlychange == 1);
if (! ((earlychange == 0) || (earlychange == 1)))
{
filterable = false;
}
}
else
{
filterable = false;
}
}
}
if ((this->predictor > 1) && (this->columns == 0))
{
filterable = false;
}
return filterable;
}
Pipeline*
SF_FlateLzwDecode::getDecodePipeline(Pipeline* next)
{
std::shared_ptr<Pipeline> pipeline;
if ((this->predictor >= 10) && (this->predictor <= 15))
{
QTC::TC("qpdf", "SF_FlateLzwDecode PNG filter");
pipeline = std::make_shared<Pl_PNGFilter>(
"png decode", next, Pl_PNGFilter::a_decode,
QIntC::to_uint(this->columns),
QIntC::to_uint(this->colors),
QIntC::to_uint(this->bits_per_component));
this->pipelines.push_back(pipeline);
next = pipeline.get();
}
else if (this->predictor == 2)
{
QTC::TC("qpdf", "SF_FlateLzwDecode TIFF predictor");
pipeline = std::make_shared<Pl_TIFFPredictor>(
"tiff decode", next, Pl_TIFFPredictor::a_decode,
QIntC::to_uint(this->columns),
QIntC::to_uint(this->colors),
QIntC::to_uint(this->bits_per_component));
this->pipelines.push_back(pipeline);
next = pipeline.get();
}
if (lzw)
{
pipeline = std::make_shared<Pl_LZWDecoder>(
"lzw decode", next, early_code_change);
}
else
{
pipeline = std::make_shared<Pl_Flate>(
"stream inflate", next, Pl_Flate::a_inflate);
}
this->pipelines.push_back(pipeline);
return pipeline.get();
}
std::shared_ptr<QPDFStreamFilter>
SF_FlateLzwDecode::flate_factory()
{
return std::make_shared<SF_FlateLzwDecode>(false);
}
std::shared_ptr<QPDFStreamFilter>
SF_FlateLzwDecode::lzw_factory()
{
return std::make_shared<SF_FlateLzwDecode>(true);
}

View File

@ -70,6 +70,7 @@ SRCS_libqpdf = \
libqpdf/QPDFPageDocumentHelper.cc \ libqpdf/QPDFPageDocumentHelper.cc \
libqpdf/QPDFPageLabelDocumentHelper.cc \ libqpdf/QPDFPageLabelDocumentHelper.cc \
libqpdf/QPDFPageObjectHelper.cc \ libqpdf/QPDFPageObjectHelper.cc \
libqpdf/QPDFStreamFilter.cc \
libqpdf/QPDFSystemError.cc \ libqpdf/QPDFSystemError.cc \
libqpdf/QPDFTokenizer.cc \ libqpdf/QPDFTokenizer.cc \
libqpdf/QPDFWriter.cc \ libqpdf/QPDFWriter.cc \
@ -94,6 +95,7 @@ SRCS_libqpdf = \
libqpdf/QUtil.cc \ libqpdf/QUtil.cc \
libqpdf/RC4.cc \ libqpdf/RC4.cc \
libqpdf/SecureRandomDataProvider.cc \ libqpdf/SecureRandomDataProvider.cc \
libqpdf/SF_FlateLzwDecode.cc \
libqpdf/SparseOHArray.cc \ libqpdf/SparseOHArray.cc \
libqpdf/qpdf-c.cc libqpdf/qpdf-c.cc

View File

@ -5,6 +5,10 @@
#include <qpdf/QPDFObject.hh> #include <qpdf/QPDFObject.hh>
#include <qpdf/QPDFObjectHandle.hh> #include <qpdf/QPDFObjectHandle.hh>
#include <qpdf/QPDFStreamFilter.hh>
#include <functional>
#include <memory>
class Pipeline; class Pipeline;
class QPDF; class QPDF;
@ -49,6 +53,10 @@ class QPDF_Stream: public QPDFObject
void replaceDict(QPDFObjectHandle new_dict); void replaceDict(QPDFObjectHandle new_dict);
static void registerStreamFilter(
std::string const& filter_name,
std::function<std::shared_ptr<QPDFStreamFilter>()> factory);
// Replace object ID and generation. This may only be called if // Replace object ID and generation. This may only be called if
// object ID and generation are 0. It is used by QPDFObjectHandle // object ID and generation are 0. It is used by QPDFObjectHandle
// when adding streams to files. // when adding streams to files.
@ -59,20 +67,15 @@ class QPDF_Stream: public QPDFObject
private: private:
static std::map<std::string, std::string> filter_abbreviations; static std::map<std::string, std::string> filter_abbreviations;
static std::map<
std::string,
std::function<std::shared_ptr<QPDFStreamFilter>()>> filter_factories;
void replaceFilterData(QPDFObjectHandle const& filter, void replaceFilterData(QPDFObjectHandle const& filter,
QPDFObjectHandle const& decode_parms, QPDFObjectHandle const& decode_parms,
size_t length); size_t length);
bool understandDecodeParams( bool filterable(std::vector<std::shared_ptr<QPDFStreamFilter>>& filters,
std::string const& filter, QPDFObjectHandle decode_params, bool& specialized_compression, bool& lossy_compression);
int& predictor, int& columns,
int& colors, int& bits_per_component,
bool& early_code_change);
bool filterable(std::vector<std::string>& filters,
bool& specialized_compression, bool& lossy_compression,
int& predictor, int& columns,
int& colors, int& bits_per_component,
bool& early_code_change);
void warn(QPDFExc const& e); void warn(QPDFExc const& e);
void setDictDescription(); void setDictDescription();
void setStreamDescription(); void setStreamDescription();

View File

@ -0,0 +1,30 @@
#include <qpdf/QPDFStreamFilter.hh>
#include <qpdf/Pl_ASCII85Decoder.hh>
#include <memory>
#ifndef SF_ASCII85DECODE_HH
#define SF_ASCII85DECODE_HH
class SF_ASCII85Decode: public QPDFStreamFilter
{
public:
SF_ASCII85Decode() = default;
virtual ~SF_ASCII85Decode() = default;
virtual Pipeline* getDecodePipeline(Pipeline* next) override
{
this->pipeline = std::make_shared<Pl_ASCII85Decoder>(
"ascii85 decode", next);
return this->pipeline.get();
}
static std::shared_ptr<QPDFStreamFilter> factory()
{
return std::make_shared<SF_ASCII85Decode>();
}
private:
std::shared_ptr<Pipeline> pipeline;
};
#endif // SF_ASCII85DECODE_HH

View File

@ -0,0 +1,30 @@
#include <qpdf/QPDFStreamFilter.hh>
#include <qpdf/Pl_ASCIIHexDecoder.hh>
#include <memory>
#ifndef SF_ASCIIHEXDECODE_HH
#define SF_ASCIIHEXDECODE_HH
class SF_ASCIIHexDecode: public QPDFStreamFilter
{
public:
SF_ASCIIHexDecode() = default;
virtual ~SF_ASCIIHexDecode() = default;
virtual Pipeline* getDecodePipeline(Pipeline* next) override
{
this->pipeline = std::make_shared<Pl_ASCIIHexDecoder>(
"asciiHex decode", next);
return this->pipeline.get();
}
static std::shared_ptr<QPDFStreamFilter> factory()
{
return std::make_shared<SF_ASCIIHexDecode>();
}
private:
std::shared_ptr<Pipeline> pipeline;
};
#endif // SF_ASCIIHEXDECODE_HH

View File

@ -0,0 +1,39 @@
#include <qpdf/QPDFStreamFilter.hh>
#include <qpdf/Pl_DCT.hh>
#include <memory>
#ifndef SF_DCTDECODE_HH
#define SF_DCTDECODE_HH
class SF_DCTDecode: public QPDFStreamFilter
{
public:
SF_DCTDecode() = default;
virtual ~SF_DCTDecode() = default;
virtual Pipeline* getDecodePipeline(Pipeline* next) override
{
this->pipeline = std::make_shared<Pl_DCT>("DCT decode", next);
return this->pipeline.get();
}
static std::shared_ptr<QPDFStreamFilter> factory()
{
return std::make_shared<SF_DCTDecode>();
}
virtual bool isSpecializedCompression() override
{
return true;
}
virtual bool isLossyCompression() override
{
return true;
}
private:
std::shared_ptr<Pipeline> pipeline;
};
#endif // SF_DCTDECODE_HH

View File

@ -0,0 +1,30 @@
#include <qpdf/QPDFStreamFilter.hh>
#include <memory>
#include <vector>
#ifndef SF_FLATELZWDECODE_HH
#define SF_FLATELZWDECODE_HH
class SF_FlateLzwDecode: public QPDFStreamFilter
{
public:
SF_FlateLzwDecode(bool lzw);
virtual ~SF_FlateLzwDecode() = default;
virtual bool setDecodeParms(QPDFObjectHandle decode_parms);
virtual Pipeline* getDecodePipeline(Pipeline* next);
static std::shared_ptr<QPDFStreamFilter> flate_factory();
static std::shared_ptr<QPDFStreamFilter> lzw_factory();
private:
bool lzw;
int predictor;
int columns;
int colors;
int bits_per_component;
bool early_code_change;
std::vector<std::shared_ptr<Pipeline>> pipelines;
};
#endif // SF_FLATELZWDECODE_HH

View File

@ -0,0 +1,35 @@
#include <qpdf/QPDFStreamFilter.hh>
#include <qpdf/Pl_RunLength.hh>
#include <memory>
#ifndef SF_RUNLENGTHDECODE_HH
#define SF_RUNLENGTHDECODE_HH
class SF_RunLengthDecode: public QPDFStreamFilter
{
public:
SF_RunLengthDecode() = default;
virtual ~SF_RunLengthDecode() = default;
virtual Pipeline* getDecodePipeline(Pipeline* next) override
{
this->pipeline = std::make_shared<Pl_RunLength>(
"runlength decode", next, Pl_RunLength::a_decode);
return this->pipeline.get();
}
static std::shared_ptr<QPDFStreamFilter> factory()
{
return std::make_shared<SF_RunLengthDecode>();
}
virtual bool isSpecializedCompression() override
{
return true;
}
private:
std::shared_ptr<Pipeline> pipeline;
};
#endif // SF_RUNLENGTHDECODE_HH

View File

@ -86,7 +86,7 @@ QPDF prev key in trailer dictionary 0
QPDF found xref stream 0 QPDF found xref stream 0
QPDF ignoring XRefStm in trailer 0 QPDF ignoring XRefStm in trailer 0
QPDF xref deleted object 0 QPDF xref deleted object 0
QPDF_Stream PNG filter 0 SF_FlateLzwDecode PNG filter 0
QPDF xref /Index is null 0 QPDF xref /Index is null 0
QPDF xref /Index is array 1 QPDF xref /Index is array 1
QPDFWriter copy Extends 0 QPDFWriter copy Extends 0
@ -294,7 +294,7 @@ qpdf-c called qpdf_set_decode_level 0
qpdf-c called qpdf_set_compress_streams 0 qpdf-c called qpdf_set_compress_streams 0
qpdf-c called qpdf_set_preserve_unreferenced_objects 0 qpdf-c called qpdf_set_preserve_unreferenced_objects 0
qpdf-c called qpdf_set_newline_before_endstream 0 qpdf-c called qpdf_set_newline_before_endstream 0
QPDF_Stream TIFF predictor 0 SF_FlateLzwDecode TIFF predictor 0
QPDFTokenizer inline image at EOF 0 QPDFTokenizer inline image at EOF 0
Pl_QPDFTokenizer found ID 0 Pl_QPDFTokenizer found ID 0
QPDFObjectHandle non-stream in stream array 0 QPDFObjectHandle non-stream in stream array 0