2008-04-29 12:55:25 +00:00
|
|
|
#include <qpdf/QPDF_Stream.hh>
|
|
|
|
|
|
|
|
#include <qpdf/QUtil.hh>
|
|
|
|
#include <qpdf/Pipeline.hh>
|
|
|
|
#include <qpdf/Pl_Flate.hh>
|
|
|
|
#include <qpdf/Pl_PNGFilter.hh>
|
2018-01-13 19:24:09 +00:00
|
|
|
#include <qpdf/Pl_TIFFPredictor.hh>
|
2008-04-29 12:55:25 +00:00
|
|
|
#include <qpdf/Pl_RC4.hh>
|
|
|
|
#include <qpdf/Pl_Buffer.hh>
|
|
|
|
#include <qpdf/Pl_ASCII85Decoder.hh>
|
|
|
|
#include <qpdf/Pl_ASCIIHexDecoder.hh>
|
|
|
|
#include <qpdf/Pl_LZWDecoder.hh>
|
2017-08-19 13:18:14 +00:00
|
|
|
#include <qpdf/Pl_RunLength.hh>
|
|
|
|
#include <qpdf/Pl_DCT.hh>
|
2010-08-05 19:04:22 +00:00
|
|
|
#include <qpdf/Pl_Count.hh>
|
2018-02-02 23:21:34 +00:00
|
|
|
#include <qpdf/ContentNormalizer.hh>
|
2008-04-29 12:55:25 +00:00
|
|
|
#include <qpdf/QTC.hh>
|
|
|
|
#include <qpdf/QPDF.hh>
|
|
|
|
#include <qpdf/QPDFExc.hh>
|
|
|
|
#include <qpdf/Pl_QPDFTokenizer.hh>
|
2019-06-21 03:35:23 +00:00
|
|
|
#include <qpdf/QIntC.hh>
|
2008-04-29 12:55:25 +00:00
|
|
|
|
2009-09-26 18:36:04 +00:00
|
|
|
#include <stdexcept>
|
|
|
|
|
2010-09-05 15:00:44 +00:00
|
|
|
std::map<std::string, std::string> QPDF_Stream::filter_abbreviations;
|
|
|
|
|
2008-04-29 12:55:25 +00:00
|
|
|
QPDF_Stream::QPDF_Stream(QPDF* qpdf, int objid, int generation,
|
|
|
|
QPDFObjectHandle stream_dict,
|
2012-06-21 23:32:21 +00:00
|
|
|
qpdf_offset_t offset, size_t length) :
|
2008-04-29 12:55:25 +00:00
|
|
|
qpdf(qpdf),
|
|
|
|
objid(objid),
|
|
|
|
generation(generation),
|
|
|
|
stream_dict(stream_dict),
|
|
|
|
offset(offset),
|
|
|
|
length(length)
|
|
|
|
{
|
|
|
|
if (! stream_dict.isDictionary())
|
|
|
|
{
|
2009-09-26 18:36:04 +00:00
|
|
|
throw std::logic_error(
|
|
|
|
"stream object instantiated with non-dictionary "
|
|
|
|
"object for dictionary");
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
2018-02-16 22:25:27 +00:00
|
|
|
setStreamDescription();
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
QPDF_Stream::~QPDF_Stream()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2017-08-28 21:05:34 +00:00
|
|
|
void
|
|
|
|
QPDF_Stream::releaseResolved()
|
|
|
|
{
|
|
|
|
this->stream_provider = 0;
|
|
|
|
QPDFObjectHandle::ReleaseResolver::releaseResolved(this->stream_dict);
|
|
|
|
}
|
|
|
|
|
2010-08-05 20:20:52 +00:00
|
|
|
void
|
|
|
|
QPDF_Stream::setObjGen(int objid, int generation)
|
|
|
|
{
|
|
|
|
if (! ((this->objid == 0) && (this->generation == 0)))
|
|
|
|
{
|
|
|
|
throw std::logic_error(
|
|
|
|
"attempt to set object ID and generation of a stream"
|
|
|
|
" that already has them");
|
|
|
|
}
|
|
|
|
this->objid = objid;
|
|
|
|
this->generation = generation;
|
|
|
|
}
|
|
|
|
|
2008-04-29 12:55:25 +00:00
|
|
|
std::string
|
|
|
|
QPDF_Stream::unparse()
|
|
|
|
{
|
|
|
|
// Unparse stream objects as indirect references
|
|
|
|
return QUtil::int_to_string(this->objid) + " " +
|
|
|
|
QUtil::int_to_string(this->generation) + " R";
|
|
|
|
}
|
|
|
|
|
2018-12-17 22:40:29 +00:00
|
|
|
JSON
|
|
|
|
QPDF_Stream::getJSON()
|
|
|
|
{
|
|
|
|
return this->stream_dict.getJSON();
|
|
|
|
}
|
|
|
|
|
2013-01-22 14:57:07 +00:00
|
|
|
QPDFObject::object_type_e
|
|
|
|
QPDF_Stream::getTypeCode() const
|
|
|
|
{
|
|
|
|
return QPDFObject::ot_stream;
|
|
|
|
}
|
|
|
|
|
|
|
|
char const*
|
|
|
|
QPDF_Stream::getTypeName() const
|
|
|
|
{
|
|
|
|
return "stream";
|
|
|
|
}
|
|
|
|
|
2018-02-16 22:25:27 +00:00
|
|
|
void
|
|
|
|
QPDF_Stream::setDescription(QPDF* qpdf, std::string const& description)
|
|
|
|
{
|
|
|
|
this->QPDFObject::setDescription(qpdf, description);
|
|
|
|
setDictDescription();
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
QPDF_Stream::setStreamDescription()
|
|
|
|
{
|
|
|
|
setDescription(
|
|
|
|
this->qpdf,
|
2019-08-28 13:32:58 +00:00
|
|
|
this->qpdf->getFilename() +
|
|
|
|
", stream object " + QUtil::int_to_string(this->objid) + " " +
|
2018-02-16 22:25:27 +00:00
|
|
|
QUtil::int_to_string(this->generation));
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
QPDF_Stream::setDictDescription()
|
|
|
|
{
|
|
|
|
QPDF* qpdf = 0;
|
|
|
|
std::string description;
|
|
|
|
if ((! this->stream_dict.hasObjectDescription()) &&
|
|
|
|
getDescription(qpdf, description))
|
|
|
|
{
|
|
|
|
this->stream_dict.setObjectDescription(
|
|
|
|
qpdf, description + " -> stream dictionary");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-04-29 12:55:25 +00:00
|
|
|
QPDFObjectHandle
|
|
|
|
QPDF_Stream::getDict() const
|
|
|
|
{
|
|
|
|
return this->stream_dict;
|
|
|
|
}
|
|
|
|
|
2018-02-02 23:21:34 +00:00
|
|
|
bool
|
|
|
|
QPDF_Stream::isDataModified() const
|
|
|
|
{
|
|
|
|
return (! this->token_filters.empty());
|
|
|
|
}
|
|
|
|
|
2019-01-07 02:18:36 +00:00
|
|
|
qpdf_offset_t
|
|
|
|
QPDF_Stream::getOffset() const
|
|
|
|
{
|
|
|
|
return this->offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t
|
|
|
|
QPDF_Stream::getLength() const
|
|
|
|
{
|
|
|
|
return this->length;
|
|
|
|
}
|
|
|
|
|
|
|
|
PointerHolder<Buffer>
|
|
|
|
QPDF_Stream::getStreamDataBuffer() const
|
|
|
|
{
|
|
|
|
return this->stream_data;
|
|
|
|
}
|
|
|
|
|
|
|
|
PointerHolder<QPDFObjectHandle::StreamDataProvider>
|
|
|
|
QPDF_Stream::getStreamDataProvider() const
|
|
|
|
{
|
|
|
|
return this->stream_provider;
|
|
|
|
}
|
|
|
|
|
2008-04-29 12:55:25 +00:00
|
|
|
PointerHolder<Buffer>
|
2017-08-19 13:18:14 +00:00
|
|
|
QPDF_Stream::getStreamData(qpdf_stream_decode_level_e decode_level)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
|
|
|
Pl_Buffer buf("stream data buffer");
|
2020-04-08 22:47:29 +00:00
|
|
|
bool filtered;
|
|
|
|
pipeStreamData(&buf, &filtered, 0, decode_level, false, false);
|
|
|
|
if (! filtered)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
2018-01-13 18:18:36 +00:00
|
|
|
throw QPDFExc(qpdf_e_unsupported, qpdf->getFilename(),
|
|
|
|
"", this->offset,
|
|
|
|
"getStreamData called on unfilterable stream");
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
2010-08-09 23:33:40 +00:00
|
|
|
QTC::TC("qpdf", "QPDF_Stream getStreamData");
|
|
|
|
return buf.getBuffer();
|
|
|
|
}
|
|
|
|
|
|
|
|
PointerHolder<Buffer>
|
|
|
|
QPDF_Stream::getRawStreamData()
|
|
|
|
{
|
|
|
|
Pl_Buffer buf("stream data buffer");
|
2020-04-05 03:35:35 +00:00
|
|
|
if (! pipeStreamData(&buf, nullptr, 0, qpdf_dl_none, false, false))
|
|
|
|
{
|
|
|
|
throw QPDFExc(qpdf_e_unsupported, qpdf->getFilename(),
|
|
|
|
"", this->offset,
|
|
|
|
"error getting raw stream data");
|
|
|
|
}
|
2010-08-09 23:33:40 +00:00
|
|
|
QTC::TC("qpdf", "QPDF_Stream getRawStreamData");
|
2008-04-29 12:55:25 +00:00
|
|
|
return buf.getBuffer();
|
|
|
|
}
|
|
|
|
|
2012-12-30 00:00:05 +00:00
|
|
|
bool
|
|
|
|
QPDF_Stream::understandDecodeParams(
|
|
|
|
std::string const& filter, QPDFObjectHandle decode_obj,
|
2017-12-25 00:18:52 +00:00
|
|
|
int& predictor, int& columns,
|
|
|
|
int& colors, int& bits_per_component,
|
|
|
|
bool& early_code_change)
|
2012-12-30 00:00:05 +00:00
|
|
|
{
|
|
|
|
bool filterable = true;
|
|
|
|
std::set<std::string> keys = decode_obj.getKeys();
|
|
|
|
for (std::set<std::string>::iterator iter = keys.begin();
|
|
|
|
iter != keys.end(); ++iter)
|
|
|
|
{
|
|
|
|
std::string const& key = *iter;
|
2017-12-25 00:18:52 +00:00
|
|
|
if (((filter == "/FlateDecode") || (filter == "/LZWDecode")) &&
|
|
|
|
(key == "/Predictor"))
|
2012-12-30 00:00:05 +00:00
|
|
|
{
|
|
|
|
QPDFObjectHandle predictor_obj = decode_obj.getKey(key);
|
|
|
|
if (predictor_obj.isInteger())
|
|
|
|
{
|
2019-06-21 03:35:23 +00:00
|
|
|
predictor = predictor_obj.getIntValueAsInt();
|
2018-01-13 19:24:09 +00:00
|
|
|
if (! ((predictor == 1) || (predictor == 2) ||
|
2017-12-25 00:18:52 +00:00
|
|
|
((predictor >= 10) && (predictor <= 15))))
|
2012-12-30 00:00:05 +00:00
|
|
|
{
|
|
|
|
filterable = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
filterable = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if ((filter == "/LZWDecode") && (key == "/EarlyChange"))
|
|
|
|
{
|
|
|
|
QPDFObjectHandle earlychange_obj = decode_obj.getKey(key);
|
|
|
|
if (earlychange_obj.isInteger())
|
|
|
|
{
|
2019-06-21 03:35:23 +00:00
|
|
|
int earlychange = earlychange_obj.getIntValueAsInt();
|
2012-12-30 00:00:05 +00:00
|
|
|
early_code_change = (earlychange == 1);
|
|
|
|
if (! ((earlychange == 0) || (earlychange == 1)))
|
|
|
|
{
|
|
|
|
filterable = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
filterable = false;
|
|
|
|
}
|
|
|
|
}
|
2017-12-25 00:18:52 +00:00
|
|
|
else if ((key == "/Columns") ||
|
|
|
|
(key == "/Colors") ||
|
|
|
|
(key == "/BitsPerComponent"))
|
2012-12-30 00:00:05 +00:00
|
|
|
{
|
2017-12-25 00:18:52 +00:00
|
|
|
QPDFObjectHandle param_obj = decode_obj.getKey(key);
|
|
|
|
if (param_obj.isInteger())
|
2012-12-30 00:00:05 +00:00
|
|
|
{
|
2019-06-21 03:35:23 +00:00
|
|
|
int val = param_obj.getIntValueAsInt();
|
2017-12-25 00:18:52 +00:00
|
|
|
if (key == "/Columns")
|
|
|
|
{
|
|
|
|
columns = val;
|
|
|
|
}
|
|
|
|
else if (key == "/Colors")
|
|
|
|
{
|
|
|
|
colors = val;
|
|
|
|
}
|
|
|
|
else if (key == "/BitsPerComponent")
|
|
|
|
{
|
|
|
|
bits_per_component = val;
|
|
|
|
}
|
2012-12-30 00:00:05 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
filterable = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if ((filter == "/Crypt") &&
|
|
|
|
(((key == "/Type") || (key == "/Name")) &&
|
|
|
|
(decode_obj.getKey("/Type").isNull() ||
|
|
|
|
(decode_obj.getKey("/Type").isName() &&
|
|
|
|
(decode_obj.getKey("/Type").getName() ==
|
|
|
|
"/CryptFilterDecodeParms")))))
|
|
|
|
{
|
|
|
|
// we handle this in decryptStream
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
filterable = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return filterable;
|
|
|
|
}
|
|
|
|
|
2008-04-29 12:55:25 +00:00
|
|
|
bool
|
|
|
|
QPDF_Stream::filterable(std::vector<std::string>& filters,
|
2017-08-19 13:18:14 +00:00
|
|
|
bool& specialized_compression,
|
|
|
|
bool& lossy_compression,
|
2008-04-29 12:55:25 +00:00
|
|
|
int& predictor, int& columns,
|
2017-12-25 00:18:52 +00:00
|
|
|
int& colors, int& bits_per_component,
|
2008-04-29 12:55:25 +00:00
|
|
|
bool& early_code_change)
|
|
|
|
{
|
2010-09-05 15:00:44 +00:00
|
|
|
if (filter_abbreviations.empty())
|
|
|
|
{
|
|
|
|
// The PDF specification provides these filter abbreviations
|
|
|
|
// for use in inline images, but according to table H.1 in the
|
|
|
|
// pre-ISO versions of the PDF specification, Adobe Reader
|
|
|
|
// also accepts them for stream filters.
|
|
|
|
filter_abbreviations["/AHx"] = "/ASCIIHexDecode";
|
|
|
|
filter_abbreviations["/A85"] = "/ASCII85Decode";
|
|
|
|
filter_abbreviations["/LZW"] = "/LZWDecode";
|
|
|
|
filter_abbreviations["/Fl"] = "/FlateDecode";
|
|
|
|
filter_abbreviations["/RL"] = "/RunLengthDecode";
|
|
|
|
filter_abbreviations["/CCF"] = "/CCITTFaxDecode";
|
|
|
|
filter_abbreviations["/DCT"] = "/DCTDecode";
|
|
|
|
}
|
|
|
|
|
2008-04-29 12:55:25 +00:00
|
|
|
// Check filters
|
|
|
|
|
|
|
|
QPDFObjectHandle filter_obj = this->stream_dict.getKey("/Filter");
|
|
|
|
bool filters_okay = true;
|
|
|
|
|
|
|
|
if (filter_obj.isNull())
|
|
|
|
{
|
|
|
|
// No filters
|
|
|
|
}
|
|
|
|
else if (filter_obj.isName())
|
|
|
|
{
|
|
|
|
// One filter
|
|
|
|
filters.push_back(filter_obj.getName());
|
|
|
|
}
|
|
|
|
else if (filter_obj.isArray())
|
|
|
|
{
|
|
|
|
// Potentially multiple filters
|
|
|
|
int n = filter_obj.getArrayNItems();
|
|
|
|
for (int i = 0; i < n; ++i)
|
|
|
|
{
|
|
|
|
QPDFObjectHandle item = filter_obj.getArrayItem(i);
|
|
|
|
if (item.isName())
|
|
|
|
{
|
|
|
|
filters.push_back(item.getName());
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
filters_okay = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
filters_okay = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (! filters_okay)
|
|
|
|
{
|
|
|
|
QTC::TC("qpdf", "QPDF_Stream invalid filter");
|
2017-07-27 22:18:18 +00:00
|
|
|
warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
|
|
|
|
"", this->offset,
|
|
|
|
"stream filter type is not name or array"));
|
|
|
|
return false;
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
2012-12-30 00:00:05 +00:00
|
|
|
bool filterable = true;
|
2008-04-29 12:55:25 +00:00
|
|
|
|
|
|
|
for (std::vector<std::string>::iterator iter = filters.begin();
|
|
|
|
iter != filters.end(); ++iter)
|
|
|
|
{
|
2010-09-05 15:00:44 +00:00
|
|
|
std::string& filter = *iter;
|
|
|
|
|
|
|
|
if (filter_abbreviations.count(filter))
|
|
|
|
{
|
|
|
|
QTC::TC("qpdf", "QPDF_Stream expand filter abbreviation");
|
|
|
|
filter = filter_abbreviations[filter];
|
|
|
|
}
|
|
|
|
|
2017-08-19 13:18:14 +00:00
|
|
|
if (filter == "/RunLengthDecode")
|
|
|
|
{
|
|
|
|
specialized_compression = true;
|
|
|
|
}
|
|
|
|
else if (filter == "/DCTDecode")
|
|
|
|
{
|
|
|
|
specialized_compression = true;
|
|
|
|
lossy_compression = true;
|
|
|
|
}
|
|
|
|
else if (! ((filter == "/Crypt") ||
|
|
|
|
(filter == "/FlateDecode") ||
|
|
|
|
(filter == "/LZWDecode") ||
|
|
|
|
(filter == "/ASCII85Decode") ||
|
|
|
|
(filter == "/ASCIIHexDecode")))
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
|
|
|
filterable = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-12-30 00:00:05 +00:00
|
|
|
if (! filterable)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// `filters' now contains a list of filters to be applied in
|
|
|
|
// order. See which ones we can support.
|
|
|
|
|
|
|
|
// Initialize values to their defaults as per the PDF spec
|
|
|
|
predictor = 1;
|
|
|
|
columns = 0;
|
2017-12-25 00:18:52 +00:00
|
|
|
colors = 1;
|
|
|
|
bits_per_component = 8;
|
2012-12-30 00:00:05 +00:00
|
|
|
early_code_change = true;
|
|
|
|
|
|
|
|
// See if we can support any decode parameters that are specified.
|
|
|
|
|
|
|
|
QPDFObjectHandle decode_obj = this->stream_dict.getKey("/DecodeParms");
|
|
|
|
std::vector<QPDFObjectHandle> decode_parms;
|
2019-06-09 21:19:07 +00:00
|
|
|
if (decode_obj.isArray() && (decode_obj.getArrayNItems() == 0))
|
|
|
|
{
|
|
|
|
decode_obj = QPDFObjectHandle::newNull();
|
|
|
|
}
|
2012-12-30 00:00:05 +00:00
|
|
|
if (decode_obj.isArray())
|
|
|
|
{
|
|
|
|
for (int i = 0; i < decode_obj.getArrayNItems(); ++i)
|
|
|
|
{
|
|
|
|
decode_parms.push_back(decode_obj.getArrayItem(i));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (unsigned int i = 0; i < filters.size(); ++i)
|
|
|
|
{
|
|
|
|
decode_parms.push_back(decode_obj);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-07-07 21:26:03 +00:00
|
|
|
// Ignore /DecodeParms entirely if /Filters is empty. At least
|
|
|
|
// one case of a file whose /DecodeParms was [ << >> ] when
|
|
|
|
// /Filters was empty has been seen in the wild.
|
|
|
|
if ((filters.size() != 0) && (decode_parms.size() != filters.size()))
|
2012-12-30 00:00:05 +00:00
|
|
|
{
|
2017-07-27 22:18:18 +00:00
|
|
|
warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
|
|
|
|
"", this->offset,
|
|
|
|
"stream /DecodeParms length is"
|
|
|
|
" inconsistent with filters"));
|
|
|
|
filterable = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (! filterable)
|
|
|
|
{
|
|
|
|
return false;
|
2012-12-30 00:00:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for (unsigned int i = 0; i < filters.size(); ++i)
|
|
|
|
{
|
2013-10-05 23:42:39 +00:00
|
|
|
QPDFObjectHandle decode_item = decode_parms.at(i);
|
2012-12-30 00:00:05 +00:00
|
|
|
if (decode_item.isNull())
|
|
|
|
{
|
|
|
|
// okay
|
|
|
|
}
|
|
|
|
else if (decode_item.isDictionary())
|
|
|
|
{
|
|
|
|
if (! understandDecodeParams(
|
2013-10-05 23:42:39 +00:00
|
|
|
filters.at(i), decode_item,
|
2017-12-25 00:18:52 +00:00
|
|
|
predictor, columns, colors, bits_per_component,
|
|
|
|
early_code_change))
|
2012-12-30 00:00:05 +00:00
|
|
|
{
|
|
|
|
filterable = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
filterable = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((predictor > 1) && (columns == 0))
|
|
|
|
{
|
|
|
|
// invalid
|
|
|
|
filterable = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (! filterable)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2008-04-29 12:55:25 +00:00
|
|
|
return filterable;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
2020-04-05 03:35:35 +00:00
|
|
|
QPDF_Stream::pipeStreamData(Pipeline* pipeline, bool* filterp,
|
2019-06-21 03:29:02 +00:00
|
|
|
int encode_flags,
|
2017-08-19 13:18:14 +00:00
|
|
|
qpdf_stream_decode_level_e decode_level,
|
2017-09-12 19:48:08 +00:00
|
|
|
bool suppress_warnings, bool will_retry)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
|
|
|
std::vector<std::string> filters;
|
|
|
|
int predictor = 1;
|
|
|
|
int columns = 0;
|
2017-12-25 00:18:52 +00:00
|
|
|
int colors = 1;
|
|
|
|
int bits_per_component = 8;
|
2008-04-29 12:55:25 +00:00
|
|
|
bool early_code_change = true;
|
2017-08-19 13:18:14 +00:00
|
|
|
bool specialized_compression = false;
|
|
|
|
bool lossy_compression = false;
|
2020-04-05 03:35:35 +00:00
|
|
|
bool ignored;
|
|
|
|
if (filterp == nullptr)
|
|
|
|
{
|
|
|
|
filterp = &ignored;
|
|
|
|
}
|
|
|
|
bool& filter = *filterp;
|
|
|
|
filter = (! ((encode_flags == 0) && (decode_level == qpdf_dl_none)));
|
|
|
|
bool success = true;
|
2008-04-29 12:55:25 +00:00
|
|
|
if (filter)
|
|
|
|
{
|
2017-08-19 13:18:14 +00:00
|
|
|
filter = filterable(filters, specialized_compression, lossy_compression,
|
2017-12-25 00:18:52 +00:00
|
|
|
predictor, columns,
|
|
|
|
colors, bits_per_component,
|
|
|
|
early_code_change);
|
2017-08-19 13:18:14 +00:00
|
|
|
if ((decode_level < qpdf_dl_all) && lossy_compression)
|
|
|
|
{
|
|
|
|
filter = false;
|
|
|
|
}
|
|
|
|
if ((decode_level < qpdf_dl_specialized) && specialized_compression)
|
|
|
|
{
|
|
|
|
filter = false;
|
|
|
|
}
|
|
|
|
QTC::TC("qpdf", "QPDF_Stream special filters",
|
|
|
|
(! filter) ? 0 :
|
|
|
|
lossy_compression ? 1 :
|
|
|
|
specialized_compression ? 2 :
|
|
|
|
3);
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (pipeline == 0)
|
|
|
|
{
|
|
|
|
QTC::TC("qpdf", "QPDF_Stream pipeStreamData with null pipeline");
|
2020-04-05 03:35:35 +00:00
|
|
|
// Return value is whether we can filter in this case.
|
2008-04-29 12:55:25 +00:00
|
|
|
return filter;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Construct the pipeline in reverse order. Force pipelines we
|
|
|
|
// create to be deleted when this function finishes.
|
|
|
|
std::vector<PointerHolder<Pipeline> > to_delete;
|
|
|
|
|
2018-02-02 23:21:34 +00:00
|
|
|
PointerHolder<ContentNormalizer> normalizer;
|
2008-04-29 12:55:25 +00:00
|
|
|
if (filter)
|
|
|
|
{
|
2017-08-19 13:18:14 +00:00
|
|
|
if (encode_flags & qpdf_ef_compress)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
2018-02-02 23:21:34 +00:00
|
|
|
pipeline = new Pl_Flate("compress stream", pipeline,
|
2008-04-29 12:55:25 +00:00
|
|
|
Pl_Flate::a_deflate);
|
|
|
|
to_delete.push_back(pipeline);
|
|
|
|
}
|
|
|
|
|
2017-08-19 13:18:14 +00:00
|
|
|
if (encode_flags & qpdf_ef_normalize)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
2018-02-02 23:21:34 +00:00
|
|
|
normalizer = new ContentNormalizer();
|
|
|
|
pipeline = new Pl_QPDFTokenizer(
|
2018-02-16 01:45:19 +00:00
|
|
|
"normalizer", normalizer.getPointer(), pipeline);
|
2008-04-29 12:55:25 +00:00
|
|
|
to_delete.push_back(pipeline);
|
|
|
|
}
|
|
|
|
|
2018-02-02 23:21:34 +00:00
|
|
|
for (std::vector<PointerHolder<
|
|
|
|
QPDFObjectHandle::TokenFilter> >::reverse_iterator iter =
|
|
|
|
this->token_filters.rbegin();
|
|
|
|
iter != this->token_filters.rend(); ++iter)
|
|
|
|
{
|
|
|
|
pipeline = new Pl_QPDFTokenizer(
|
2018-02-16 01:45:19 +00:00
|
|
|
"token filter", (*iter).getPointer(), pipeline);
|
2018-02-02 23:21:34 +00:00
|
|
|
to_delete.push_back(pipeline);
|
|
|
|
}
|
|
|
|
|
2020-04-16 15:43:37 +00:00
|
|
|
for (std::vector<std::string>::reverse_iterator f_iter =
|
|
|
|
filters.rbegin();
|
|
|
|
f_iter != filters.rend(); ++f_iter)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
2020-04-16 15:43:37 +00:00
|
|
|
std::string const& filter_name = *f_iter;
|
2017-12-25 00:18:52 +00:00
|
|
|
|
2020-04-16 15:43:37 +00:00
|
|
|
if ((filter_name == "/FlateDecode") ||
|
|
|
|
(filter_name == "/LZWDecode"))
|
2017-12-25 00:18:52 +00:00
|
|
|
{
|
2018-01-13 19:24:09 +00:00
|
|
|
if ((predictor >= 10) && (predictor <= 15))
|
|
|
|
{
|
|
|
|
QTC::TC("qpdf", "QPDF_Stream PNG filter");
|
|
|
|
pipeline = new Pl_PNGFilter(
|
|
|
|
"png decode", pipeline, Pl_PNGFilter::a_decode,
|
2019-06-21 03:35:23 +00:00
|
|
|
QIntC::to_uint(columns),
|
|
|
|
QIntC::to_uint(colors),
|
|
|
|
QIntC::to_uint(bits_per_component));
|
2018-01-13 19:24:09 +00:00
|
|
|
to_delete.push_back(pipeline);
|
|
|
|
}
|
|
|
|
else if (predictor == 2)
|
|
|
|
{
|
|
|
|
QTC::TC("qpdf", "QPDF_Stream TIFF predictor");
|
|
|
|
pipeline = new Pl_TIFFPredictor(
|
|
|
|
"tiff decode", pipeline, Pl_TIFFPredictor::a_decode,
|
2019-06-21 03:35:23 +00:00
|
|
|
QIntC::to_uint(columns),
|
|
|
|
QIntC::to_uint(colors),
|
|
|
|
QIntC::to_uint(bits_per_component));
|
2018-01-13 19:24:09 +00:00
|
|
|
to_delete.push_back(pipeline);
|
|
|
|
}
|
2017-12-25 00:18:52 +00:00
|
|
|
}
|
|
|
|
|
2020-04-16 15:43:37 +00:00
|
|
|
if (filter_name == "/Crypt")
|
2009-10-19 01:58:31 +00:00
|
|
|
{
|
|
|
|
// Ignore -- handled by pipeStreamData
|
|
|
|
}
|
2020-04-16 15:43:37 +00:00
|
|
|
else if (filter_name == "/FlateDecode")
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
|
|
|
pipeline = new Pl_Flate("stream inflate",
|
|
|
|
pipeline, Pl_Flate::a_inflate);
|
|
|
|
to_delete.push_back(pipeline);
|
|
|
|
}
|
2020-04-16 15:43:37 +00:00
|
|
|
else if (filter_name == "/ASCII85Decode")
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
|
|
|
pipeline = new Pl_ASCII85Decoder("ascii85 decode", pipeline);
|
|
|
|
to_delete.push_back(pipeline);
|
|
|
|
}
|
2020-04-16 15:43:37 +00:00
|
|
|
else if (filter_name == "/ASCIIHexDecode")
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
|
|
|
pipeline = new Pl_ASCIIHexDecoder("asciiHex decode", pipeline);
|
|
|
|
to_delete.push_back(pipeline);
|
|
|
|
}
|
2020-04-16 15:43:37 +00:00
|
|
|
else if (filter_name == "/LZWDecode")
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
|
|
|
pipeline = new Pl_LZWDecoder("lzw decode", pipeline,
|
|
|
|
early_code_change);
|
|
|
|
to_delete.push_back(pipeline);
|
|
|
|
}
|
2020-04-16 15:43:37 +00:00
|
|
|
else if (filter_name == "/RunLengthDecode")
|
2017-08-19 13:18:14 +00:00
|
|
|
{
|
|
|
|
pipeline = new Pl_RunLength("runlength decode", pipeline,
|
|
|
|
Pl_RunLength::a_decode);
|
|
|
|
to_delete.push_back(pipeline);
|
|
|
|
}
|
2020-04-16 15:43:37 +00:00
|
|
|
else if (filter_name == "/DCTDecode")
|
2017-08-19 13:18:14 +00:00
|
|
|
{
|
|
|
|
pipeline = new Pl_DCT("DCT decode", pipeline);
|
|
|
|
to_delete.push_back(pipeline);
|
|
|
|
}
|
2008-04-29 12:55:25 +00:00
|
|
|
else
|
|
|
|
{
|
2009-09-26 18:36:04 +00:00
|
|
|
throw std::logic_error(
|
|
|
|
"INTERNAL ERROR: QPDFStream: unknown filter "
|
|
|
|
"encountered after check");
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-08-02 22:40:52 +00:00
|
|
|
if (this->stream_data.getPointer())
|
|
|
|
{
|
|
|
|
QTC::TC("qpdf", "QPDF_Stream pipe replaced stream data");
|
2010-09-24 20:45:18 +00:00
|
|
|
pipeline->write(this->stream_data->getBuffer(),
|
|
|
|
this->stream_data->getSize());
|
2010-08-02 22:40:52 +00:00
|
|
|
pipeline->finish();
|
|
|
|
}
|
2010-08-05 19:04:22 +00:00
|
|
|
else if (this->stream_provider.getPointer())
|
|
|
|
{
|
|
|
|
Pl_Count count("stream provider count", pipeline);
|
2020-04-05 03:35:35 +00:00
|
|
|
if (this->stream_provider->supportsRetry())
|
|
|
|
{
|
|
|
|
if (! this->stream_provider->provideStreamData(
|
|
|
|
this->objid, this->generation, &count,
|
|
|
|
suppress_warnings, will_retry))
|
|
|
|
{
|
|
|
|
filter = false;
|
|
|
|
success = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
this->stream_provider->provideStreamData(
|
|
|
|
this->objid, this->generation, &count);
|
|
|
|
}
|
2012-06-21 23:32:21 +00:00
|
|
|
qpdf_offset_t actual_length = count.getCount();
|
2012-07-07 21:33:45 +00:00
|
|
|
qpdf_offset_t desired_length = 0;
|
|
|
|
if (this->stream_dict.hasKey("/Length"))
|
|
|
|
{
|
|
|
|
desired_length = this->stream_dict.getKey("/Length").getIntValue();
|
|
|
|
if (actual_length == desired_length)
|
|
|
|
{
|
|
|
|
QTC::TC("qpdf", "QPDF_Stream pipe use stream provider");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
QTC::TC("qpdf", "QPDF_Stream provider length mismatch");
|
2017-07-27 22:18:18 +00:00
|
|
|
// This would be caused by programmer error on the
|
|
|
|
// part of a library user, not by invalid input data.
|
|
|
|
throw std::runtime_error(
|
2012-07-07 21:33:45 +00:00
|
|
|
"stream data provider for " +
|
|
|
|
QUtil::int_to_string(this->objid) + " " +
|
|
|
|
QUtil::int_to_string(this->generation) +
|
|
|
|
" provided " +
|
|
|
|
QUtil::int_to_string(actual_length) +
|
|
|
|
" bytes instead of expected " +
|
|
|
|
QUtil::int_to_string(desired_length) + " bytes");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
QTC::TC("qpdf", "QPDF_Stream provider length not provided");
|
|
|
|
this->stream_dict.replaceKey(
|
|
|
|
"/Length", QPDFObjectHandle::newInteger(actual_length));
|
|
|
|
}
|
2010-08-05 19:04:22 +00:00
|
|
|
}
|
2010-08-05 20:20:52 +00:00
|
|
|
else if (this->offset == 0)
|
|
|
|
{
|
|
|
|
QTC::TC("qpdf", "QPDF_Stream pipe no stream data");
|
|
|
|
throw std::logic_error(
|
|
|
|
"pipeStreamData called for stream with no data");
|
|
|
|
}
|
2010-08-02 22:40:52 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
QTC::TC("qpdf", "QPDF_Stream pipe original stream data");
|
2017-07-28 03:42:27 +00:00
|
|
|
if (! QPDF::Pipe::pipeStreamData(this->qpdf, this->objid, this->generation,
|
|
|
|
this->offset, this->length,
|
|
|
|
this->stream_dict, pipeline,
|
2017-09-12 19:48:08 +00:00
|
|
|
suppress_warnings,
|
|
|
|
will_retry))
|
2017-07-28 03:42:27 +00:00
|
|
|
{
|
|
|
|
filter = false;
|
2020-04-05 03:35:35 +00:00
|
|
|
success = false;
|
2017-07-28 03:42:27 +00:00
|
|
|
}
|
2010-08-02 22:40:52 +00:00
|
|
|
}
|
2008-04-29 12:55:25 +00:00
|
|
|
|
2018-02-03 02:16:40 +00:00
|
|
|
if (filter &&
|
|
|
|
(! suppress_warnings) &&
|
|
|
|
normalizer.getPointer() &&
|
|
|
|
normalizer->anyBadTokens())
|
|
|
|
{
|
|
|
|
warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
|
|
|
|
"", this->offset,
|
|
|
|
"content normalization encountered bad tokens"));
|
|
|
|
if (normalizer->lastTokenWasBad())
|
|
|
|
{
|
|
|
|
QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize");
|
|
|
|
warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
|
|
|
|
"", this->offset,
|
|
|
|
"normalized content ended with a bad token;"
|
|
|
|
" you may be able to resolve this by"
|
|
|
|
" coalescing content streams in combination"
|
|
|
|
" with normalizing content. From the command"
|
|
|
|
" line, specify --coalesce-contents"));
|
|
|
|
}
|
|
|
|
warn(QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
|
|
|
|
"", this->offset,
|
|
|
|
"Resulting stream data may be corrupted but is"
|
|
|
|
" may still useful for manual inspection."
|
|
|
|
" For more information on this warning, search"
|
|
|
|
" for content normalization in the manual."));
|
|
|
|
}
|
|
|
|
|
2020-04-05 03:35:35 +00:00
|
|
|
return success;
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
2010-08-02 22:17:01 +00:00
|
|
|
|
|
|
|
void
|
|
|
|
QPDF_Stream::replaceStreamData(PointerHolder<Buffer> data,
|
2010-08-05 19:04:22 +00:00
|
|
|
QPDFObjectHandle const& filter,
|
|
|
|
QPDFObjectHandle const& decode_parms)
|
2010-08-02 22:17:01 +00:00
|
|
|
{
|
|
|
|
this->stream_data = data;
|
2010-08-05 19:04:22 +00:00
|
|
|
this->stream_provider = 0;
|
2010-09-24 20:45:18 +00:00
|
|
|
replaceFilterData(filter, decode_parms, data->getSize());
|
2010-08-05 19:04:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
QPDF_Stream::replaceStreamData(
|
|
|
|
PointerHolder<QPDFObjectHandle::StreamDataProvider> provider,
|
|
|
|
QPDFObjectHandle const& filter,
|
2012-07-07 21:33:45 +00:00
|
|
|
QPDFObjectHandle const& decode_parms)
|
2010-08-05 19:04:22 +00:00
|
|
|
{
|
|
|
|
this->stream_provider = provider;
|
|
|
|
this->stream_data = 0;
|
2012-07-07 21:33:45 +00:00
|
|
|
replaceFilterData(filter, decode_parms, 0);
|
2010-08-05 19:04:22 +00:00
|
|
|
}
|
|
|
|
|
2018-02-02 23:21:34 +00:00
|
|
|
void
|
|
|
|
QPDF_Stream::addTokenFilter(
|
|
|
|
PointerHolder<QPDFObjectHandle::TokenFilter> token_filter)
|
|
|
|
{
|
|
|
|
this->token_filters.push_back(token_filter);
|
|
|
|
}
|
|
|
|
|
2010-08-05 19:04:22 +00:00
|
|
|
void
|
|
|
|
QPDF_Stream::replaceFilterData(QPDFObjectHandle const& filter,
|
|
|
|
QPDFObjectHandle const& decode_parms,
|
|
|
|
size_t length)
|
|
|
|
{
|
2010-08-02 22:17:01 +00:00
|
|
|
this->stream_dict.replaceOrRemoveKey("/Filter", filter);
|
|
|
|
this->stream_dict.replaceOrRemoveKey("/DecodeParms", decode_parms);
|
2012-07-07 21:33:45 +00:00
|
|
|
if (length == 0)
|
|
|
|
{
|
|
|
|
QTC::TC("qpdf", "QPDF_Stream unknown stream length");
|
|
|
|
this->stream_dict.removeKey("/Length");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
this->stream_dict.replaceKey(
|
2019-06-21 03:35:23 +00:00
|
|
|
"/Length", QPDFObjectHandle::newInteger(
|
|
|
|
QIntC::to_longlong(length)));
|
2012-07-07 21:33:45 +00:00
|
|
|
}
|
2010-08-02 22:17:01 +00:00
|
|
|
}
|
2012-07-21 13:00:06 +00:00
|
|
|
|
|
|
|
void
|
|
|
|
QPDF_Stream::replaceDict(QPDFObjectHandle new_dict)
|
|
|
|
{
|
|
|
|
this->stream_dict = new_dict;
|
2018-02-16 22:25:27 +00:00
|
|
|
setDictDescription();
|
2012-07-21 13:00:06 +00:00
|
|
|
QPDFObjectHandle length_obj = new_dict.getKey("/Length");
|
|
|
|
if (length_obj.isInteger())
|
|
|
|
{
|
2019-06-21 03:35:23 +00:00
|
|
|
this->length = QIntC::to_size(length_obj.getUIntValue());
|
2012-07-21 13:00:06 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
this->length = 0;
|
|
|
|
}
|
|
|
|
}
|
2017-07-27 22:18:18 +00:00
|
|
|
|
|
|
|
void
|
|
|
|
QPDF_Stream::warn(QPDFExc const& e)
|
|
|
|
{
|
|
|
|
QPDF::Warner::warn(this->qpdf, e);
|
|
|
|
}
|