2008-04-29 12:55:25 +00:00
|
|
|
#include <qpdf/QPDF_Stream.hh>
|
|
|
|
|
2022-04-02 17:14:10 -04:00
|
|
|
#include <qpdf/ContentNormalizer.hh>
|
2008-04-29 12:55:25 +00:00
|
|
|
#include <qpdf/Pipeline.hh>
|
2022-05-07 11:12:15 -04:00
|
|
|
#include <qpdf/Pl_Base64.hh>
|
2008-04-29 12:55:25 +00:00
|
|
|
#include <qpdf/Pl_Buffer.hh>
|
2010-08-05 19:04:22 +00:00
|
|
|
#include <qpdf/Pl_Count.hh>
|
2022-05-07 11:12:15 -04:00
|
|
|
#include <qpdf/Pl_Discard.hh>
|
2022-04-02 17:14:10 -04:00
|
|
|
#include <qpdf/Pl_Flate.hh>
|
2008-04-29 12:55:25 +00:00
|
|
|
#include <qpdf/Pl_QPDFTokenizer.hh>
|
2019-06-20 23:35:23 -04:00
|
|
|
#include <qpdf/QIntC.hh>
|
2022-04-02 17:14:10 -04:00
|
|
|
#include <qpdf/QPDF.hh>
|
|
|
|
#include <qpdf/QPDFExc.hh>
|
|
|
|
#include <qpdf/QTC.hh>
|
|
|
|
#include <qpdf/QUtil.hh>
|
2020-12-23 06:12:49 -05:00
|
|
|
#include <qpdf/SF_ASCII85Decode.hh>
|
|
|
|
#include <qpdf/SF_ASCIIHexDecode.hh>
|
2022-04-02 17:14:10 -04:00
|
|
|
#include <qpdf/SF_DCTDecode.hh>
|
|
|
|
#include <qpdf/SF_FlateLzwDecode.hh>
|
|
|
|
#include <qpdf/SF_RunLengthDecode.hh>
|
2008-04-29 12:55:25 +00:00
|
|
|
|
2009-09-26 18:36:04 +00:00
|
|
|
#include <stdexcept>
|
|
|
|
|
2022-04-16 13:21:57 -04:00
|
|
|
namespace
|
2020-12-23 06:12:49 -05:00
|
|
|
{
|
2022-04-16 13:21:57 -04:00
|
|
|
class SF_Crypt: public QPDFStreamFilter
|
2020-12-23 06:12:49 -05:00
|
|
|
{
|
2022-04-16 13:21:57 -04:00
|
|
|
public:
|
|
|
|
SF_Crypt() = default;
|
2023-05-20 14:25:46 +01:00
|
|
|
~SF_Crypt() override = default;
|
2022-04-16 13:21:57 -04:00
|
|
|
|
2023-05-20 14:25:46 +01:00
|
|
|
bool
|
|
|
|
setDecodeParms(QPDFObjectHandle decode_parms) override
|
2022-04-16 13:21:57 -04:00
|
|
|
{
|
|
|
|
if (decode_parms.isNull()) {
|
|
|
|
return true;
|
2020-12-23 06:12:49 -05:00
|
|
|
}
|
2022-04-16 13:21:57 -04:00
|
|
|
bool filterable = true;
|
2022-04-30 09:43:07 -04:00
|
|
|
for (auto const& key: decode_parms.getKeys()) {
|
2022-04-16 13:21:57 -04:00
|
|
|
if (((key == "/Type") || (key == "/Name")) &&
|
|
|
|
((!decode_parms.hasKey("/Type")) ||
|
2023-05-21 13:35:09 -04:00
|
|
|
decode_parms.isDictionaryOfType("/CryptFilterDecodeParms"))) {
|
2022-04-16 13:21:57 -04:00
|
|
|
// we handle this in decryptStream
|
|
|
|
} else {
|
|
|
|
filterable = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return filterable;
|
2020-12-23 06:12:49 -05:00
|
|
|
}
|
|
|
|
|
2023-05-20 14:25:46 +01:00
|
|
|
Pipeline*
|
|
|
|
getDecodePipeline(Pipeline*) override
|
2022-04-16 13:21:57 -04:00
|
|
|
{
|
|
|
|
// Not used -- handled by pipeStreamData
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
};
|
2022-05-07 11:12:15 -04:00
|
|
|
|
|
|
|
class StreamBlobProvider
|
|
|
|
{
|
|
|
|
public:
|
2023-05-21 13:35:09 -04:00
|
|
|
StreamBlobProvider(QPDF_Stream* stream, qpdf_stream_decode_level_e decode_level);
|
2022-05-07 11:12:15 -04:00
|
|
|
void operator()(Pipeline*);
|
|
|
|
|
|
|
|
private:
|
|
|
|
QPDF_Stream* stream;
|
|
|
|
qpdf_stream_decode_level_e decode_level;
|
|
|
|
};
|
2022-04-16 13:21:57 -04:00
|
|
|
} // namespace
|
2020-12-23 06:12:49 -05:00
|
|
|
|
|
|
|
std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = {
|
2023-05-24 16:28:17 +01:00
|
|
|
// The PDF specification provides these filter abbreviations for use in inline images, but
|
|
|
|
// according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader also
|
|
|
|
// accepts them for stream filters.
|
2020-12-23 06:12:49 -05:00
|
|
|
{"/AHx", "/ASCIIHexDecode"},
|
|
|
|
{"/A85", "/ASCII85Decode"},
|
|
|
|
{"/LZW", "/LZWDecode"},
|
|
|
|
{"/Fl", "/FlateDecode"},
|
|
|
|
{"/RL", "/RunLengthDecode"},
|
|
|
|
{"/CCF", "/CCITTFaxDecode"},
|
|
|
|
{"/DCT", "/DCTDecode"},
|
|
|
|
};
|
|
|
|
|
2022-04-02 17:14:10 -04:00
|
|
|
std::map<std::string, std::function<std::shared_ptr<QPDFStreamFilter>()>>
|
|
|
|
QPDF_Stream::filter_factories = {
|
|
|
|
{"/Crypt", []() { return std::make_shared<SF_Crypt>(); }},
|
|
|
|
{"/FlateDecode", SF_FlateLzwDecode::flate_factory},
|
|
|
|
{"/LZWDecode", SF_FlateLzwDecode::lzw_factory},
|
|
|
|
{"/RunLengthDecode", SF_RunLengthDecode::factory},
|
|
|
|
{"/DCTDecode", SF_DCTDecode::factory},
|
|
|
|
{"/ASCII85Decode", SF_ASCII85Decode::factory},
|
|
|
|
{"/ASCIIHexDecode", SF_ASCIIHexDecode::factory},
|
2020-12-23 06:12:49 -05:00
|
|
|
};
|
2010-09-05 15:00:44 +00:00
|
|
|
|
2022-05-07 11:12:15 -04:00
|
|
|
StreamBlobProvider::StreamBlobProvider(
|
|
|
|
QPDF_Stream* stream, qpdf_stream_decode_level_e decode_level) :
|
|
|
|
stream(stream),
|
|
|
|
decode_level(decode_level)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
StreamBlobProvider::operator()(Pipeline* p)
|
|
|
|
{
|
|
|
|
this->stream->pipeStreamData(p, nullptr, 0, decode_level, false, false);
|
|
|
|
}
|
|
|
|
|
2022-04-02 17:14:10 -04:00
|
|
|
QPDF_Stream::QPDF_Stream(
|
|
|
|
QPDF* qpdf,
|
2022-07-23 19:44:11 +01:00
|
|
|
QPDFObjGen const& og,
|
2022-04-02 17:14:10 -04:00
|
|
|
QPDFObjectHandle stream_dict,
|
|
|
|
qpdf_offset_t offset,
|
|
|
|
size_t length) :
|
2022-08-02 22:57:33 +01:00
|
|
|
QPDFValue(::ot_stream, "stream"),
|
2020-12-26 19:45:01 -05:00
|
|
|
filter_on_write(true),
|
2008-04-29 12:55:25 +00:00
|
|
|
stream_dict(stream_dict),
|
|
|
|
length(length)
|
|
|
|
{
|
2022-04-02 17:14:10 -04:00
|
|
|
if (!stream_dict.isDictionary()) {
|
2023-05-24 16:28:17 +01:00
|
|
|
throw std::logic_error(
|
|
|
|
"stream object instantiated with non-dictionary object for dictionary");
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
2023-02-15 10:11:38 +00:00
|
|
|
auto descr = std::make_shared<QPDFValue::Description>(
|
2022-12-16 14:53:47 +00:00
|
|
|
qpdf->getFilename() + ", stream object " + og.unparse(' '));
|
|
|
|
setDescription(qpdf, descr, offset);
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
2022-09-08 11:29:23 -04:00
|
|
|
std::shared_ptr<QPDFObject>
|
2022-06-16 17:45:04 +01:00
|
|
|
QPDF_Stream::create(
|
|
|
|
QPDF* qpdf,
|
2022-07-23 19:44:11 +01:00
|
|
|
QPDFObjGen const& og,
|
2022-06-16 17:45:04 +01:00
|
|
|
QPDFObjectHandle stream_dict,
|
|
|
|
qpdf_offset_t offset,
|
|
|
|
size_t length)
|
|
|
|
{
|
2022-07-23 19:44:11 +01:00
|
|
|
return do_create(new QPDF_Stream(qpdf, og, stream_dict, offset, length));
|
2022-06-16 17:45:04 +01:00
|
|
|
}
|
|
|
|
|
2022-09-08 11:29:23 -04:00
|
|
|
std::shared_ptr<QPDFObject>
|
2022-11-14 17:54:12 +00:00
|
|
|
QPDF_Stream::copy(bool shallow)
|
2022-06-16 17:45:04 +01:00
|
|
|
{
|
2022-11-14 22:06:04 +00:00
|
|
|
QTC::TC("qpdf", "QPDF_Stream ERR shallow copy stream");
|
2022-11-14 17:54:12 +00:00
|
|
|
throw std::runtime_error("stream objects cannot be cloned");
|
2022-06-16 17:45:04 +01:00
|
|
|
}
|
|
|
|
|
2020-12-23 06:12:49 -05:00
|
|
|
void
|
|
|
|
QPDF_Stream::registerStreamFilter(
|
2023-05-21 13:35:09 -04:00
|
|
|
std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory)
|
2020-12-23 06:12:49 -05:00
|
|
|
{
|
|
|
|
filter_factories[filter_name] = factory;
|
|
|
|
}
|
|
|
|
|
2020-12-26 19:45:01 -05:00
|
|
|
void
|
|
|
|
QPDF_Stream::setFilterOnWrite(bool val)
|
|
|
|
{
|
|
|
|
this->filter_on_write = val;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
QPDF_Stream::getFilterOnWrite() const
|
|
|
|
{
|
|
|
|
return this->filter_on_write;
|
|
|
|
}
|
|
|
|
|
2022-09-07 16:49:31 -04:00
|
|
|
void
|
2022-09-08 11:06:15 -04:00
|
|
|
QPDF_Stream::disconnect()
|
2022-09-07 16:49:31 -04:00
|
|
|
{
|
|
|
|
this->stream_provider = nullptr;
|
2022-09-08 11:06:15 -04:00
|
|
|
QPDFObjectHandle::DisconnectAccess::disconnect(this->stream_dict);
|
2022-09-07 16:49:31 -04:00
|
|
|
}
|
|
|
|
|
2008-04-29 12:55:25 +00:00
|
|
|
std::string
|
|
|
|
QPDF_Stream::unparse()
|
|
|
|
{
|
|
|
|
// Unparse stream objects as indirect references
|
2022-07-23 19:44:11 +01:00
|
|
|
return og.unparse(' ') + " R";
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
2018-12-17 17:40:29 -05:00
|
|
|
JSON
|
2022-05-07 07:53:45 -04:00
|
|
|
QPDF_Stream::getJSON(int json_version)
|
2018-12-17 17:40:29 -05:00
|
|
|
{
|
2022-05-07 11:12:15 -04:00
|
|
|
if (json_version == 1) {
|
|
|
|
return this->stream_dict.getJSON(json_version);
|
|
|
|
}
|
|
|
|
return getStreamJSON(json_version, qpdf_sj_none, qpdf_dl_none, nullptr, "");
|
|
|
|
}
|
|
|
|
|
|
|
|
JSON
|
|
|
|
QPDF_Stream::getStreamJSON(
|
|
|
|
int json_version,
|
2022-05-07 13:33:45 -04:00
|
|
|
qpdf_json_stream_data_e json_data,
|
2022-05-07 11:12:15 -04:00
|
|
|
qpdf_stream_decode_level_e decode_level,
|
|
|
|
Pipeline* p,
|
|
|
|
std::string const& data_filename)
|
|
|
|
{
|
|
|
|
switch (json_data) {
|
|
|
|
case qpdf_sj_none:
|
|
|
|
case qpdf_sj_inline:
|
|
|
|
if (p != nullptr) {
|
2023-05-24 16:28:17 +01:00
|
|
|
throw std::logic_error("QPDF_Stream::getStreamJSON: pipeline should only be supplied "
|
|
|
|
"when json_data is file");
|
2022-05-07 11:12:15 -04:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case qpdf_sj_file:
|
|
|
|
if (p == nullptr) {
|
2023-05-24 16:28:17 +01:00
|
|
|
throw std::logic_error(
|
|
|
|
"QPDF_Stream::getStreamJSON: pipeline must be supplied when json_data is file");
|
2022-05-07 11:12:15 -04:00
|
|
|
}
|
|
|
|
if (data_filename.empty()) {
|
2023-05-24 16:28:17 +01:00
|
|
|
throw std::logic_error("QPDF_Stream::getStreamJSON: data_filename must be supplied "
|
|
|
|
"when json_data is file");
|
2022-05-07 11:12:15 -04:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto dict = this->stream_dict;
|
|
|
|
JSON result = JSON::makeDictionary();
|
|
|
|
if (json_data != qpdf_sj_none) {
|
2023-11-17 17:09:15 +00:00
|
|
|
Pl_Discard discard;
|
|
|
|
Pl_Buffer buf_pl{"stream data"};
|
|
|
|
// buf_pl contains valid data and is ready for retrieval of the data.
|
|
|
|
bool buf_pl_ready = false;
|
2022-05-07 11:12:15 -04:00
|
|
|
bool filtered = false;
|
|
|
|
bool filter = (decode_level != qpdf_dl_none);
|
|
|
|
for (int attempt = 1; attempt <= 2; ++attempt) {
|
2023-11-17 17:09:15 +00:00
|
|
|
Pipeline* data_pipeline = &discard;
|
2022-05-07 11:12:15 -04:00
|
|
|
if (json_data == qpdf_sj_file) {
|
|
|
|
// We need to capture the data to write
|
2023-11-17 17:09:15 +00:00
|
|
|
data_pipeline = &buf_pl;
|
2022-05-07 11:12:15 -04:00
|
|
|
}
|
2023-05-21 13:35:09 -04:00
|
|
|
bool succeeded =
|
|
|
|
pipeStreamData(data_pipeline, &filtered, 0, decode_level, false, (attempt == 1));
|
2023-11-17 17:09:15 +00:00
|
|
|
if (!succeeded || (filter && !filtered)) {
|
2022-05-07 11:12:15 -04:00
|
|
|
// Try again
|
|
|
|
filter = false;
|
2022-05-07 13:33:45 -04:00
|
|
|
decode_level = qpdf_dl_none;
|
2023-11-17 17:09:15 +00:00
|
|
|
buf_pl.getString(); // reset buf_pl
|
2022-05-07 11:12:15 -04:00
|
|
|
} else {
|
2023-11-17 17:09:15 +00:00
|
|
|
if (json_data == qpdf_sj_file) {
|
|
|
|
buf_pl_ready = true;
|
2022-05-07 11:12:15 -04:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2023-05-24 16:28:17 +01:00
|
|
|
// We can use unsafeShallowCopy because we are only touching top-level keys.
|
2022-05-07 11:12:15 -04:00
|
|
|
dict = this->stream_dict.unsafeShallowCopy();
|
|
|
|
dict.removeKey("/Length");
|
2022-05-07 13:33:45 -04:00
|
|
|
if (filter && filtered) {
|
2022-05-07 11:12:15 -04:00
|
|
|
dict.removeKey("/Filter");
|
|
|
|
dict.removeKey("/DecodeParms");
|
|
|
|
}
|
|
|
|
if (json_data == qpdf_sj_file) {
|
2023-05-21 13:35:09 -04:00
|
|
|
result.addDictionaryMember("datafile", JSON::makeString(data_filename));
|
2023-11-17 17:09:15 +00:00
|
|
|
if (!buf_pl_ready) {
|
2023-05-21 13:35:09 -04:00
|
|
|
throw std::logic_error("QPDF_Stream: failed to get stream data in json file mode");
|
2022-05-07 11:12:15 -04:00
|
|
|
}
|
2023-11-17 17:09:15 +00:00
|
|
|
p->writeString(buf_pl.getString());
|
2022-05-07 11:12:15 -04:00
|
|
|
} else if (json_data == qpdf_sj_inline) {
|
|
|
|
result.addDictionaryMember(
|
|
|
|
"data", JSON::makeBlob(StreamBlobProvider(this, decode_level)));
|
|
|
|
} else {
|
2023-05-21 13:35:09 -04:00
|
|
|
throw std::logic_error("QPDF_Stream: unexpected value of json_data");
|
2022-05-07 11:12:15 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
result.addDictionaryMember("dict", dict.getJSON(json_version));
|
|
|
|
return result;
|
2018-12-17 17:40:29 -05:00
|
|
|
}
|
|
|
|
|
2018-02-16 17:25:27 -05:00
|
|
|
void
|
2022-12-15 09:56:46 +00:00
|
|
|
QPDF_Stream::setDescription(
|
2023-05-21 13:35:09 -04:00
|
|
|
QPDF* qpdf, std::shared_ptr<QPDFValue::Description>& description, qpdf_offset_t offset)
|
2018-02-16 17:25:27 -05:00
|
|
|
{
|
2022-12-15 09:56:46 +00:00
|
|
|
this->QPDFValue::setDescription(qpdf, description, offset);
|
2018-02-16 17:25:27 -05:00
|
|
|
setDictDescription();
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
QPDF_Stream::setDictDescription()
|
|
|
|
{
|
2022-12-31 12:13:32 +00:00
|
|
|
if (!this->stream_dict.hasObjectDescription()) {
|
2023-05-21 13:35:09 -04:00
|
|
|
this->stream_dict.setObjectDescription(qpdf, getDescription() + " -> stream dictionary");
|
2018-02-16 17:25:27 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-04-29 12:55:25 +00:00
|
|
|
QPDFObjectHandle
|
|
|
|
QPDF_Stream::getDict() const
|
|
|
|
{
|
|
|
|
return this->stream_dict;
|
|
|
|
}
|
|
|
|
|
2018-02-02 18:21:34 -05:00
|
|
|
bool
|
|
|
|
QPDF_Stream::isDataModified() const
|
|
|
|
{
|
2022-04-02 17:14:10 -04:00
|
|
|
return (!this->token_filters.empty());
|
2018-02-02 18:21:34 -05:00
|
|
|
}
|
|
|
|
|
2019-01-06 21:18:36 -05:00
|
|
|
size_t
|
|
|
|
QPDF_Stream::getLength() const
|
|
|
|
{
|
|
|
|
return this->length;
|
|
|
|
}
|
|
|
|
|
2022-04-09 14:35:56 -04:00
|
|
|
std::shared_ptr<Buffer>
|
2019-01-06 21:18:36 -05:00
|
|
|
QPDF_Stream::getStreamDataBuffer() const
|
|
|
|
{
|
|
|
|
return this->stream_data;
|
|
|
|
}
|
|
|
|
|
2022-04-09 14:35:56 -04:00
|
|
|
std::shared_ptr<QPDFObjectHandle::StreamDataProvider>
|
2019-01-06 21:18:36 -05:00
|
|
|
QPDF_Stream::getStreamDataProvider() const
|
|
|
|
{
|
|
|
|
return this->stream_provider;
|
|
|
|
}
|
|
|
|
|
2022-04-09 14:35:56 -04:00
|
|
|
std::shared_ptr<Buffer>
|
2017-08-19 09:18:14 -04:00
|
|
|
QPDF_Stream::getStreamData(qpdf_stream_decode_level_e decode_level)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
|
|
|
Pl_Buffer buf("stream data buffer");
|
2020-04-08 18:47:29 -04:00
|
|
|
bool filtered;
|
|
|
|
pipeStreamData(&buf, &filtered, 0, decode_level, false, false);
|
2022-04-02 17:14:10 -04:00
|
|
|
if (!filtered) {
|
|
|
|
throw QPDFExc(
|
|
|
|
qpdf_e_unsupported,
|
|
|
|
qpdf->getFilename(),
|
|
|
|
"",
|
2022-09-27 12:40:39 +01:00
|
|
|
this->parsed_offset,
|
2022-04-02 17:14:10 -04:00
|
|
|
"getStreamData called on unfilterable stream");
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
2010-08-09 23:33:40 +00:00
|
|
|
QTC::TC("qpdf", "QPDF_Stream getStreamData");
|
2022-02-06 11:40:24 -05:00
|
|
|
return buf.getBufferSharedPointer();
|
2010-08-09 23:33:40 +00:00
|
|
|
}
|
|
|
|
|
2022-04-09 14:35:56 -04:00
|
|
|
std::shared_ptr<Buffer>
|
2010-08-09 23:33:40 +00:00
|
|
|
QPDF_Stream::getRawStreamData()
|
|
|
|
{
|
|
|
|
Pl_Buffer buf("stream data buffer");
|
2022-04-02 17:14:10 -04:00
|
|
|
if (!pipeStreamData(&buf, nullptr, 0, qpdf_dl_none, false, false)) {
|
|
|
|
throw QPDFExc(
|
|
|
|
qpdf_e_unsupported,
|
|
|
|
qpdf->getFilename(),
|
|
|
|
"",
|
2022-09-27 12:40:39 +01:00
|
|
|
this->parsed_offset,
|
2022-04-02 17:14:10 -04:00
|
|
|
"error getting raw stream data");
|
2020-04-04 23:35:35 -04:00
|
|
|
}
|
2010-08-09 23:33:40 +00:00
|
|
|
QTC::TC("qpdf", "QPDF_Stream getRawStreamData");
|
2022-02-06 11:40:24 -05:00
|
|
|
return buf.getBufferSharedPointer();
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
2012-12-29 19:00:05 -05:00
|
|
|
bool
|
2020-12-23 06:12:49 -05:00
|
|
|
QPDF_Stream::filterable(
|
|
|
|
std::vector<std::shared_ptr<QPDFStreamFilter>>& filters,
|
|
|
|
bool& specialized_compression,
|
|
|
|
bool& lossy_compression)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
|
|
|
// Check filters
|
|
|
|
|
|
|
|
QPDFObjectHandle filter_obj = this->stream_dict.getKey("/Filter");
|
|
|
|
bool filters_okay = true;
|
|
|
|
|
2020-12-23 06:12:49 -05:00
|
|
|
std::vector<std::string> filter_names;
|
|
|
|
|
2022-04-02 17:14:10 -04:00
|
|
|
if (filter_obj.isNull()) {
|
2022-02-08 09:18:08 -05:00
|
|
|
// No filters
|
2022-04-02 17:14:10 -04:00
|
|
|
} else if (filter_obj.isName()) {
|
2022-02-08 09:18:08 -05:00
|
|
|
// One filter
|
|
|
|
filter_names.push_back(filter_obj.getName());
|
2022-04-02 17:14:10 -04:00
|
|
|
} else if (filter_obj.isArray()) {
|
2022-02-08 09:18:08 -05:00
|
|
|
// Potentially multiple filters
|
|
|
|
int n = filter_obj.getArrayNItems();
|
2022-04-02 17:14:10 -04:00
|
|
|
for (int i = 0; i < n; ++i) {
|
2022-02-08 09:18:08 -05:00
|
|
|
QPDFObjectHandle item = filter_obj.getArrayItem(i);
|
2022-04-02 17:14:10 -04:00
|
|
|
if (item.isName()) {
|
2022-02-08 09:18:08 -05:00
|
|
|
filter_names.push_back(item.getName());
|
2022-04-02 17:14:10 -04:00
|
|
|
} else {
|
2022-02-08 09:18:08 -05:00
|
|
|
filters_okay = false;
|
|
|
|
}
|
|
|
|
}
|
2022-04-02 17:14:10 -04:00
|
|
|
} else {
|
2022-02-08 09:18:08 -05:00
|
|
|
filters_okay = false;
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
2022-04-02 17:14:10 -04:00
|
|
|
if (!filters_okay) {
|
2022-02-08 09:18:08 -05:00
|
|
|
QTC::TC("qpdf", "QPDF_Stream invalid filter");
|
2022-09-27 01:10:08 +01:00
|
|
|
warn("stream filter type is not name or array");
|
2017-07-27 18:18:18 -04:00
|
|
|
return false;
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
2012-12-29 19:00:05 -05:00
|
|
|
bool filterable = true;
|
2008-04-29 12:55:25 +00:00
|
|
|
|
2022-04-30 09:43:07 -04:00
|
|
|
for (auto& filter_name: filter_names) {
|
2022-04-02 17:14:10 -04:00
|
|
|
if (filter_abbreviations.count(filter_name)) {
|
2022-02-08 09:18:08 -05:00
|
|
|
QTC::TC("qpdf", "QPDF_Stream expand filter abbreviation");
|
|
|
|
filter_name = filter_abbreviations[filter_name];
|
|
|
|
}
|
2010-09-05 15:00:44 +00:00
|
|
|
|
2020-12-23 06:12:49 -05:00
|
|
|
auto ff = filter_factories.find(filter_name);
|
2022-04-02 17:14:10 -04:00
|
|
|
if (ff == filter_factories.end()) {
|
2020-12-23 06:12:49 -05:00
|
|
|
filterable = false;
|
2022-04-02 17:14:10 -04:00
|
|
|
} else {
|
2020-12-23 06:12:49 -05:00
|
|
|
filters.push_back((ff->second)());
|
2017-08-19 09:18:14 -04:00
|
|
|
}
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
2022-04-02 17:14:10 -04:00
|
|
|
if (!filterable) {
|
2012-12-29 19:00:05 -05:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2023-05-24 16:28:17 +01:00
|
|
|
// filters now contains a list of filters to be applied in order. See which ones we can support.
|
2012-12-29 19:00:05 -05:00
|
|
|
|
|
|
|
// See if we can support any decode parameters that are specified.
|
|
|
|
|
|
|
|
QPDFObjectHandle decode_obj = this->stream_dict.getKey("/DecodeParms");
|
|
|
|
std::vector<QPDFObjectHandle> decode_parms;
|
2022-04-02 17:14:10 -04:00
|
|
|
if (decode_obj.isArray() && (decode_obj.getArrayNItems() == 0)) {
|
2019-06-09 17:19:07 -04:00
|
|
|
decode_obj = QPDFObjectHandle::newNull();
|
|
|
|
}
|
2022-04-02 17:14:10 -04:00
|
|
|
if (decode_obj.isArray()) {
|
|
|
|
for (int i = 0; i < decode_obj.getArrayNItems(); ++i) {
|
2012-12-29 19:00:05 -05:00
|
|
|
decode_parms.push_back(decode_obj.getArrayItem(i));
|
|
|
|
}
|
2022-04-02 17:14:10 -04:00
|
|
|
} else {
|
|
|
|
for (unsigned int i = 0; i < filter_names.size(); ++i) {
|
2012-12-29 19:00:05 -05:00
|
|
|
decode_parms.push_back(decode_obj);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-24 16:28:17 +01:00
|
|
|
// Ignore /DecodeParms entirely if /Filters is empty. At least one case of a file whose
|
|
|
|
// /DecodeParms was [ << >> ] when /Filters was empty has been seen in the wild.
|
2022-04-02 17:14:10 -04:00
|
|
|
if ((filters.size() != 0) && (decode_parms.size() != filters.size())) {
|
2022-09-27 01:10:08 +01:00
|
|
|
warn("stream /DecodeParms length is inconsistent with filters");
|
2017-07-27 18:18:18 -04:00
|
|
|
filterable = false;
|
|
|
|
}
|
|
|
|
|
2022-04-02 17:14:10 -04:00
|
|
|
if (!filterable) {
|
2017-07-27 18:18:18 -04:00
|
|
|
return false;
|
2012-12-29 19:00:05 -05:00
|
|
|
}
|
|
|
|
|
2022-04-02 17:14:10 -04:00
|
|
|
for (size_t i = 0; i < filters.size(); ++i) {
|
2020-12-23 06:12:49 -05:00
|
|
|
auto filter = filters.at(i);
|
|
|
|
auto decode_item = decode_parms.at(i);
|
|
|
|
|
2022-04-02 17:14:10 -04:00
|
|
|
if (filter->setDecodeParms(decode_item)) {
|
|
|
|
if (filter->isSpecializedCompression()) {
|
2020-12-23 06:12:49 -05:00
|
|
|
specialized_compression = true;
|
|
|
|
}
|
2022-04-02 17:14:10 -04:00
|
|
|
if (filter->isLossyCompression()) {
|
2020-12-23 06:12:49 -05:00
|
|
|
specialized_compression = true;
|
|
|
|
lossy_compression = true;
|
2012-12-29 19:00:05 -05:00
|
|
|
}
|
2022-04-02 17:14:10 -04:00
|
|
|
} else {
|
2012-12-29 19:00:05 -05:00
|
|
|
filterable = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-04-29 12:55:25 +00:00
|
|
|
return filterable;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
2022-04-02 17:14:10 -04:00
|
|
|
QPDF_Stream::pipeStreamData(
|
|
|
|
Pipeline* pipeline,
|
|
|
|
bool* filterp,
|
|
|
|
int encode_flags,
|
|
|
|
qpdf_stream_decode_level_e decode_level,
|
|
|
|
bool suppress_warnings,
|
|
|
|
bool will_retry)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
2020-12-23 06:12:49 -05:00
|
|
|
std::vector<std::shared_ptr<QPDFStreamFilter>> filters;
|
2017-08-19 09:18:14 -04:00
|
|
|
bool specialized_compression = false;
|
|
|
|
bool lossy_compression = false;
|
2020-04-04 23:35:35 -04:00
|
|
|
bool ignored;
|
2022-04-02 17:14:10 -04:00
|
|
|
if (filterp == nullptr) {
|
2020-04-04 23:35:35 -04:00
|
|
|
filterp = &ignored;
|
|
|
|
}
|
|
|
|
bool& filter = *filterp;
|
2022-04-02 17:14:10 -04:00
|
|
|
filter = (!((encode_flags == 0) && (decode_level == qpdf_dl_none)));
|
2020-04-04 23:35:35 -04:00
|
|
|
bool success = true;
|
2022-04-02 17:14:10 -04:00
|
|
|
if (filter) {
|
2023-05-21 13:35:09 -04:00
|
|
|
filter = filterable(filters, specialized_compression, lossy_compression);
|
2022-04-02 17:14:10 -04:00
|
|
|
if ((decode_level < qpdf_dl_all) && lossy_compression) {
|
2017-08-19 09:18:14 -04:00
|
|
|
filter = false;
|
|
|
|
}
|
2022-04-02 17:14:10 -04:00
|
|
|
if ((decode_level < qpdf_dl_specialized) && specialized_compression) {
|
2017-08-19 09:18:14 -04:00
|
|
|
filter = false;
|
|
|
|
}
|
2022-04-02 17:14:10 -04:00
|
|
|
QTC::TC(
|
|
|
|
"qpdf",
|
|
|
|
"QPDF_Stream special filters",
|
|
|
|
(!filter) ? 0
|
|
|
|
: lossy_compression ? 1
|
|
|
|
: specialized_compression ? 2
|
|
|
|
: 3);
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
2022-07-26 12:37:50 +01:00
|
|
|
if (pipeline == nullptr) {
|
2022-02-08 09:18:08 -05:00
|
|
|
QTC::TC("qpdf", "QPDF_Stream pipeStreamData with null pipeline");
|
2020-04-04 23:35:35 -04:00
|
|
|
// Return value is whether we can filter in this case.
|
2022-02-08 09:18:08 -05:00
|
|
|
return filter;
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
2023-05-24 16:28:17 +01:00
|
|
|
// Construct the pipeline in reverse order. Force pipelines we create to be deleted when this
|
|
|
|
// function finishes. Pipelines created by QPDFStreamFilter objects will be deleted by those
|
2020-12-23 06:12:49 -05:00
|
|
|
// objects.
|
2022-02-04 11:03:52 -05:00
|
|
|
std::vector<std::shared_ptr<Pipeline>> to_delete;
|
2008-04-29 12:55:25 +00:00
|
|
|
|
2022-04-09 14:35:56 -04:00
|
|
|
std::shared_ptr<ContentNormalizer> normalizer;
|
2022-02-04 11:03:52 -05:00
|
|
|
std::shared_ptr<Pipeline> new_pipeline;
|
2022-04-02 17:14:10 -04:00
|
|
|
if (filter) {
|
|
|
|
if (encode_flags & qpdf_ef_compress) {
|
2023-05-21 13:35:09 -04:00
|
|
|
new_pipeline =
|
|
|
|
std::make_shared<Pl_Flate>("compress stream", pipeline, Pl_Flate::a_deflate);
|
2022-02-08 09:18:08 -05:00
|
|
|
to_delete.push_back(new_pipeline);
|
2022-02-04 11:03:52 -05:00
|
|
|
pipeline = new_pipeline.get();
|
2022-02-08 09:18:08 -05:00
|
|
|
}
|
2008-04-29 12:55:25 +00:00
|
|
|
|
2022-04-02 17:14:10 -04:00
|
|
|
if (encode_flags & qpdf_ef_normalize) {
|
2022-04-09 14:35:56 -04:00
|
|
|
normalizer = std::make_shared<ContentNormalizer>();
|
2023-05-21 13:35:09 -04:00
|
|
|
new_pipeline =
|
|
|
|
std::make_shared<Pl_QPDFTokenizer>("normalizer", normalizer.get(), pipeline);
|
2022-02-08 09:18:08 -05:00
|
|
|
to_delete.push_back(new_pipeline);
|
2022-02-04 11:03:52 -05:00
|
|
|
pipeline = new_pipeline.get();
|
2022-02-08 09:18:08 -05:00
|
|
|
}
|
2008-04-29 12:55:25 +00:00
|
|
|
|
2023-05-21 13:35:09 -04:00
|
|
|
for (auto iter = this->token_filters.rbegin(); iter != this->token_filters.rend(); ++iter) {
|
|
|
|
new_pipeline =
|
|
|
|
std::make_shared<Pl_QPDFTokenizer>("token filter", (*iter).get(), pipeline);
|
2022-02-04 11:03:52 -05:00
|
|
|
to_delete.push_back(new_pipeline);
|
|
|
|
pipeline = new_pipeline.get();
|
2018-02-02 18:21:34 -05:00
|
|
|
}
|
|
|
|
|
2023-05-21 13:35:09 -04:00
|
|
|
for (auto f_iter = filters.rbegin(); f_iter != filters.rend(); ++f_iter) {
|
2020-12-23 06:12:49 -05:00
|
|
|
auto decode_pipeline = (*f_iter)->getDecodePipeline(pipeline);
|
2022-04-02 17:14:10 -04:00
|
|
|
if (decode_pipeline) {
|
2020-12-23 06:12:49 -05:00
|
|
|
pipeline = decode_pipeline;
|
2017-12-24 19:18:52 -05:00
|
|
|
}
|
2023-05-20 13:34:53 +01:00
|
|
|
auto* flate = dynamic_cast<Pl_Flate*>(pipeline);
|
2022-04-02 17:14:10 -04:00
|
|
|
if (flate != nullptr) {
|
2023-05-21 13:35:09 -04:00
|
|
|
flate->setWarnCallback([this](char const* msg, int code) { warn(msg); });
|
2021-11-02 17:54:10 -04:00
|
|
|
}
|
2022-02-08 09:18:08 -05:00
|
|
|
}
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
2022-04-02 17:14:10 -04:00
|
|
|
if (this->stream_data.get()) {
|
2022-02-08 09:18:08 -05:00
|
|
|
QTC::TC("qpdf", "QPDF_Stream pipe replaced stream data");
|
2023-05-21 13:35:09 -04:00
|
|
|
pipeline->write(this->stream_data->getBuffer(), this->stream_data->getSize());
|
2022-02-08 09:18:08 -05:00
|
|
|
pipeline->finish();
|
2022-04-02 17:14:10 -04:00
|
|
|
} else if (this->stream_provider.get()) {
|
2022-02-08 09:18:08 -05:00
|
|
|
Pl_Count count("stream provider count", pipeline);
|
2022-04-02 17:14:10 -04:00
|
|
|
if (this->stream_provider->supportsRetry()) {
|
|
|
|
if (!this->stream_provider->provideStreamData(
|
2022-07-24 14:16:37 +01:00
|
|
|
og, &count, suppress_warnings, will_retry)) {
|
2020-04-04 23:35:35 -04:00
|
|
|
filter = false;
|
|
|
|
success = false;
|
|
|
|
}
|
2022-04-02 17:14:10 -04:00
|
|
|
} else {
|
2022-07-24 14:16:37 +01:00
|
|
|
this->stream_provider->provideStreamData(og, &count);
|
2020-04-04 23:35:35 -04:00
|
|
|
}
|
2022-02-08 09:18:08 -05:00
|
|
|
qpdf_offset_t actual_length = count.getCount();
|
|
|
|
qpdf_offset_t desired_length = 0;
|
2022-04-02 17:14:10 -04:00
|
|
|
if (success && this->stream_dict.hasKey("/Length")) {
|
2022-02-08 09:18:08 -05:00
|
|
|
desired_length = this->stream_dict.getKey("/Length").getIntValue();
|
2022-04-02 17:14:10 -04:00
|
|
|
if (actual_length == desired_length) {
|
2012-07-07 17:33:45 -04:00
|
|
|
QTC::TC("qpdf", "QPDF_Stream pipe use stream provider");
|
2022-04-02 17:14:10 -04:00
|
|
|
} else {
|
2012-07-07 17:33:45 -04:00
|
|
|
QTC::TC("qpdf", "QPDF_Stream provider length mismatch");
|
2023-05-24 16:28:17 +01:00
|
|
|
// This would be caused by programmer error on the part of a library user, not by
|
|
|
|
// invalid input data.
|
2017-07-27 18:18:18 -04:00
|
|
|
throw std::runtime_error(
|
2023-05-21 13:35:09 -04:00
|
|
|
"stream data provider for " + og.unparse(' ') + " provided " +
|
|
|
|
std::to_string(actual_length) + " bytes instead of expected " +
|
2022-09-21 17:49:21 +01:00
|
|
|
std::to_string(desired_length) + " bytes");
|
2012-07-07 17:33:45 -04:00
|
|
|
}
|
2022-04-02 17:14:10 -04:00
|
|
|
} else if (success) {
|
2012-07-07 17:33:45 -04:00
|
|
|
QTC::TC("qpdf", "QPDF_Stream provider length not provided");
|
2023-05-21 13:35:09 -04:00
|
|
|
this->stream_dict.replaceKey("/Length", QPDFObjectHandle::newInteger(actual_length));
|
2012-07-07 17:33:45 -04:00
|
|
|
}
|
2022-09-27 12:40:39 +01:00
|
|
|
} else if (this->parsed_offset == 0) {
|
2022-02-08 09:18:08 -05:00
|
|
|
QTC::TC("qpdf", "QPDF_Stream pipe no stream data");
|
2022-04-02 17:14:10 -04:00
|
|
|
throw std::logic_error("pipeStreamData called for stream with no data");
|
|
|
|
} else {
|
2022-02-08 09:18:08 -05:00
|
|
|
QTC::TC("qpdf", "QPDF_Stream pipe original stream data");
|
2022-04-02 17:14:10 -04:00
|
|
|
if (!QPDF::Pipe::pipeStreamData(
|
|
|
|
this->qpdf,
|
2022-07-23 19:44:11 +01:00
|
|
|
og,
|
2022-09-27 12:40:39 +01:00
|
|
|
this->parsed_offset,
|
2022-04-02 17:14:10 -04:00
|
|
|
this->length,
|
|
|
|
this->stream_dict,
|
|
|
|
pipeline,
|
|
|
|
suppress_warnings,
|
|
|
|
will_retry)) {
|
2017-07-27 23:42:27 -04:00
|
|
|
filter = false;
|
2020-04-04 23:35:35 -04:00
|
|
|
success = false;
|
2017-07-27 23:42:27 -04:00
|
|
|
}
|
2010-08-02 22:40:52 +00:00
|
|
|
}
|
2008-04-29 12:55:25 +00:00
|
|
|
|
2023-05-21 13:35:09 -04:00
|
|
|
if (filter && (!suppress_warnings) && normalizer.get() && normalizer->anyBadTokens()) {
|
2022-09-27 01:10:08 +01:00
|
|
|
warn("content normalization encountered bad tokens");
|
2022-04-02 17:14:10 -04:00
|
|
|
if (normalizer->lastTokenWasBad()) {
|
2018-02-02 21:16:40 -05:00
|
|
|
QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize");
|
2023-05-24 16:28:17 +01:00
|
|
|
warn("normalized content ended with a bad token; you may be able to resolve this by "
|
|
|
|
"coalescing content streams in combination with normalizing content. From the "
|
|
|
|
"command line, specify --coalesce-contents");
|
2018-02-02 21:16:40 -05:00
|
|
|
}
|
2023-05-24 16:28:17 +01:00
|
|
|
warn("Resulting stream data may be corrupted but is may still useful for manual "
|
|
|
|
"inspection. For more information on this warning, search for content normalization "
|
|
|
|
"in the manual.");
|
2018-02-02 21:16:40 -05:00
|
|
|
}
|
|
|
|
|
2020-04-04 23:35:35 -04:00
|
|
|
return success;
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
2010-08-02 22:17:01 +00:00
|
|
|
|
|
|
|
void
|
2022-04-02 17:14:10 -04:00
|
|
|
QPDF_Stream::replaceStreamData(
|
2022-04-09 14:35:56 -04:00
|
|
|
std::shared_ptr<Buffer> data,
|
2022-04-02 17:14:10 -04:00
|
|
|
QPDFObjectHandle const& filter,
|
|
|
|
QPDFObjectHandle const& decode_parms)
|
2010-08-02 22:17:01 +00:00
|
|
|
{
|
|
|
|
this->stream_data = data;
|
2022-07-26 12:37:50 +01:00
|
|
|
this->stream_provider = nullptr;
|
2010-09-24 20:45:18 +00:00
|
|
|
replaceFilterData(filter, decode_parms, data->getSize());
|
2010-08-05 19:04:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
QPDF_Stream::replaceStreamData(
|
2022-04-09 14:35:56 -04:00
|
|
|
std::shared_ptr<QPDFObjectHandle::StreamDataProvider> provider,
|
2010-08-05 19:04:22 +00:00
|
|
|
QPDFObjectHandle const& filter,
|
2012-07-07 17:33:45 -04:00
|
|
|
QPDFObjectHandle const& decode_parms)
|
2010-08-05 19:04:22 +00:00
|
|
|
{
|
|
|
|
this->stream_provider = provider;
|
2022-07-26 12:37:50 +01:00
|
|
|
this->stream_data = nullptr;
|
2012-07-07 17:33:45 -04:00
|
|
|
replaceFilterData(filter, decode_parms, 0);
|
2010-08-05 19:04:22 +00:00
|
|
|
}
|
|
|
|
|
2018-02-02 18:21:34 -05:00
|
|
|
void
|
2023-05-21 13:35:09 -04:00
|
|
|
QPDF_Stream::addTokenFilter(std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter)
|
2018-02-02 18:21:34 -05:00
|
|
|
{
|
|
|
|
this->token_filters.push_back(token_filter);
|
|
|
|
}
|
|
|
|
|
2010-08-05 19:04:22 +00:00
|
|
|
void
|
2022-04-02 17:14:10 -04:00
|
|
|
QPDF_Stream::replaceFilterData(
|
2023-05-21 13:35:09 -04:00
|
|
|
QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms, size_t length)
|
2010-08-05 19:04:22 +00:00
|
|
|
{
|
2022-05-17 18:35:35 -04:00
|
|
|
if (filter.isInitialized()) {
|
|
|
|
this->stream_dict.replaceKey("/Filter", filter);
|
|
|
|
}
|
|
|
|
if (decode_parms.isInitialized()) {
|
|
|
|
this->stream_dict.replaceKey("/DecodeParms", decode_parms);
|
|
|
|
}
|
2022-04-02 17:14:10 -04:00
|
|
|
if (length == 0) {
|
2012-07-07 17:33:45 -04:00
|
|
|
QTC::TC("qpdf", "QPDF_Stream unknown stream length");
|
|
|
|
this->stream_dict.removeKey("/Length");
|
2022-04-02 17:14:10 -04:00
|
|
|
} else {
|
2012-07-07 17:33:45 -04:00
|
|
|
this->stream_dict.replaceKey(
|
2023-05-21 13:35:09 -04:00
|
|
|
"/Length", QPDFObjectHandle::newInteger(QIntC::to_longlong(length)));
|
2012-07-07 17:33:45 -04:00
|
|
|
}
|
2010-08-02 22:17:01 +00:00
|
|
|
}
|
2012-07-21 09:00:06 -04:00
|
|
|
|
|
|
|
void
|
2022-04-24 09:05:50 -04:00
|
|
|
QPDF_Stream::replaceDict(QPDFObjectHandle const& new_dict)
|
2012-07-21 09:00:06 -04:00
|
|
|
{
|
|
|
|
this->stream_dict = new_dict;
|
2018-02-16 17:25:27 -05:00
|
|
|
setDictDescription();
|
2012-07-21 09:00:06 -04:00
|
|
|
}
|
2017-07-27 18:18:18 -04:00
|
|
|
|
|
|
|
void
|
2022-09-27 01:10:08 +01:00
|
|
|
QPDF_Stream::warn(std::string const& message)
|
2017-07-27 18:18:18 -04:00
|
|
|
{
|
2022-09-27 12:40:39 +01:00
|
|
|
this->qpdf->warn(qpdf_e_damaged_pdf, "", this->parsed_offset, message);
|
2017-07-27 18:18:18 -04:00
|
|
|
}
|