2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-01-03 07:12:28 +00:00

Implement JSON v2 for Stream

Not fully exercised in this commit
This commit is contained in:
Jay Berkenbilt 2022-05-07 11:12:15 -04:00
parent 3246923cf2
commit 1bc8abfdd3
10 changed files with 198 additions and 12 deletions

2
TODO
View File

@ -63,6 +63,8 @@ General things to remember:
* Remember typo: search for "Typo" In QPDFJob::doJSONEncrypt. * Remember typo: search for "Typo" In QPDFJob::doJSONEncrypt.
* Test stream with invalid data
* Consider using camelCase in multi-word key names to be consistent * Consider using camelCase in multi-word key names to be consistent
with job JSON and with how JSON is often represented in languages with job JSON and with how JSON is often represented in languages
that use it more natively. that use it more natively.

View File

@ -99,6 +99,12 @@ enum qpdf_stream_decode_level_e {
qpdf_dl_specialized, /* also decode other non-lossy filters */ qpdf_dl_specialized, /* also decode other non-lossy filters */
qpdf_dl_all /* also decode lossy filters */ qpdf_dl_all /* also decode lossy filters */
}; };
/* For JSON encoding */
enum qpdf_stream_data_json_e {
qpdf_sj_none = 0,
qpdf_sj_inline,
qpdf_sj_file,
};
/* R3 Encryption Parameters */ /* R3 Encryption Parameters */

View File

@ -1339,8 +1339,8 @@ class QPDFObjectHandle
// unambiguous. The getStreamJSON() call can be used to add // unambiguous. The getStreamJSON() call can be used to add
// encoding of the stream's data. // encoding of the stream's data.
// * Object types that are only valid in content streams (inline // * Object types that are only valid in content streams (inline
// image, operator) as well as "reserved" objects are not // image, operator) are serialized as "null". Attempting to
// representable and will be serialized as "null". // serialize a "reserved" object is an error.
// If dereference_indirect is true and this is an indirect object, // If dereference_indirect is true and this is an indirect object,
// show the actual contents of the object. The effect of // show the actual contents of the object. The effect of
// dereference_indirect applies only to this object. It is not // dereference_indirect applies only to this object. It is not
@ -1350,9 +1350,42 @@ class QPDFObjectHandle
// Deprecated version uses v1 for backward compatibility. // Deprecated version uses v1 for backward compatibility.
// ABI: remove for qpdf 12 // ABI: remove for qpdf 12
[[deprecated("Use getJSON(int version)")]] [[deprecated("Use getJSON(int version)")]] QPDF_DLL JSON
getJSON(bool dereference_indirect = false);
// This method can be called on a stream to get a more extended
// JSON representation of the stream that includes the stream's
// data. The JSON object returned is always a dictionary whose
// "dict" key is an encoding of the stream's dictionary. The
// representation of the data is determined by the json_data
// field.
//
// The json_data field may have the value qpdf_sj_none,
// qpdf_sj_inline, or qpdf_sj_file.
//
// If json_data is qpdf_sj_none, stream data is not represented.
//
// If json_data is qpdf_sj_inline or qpdf_sj_file, then stream
// data is filtered or not based on the value of decode_level,
// which has the same meaning as with pipeStreamData.
//
// If json_data is qpdf_sj_inline, the base64-encoded stream data
// is included in the "data" field of the dictionary that is
// returned.
//
// If json_data is qpdf_sj_file, then the Pipeline ("p") and
// data_filename argument must be supplied. The value of
// data_filename is stored in the resulting json in the "datafile"
// key but is not otherwise use. The stream data itself (raw or
// filtered depending on decode level), is written to the
// pipeline via pipeStreamData().
QPDF_DLL QPDF_DLL
JSON getJSON(bool dereference_indirect = false); JSON getStreamJSON(
int json_version,
qpdf_stream_data_json_e json_data,
qpdf_stream_decode_level_e decode_level,
Pipeline* p,
std::string const& data_filename);
// Legacy helper methods for commonly performed operations on // Legacy helper methods for commonly performed operations on
// pages. Newer code should use QPDFPageObjectHelper instead. The // pages. Newer code should use QPDFPageObjectHelper instead. The

View File

@ -1797,6 +1797,19 @@ QPDFObjectHandle::getJSON(int json_version, bool dereference_indirect)
} }
} }
JSON
QPDFObjectHandle::getStreamJSON(
int json_version,
qpdf_stream_data_json_e json_data,
qpdf_stream_decode_level_e decode_level,
Pipeline* p,
std::string const& data_filename)
{
assertStream();
return dynamic_cast<QPDF_Stream*>(obj.get())->getStreamJSON(
json_version, json_data, decode_level, p, data_filename);
}
QPDFObjectHandle QPDFObjectHandle
QPDFObjectHandle::wrapInArray() QPDFObjectHandle::wrapInArray()
{ {

View File

@ -2,8 +2,10 @@
#include <qpdf/ContentNormalizer.hh> #include <qpdf/ContentNormalizer.hh>
#include <qpdf/Pipeline.hh> #include <qpdf/Pipeline.hh>
#include <qpdf/Pl_Base64.hh>
#include <qpdf/Pl_Buffer.hh> #include <qpdf/Pl_Buffer.hh>
#include <qpdf/Pl_Count.hh> #include <qpdf/Pl_Count.hh>
#include <qpdf/Pl_Discard.hh>
#include <qpdf/Pl_Flate.hh> #include <qpdf/Pl_Flate.hh>
#include <qpdf/Pl_QPDFTokenizer.hh> #include <qpdf/Pl_QPDFTokenizer.hh>
#include <qpdf/QIntC.hh> #include <qpdf/QIntC.hh>
@ -54,6 +56,18 @@ namespace
return nullptr; return nullptr;
} }
}; };
class StreamBlobProvider
{
public:
StreamBlobProvider(
QPDF_Stream* stream, qpdf_stream_decode_level_e decode_level);
void operator()(Pipeline*);
private:
QPDF_Stream* stream;
qpdf_stream_decode_level_e decode_level;
};
} // namespace } // namespace
std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = { std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = {
@ -81,6 +95,19 @@ std::map<std::string, std::function<std::shared_ptr<QPDFStreamFilter>()>>
{"/ASCIIHexDecode", SF_ASCIIHexDecode::factory}, {"/ASCIIHexDecode", SF_ASCIIHexDecode::factory},
}; };
StreamBlobProvider::StreamBlobProvider(
QPDF_Stream* stream, qpdf_stream_decode_level_e decode_level) :
stream(stream),
decode_level(decode_level)
{
}
void
StreamBlobProvider::operator()(Pipeline* p)
{
this->stream->pipeStreamData(p, nullptr, 0, decode_level, false, false);
}
QPDF_Stream::QPDF_Stream( QPDF_Stream::QPDF_Stream(
QPDF* qpdf, QPDF* qpdf,
int objid, int objid,
@ -153,8 +180,95 @@ QPDF_Stream::unparse()
JSON JSON
QPDF_Stream::getJSON(int json_version) QPDF_Stream::getJSON(int json_version)
{ {
// QXXXQ if (json_version == 1) {
return this->stream_dict.getJSON(json_version); return this->stream_dict.getJSON(json_version);
}
return getStreamJSON(json_version, qpdf_sj_none, qpdf_dl_none, nullptr, "");
}
JSON
QPDF_Stream::getStreamJSON(
int json_version,
qpdf_stream_data_json_e json_data,
qpdf_stream_decode_level_e decode_level,
Pipeline* p,
std::string const& data_filename)
{
switch (json_data) {
case qpdf_sj_none:
case qpdf_sj_inline:
if (p != nullptr) {
throw std::logic_error("QPDF_Stream::getStreamJSON: pipline should "
"only be suppiled json_data is file");
}
break;
case qpdf_sj_file:
if (p == nullptr) {
throw std::logic_error("QPDF_Stream::getStreamJSON: pipline must "
"be be suppiled json_data is file");
}
if (data_filename.empty()) {
throw std::logic_error("QPDF_Stream::getStreamJSON: data_filename "
"must be supplied when json_data is file");
}
break;
}
auto dict = this->stream_dict;
JSON result = JSON::makeDictionary();
if (json_data != qpdf_sj_none) {
std::shared_ptr<Buffer> buf;
bool filtered = false;
bool filter = (decode_level != qpdf_dl_none);
for (int attempt = 1; attempt <= 2; ++attempt) {
Pl_Discard discard;
std::shared_ptr<Pl_Buffer> buf_pl;
Pipeline* data_pipeline = nullptr;
if (json_data == qpdf_sj_file) {
// We need to capture the data to write
buf_pl = std::make_shared<Pl_Buffer>("stream data");
data_pipeline = buf_pl.get();
} else {
data_pipeline = &discard;
}
filtered = pipeStreamData(
data_pipeline, nullptr, 0, decode_level, false, (attempt == 1));
if (filter && (!filtered)) {
// Try again
filter = false;
} else {
if (buf_pl.get()) {
buf = buf_pl->getBufferSharedPointer();
}
break;
}
}
// We can use unsafeShallowCopy because we are only
// touching top-level keys.
dict = this->stream_dict.unsafeShallowCopy();
dict.removeKey("/Length");
if (filtered) {
dict.removeKey("/Filter");
dict.removeKey("/DecodeParms");
}
if (json_data == qpdf_sj_file) {
result.addDictionaryMember(
"datafile", JSON::makeString(data_filename));
if (!buf.get()) {
throw std::logic_error(
"QPDF_Stream: failed to get stream data in json file mode");
}
p->write(buf->getBuffer(), buf->getSize());
} else if (json_data == qpdf_sj_inline) {
result.addDictionaryMember(
"data", JSON::makeBlob(StreamBlobProvider(this, decode_level)));
} else {
throw std::logic_error(
"QPDF_Stream: unexpected value of json_data");
}
}
result.addDictionaryMember("dict", dict.getJSON(json_version));
return result;
} }
QPDFObject::object_type_e QPDFObject::object_type_e

View File

@ -61,6 +61,12 @@ class QPDF_Stream: public QPDFObject
QPDFObjectHandle const& decode_parms); QPDFObjectHandle const& decode_parms);
void void
addTokenFilter(std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter); addTokenFilter(std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter);
JSON getStreamJSON(
int json_version,
qpdf_stream_data_json_e json_data,
qpdf_stream_decode_level_e decode_level,
Pipeline* p,
std::string const& data_filename);
void replaceDict(QPDFObjectHandle const& new_dict); void replaceDict(QPDFObjectHandle const& new_dict);

View File

@ -49,7 +49,9 @@
"/Type": "/Pages" "/Type": "/Pages"
}, },
"3 0 R": { "3 0 R": {
"/Length": "4 0 R" "dict": {
"/Length": "4 0 R"
}
}, },
"4 0 R": 44, "4 0 R": 44,
"5 0 R": { "5 0 R": {

View File

@ -39,7 +39,9 @@
"/Type": "/Pages" "/Type": "/Pages"
}, },
"3 0 R": { "3 0 R": {
"/Length": "4 0 R" "dict": {
"/Length": "4 0 R"
}
}, },
"4 0 R": 44, "4 0 R": 44,
"5 0 R": { "5 0 R": {

View File

@ -62,7 +62,9 @@
"/Type": "/Page" "/Type": "/Page"
}, },
"6 0 R": { "6 0 R": {
"/Length": "7 0 R" "dict": {
"/Length": "7 0 R"
}
}, },
"7 0 R": 47, "7 0 R": 47,
"8 0 R": { "8 0 R": {
@ -72,7 +74,9 @@
"/Type": "/Font" "/Type": "/Font"
}, },
"9 0 R": { "9 0 R": {
"/Length": "10 0 R" "dict": {
"/Length": "10 0 R"
}
}, },
"10 0 R": 47, "10 0 R": 47,
"trailer": { "trailer": {

View File

@ -94,7 +94,9 @@
"/Type": "/Page" "/Type": "/Page"
}, },
"6 0 R": { "6 0 R": {
"/Length": "7 0 R" "dict": {
"/Length": "7 0 R"
}
}, },
"7 0 R": 47, "7 0 R": 47,
"8 0 R": { "8 0 R": {
@ -104,7 +106,9 @@
"/Type": "/Font" "/Type": "/Font"
}, },
"9 0 R": { "9 0 R": {
"/Length": "10 0 R" "dict": {
"/Length": "10 0 R"
}
}, },
"10 0 R": 47, "10 0 R": 47,
"11 0 R": { "11 0 R": {