2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-02-07 14:18:24 +00:00

Implement JSON v2 for Stream

Not fully exercised in this commit
This commit is contained in:
Jay Berkenbilt 2022-05-07 11:12:15 -04:00
parent 3246923cf2
commit 1bc8abfdd3
10 changed files with 198 additions and 12 deletions

2
TODO
View File

@ -63,6 +63,8 @@ General things to remember:
* Remember typo: search for "Typo" In QPDFJob::doJSONEncrypt.
* Test stream with invalid data
* Consider using camelCase in multi-word key names to be consistent
with job JSON and with how JSON is often represented in languages
that use it more natively.

View File

@ -99,6 +99,12 @@ enum qpdf_stream_decode_level_e {
qpdf_dl_specialized, /* also decode other non-lossy filters */
qpdf_dl_all /* also decode lossy filters */
};
/* For JSON encoding */
enum qpdf_stream_data_json_e {
qpdf_sj_none = 0,
qpdf_sj_inline,
qpdf_sj_file,
};
/* R3 Encryption Parameters */

View File

@ -1339,8 +1339,8 @@ class QPDFObjectHandle
// unambiguous. The getStreamJSON() call can be used to add
// encoding of the stream's data.
// * Object types that are only valid in content streams (inline
// image, operator) as well as "reserved" objects are not
// representable and will be serialized as "null".
// image, operator) are serialized as "null". Attempting to
// serialize a "reserved" object is an error.
// If dereference_indirect is true and this is an indirect object,
// show the actual contents of the object. The effect of
// dereference_indirect applies only to this object. It is not
@ -1350,9 +1350,42 @@ class QPDFObjectHandle
// Deprecated version uses v1 for backward compatibility.
// ABI: remove for qpdf 12
[[deprecated("Use getJSON(int version)")]]
[[deprecated("Use getJSON(int version)")]] QPDF_DLL JSON
getJSON(bool dereference_indirect = false);
// This method can be called on a stream to get a more extended
// JSON representation of the stream that includes the stream's
// data. The JSON object returned is always a dictionary whose
// "dict" key is an encoding of the stream's dictionary. The
// representation of the data is determined by the json_data
// field.
//
// The json_data field may have the value qpdf_sj_none,
// qpdf_sj_inline, or qpdf_sj_file.
//
// If json_data is qpdf_sj_none, stream data is not represented.
//
// If json_data is qpdf_sj_inline or qpdf_sj_file, then stream
// data is filtered or not based on the value of decode_level,
// which has the same meaning as with pipeStreamData.
//
// If json_data is qpdf_sj_inline, the base64-encoded stream data
// is included in the "data" field of the dictionary that is
// returned.
//
// If json_data is qpdf_sj_file, then the Pipeline ("p") and
// data_filename argument must be supplied. The value of
// data_filename is stored in the resulting json in the "datafile"
// key but is not otherwise use. The stream data itself (raw or
// filtered depending on decode level), is written to the
// pipeline via pipeStreamData().
QPDF_DLL
JSON getJSON(bool dereference_indirect = false);
JSON getStreamJSON(
int json_version,
qpdf_stream_data_json_e json_data,
qpdf_stream_decode_level_e decode_level,
Pipeline* p,
std::string const& data_filename);
// Legacy helper methods for commonly performed operations on
// pages. Newer code should use QPDFPageObjectHelper instead. The

View File

@ -1797,6 +1797,19 @@ QPDFObjectHandle::getJSON(int json_version, bool dereference_indirect)
}
}
JSON
QPDFObjectHandle::getStreamJSON(
int json_version,
qpdf_stream_data_json_e json_data,
qpdf_stream_decode_level_e decode_level,
Pipeline* p,
std::string const& data_filename)
{
assertStream();
return dynamic_cast<QPDF_Stream*>(obj.get())->getStreamJSON(
json_version, json_data, decode_level, p, data_filename);
}
QPDFObjectHandle
QPDFObjectHandle::wrapInArray()
{

View File

@ -2,8 +2,10 @@
#include <qpdf/ContentNormalizer.hh>
#include <qpdf/Pipeline.hh>
#include <qpdf/Pl_Base64.hh>
#include <qpdf/Pl_Buffer.hh>
#include <qpdf/Pl_Count.hh>
#include <qpdf/Pl_Discard.hh>
#include <qpdf/Pl_Flate.hh>
#include <qpdf/Pl_QPDFTokenizer.hh>
#include <qpdf/QIntC.hh>
@ -54,6 +56,18 @@ namespace
return nullptr;
}
};
class StreamBlobProvider
{
public:
StreamBlobProvider(
QPDF_Stream* stream, qpdf_stream_decode_level_e decode_level);
void operator()(Pipeline*);
private:
QPDF_Stream* stream;
qpdf_stream_decode_level_e decode_level;
};
} // namespace
std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = {
@ -81,6 +95,19 @@ std::map<std::string, std::function<std::shared_ptr<QPDFStreamFilter>()>>
{"/ASCIIHexDecode", SF_ASCIIHexDecode::factory},
};
StreamBlobProvider::StreamBlobProvider(
QPDF_Stream* stream, qpdf_stream_decode_level_e decode_level) :
stream(stream),
decode_level(decode_level)
{
}
void
StreamBlobProvider::operator()(Pipeline* p)
{
this->stream->pipeStreamData(p, nullptr, 0, decode_level, false, false);
}
QPDF_Stream::QPDF_Stream(
QPDF* qpdf,
int objid,
@ -153,8 +180,95 @@ QPDF_Stream::unparse()
JSON
QPDF_Stream::getJSON(int json_version)
{
// QXXXQ
return this->stream_dict.getJSON(json_version);
if (json_version == 1) {
return this->stream_dict.getJSON(json_version);
}
return getStreamJSON(json_version, qpdf_sj_none, qpdf_dl_none, nullptr, "");
}
JSON
QPDF_Stream::getStreamJSON(
int json_version,
qpdf_stream_data_json_e json_data,
qpdf_stream_decode_level_e decode_level,
Pipeline* p,
std::string const& data_filename)
{
switch (json_data) {
case qpdf_sj_none:
case qpdf_sj_inline:
if (p != nullptr) {
throw std::logic_error("QPDF_Stream::getStreamJSON: pipline should "
"only be suppiled json_data is file");
}
break;
case qpdf_sj_file:
if (p == nullptr) {
throw std::logic_error("QPDF_Stream::getStreamJSON: pipline must "
"be be suppiled json_data is file");
}
if (data_filename.empty()) {
throw std::logic_error("QPDF_Stream::getStreamJSON: data_filename "
"must be supplied when json_data is file");
}
break;
}
auto dict = this->stream_dict;
JSON result = JSON::makeDictionary();
if (json_data != qpdf_sj_none) {
std::shared_ptr<Buffer> buf;
bool filtered = false;
bool filter = (decode_level != qpdf_dl_none);
for (int attempt = 1; attempt <= 2; ++attempt) {
Pl_Discard discard;
std::shared_ptr<Pl_Buffer> buf_pl;
Pipeline* data_pipeline = nullptr;
if (json_data == qpdf_sj_file) {
// We need to capture the data to write
buf_pl = std::make_shared<Pl_Buffer>("stream data");
data_pipeline = buf_pl.get();
} else {
data_pipeline = &discard;
}
filtered = pipeStreamData(
data_pipeline, nullptr, 0, decode_level, false, (attempt == 1));
if (filter && (!filtered)) {
// Try again
filter = false;
} else {
if (buf_pl.get()) {
buf = buf_pl->getBufferSharedPointer();
}
break;
}
}
// We can use unsafeShallowCopy because we are only
// touching top-level keys.
dict = this->stream_dict.unsafeShallowCopy();
dict.removeKey("/Length");
if (filtered) {
dict.removeKey("/Filter");
dict.removeKey("/DecodeParms");
}
if (json_data == qpdf_sj_file) {
result.addDictionaryMember(
"datafile", JSON::makeString(data_filename));
if (!buf.get()) {
throw std::logic_error(
"QPDF_Stream: failed to get stream data in json file mode");
}
p->write(buf->getBuffer(), buf->getSize());
} else if (json_data == qpdf_sj_inline) {
result.addDictionaryMember(
"data", JSON::makeBlob(StreamBlobProvider(this, decode_level)));
} else {
throw std::logic_error(
"QPDF_Stream: unexpected value of json_data");
}
}
result.addDictionaryMember("dict", dict.getJSON(json_version));
return result;
}
QPDFObject::object_type_e

View File

@ -61,6 +61,12 @@ class QPDF_Stream: public QPDFObject
QPDFObjectHandle const& decode_parms);
void
addTokenFilter(std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter);
JSON getStreamJSON(
int json_version,
qpdf_stream_data_json_e json_data,
qpdf_stream_decode_level_e decode_level,
Pipeline* p,
std::string const& data_filename);
void replaceDict(QPDFObjectHandle const& new_dict);

View File

@ -49,7 +49,9 @@
"/Type": "/Pages"
},
"3 0 R": {
"/Length": "4 0 R"
"dict": {
"/Length": "4 0 R"
}
},
"4 0 R": 44,
"5 0 R": {

View File

@ -39,7 +39,9 @@
"/Type": "/Pages"
},
"3 0 R": {
"/Length": "4 0 R"
"dict": {
"/Length": "4 0 R"
}
},
"4 0 R": 44,
"5 0 R": {

View File

@ -62,7 +62,9 @@
"/Type": "/Page"
},
"6 0 R": {
"/Length": "7 0 R"
"dict": {
"/Length": "7 0 R"
}
},
"7 0 R": 47,
"8 0 R": {
@ -72,7 +74,9 @@
"/Type": "/Font"
},
"9 0 R": {
"/Length": "10 0 R"
"dict": {
"/Length": "10 0 R"
}
},
"10 0 R": 47,
"trailer": {

View File

@ -94,7 +94,9 @@
"/Type": "/Page"
},
"6 0 R": {
"/Length": "7 0 R"
"dict": {
"/Length": "7 0 R"
}
},
"7 0 R": 47,
"8 0 R": {
@ -104,7 +106,9 @@
"/Type": "/Font"
},
"9 0 R": {
"/Length": "10 0 R"
"dict": {
"/Length": "10 0 R"
}
},
"10 0 R": 47,
"11 0 R": {