mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-03 07:12:28 +00:00
Implement JSON v2 for Stream
Not fully exercised in this commit
This commit is contained in:
parent
3246923cf2
commit
1bc8abfdd3
2
TODO
2
TODO
@ -63,6 +63,8 @@ General things to remember:
|
|||||||
|
|
||||||
* Remember typo: search for "Typo" In QPDFJob::doJSONEncrypt.
|
* Remember typo: search for "Typo" In QPDFJob::doJSONEncrypt.
|
||||||
|
|
||||||
|
* Test stream with invalid data
|
||||||
|
|
||||||
* Consider using camelCase in multi-word key names to be consistent
|
* Consider using camelCase in multi-word key names to be consistent
|
||||||
with job JSON and with how JSON is often represented in languages
|
with job JSON and with how JSON is often represented in languages
|
||||||
that use it more natively.
|
that use it more natively.
|
||||||
|
@ -99,6 +99,12 @@ enum qpdf_stream_decode_level_e {
|
|||||||
qpdf_dl_specialized, /* also decode other non-lossy filters */
|
qpdf_dl_specialized, /* also decode other non-lossy filters */
|
||||||
qpdf_dl_all /* also decode lossy filters */
|
qpdf_dl_all /* also decode lossy filters */
|
||||||
};
|
};
|
||||||
|
/* For JSON encoding */
|
||||||
|
enum qpdf_stream_data_json_e {
|
||||||
|
qpdf_sj_none = 0,
|
||||||
|
qpdf_sj_inline,
|
||||||
|
qpdf_sj_file,
|
||||||
|
};
|
||||||
|
|
||||||
/* R3 Encryption Parameters */
|
/* R3 Encryption Parameters */
|
||||||
|
|
||||||
|
@ -1339,8 +1339,8 @@ class QPDFObjectHandle
|
|||||||
// unambiguous. The getStreamJSON() call can be used to add
|
// unambiguous. The getStreamJSON() call can be used to add
|
||||||
// encoding of the stream's data.
|
// encoding of the stream's data.
|
||||||
// * Object types that are only valid in content streams (inline
|
// * Object types that are only valid in content streams (inline
|
||||||
// image, operator) as well as "reserved" objects are not
|
// image, operator) are serialized as "null". Attempting to
|
||||||
// representable and will be serialized as "null".
|
// serialize a "reserved" object is an error.
|
||||||
// If dereference_indirect is true and this is an indirect object,
|
// If dereference_indirect is true and this is an indirect object,
|
||||||
// show the actual contents of the object. The effect of
|
// show the actual contents of the object. The effect of
|
||||||
// dereference_indirect applies only to this object. It is not
|
// dereference_indirect applies only to this object. It is not
|
||||||
@ -1350,9 +1350,42 @@ class QPDFObjectHandle
|
|||||||
|
|
||||||
// Deprecated version uses v1 for backward compatibility.
|
// Deprecated version uses v1 for backward compatibility.
|
||||||
// ABI: remove for qpdf 12
|
// ABI: remove for qpdf 12
|
||||||
[[deprecated("Use getJSON(int version)")]]
|
[[deprecated("Use getJSON(int version)")]] QPDF_DLL JSON
|
||||||
|
getJSON(bool dereference_indirect = false);
|
||||||
|
|
||||||
|
// This method can be called on a stream to get a more extended
|
||||||
|
// JSON representation of the stream that includes the stream's
|
||||||
|
// data. The JSON object returned is always a dictionary whose
|
||||||
|
// "dict" key is an encoding of the stream's dictionary. The
|
||||||
|
// representation of the data is determined by the json_data
|
||||||
|
// field.
|
||||||
|
//
|
||||||
|
// The json_data field may have the value qpdf_sj_none,
|
||||||
|
// qpdf_sj_inline, or qpdf_sj_file.
|
||||||
|
//
|
||||||
|
// If json_data is qpdf_sj_none, stream data is not represented.
|
||||||
|
//
|
||||||
|
// If json_data is qpdf_sj_inline or qpdf_sj_file, then stream
|
||||||
|
// data is filtered or not based on the value of decode_level,
|
||||||
|
// which has the same meaning as with pipeStreamData.
|
||||||
|
//
|
||||||
|
// If json_data is qpdf_sj_inline, the base64-encoded stream data
|
||||||
|
// is included in the "data" field of the dictionary that is
|
||||||
|
// returned.
|
||||||
|
//
|
||||||
|
// If json_data is qpdf_sj_file, then the Pipeline ("p") and
|
||||||
|
// data_filename argument must be supplied. The value of
|
||||||
|
// data_filename is stored in the resulting json in the "datafile"
|
||||||
|
// key but is not otherwise use. The stream data itself (raw or
|
||||||
|
// filtered depending on decode level), is written to the
|
||||||
|
// pipeline via pipeStreamData().
|
||||||
QPDF_DLL
|
QPDF_DLL
|
||||||
JSON getJSON(bool dereference_indirect = false);
|
JSON getStreamJSON(
|
||||||
|
int json_version,
|
||||||
|
qpdf_stream_data_json_e json_data,
|
||||||
|
qpdf_stream_decode_level_e decode_level,
|
||||||
|
Pipeline* p,
|
||||||
|
std::string const& data_filename);
|
||||||
|
|
||||||
// Legacy helper methods for commonly performed operations on
|
// Legacy helper methods for commonly performed operations on
|
||||||
// pages. Newer code should use QPDFPageObjectHelper instead. The
|
// pages. Newer code should use QPDFPageObjectHelper instead. The
|
||||||
|
@ -1797,6 +1797,19 @@ QPDFObjectHandle::getJSON(int json_version, bool dereference_indirect)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
JSON
|
||||||
|
QPDFObjectHandle::getStreamJSON(
|
||||||
|
int json_version,
|
||||||
|
qpdf_stream_data_json_e json_data,
|
||||||
|
qpdf_stream_decode_level_e decode_level,
|
||||||
|
Pipeline* p,
|
||||||
|
std::string const& data_filename)
|
||||||
|
{
|
||||||
|
assertStream();
|
||||||
|
return dynamic_cast<QPDF_Stream*>(obj.get())->getStreamJSON(
|
||||||
|
json_version, json_data, decode_level, p, data_filename);
|
||||||
|
}
|
||||||
|
|
||||||
QPDFObjectHandle
|
QPDFObjectHandle
|
||||||
QPDFObjectHandle::wrapInArray()
|
QPDFObjectHandle::wrapInArray()
|
||||||
{
|
{
|
||||||
|
@ -2,8 +2,10 @@
|
|||||||
|
|
||||||
#include <qpdf/ContentNormalizer.hh>
|
#include <qpdf/ContentNormalizer.hh>
|
||||||
#include <qpdf/Pipeline.hh>
|
#include <qpdf/Pipeline.hh>
|
||||||
|
#include <qpdf/Pl_Base64.hh>
|
||||||
#include <qpdf/Pl_Buffer.hh>
|
#include <qpdf/Pl_Buffer.hh>
|
||||||
#include <qpdf/Pl_Count.hh>
|
#include <qpdf/Pl_Count.hh>
|
||||||
|
#include <qpdf/Pl_Discard.hh>
|
||||||
#include <qpdf/Pl_Flate.hh>
|
#include <qpdf/Pl_Flate.hh>
|
||||||
#include <qpdf/Pl_QPDFTokenizer.hh>
|
#include <qpdf/Pl_QPDFTokenizer.hh>
|
||||||
#include <qpdf/QIntC.hh>
|
#include <qpdf/QIntC.hh>
|
||||||
@ -54,6 +56,18 @@ namespace
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class StreamBlobProvider
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
StreamBlobProvider(
|
||||||
|
QPDF_Stream* stream, qpdf_stream_decode_level_e decode_level);
|
||||||
|
void operator()(Pipeline*);
|
||||||
|
|
||||||
|
private:
|
||||||
|
QPDF_Stream* stream;
|
||||||
|
qpdf_stream_decode_level_e decode_level;
|
||||||
|
};
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = {
|
std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = {
|
||||||
@ -81,6 +95,19 @@ std::map<std::string, std::function<std::shared_ptr<QPDFStreamFilter>()>>
|
|||||||
{"/ASCIIHexDecode", SF_ASCIIHexDecode::factory},
|
{"/ASCIIHexDecode", SF_ASCIIHexDecode::factory},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
StreamBlobProvider::StreamBlobProvider(
|
||||||
|
QPDF_Stream* stream, qpdf_stream_decode_level_e decode_level) :
|
||||||
|
stream(stream),
|
||||||
|
decode_level(decode_level)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
StreamBlobProvider::operator()(Pipeline* p)
|
||||||
|
{
|
||||||
|
this->stream->pipeStreamData(p, nullptr, 0, decode_level, false, false);
|
||||||
|
}
|
||||||
|
|
||||||
QPDF_Stream::QPDF_Stream(
|
QPDF_Stream::QPDF_Stream(
|
||||||
QPDF* qpdf,
|
QPDF* qpdf,
|
||||||
int objid,
|
int objid,
|
||||||
@ -153,8 +180,95 @@ QPDF_Stream::unparse()
|
|||||||
JSON
|
JSON
|
||||||
QPDF_Stream::getJSON(int json_version)
|
QPDF_Stream::getJSON(int json_version)
|
||||||
{
|
{
|
||||||
// QXXXQ
|
if (json_version == 1) {
|
||||||
return this->stream_dict.getJSON(json_version);
|
return this->stream_dict.getJSON(json_version);
|
||||||
|
}
|
||||||
|
return getStreamJSON(json_version, qpdf_sj_none, qpdf_dl_none, nullptr, "");
|
||||||
|
}
|
||||||
|
|
||||||
|
JSON
|
||||||
|
QPDF_Stream::getStreamJSON(
|
||||||
|
int json_version,
|
||||||
|
qpdf_stream_data_json_e json_data,
|
||||||
|
qpdf_stream_decode_level_e decode_level,
|
||||||
|
Pipeline* p,
|
||||||
|
std::string const& data_filename)
|
||||||
|
{
|
||||||
|
switch (json_data) {
|
||||||
|
case qpdf_sj_none:
|
||||||
|
case qpdf_sj_inline:
|
||||||
|
if (p != nullptr) {
|
||||||
|
throw std::logic_error("QPDF_Stream::getStreamJSON: pipline should "
|
||||||
|
"only be suppiled json_data is file");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case qpdf_sj_file:
|
||||||
|
if (p == nullptr) {
|
||||||
|
throw std::logic_error("QPDF_Stream::getStreamJSON: pipline must "
|
||||||
|
"be be suppiled json_data is file");
|
||||||
|
}
|
||||||
|
if (data_filename.empty()) {
|
||||||
|
throw std::logic_error("QPDF_Stream::getStreamJSON: data_filename "
|
||||||
|
"must be supplied when json_data is file");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto dict = this->stream_dict;
|
||||||
|
JSON result = JSON::makeDictionary();
|
||||||
|
if (json_data != qpdf_sj_none) {
|
||||||
|
std::shared_ptr<Buffer> buf;
|
||||||
|
bool filtered = false;
|
||||||
|
bool filter = (decode_level != qpdf_dl_none);
|
||||||
|
for (int attempt = 1; attempt <= 2; ++attempt) {
|
||||||
|
Pl_Discard discard;
|
||||||
|
std::shared_ptr<Pl_Buffer> buf_pl;
|
||||||
|
Pipeline* data_pipeline = nullptr;
|
||||||
|
if (json_data == qpdf_sj_file) {
|
||||||
|
// We need to capture the data to write
|
||||||
|
buf_pl = std::make_shared<Pl_Buffer>("stream data");
|
||||||
|
data_pipeline = buf_pl.get();
|
||||||
|
} else {
|
||||||
|
data_pipeline = &discard;
|
||||||
|
}
|
||||||
|
filtered = pipeStreamData(
|
||||||
|
data_pipeline, nullptr, 0, decode_level, false, (attempt == 1));
|
||||||
|
if (filter && (!filtered)) {
|
||||||
|
// Try again
|
||||||
|
filter = false;
|
||||||
|
} else {
|
||||||
|
if (buf_pl.get()) {
|
||||||
|
buf = buf_pl->getBufferSharedPointer();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// We can use unsafeShallowCopy because we are only
|
||||||
|
// touching top-level keys.
|
||||||
|
dict = this->stream_dict.unsafeShallowCopy();
|
||||||
|
dict.removeKey("/Length");
|
||||||
|
if (filtered) {
|
||||||
|
dict.removeKey("/Filter");
|
||||||
|
dict.removeKey("/DecodeParms");
|
||||||
|
}
|
||||||
|
if (json_data == qpdf_sj_file) {
|
||||||
|
result.addDictionaryMember(
|
||||||
|
"datafile", JSON::makeString(data_filename));
|
||||||
|
if (!buf.get()) {
|
||||||
|
throw std::logic_error(
|
||||||
|
"QPDF_Stream: failed to get stream data in json file mode");
|
||||||
|
}
|
||||||
|
p->write(buf->getBuffer(), buf->getSize());
|
||||||
|
} else if (json_data == qpdf_sj_inline) {
|
||||||
|
result.addDictionaryMember(
|
||||||
|
"data", JSON::makeBlob(StreamBlobProvider(this, decode_level)));
|
||||||
|
} else {
|
||||||
|
throw std::logic_error(
|
||||||
|
"QPDF_Stream: unexpected value of json_data");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result.addDictionaryMember("dict", dict.getJSON(json_version));
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
QPDFObject::object_type_e
|
QPDFObject::object_type_e
|
||||||
|
@ -61,6 +61,12 @@ class QPDF_Stream: public QPDFObject
|
|||||||
QPDFObjectHandle const& decode_parms);
|
QPDFObjectHandle const& decode_parms);
|
||||||
void
|
void
|
||||||
addTokenFilter(std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter);
|
addTokenFilter(std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter);
|
||||||
|
JSON getStreamJSON(
|
||||||
|
int json_version,
|
||||||
|
qpdf_stream_data_json_e json_data,
|
||||||
|
qpdf_stream_decode_level_e decode_level,
|
||||||
|
Pipeline* p,
|
||||||
|
std::string const& data_filename);
|
||||||
|
|
||||||
void replaceDict(QPDFObjectHandle const& new_dict);
|
void replaceDict(QPDFObjectHandle const& new_dict);
|
||||||
|
|
||||||
|
@ -49,7 +49,9 @@
|
|||||||
"/Type": "/Pages"
|
"/Type": "/Pages"
|
||||||
},
|
},
|
||||||
"3 0 R": {
|
"3 0 R": {
|
||||||
"/Length": "4 0 R"
|
"dict": {
|
||||||
|
"/Length": "4 0 R"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"4 0 R": 44,
|
"4 0 R": 44,
|
||||||
"5 0 R": {
|
"5 0 R": {
|
||||||
|
@ -39,7 +39,9 @@
|
|||||||
"/Type": "/Pages"
|
"/Type": "/Pages"
|
||||||
},
|
},
|
||||||
"3 0 R": {
|
"3 0 R": {
|
||||||
"/Length": "4 0 R"
|
"dict": {
|
||||||
|
"/Length": "4 0 R"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"4 0 R": 44,
|
"4 0 R": 44,
|
||||||
"5 0 R": {
|
"5 0 R": {
|
||||||
|
@ -62,7 +62,9 @@
|
|||||||
"/Type": "/Page"
|
"/Type": "/Page"
|
||||||
},
|
},
|
||||||
"6 0 R": {
|
"6 0 R": {
|
||||||
"/Length": "7 0 R"
|
"dict": {
|
||||||
|
"/Length": "7 0 R"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"7 0 R": 47,
|
"7 0 R": 47,
|
||||||
"8 0 R": {
|
"8 0 R": {
|
||||||
@ -72,7 +74,9 @@
|
|||||||
"/Type": "/Font"
|
"/Type": "/Font"
|
||||||
},
|
},
|
||||||
"9 0 R": {
|
"9 0 R": {
|
||||||
"/Length": "10 0 R"
|
"dict": {
|
||||||
|
"/Length": "10 0 R"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"10 0 R": 47,
|
"10 0 R": 47,
|
||||||
"trailer": {
|
"trailer": {
|
||||||
|
@ -94,7 +94,9 @@
|
|||||||
"/Type": "/Page"
|
"/Type": "/Page"
|
||||||
},
|
},
|
||||||
"6 0 R": {
|
"6 0 R": {
|
||||||
"/Length": "7 0 R"
|
"dict": {
|
||||||
|
"/Length": "7 0 R"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"7 0 R": 47,
|
"7 0 R": 47,
|
||||||
"8 0 R": {
|
"8 0 R": {
|
||||||
@ -104,7 +106,9 @@
|
|||||||
"/Type": "/Font"
|
"/Type": "/Font"
|
||||||
},
|
},
|
||||||
"9 0 R": {
|
"9 0 R": {
|
||||||
"/Length": "10 0 R"
|
"dict": {
|
||||||
|
"/Length": "10 0 R"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"10 0 R": 47,
|
"10 0 R": 47,
|
||||||
"11 0 R": {
|
"11 0 R": {
|
||||||
|
Loading…
Reference in New Issue
Block a user