2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-06-02 10:20:52 +00:00
qpdf/libqpdf/QPDF_json.cc
m-holger 542cb91b7d Refactor the creation of unresolved objects
Create unresolved objects only for objects in the xref table (except during
parsing of the xref table). Do not add indirect nulls into the the object
cache as the result of a cache miss during a call to getObject except
during parsing or creation/updating from JSON. To support this behaviour,
add new private methods getObjectForParser and getObjectForJSON.

As a result of this change, dangling references are treated as direct nulls
rather than indirect nulls.
2024-05-04 12:29:03 +01:00

935 lines
30 KiB
C++

#include <qpdf/QPDF.hh>
#include <qpdf/FileInputSource.hh>
#include <qpdf/JSON_writer.hh>
#include <qpdf/Pl_Base64.hh>
#include <qpdf/Pl_StdioFile.hh>
#include <qpdf/QIntC.hh>
#include <qpdf/QPDFObject_private.hh>
#include <qpdf/QPDFValue.hh>
#include <qpdf/QPDF_Null.hh>
#include <qpdf/QPDF_Stream.hh>
#include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh>
#include <algorithm>
#include <cstring>
// This chart shows an example of the state transitions that would occur in parsing a minimal file.
// |
// { | -> st_top
// "qpdf": [ | -> st_qpdf
// { | -> st_qpdf_meta
// ... | ...
// }, | ...
// { | -> st_objects
// "obj:1 0 R": { | -> st_object_top
// "value": { | -> st_object
// "/Pages": "2 0 R", | ...
// "/Type": "/Catalog" | ...
// } | <- st_object_top
// }, | <- st_objects
// "obj:2 0 R": { | -> st_object_top
// "value": 12 | -> st_object
// } | <- st_object_top
// }, | <- st_objects
// "obj:4 0 R": { | -> st_object_top
// "stream": { | -> st_stream
// "data": "cG90YXRv", | ...
// "dict": { | -> st_object
// "/K": true | ...
// } | <- st_stream
// } | <- st_object_top
// }, | <- st_objects
// "trailer": { | -> st_trailer
// "value": { | -> st_object
// "/Root": "1 0 R", | ...
// "/Size": 7 | ...
// } | <- st_trailer
// } | <- st_objects
// } | <- st_qpdf
// ] | <- st_top
// } |
static char const* JSON_PDF = (
// force line break
"%PDF-1.3\n"
"xref\n"
"0 1\n"
"0000000000 65535 f \n"
"trailer << /Size 1 >>\n"
"startxref\n"
"9\n"
"%%EOF\n");
// Validator methods -- these are much more performant than std::regex.
static bool
is_indirect_object(std::string const& v, int& obj, int& gen)
{
char const* p = v.c_str();
std::string o_str;
std::string g_str;
if (!QUtil::is_digit(*p)) {
return false;
}
while (QUtil::is_digit(*p)) {
o_str.append(1, *p++);
}
if (*p != ' ') {
return false;
}
while (*p == ' ') {
++p;
}
if (!QUtil::is_digit(*p)) {
return false;
}
while (QUtil::is_digit(*p)) {
g_str.append(1, *p++);
}
if (*p != ' ') {
return false;
}
while (*p == ' ') {
++p;
}
if (*p++ != 'R') {
return false;
}
if (*p) {
return false;
}
obj = QUtil::string_to_int(o_str.c_str());
gen = QUtil::string_to_int(g_str.c_str());
return obj > 0;
}
static bool
is_obj_key(std::string const& v, int& obj, int& gen)
{
if (v.substr(0, 4) != "obj:") {
return false;
}
return is_indirect_object(v.substr(4), obj, gen);
}
static bool
is_unicode_string(std::string const& v, std::string& str)
{
if (v.substr(0, 2) == "u:") {
str = v.substr(2);
return true;
}
return false;
}
static bool
is_binary_string(std::string const& v, std::string& str)
{
if (v.substr(0, 2) == "b:") {
str = v.substr(2);
int count = 0;
for (char c: str) {
if (!QUtil::is_hex_digit(c)) {
return false;
}
++count;
}
return (count % 2 == 0);
}
return false;
}
static bool
is_name(std::string const& v)
{
return ((v.length() > 1) && (v.at(0) == '/'));
}
static bool
is_pdf_name(std::string const& v)
{
return ((v.length() > 3) && (v.substr(0, 3) == "n:/"));
}
bool
QPDF::test_json_validators()
{
bool passed = true;
auto check_fn = [&passed](char const* msg, bool expr) {
if (!expr) {
passed = false;
std::cerr << msg << std::endl;
}
};
#define check(expr) check_fn(#expr, expr)
int obj = 0;
int gen = 0;
check(!is_indirect_object("", obj, gen));
check(!is_indirect_object("12", obj, gen));
check(!is_indirect_object("x12 0 R", obj, gen));
check(!is_indirect_object("12 0 Rx", obj, gen));
check(!is_indirect_object("12 0R", obj, gen));
check(is_indirect_object("52 1 R", obj, gen));
check(obj == 52);
check(gen == 1);
check(is_indirect_object("53 20 R", obj, gen));
check(obj == 53);
check(gen == 20);
check(!is_obj_key("", obj, gen));
check(!is_obj_key("obj:x", obj, gen));
check(!is_obj_key("obj:x", obj, gen));
check(is_obj_key("obj:12 13 R", obj, gen));
check(obj == 12);
check(gen == 13);
std::string str;
check(!is_unicode_string("", str));
check(!is_unicode_string("xyz", str));
check(!is_unicode_string("x:", str));
check(is_unicode_string("u:potato", str));
check(str == "potato");
check(is_unicode_string("u:", str));
check(str == "");
check(!is_binary_string("", str));
check(!is_binary_string("x:", str));
check(!is_binary_string("b:1", str));
check(!is_binary_string("b:123", str));
check(!is_binary_string("b:gh", str));
check(is_binary_string("b:", str));
check(is_binary_string("b:12", str));
check(is_binary_string("b:123aBC", str));
check(!is_name(""));
check(!is_name("/"));
check(!is_name("xyz"));
check(is_name("/Potato"));
check(is_name("/Potato Salad"));
return passed;
#undef check_arg
}
static std::function<void(Pipeline*)>
provide_data(std::shared_ptr<InputSource> is, qpdf_offset_t start, qpdf_offset_t end)
{
return [is, start, end](Pipeline* p) {
Pl_Base64 decode("base64-decode", p, Pl_Base64::a_decode);
p = &decode;
size_t bytes = QIntC::to_size(end - start);
char buf[8192];
is->seek(start, SEEK_SET);
size_t len = 0;
while ((len = is->read(buf, std::min(bytes, sizeof(buf)))) > 0) {
p->write(buf, len);
bytes -= len;
if (bytes == 0) {
break;
}
}
decode.finish();
};
}
class QPDF::JSONReactor: public JSON::Reactor
{
public:
JSONReactor(QPDF& pdf, std::shared_ptr<InputSource> is, bool must_be_complete) :
pdf(pdf),
is(is),
must_be_complete(must_be_complete),
descr(std::make_shared<QPDFValue::Description>(
QPDFValue::JSON_Descr(std::make_shared<std::string>(is->getName()), "")))
{
}
~JSONReactor() override = default;
void dictionaryStart() override;
void arrayStart() override;
void containerEnd(JSON const& value) override;
void topLevelScalar() override;
bool dictionaryItem(std::string const& key, JSON const& value) override;
bool arrayItem(JSON const& value) override;
bool anyErrors() const;
private:
enum state_e {
st_top,
st_qpdf,
st_qpdf_meta,
st_objects,
st_trailer,
st_object_top,
st_stream,
st_object,
st_ignore,
};
struct StackFrame
{
StackFrame(state_e state) :
state(state){};
StackFrame(state_e state, QPDFObjectHandle&& object) :
state(state),
object(object){};
state_e state;
QPDFObjectHandle object;
};
void containerStart();
bool setNextStateIfDictionary(std::string const& key, JSON const& value, state_e);
void setObjectDescription(QPDFObjectHandle& oh, JSON const& value);
QPDFObjectHandle makeObject(JSON const& value);
void error(qpdf_offset_t offset, std::string const& message);
void replaceObject(QPDFObjectHandle&& replacement, JSON const& value);
QPDF& pdf;
std::shared_ptr<InputSource> is;
bool must_be_complete{true};
std::shared_ptr<QPDFValue::Description> descr;
bool errors{false};
bool saw_qpdf{false};
bool saw_qpdf_meta{false};
bool saw_objects{false};
bool saw_json_version{false};
bool saw_pdf_version{false};
bool saw_trailer{false};
std::string cur_object;
bool saw_value{false};
bool saw_stream{false};
bool saw_dict{false};
bool saw_data{false};
bool saw_datafile{false};
bool this_stream_needs_data{false};
std::vector<StackFrame> stack;
QPDFObjectHandle next_obj;
state_e next_state{st_top};
};
void
QPDF::JSONReactor::error(qpdf_offset_t offset, std::string const& msg)
{
this->errors = true;
std::string object = this->cur_object;
if (is->getName() != pdf.getFilename()) {
object += " from " + is->getName();
}
this->pdf.warn(qpdf_e_json, object, offset, msg);
}
bool
QPDF::JSONReactor::anyErrors() const
{
return this->errors;
}
void
QPDF::JSONReactor::containerStart()
{
if (next_obj.isInitialized()) {
stack.emplace_back(next_state, std::move(next_obj));
next_obj = QPDFObjectHandle();
} else {
stack.emplace_back(next_state);
}
}
void
QPDF::JSONReactor::dictionaryStart()
{
containerStart();
}
void
QPDF::JSONReactor::arrayStart()
{
if (stack.empty()) {
QTC::TC("qpdf", "QPDF_json top-level array");
throw std::runtime_error("QPDF JSON must be a dictionary");
}
containerStart();
}
void
QPDF::JSONReactor::containerEnd(JSON const& value)
{
auto from_state = stack.back().state;
stack.pop_back();
if (stack.empty()) {
if (!this->saw_qpdf) {
QTC::TC("qpdf", "QPDF_json missing qpdf");
error(0, "\"qpdf\" object was not seen");
} else {
if (!this->saw_json_version) {
QTC::TC("qpdf", "QPDF_json missing json version");
error(0, "\"qpdf[0].jsonversion\" was not seen");
}
if (must_be_complete && !this->saw_pdf_version) {
QTC::TC("qpdf", "QPDF_json missing pdf version");
error(0, "\"qpdf[0].pdfversion\" was not seen");
}
if (!this->saw_objects) {
QTC::TC("qpdf", "QPDF_json missing objects");
error(0, "\"qpdf[1]\" was not seen");
} else {
if (must_be_complete && !this->saw_trailer) {
QTC::TC("qpdf", "QPDF_json missing trailer");
error(0, "\"qpdf[1].trailer\" was not seen");
}
}
}
} else if (from_state == st_trailer) {
if (!saw_value) {
QTC::TC("qpdf", "QPDF_json trailer no value");
error(value.getStart(), "\"trailer\" is missing \"value\"");
}
} else if (from_state == st_object_top) {
if (saw_value == saw_stream) {
QTC::TC("qpdf", "QPDF_json value stream both or neither");
error(value.getStart(), "object must have exactly one of \"value\" or \"stream\"");
}
if (saw_stream) {
if (!saw_dict) {
QTC::TC("qpdf", "QPDF_json stream no dict");
error(value.getStart(), "\"stream\" is missing \"dict\"");
}
if (saw_data == saw_datafile) {
if (this_stream_needs_data) {
QTC::TC("qpdf", "QPDF_json data datafile both or neither");
error(
value.getStart(),
"new \"stream\" must have exactly one of \"data\" or "
"\"datafile\"");
} else if (saw_datafile) {
QTC::TC("qpdf", "QPDF_json data and datafile");
error(
value.getStart(),
"existing \"stream\" may at most one of \"data\" or "
"\"datafile\"");
} else {
QTC::TC("qpdf", "QPDF_json no stream data in update mode");
}
}
}
} else if (from_state == st_qpdf) {
// Handle dangling indirect object references which the PDF spec says to treat as nulls.
// It's tempting to make this an error, but that would be wrong since valid input files may
// have these.
}
if (!stack.empty()) {
auto state = stack.back().state;
if (state == st_objects) {
this->cur_object = "";
this->saw_dict = false;
this->saw_data = false;
this->saw_datafile = false;
this->saw_value = false;
this->saw_stream = false;
}
}
}
void
QPDF::JSONReactor::replaceObject(QPDFObjectHandle&& replacement, JSON const& value)
{
if (replacement.isIndirect()) {
error(
replacement.getParsedOffset(),
"the value of an object may not be an indirect object reference");
return;
}
auto& tos = stack.back();
auto og = tos.object.getObjGen();
this->pdf.replaceObject(og, replacement);
next_obj = pdf.getObject(og);
setObjectDescription(tos.object, value);
}
void
QPDF::JSONReactor::topLevelScalar()
{
QTC::TC("qpdf", "QPDF_json top-level scalar");
throw std::runtime_error("QPDF JSON must be a dictionary");
}
bool
QPDF::JSONReactor::setNextStateIfDictionary(std::string const& key, JSON const& value, state_e next)
{
// Use this method when the next state is for processing a nested dictionary.
if (value.isDictionary()) {
this->next_state = next;
return true;
}
error(value.getStart(), "\"" + key + "\" must be a dictionary");
return false;
}
bool
QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
{
if (stack.empty()) {
throw std::logic_error("stack is empty in dictionaryItem");
}
next_state = st_ignore;
auto state = stack.back().state;
if (state == st_ignore) {
QTC::TC("qpdf", "QPDF_json ignoring in st_ignore");
// ignore
} else if (state == st_top) {
if (key == "qpdf") {
this->saw_qpdf = true;
if (!value.isArray()) {
QTC::TC("qpdf", "QPDF_json qpdf not array");
error(value.getStart(), "\"qpdf\" must be an array");
} else {
next_state = st_qpdf;
}
} else {
// Ignore all other fields.
QTC::TC("qpdf", "QPDF_json ignoring unknown top-level key");
}
} else if (state == st_qpdf_meta) {
if (key == "pdfversion") {
this->saw_pdf_version = true;
std::string v;
bool okay = false;
if (value.getString(v)) {
std::string version;
char const* p = v.c_str();
if (QPDF::validatePDFVersion(p, version) && (*p == '\0')) {
this->pdf.m->pdf_version = version;
okay = true;
}
}
if (!okay) {
QTC::TC("qpdf", "QPDF_json bad pdf version");
error(value.getStart(), "invalid PDF version (must be \"x.y\")");
}
} else if (key == "jsonversion") {
this->saw_json_version = true;
std::string v;
bool okay = false;
if (value.getNumber(v)) {
std::string version;
if (QUtil::string_to_int(v.c_str()) == 2) {
okay = true;
}
}
if (!okay) {
QTC::TC("qpdf", "QPDF_json bad json version");
error(value.getStart(), "invalid JSON version (must be numeric value 2)");
}
} else if (key == "pushedinheritedpageresources") {
bool v;
if (value.getBool(v)) {
if (!this->must_be_complete && v) {
this->pdf.pushInheritedAttributesToPage();
}
} else {
QTC::TC("qpdf", "QPDF_json bad pushedinheritedpageresources");
error(value.getStart(), "pushedinheritedpageresources must be a boolean");
}
} else if (key == "calledgetallpages") {
bool v;
if (value.getBool(v)) {
if (!this->must_be_complete && v) {
this->pdf.getAllPages();
}
} else {
QTC::TC("qpdf", "QPDF_json bad calledgetallpages");
error(value.getStart(), "calledgetallpages must be a boolean");
}
} else {
// ignore unknown keys for forward compatibility and to skip keys we don't care about
// like "maxobjectid".
QTC::TC("qpdf", "QPDF_json ignore second-level key");
}
} else if (state == st_objects) {
int obj = 0;
int gen = 0;
if (key == "trailer") {
this->saw_trailer = true;
this->cur_object = "trailer";
setNextStateIfDictionary(key, value, st_trailer);
} else if (is_obj_key(key, obj, gen)) {
this->cur_object = key;
if (setNextStateIfDictionary(key, value, st_object_top)) {
next_obj = pdf.getObjectForJSON(obj, gen);
}
} else {
QTC::TC("qpdf", "QPDF_json bad object key");
error(value.getStart(), "object key should be \"trailer\" or \"obj:n n R\"");
}
} else if (state == st_object_top) {
if (stack.empty()) {
throw std::logic_error("stack empty in st_object_top");
}
auto& tos = stack.back();
if (!tos.object.isInitialized()) {
throw std::logic_error("current object uninitialized in st_object_top");
}
if (key == "value") {
// Don't use setNextStateIfDictionary since this can have any type.
this->saw_value = true;
replaceObject(makeObject(value), value);
next_state = st_object;
} else if (key == "stream") {
this->saw_stream = true;
if (setNextStateIfDictionary(key, value, st_stream)) {
this->this_stream_needs_data = false;
if (tos.object.isStream()) {
QTC::TC("qpdf", "QPDF_json updating existing stream");
} else {
this->this_stream_needs_data = true;
replaceObject(pdf.reserveStream(tos.object.getObjGen()), value);
}
next_obj = tos.object;
} else {
// Error message already given above
QTC::TC("qpdf", "QPDF_json stream not a dictionary");
}
} else {
// Ignore unknown keys for forward compatibility
QTC::TC("qpdf", "QPDF_json ignore unknown key in object_top");
}
} else if (state == st_trailer) {
if (key == "value") {
this->saw_value = true;
// The trailer must be a dictionary, so we can use setNextStateIfDictionary.
if (setNextStateIfDictionary("trailer.value", value, st_object)) {
this->pdf.m->trailer = makeObject(value);
setObjectDescription(this->pdf.m->trailer, value);
}
} else if (key == "stream") {
// Don't need to set saw_stream here since there's already an error.
QTC::TC("qpdf", "QPDF_json trailer stream");
error(value.getStart(), "the trailer may not be a stream");
} else {
// Ignore unknown keys for forward compatibility
QTC::TC("qpdf", "QPDF_json ignore unknown key in trailer");
}
} else if (state == st_stream) {
if (stack.empty()) {
throw std::logic_error("stack empty in st_stream");
}
auto& tos = stack.back();
if (!tos.object.isStream()) {
throw std::logic_error("current object is not stream in st_stream");
}
auto uninitialized = QPDFObjectHandle();
if (key == "dict") {
this->saw_dict = true;
if (setNextStateIfDictionary("stream.dict", value, st_object)) {
tos.object.replaceDict(makeObject(value));
} else {
// An error had already been given by setNextStateIfDictionary
QTC::TC("qpdf", "QPDF_json stream dict not dict");
}
} else if (key == "data") {
this->saw_data = true;
std::string v;
if (!value.getString(v)) {
QTC::TC("qpdf", "QPDF_json stream data not string");
error(value.getStart(), "\"stream.data\" must be a string");
tos.object.replaceStreamData("", uninitialized, uninitialized);
} else {
// The range includes the quotes.
auto start = value.getStart() + 1;
auto end = value.getEnd() - 1;
if (end < start) {
throw std::logic_error("QPDF_json: JSON string length < 0");
}
tos.object.replaceStreamData(
provide_data(is, start, end), uninitialized, uninitialized);
}
} else if (key == "datafile") {
this->saw_datafile = true;
std::string filename;
if (!value.getString(filename)) {
QTC::TC("qpdf", "QPDF_json stream datafile not string");
error(
value.getStart(),
"\"stream.datafile\" must be a string containing a file name");
tos.object.replaceStreamData("", uninitialized, uninitialized);
} else {
tos.object.replaceStreamData(
QUtil::file_provider(filename), uninitialized, uninitialized);
}
} else {
// Ignore unknown keys for forward compatibility.
QTC::TC("qpdf", "QPDF_json ignore unknown key in stream");
}
} else if (state == st_object) {
if (stack.empty()) {
throw std::logic_error("stack empty in st_object");
}
auto& tos = stack.back();
auto dict = tos.object;
if (dict.isStream()) {
dict = dict.getDict();
}
if (!dict.isDictionary()) {
throw std::logic_error(
"current object is not stream or dictionary in st_object dictionary item");
}
dict.replaceKey(
is_pdf_name(key) ? QPDFObjectHandle::parse(key.substr(2)).getName() : key,
makeObject(value));
} else {
throw std::logic_error("QPDF_json: unknown state " + std::to_string(state));
}
return true;
}
bool
QPDF::JSONReactor::arrayItem(JSON const& value)
{
if (stack.empty()) {
throw std::logic_error("stack is empty in arrayItem");
}
next_state = st_ignore;
auto state = stack.back().state;
if (state == st_qpdf) {
if (!this->saw_qpdf_meta) {
this->saw_qpdf_meta = true;
setNextStateIfDictionary("qpdf[0]", value, st_qpdf_meta);
} else if (!this->saw_objects) {
this->saw_objects = true;
setNextStateIfDictionary("qpdf[1]", value, st_objects);
} else {
QTC::TC("qpdf", "QPDF_json more than two qpdf elements");
error(value.getStart(), "\"qpdf\" must have two elements");
}
} else if (state == st_object) {
stack.back().object.appendItem(makeObject(value));
}
return true;
}
void
QPDF::JSONReactor::setObjectDescription(QPDFObjectHandle& oh, JSON const& value)
{
auto j_descr = std::get<QPDFValue::JSON_Descr>(*descr);
if (j_descr.object != cur_object) {
descr = std::make_shared<QPDFValue::Description>(
QPDFValue::JSON_Descr(j_descr.input, cur_object));
}
oh.getObjectPtr()->setDescription(&pdf, descr, value.getStart());
}
QPDFObjectHandle
QPDF::JSONReactor::makeObject(JSON const& value)
{
QPDFObjectHandle result;
std::string str_v;
bool bool_v = false;
if (value.isDictionary()) {
result = QPDFObjectHandle::newDictionary();
next_obj = result;
next_state = st_object;
} else if (value.isArray()) {
result = QPDFObjectHandle::newArray();
next_obj = result;
next_state = st_object;
} else if (value.isNull()) {
result = QPDFObjectHandle::newNull();
} else if (value.getBool(bool_v)) {
result = QPDFObjectHandle::newBool(bool_v);
} else if (value.getNumber(str_v)) {
if (QUtil::is_long_long(str_v.c_str())) {
result = QPDFObjectHandle::newInteger(QUtil::string_to_ll(str_v.c_str()));
} else {
// JSON allows scientific notation, but PDF does not.
if (str_v.find('e') != std::string::npos || str_v.find('E') != std::string::npos) {
try {
auto v = std::stod(str_v);
str_v = QUtil::double_to_string(v);
} catch (std::exception&) {
// Keep it as it was
}
}
result = QPDFObjectHandle::newReal(str_v);
}
} else if (value.getString(str_v)) {
int obj = 0;
int gen = 0;
std::string str;
if (is_indirect_object(str_v, obj, gen)) {
result = pdf.getObjectForJSON(obj, gen);
} else if (is_unicode_string(str_v, str)) {
result = QPDFObjectHandle::newUnicodeString(str);
} else if (is_binary_string(str_v, str)) {
result = QPDFObjectHandle::newString(QUtil::hex_decode(str));
} else if (is_name(str_v)) {
result = QPDFObjectHandle::newName(str_v);
} else if (is_pdf_name(str_v)) {
result = QPDFObjectHandle::parse(str_v.substr(2));
} else {
QTC::TC("qpdf", "QPDF_json unrecognized string value");
error(value.getStart(), "unrecognized string value");
result = QPDFObjectHandle::newNull();
}
}
if (!result.isInitialized()) {
throw std::logic_error("JSONReactor::makeObject didn't initialize the object");
}
if (!result.hasObjectDescription()) {
setObjectDescription(result, value);
}
return result;
}
void
QPDF::createFromJSON(std::string const& json_file)
{
createFromJSON(std::make_shared<FileInputSource>(json_file.c_str()));
}
void
QPDF::createFromJSON(std::shared_ptr<InputSource> is)
{
processMemoryFile(is->getName().c_str(), JSON_PDF, strlen(JSON_PDF));
importJSON(is, true);
}
void
QPDF::updateFromJSON(std::string const& json_file)
{
updateFromJSON(std::make_shared<FileInputSource>(json_file.c_str()));
}
void
QPDF::updateFromJSON(std::shared_ptr<InputSource> is)
{
importJSON(is, false);
}
void
QPDF::importJSON(std::shared_ptr<InputSource> is, bool must_be_complete)
{
JSONReactor reactor(*this, is, must_be_complete);
try {
JSON::parse(*is, &reactor);
} catch (std::runtime_error& e) {
throw std::runtime_error(is->getName() + ": " + e.what());
}
if (reactor.anyErrors()) {
throw std::runtime_error(is->getName() + ": errors found in JSON");
}
}
void
writeJSONStreamFile(
int version,
JSON::Writer& jw,
QPDF_Stream& stream,
int id,
qpdf_stream_decode_level_e decode_level,
std::string const& file_prefix)
{
auto filename = file_prefix + "-" + std::to_string(id);
auto* f = QUtil::safe_fopen(filename.c_str(), "wb");
Pl_StdioFile f_pl{"stream data", f};
stream.writeStreamJSON(version, jw, qpdf_sj_file, decode_level, &f_pl, filename);
f_pl.finish();
fclose(f);
}
void
QPDF::writeJSON(
int version,
Pipeline* p,
qpdf_stream_decode_level_e decode_level,
qpdf_json_stream_data_e json_stream_data,
std::string const& file_prefix,
std::set<std::string> wanted_objects)
{
bool first = true;
writeJSON(version, p, true, first, decode_level, json_stream_data, file_prefix, wanted_objects);
}
void
QPDF::writeJSON(
int version,
Pipeline* p,
bool complete,
bool& first_key,
qpdf_stream_decode_level_e decode_level,
qpdf_json_stream_data_e json_stream_data,
std::string const& file_prefix,
std::set<std::string> wanted_objects)
{
if (version != 2) {
throw std::runtime_error("QPDF::writeJSON: only version 2 is supported");
}
JSON::Writer jw{p, 4};
if (complete) {
jw << "{";
} else if (!first_key) {
jw << ",";
}
first_key = false;
/* clang-format off */
jw << "\n"
" \"qpdf\": [\n"
" {\n"
" \"jsonversion\": " << std::to_string(version) << ",\n"
" \"pdfversion\": \"" << getPDFVersion() << "\",\n"
" \"pushedinheritedpageresources\": " << (everPushedInheritedAttributesToPages() ? "true" : "false") << ",\n"
" \"calledgetallpages\": " << (everCalledGetAllPages() ? "true" : "false") << ",\n"
" \"maxobjectid\": " << std::to_string(getObjectCount()) << "\n"
" },\n"
" {";
/* clang-format on */
bool all_objects = wanted_objects.empty();
bool first = true;
for (auto& obj: getAllObjects()) {
auto const og = obj.getObjGen();
std::string key = "obj:" + og.unparse(' ') + " R";
if (all_objects || wanted_objects.count(key)) {
if (first) {
jw << "\n \"" << key;
first = false;
} else {
jw << "\n },\n \"" << key;
}
if (auto* stream = obj.getObjectPtr()->as<QPDF_Stream>()) {
jw << "\": {\n \"stream\": ";
if (json_stream_data == qpdf_sj_file) {
writeJSONStreamFile(
version, jw, *stream, og.getObj(), decode_level, file_prefix);
} else {
stream->writeStreamJSON(
version, jw, json_stream_data, decode_level, nullptr, "");
}
} else {
jw << "\": {\n \"value\": ";
obj.writeJSON(version, jw, true);
}
}
}
if (all_objects || wanted_objects.count("trailer")) {
if (!first) {
jw << "\n },";
}
jw << "\n \"trailer\": {\n \"value\": ";
getTrailer().writeJSON(version, jw, true);
first = false;
}
if (!first) {
jw << "\n }";
}
/* clang-format off */
jw << "\n"
" }\n"
" ]";
/* clang-format on */
if (complete) {
jw << "\n}\n";
p->finish();
}
}