2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-12-23 11:28:56 +00:00
qpdf/libqpdf/QPDF_json.cc

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

947 lines
31 KiB
C++
Raw Normal View History

#include <qpdf/QPDF.hh>
#include <qpdf/FileInputSource.hh>
#include <qpdf/JSON_writer.hh>
#include <qpdf/Pl_Base64.hh>
#include <qpdf/Pl_StdioFile.hh>
#include <qpdf/QIntC.hh>
#include <qpdf/QPDFObject_private.hh>
#include <qpdf/QPDFValue.hh>
#include <qpdf/QPDF_Null.hh>
#include <qpdf/QPDF_Stream.hh>
2022-05-15 16:34:27 +00:00
#include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh>
#include <algorithm>
#include <cstring>
2022-05-15 16:34:27 +00:00
// This chart shows an example of the state transitions that would occur in parsing a minimal file.
// |
// { | -> st_top
2022-08-01 15:31:09 +00:00
// "qpdf": [ | -> st_qpdf
// { | -> st_qpdf_meta
// ... | ...
// }, | ...
// { | -> st_objects
// "obj:1 0 R": { | -> st_object_top
// "value": { | -> st_object
// "/Pages": "2 0 R", | ...
// "/Type": "/Catalog" | ...
// } | <- st_object_top
// }, | <- st_objects
// "obj:2 0 R": { | -> st_object_top
// "value": 12 | -> st_object
// } | <- st_object_top
// }, | <- st_objects
// "obj:4 0 R": { | -> st_object_top
// "stream": { | -> st_stream
// "data": "cG90YXRv", | ...
// "dict": { | -> st_object
// "/K": true | ...
// } | <- st_stream
// } | <- st_object_top
// }, | <- st_objects
// "trailer": { | -> st_trailer
// "value": { | -> st_object
// "/Root": "1 0 R", | ...
// "/Size": 7 | ...
// } | <- st_trailer
// } | <- st_objects
// } | <- st_qpdf
2022-08-01 15:31:09 +00:00
// ] | <- st_top
// } |
static char const* JSON_PDF = (
// force line break
"%PDF-1.3\n"
"xref\n"
"0 1\n"
"0000000000 65535 f \n"
"trailer << /Size 1 >>\n"
"startxref\n"
"9\n"
"%%EOF\n");
2022-05-15 16:34:27 +00:00
// Validator methods -- these are much more performant than std::regex.
static bool
is_indirect_object(std::string const& v, int& obj, int& gen)
{
char const* p = v.c_str();
std::string o_str;
std::string g_str;
if (!QUtil::is_digit(*p)) {
return false;
}
while (QUtil::is_digit(*p)) {
o_str.append(1, *p++);
}
if (*p != ' ') {
return false;
}
while (*p == ' ') {
++p;
}
if (!QUtil::is_digit(*p)) {
return false;
}
while (QUtil::is_digit(*p)) {
g_str.append(1, *p++);
}
if (*p != ' ') {
return false;
}
while (*p == ' ') {
++p;
}
if (*p++ != 'R') {
return false;
}
if (*p) {
return false;
}
obj = QUtil::string_to_int(o_str.c_str());
gen = QUtil::string_to_int(g_str.c_str());
return obj > 0;
}
static bool
is_obj_key(std::string const& v, int& obj, int& gen)
{
if (v.substr(0, 4) != "obj:") {
return false;
}
return is_indirect_object(v.substr(4), obj, gen);
}
static bool
is_unicode_string(std::string const& v, std::string& str)
{
if (v.substr(0, 2) == "u:") {
str = v.substr(2);
return true;
}
return false;
}
static bool
is_binary_string(std::string const& v, std::string& str)
{
if (v.substr(0, 2) == "b:") {
str = v.substr(2);
int count = 0;
for (char c: str) {
if (!QUtil::is_hex_digit(c)) {
return false;
}
++count;
}
2022-05-21 21:36:32 +00:00
return (count % 2 == 0);
}
return false;
}
static bool
is_name(std::string const& v)
{
return ((v.length() > 1) && (v.at(0) == '/'));
}
static bool
is_pdf_name(std::string const& v)
{
return ((v.length() > 3) && (v.substr(0, 3) == "n:/"));
}
bool
QPDF::test_json_validators()
{
bool passed = true;
auto check_fn = [&passed](char const* msg, bool expr) {
if (!expr) {
passed = false;
std::cerr << msg << std::endl;
}
};
#define check(expr) check_fn(#expr, expr)
int obj = 0;
int gen = 0;
check(!is_indirect_object("", obj, gen));
check(!is_indirect_object("12", obj, gen));
check(!is_indirect_object("x12 0 R", obj, gen));
check(!is_indirect_object("12 0 Rx", obj, gen));
check(!is_indirect_object("12 0R", obj, gen));
check(is_indirect_object("52 1 R", obj, gen));
check(obj == 52);
check(gen == 1);
check(is_indirect_object("53 20 R", obj, gen));
check(obj == 53);
check(gen == 20);
check(!is_obj_key("", obj, gen));
check(!is_obj_key("obj:x", obj, gen));
check(!is_obj_key("obj:x", obj, gen));
check(is_obj_key("obj:12 13 R", obj, gen));
check(obj == 12);
check(gen == 13);
std::string str;
check(!is_unicode_string("", str));
check(!is_unicode_string("xyz", str));
check(!is_unicode_string("x:", str));
check(is_unicode_string("u:potato", str));
check(str == "potato");
check(is_unicode_string("u:", str));
check(str == "");
check(!is_binary_string("", str));
check(!is_binary_string("x:", str));
check(!is_binary_string("b:1", str));
check(!is_binary_string("b:123", str));
check(!is_binary_string("b:gh", str));
2022-05-21 21:36:32 +00:00
check(is_binary_string("b:", str));
check(is_binary_string("b:12", str));
check(is_binary_string("b:123aBC", str));
check(!is_name(""));
check(!is_name("/"));
check(!is_name("xyz"));
check(is_name("/Potato"));
check(is_name("/Potato Salad"));
return passed;
#undef check_arg
}
static std::function<void(Pipeline*)>
2022-05-21 20:11:42 +00:00
provide_data(std::shared_ptr<InputSource> is, qpdf_offset_t start, qpdf_offset_t end)
{
return [is, start, end](Pipeline* p) {
Pl_Base64 decode("base64-decode", p, Pl_Base64::a_decode);
p = &decode;
2022-05-21 13:05:54 +00:00
size_t bytes = QIntC::to_size(end - start);
char buf[8192];
2022-05-21 13:05:54 +00:00
is->seek(start, SEEK_SET);
size_t len = 0;
while ((len = is->read(buf, std::min(bytes, sizeof(buf)))) > 0) {
p->write(buf, len);
bytes -= len;
if (bytes == 0) {
break;
}
}
decode.finish();
};
}
2022-05-15 16:34:27 +00:00
class QPDF::JSONReactor: public JSON::Reactor
{
public:
JSONReactor(QPDF& pdf, std::shared_ptr<InputSource> is, bool must_be_complete) :
pdf(pdf),
is(is),
must_be_complete(must_be_complete),
descr(std::make_shared<QPDFValue::Description>(
QPDFValue::JSON_Descr(std::make_shared<std::string>(is->getName()), "")))
{
for (auto& oc: pdf.m->obj_cache) {
if (oc.second.object->getTypeCode() == ::ot_reserved) {
reserved.insert(oc.first);
}
}
}
~JSONReactor() override = default;
2023-05-20 12:56:33 +00:00
void dictionaryStart() override;
void arrayStart() override;
void containerEnd(JSON const& value) override;
void topLevelScalar() override;
bool dictionaryItem(std::string const& key, JSON const& value) override;
2023-05-20 12:56:33 +00:00
bool arrayItem(JSON const& value) override;
bool anyErrors() const;
private:
enum state_e {
st_top,
st_qpdf,
st_qpdf_meta,
st_objects,
st_trailer,
st_object_top,
st_stream,
st_object,
st_ignore,
};
struct StackFrame
{
StackFrame(state_e state) :
state(state){};
StackFrame(state_e state, QPDFObjectHandle&& object) :
state(state),
object(object){};
state_e state;
QPDFObjectHandle object;
};
void containerStart();
bool setNextStateIfDictionary(std::string const& key, JSON const& value, state_e);
void setObjectDescription(QPDFObjectHandle& oh, JSON const& value);
QPDFObjectHandle makeObject(JSON const& value);
void error(qpdf_offset_t offset, std::string const& message);
void replaceObject(QPDFObjectHandle&& replacement, JSON const& value);
QPDF& pdf;
std::shared_ptr<InputSource> is;
bool must_be_complete{true};
std::shared_ptr<QPDFValue::Description> descr;
bool errors{false};
bool saw_qpdf{false};
bool saw_qpdf_meta{false};
bool saw_objects{false};
bool saw_json_version{false};
bool saw_pdf_version{false};
bool saw_trailer{false};
std::string cur_object;
bool saw_value{false};
bool saw_stream{false};
bool saw_dict{false};
bool saw_data{false};
bool saw_datafile{false};
bool this_stream_needs_data{false};
std::set<QPDFObjGen> reserved;
std::vector<StackFrame> stack;
QPDFObjectHandle next_obj;
state_e next_state{st_top};
};
void
2022-05-21 13:05:54 +00:00
QPDF::JSONReactor::error(qpdf_offset_t offset, std::string const& msg)
{
this->errors = true;
2022-05-20 15:10:12 +00:00
std::string object = this->cur_object;
if (is->getName() != pdf.getFilename()) {
object += " from " + is->getName();
}
2022-05-21 13:05:54 +00:00
this->pdf.warn(qpdf_e_json, object, offset, msg);
}
bool
QPDF::JSONReactor::anyErrors() const
{
return this->errors;
}
2022-05-15 16:34:27 +00:00
void
QPDF::JSONReactor::containerStart()
{
if (next_obj.isInitialized()) {
stack.emplace_back(next_state, std::move(next_obj));
next_obj = QPDFObjectHandle();
} else {
stack.emplace_back(next_state);
}
2022-05-15 16:34:27 +00:00
}
2022-05-14 12:49:42 +00:00
void
QPDF::JSONReactor::dictionaryStart()
{
2022-05-15 16:34:27 +00:00
containerStart();
2022-05-14 12:49:42 +00:00
}
void
QPDF::JSONReactor::arrayStart()
{
if (stack.empty()) {
2022-05-15 16:34:27 +00:00
QTC::TC("qpdf", "QPDF_json top-level array");
throw std::runtime_error("QPDF JSON must be a dictionary");
}
containerStart();
2022-05-14 12:49:42 +00:00
}
void
QPDF::JSONReactor::containerEnd(JSON const& value)
{
auto from_state = stack.back().state;
stack.pop_back();
if (stack.empty()) {
2022-05-15 16:34:27 +00:00
if (!this->saw_qpdf) {
QTC::TC("qpdf", "QPDF_json missing qpdf");
error(0, "\"qpdf\" object was not seen");
} else {
if (!this->saw_json_version) {
QTC::TC("qpdf", "QPDF_json missing json version");
error(0, "\"qpdf[0].jsonversion\" was not seen");
}
if (must_be_complete && !this->saw_pdf_version) {
QTC::TC("qpdf", "QPDF_json missing pdf version");
error(0, "\"qpdf[0].pdfversion\" was not seen");
}
if (!this->saw_objects) {
QTC::TC("qpdf", "QPDF_json missing objects");
error(0, "\"qpdf[1]\" was not seen");
} else {
if (must_be_complete && !this->saw_trailer) {
QTC::TC("qpdf", "QPDF_json missing trailer");
error(0, "\"qpdf[1].trailer\" was not seen");
}
}
2022-05-15 16:34:27 +00:00
}
} else if (from_state == st_trailer) {
if (!saw_value) {
QTC::TC("qpdf", "QPDF_json trailer no value");
error(value.getStart(), "\"trailer\" is missing \"value\"");
}
} else if (from_state == st_object_top) {
if (saw_value == saw_stream) {
QTC::TC("qpdf", "QPDF_json value stream both or neither");
error(value.getStart(), "object must have exactly one of \"value\" or \"stream\"");
2022-05-15 16:34:27 +00:00
}
if (saw_stream) {
if (!saw_dict) {
QTC::TC("qpdf", "QPDF_json stream no dict");
error(value.getStart(), "\"stream\" is missing \"dict\"");
}
2022-05-20 15:10:12 +00:00
if (saw_data == saw_datafile) {
if (this_stream_needs_data) {
QTC::TC("qpdf", "QPDF_json data datafile both or neither");
error(
value.getStart(),
2022-05-20 15:10:12 +00:00
"new \"stream\" must have exactly one of \"data\" or "
"\"datafile\"");
} else if (saw_datafile) {
QTC::TC("qpdf", "QPDF_json data and datafile");
error(
value.getStart(),
"existing \"stream\" may at most one of \"data\" or "
"\"datafile\"");
2022-05-20 15:10:12 +00:00
} else {
QTC::TC("qpdf", "QPDF_json no stream data in update mode");
}
}
}
} else if (from_state == st_qpdf) {
// Handle dangling indirect object references which the PDF spec says to treat as nulls.
// It's tempting to make this an error, but that would be wrong since valid input files may
// have these.
for (auto& oc: pdf.m->obj_cache) {
if (oc.second.object->getTypeCode() == ::ot_reserved && reserved.count(oc.first) == 0) {
QTC::TC("qpdf", "QPDF_json non-trivial null reserved");
pdf.updateCache(oc.first, QPDF_Null::create(), -1, -1);
}
2022-05-15 16:34:27 +00:00
}
}
if (!stack.empty()) {
auto state = stack.back().state;
if (state == st_objects) {
this->cur_object = "";
this->saw_dict = false;
this->saw_data = false;
this->saw_datafile = false;
this->saw_value = false;
this->saw_stream = false;
}
}
}
void
QPDF::JSONReactor::replaceObject(QPDFObjectHandle&& replacement, JSON const& value)
{
if (replacement.isIndirect()) {
error(
replacement.getParsedOffset(),
"the value of an object may not be an indirect object reference");
return;
}
auto& tos = stack.back();
auto og = tos.object.getObjGen();
this->pdf.replaceObject(og, replacement);
next_obj = pdf.getObject(og);
setObjectDescription(tos.object, value);
2022-05-14 12:49:42 +00:00
}
void
QPDF::JSONReactor::topLevelScalar()
{
2022-05-15 16:34:27 +00:00
QTC::TC("qpdf", "QPDF_json top-level scalar");
throw std::runtime_error("QPDF JSON must be a dictionary");
}
bool
QPDF::JSONReactor::setNextStateIfDictionary(std::string const& key, JSON const& value, state_e next)
2022-05-15 16:34:27 +00:00
{
// Use this method when the next state is for processing a nested dictionary.
if (value.isDictionary()) {
this->next_state = next;
return true;
2022-05-15 16:34:27 +00:00
}
error(value.getStart(), "\"" + key + "\" must be a dictionary");
return false;
2022-05-14 12:49:42 +00:00
}
bool
QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
{
if (stack.empty()) {
throw std::logic_error("stack is empty in dictionaryItem");
}
next_state = st_ignore;
auto state = stack.back().state;
2022-05-15 16:34:27 +00:00
if (state == st_ignore) {
QTC::TC("qpdf", "QPDF_json ignoring in st_ignore");
2022-05-15 16:34:27 +00:00
// ignore
} else if (state == st_top) {
if (key == "qpdf") {
2022-05-15 16:34:27 +00:00
this->saw_qpdf = true;
if (!value.isArray()) {
QTC::TC("qpdf", "QPDF_json qpdf not array");
error(value.getStart(), "\"qpdf\" must be an array");
} else {
next_state = st_qpdf;
}
2022-05-15 16:34:27 +00:00
} else {
// Ignore all other fields.
QTC::TC("qpdf", "QPDF_json ignoring unknown top-level key");
2022-05-15 16:34:27 +00:00
}
} else if (state == st_qpdf_meta) {
if (key == "pdfversion") {
2022-05-15 16:34:27 +00:00
this->saw_pdf_version = true;
std::string v;
bool okay = false;
2022-05-15 16:34:27 +00:00
if (value.getString(v)) {
std::string version;
char const* p = v.c_str();
if (QPDF::validatePDFVersion(p, version) && (*p == '\0')) {
this->pdf.m->pdf_version = version;
okay = true;
2022-05-15 16:34:27 +00:00
}
}
if (!okay) {
2022-05-15 16:34:27 +00:00
QTC::TC("qpdf", "QPDF_json bad pdf version");
error(value.getStart(), "invalid PDF version (must be \"x.y\")");
2022-05-15 16:34:27 +00:00
}
} else if (key == "jsonversion") {
this->saw_json_version = true;
std::string v;
bool okay = false;
if (value.getNumber(v)) {
std::string version;
if (QUtil::string_to_int(v.c_str()) == 2) {
okay = true;
}
}
if (!okay) {
QTC::TC("qpdf", "QPDF_json bad json version");
error(value.getStart(), "invalid JSON version (must be numeric value 2)");
}
} else if (key == "pushedinheritedpageresources") {
bool v;
if (value.getBool(v)) {
if (!this->must_be_complete && v) {
this->pdf.pushInheritedAttributesToPage();
}
} else {
QTC::TC("qpdf", "QPDF_json bad pushedinheritedpageresources");
error(value.getStart(), "pushedinheritedpageresources must be a boolean");
}
} else if (key == "calledgetallpages") {
bool v;
if (value.getBool(v)) {
if (!this->must_be_complete && v) {
this->pdf.getAllPages();
}
} else {
QTC::TC("qpdf", "QPDF_json bad calledgetallpages");
2022-07-31 19:26:24 +00:00
error(value.getStart(), "calledgetallpages must be a boolean");
}
2022-05-15 16:34:27 +00:00
} else {
// ignore unknown keys for forward compatibility and to skip keys we don't care about
// like "maxobjectid".
QTC::TC("qpdf", "QPDF_json ignore second-level key");
2022-05-15 16:34:27 +00:00
}
} else if (state == st_objects) {
int obj = 0;
int gen = 0;
2022-05-15 16:34:27 +00:00
if (key == "trailer") {
this->saw_trailer = true;
this->cur_object = "trailer";
setNextStateIfDictionary(key, value, st_trailer);
} else if (is_obj_key(key, obj, gen)) {
this->cur_object = key;
if (setNextStateIfDictionary(key, value, st_object_top)) {
next_obj = pdf.reserveObjectIfNotExists(QPDFObjGen(obj, gen));
}
2022-05-15 16:34:27 +00:00
} else {
QTC::TC("qpdf", "QPDF_json bad object key");
error(value.getStart(), "object key should be \"trailer\" or \"obj:n n R\"");
2022-05-15 16:34:27 +00:00
}
} else if (state == st_object_top) {
if (stack.empty()) {
throw std::logic_error("stack empty in st_object_top");
}
auto& tos = stack.back();
if (!tos.object.isInitialized()) {
throw std::logic_error("current object uninitialized in st_object_top");
}
2022-05-15 16:34:27 +00:00
if (key == "value") {
// Don't use setNextStateIfDictionary since this can have any type.
this->saw_value = true;
replaceObject(makeObject(value), value);
2022-05-15 16:34:27 +00:00
next_state = st_object;
} else if (key == "stream") {
this->saw_stream = true;
if (setNextStateIfDictionary(key, value, st_stream)) {
this->this_stream_needs_data = false;
if (tos.object.isStream()) {
QTC::TC("qpdf", "QPDF_json updating existing stream");
} else {
this->this_stream_needs_data = true;
replaceObject(pdf.reserveStream(tos.object.getObjGen()), value);
}
next_obj = tos.object;
} else {
// Error message already given above
QTC::TC("qpdf", "QPDF_json stream not a dictionary");
}
2022-05-15 16:34:27 +00:00
} else {
// Ignore unknown keys for forward compatibility
QTC::TC("qpdf", "QPDF_json ignore unknown key in object_top");
}
} else if (state == st_trailer) {
2022-05-15 16:34:27 +00:00
if (key == "value") {
this->saw_value = true;
// The trailer must be a dictionary, so we can use setNextStateIfDictionary.
if (setNextStateIfDictionary("trailer.value", value, st_object)) {
this->pdf.m->trailer = makeObject(value);
setObjectDescription(this->pdf.m->trailer, value);
}
2022-05-15 16:34:27 +00:00
} else if (key == "stream") {
// Don't need to set saw_stream here since there's already an error.
2022-05-15 16:34:27 +00:00
QTC::TC("qpdf", "QPDF_json trailer stream");
error(value.getStart(), "the trailer may not be a stream");
2022-05-15 16:34:27 +00:00
} else {
// Ignore unknown keys for forward compatibility
QTC::TC("qpdf", "QPDF_json ignore unknown key in trailer");
2022-05-15 16:34:27 +00:00
}
} else if (state == st_stream) {
if (stack.empty()) {
throw std::logic_error("stack empty in st_stream");
}
auto& tos = stack.back();
if (!tos.object.isStream()) {
throw std::logic_error("current object is not stream in st_stream");
2022-05-20 15:10:12 +00:00
}
auto uninitialized = QPDFObjectHandle();
if (key == "dict") {
this->saw_dict = true;
if (setNextStateIfDictionary("stream.dict", value, st_object)) {
tos.object.replaceDict(makeObject(value));
} else {
// An error had already been given by setNextStateIfDictionary
QTC::TC("qpdf", "QPDF_json stream dict not dict");
}
2022-05-15 16:34:27 +00:00
} else if (key == "data") {
this->saw_data = true;
std::string v;
if (!value.getString(v)) {
QTC::TC("qpdf", "QPDF_json stream data not string");
error(value.getStart(), "\"stream.data\" must be a string");
tos.object.replaceStreamData("", uninitialized, uninitialized);
} else {
// The range includes the quotes.
auto start = value.getStart() + 1;
auto end = value.getEnd() - 1;
if (end < start) {
throw std::logic_error("QPDF_json: JSON string length < 0");
}
tos.object.replaceStreamData(
provide_data(is, start, end), uninitialized, uninitialized);
}
2022-05-15 16:34:27 +00:00
} else if (key == "datafile") {
this->saw_datafile = true;
std::string filename;
if (!value.getString(filename)) {
QTC::TC("qpdf", "QPDF_json stream datafile not string");
error(
value.getStart(),
"\"stream.datafile\" must be a string containing a file name");
tos.object.replaceStreamData("", uninitialized, uninitialized);
} else {
tos.object.replaceStreamData(
QUtil::file_provider(filename), uninitialized, uninitialized);
}
2022-05-15 16:34:27 +00:00
} else {
// Ignore unknown keys for forward compatibility.
QTC::TC("qpdf", "QPDF_json ignore unknown key in stream");
2022-05-15 16:34:27 +00:00
}
} else if (state == st_object) {
if (stack.empty()) {
throw std::logic_error("stack empty in st_object");
}
auto& tos = stack.back();
auto dict = tos.object;
if (dict.isStream()) {
dict = dict.getDict();
}
if (!dict.isDictionary()) {
throw std::logic_error(
"current object is not stream or dictionary in st_object dictionary item");
}
dict.replaceKey(
is_pdf_name(key) ? QPDFObjectHandle::parse(key.substr(2)).getName() : key,
makeObject(value));
2022-05-15 16:34:27 +00:00
} else {
throw std::logic_error("QPDF_json: unknown state " + std::to_string(state));
2022-05-15 16:34:27 +00:00
}
2022-05-14 12:49:42 +00:00
return true;
}
bool
QPDF::JSONReactor::arrayItem(JSON const& value)
{
if (stack.empty()) {
throw std::logic_error("stack is empty in arrayItem");
}
next_state = st_ignore;
auto state = stack.back().state;
if (state == st_qpdf) {
if (!this->saw_qpdf_meta) {
this->saw_qpdf_meta = true;
setNextStateIfDictionary("qpdf[0]", value, st_qpdf_meta);
} else if (!this->saw_objects) {
this->saw_objects = true;
setNextStateIfDictionary("qpdf[1]", value, st_objects);
} else {
QTC::TC("qpdf", "QPDF_json more than two qpdf elements");
error(value.getStart(), "\"qpdf\" must have two elements");
}
} else if (state == st_object) {
stack.back().object.appendItem(makeObject(value));
}
2022-05-14 12:49:42 +00:00
return true;
}
2022-05-20 18:23:32 +00:00
void
QPDF::JSONReactor::setObjectDescription(QPDFObjectHandle& oh, JSON const& value)
{
auto j_descr = std::get<QPDFValue::JSON_Descr>(*descr);
if (j_descr.object != cur_object) {
descr = std::make_shared<QPDFValue::Description>(
QPDFValue::JSON_Descr(j_descr.input, cur_object));
2022-05-20 18:23:32 +00:00
}
oh.getObjectPtr()->setDescription(&pdf, descr, value.getStart());
2022-05-20 18:23:32 +00:00
}
QPDFObjectHandle
QPDF::JSONReactor::makeObject(JSON const& value)
{
QPDFObjectHandle result;
std::string str_v;
bool bool_v = false;
if (value.isDictionary()) {
result = QPDFObjectHandle::newDictionary();
next_obj = result;
next_state = st_object;
} else if (value.isArray()) {
result = QPDFObjectHandle::newArray();
next_obj = result;
next_state = st_object;
} else if (value.isNull()) {
result = QPDFObjectHandle::newNull();
} else if (value.getBool(bool_v)) {
result = QPDFObjectHandle::newBool(bool_v);
} else if (value.getNumber(str_v)) {
if (QUtil::is_long_long(str_v.c_str())) {
result = QPDFObjectHandle::newInteger(QUtil::string_to_ll(str_v.c_str()));
} else {
// JSON allows scientific notation, but PDF does not.
if (str_v.find('e') != std::string::npos || str_v.find('E') != std::string::npos) {
try {
auto v = std::stod(str_v);
str_v = QUtil::double_to_string(v);
} catch (std::exception&) {
// Keep it as it was
}
}
result = QPDFObjectHandle::newReal(str_v);
}
} else if (value.getString(str_v)) {
int obj = 0;
int gen = 0;
std::string str;
if (is_indirect_object(str_v, obj, gen)) {
result = pdf.reserveObjectIfNotExists(QPDFObjGen(obj, gen));
} else if (is_unicode_string(str_v, str)) {
result = QPDFObjectHandle::newUnicodeString(str);
} else if (is_binary_string(str_v, str)) {
result = QPDFObjectHandle::newString(QUtil::hex_decode(str));
} else if (is_name(str_v)) {
result = QPDFObjectHandle::newName(str_v);
} else if (is_pdf_name(str_v)) {
result = QPDFObjectHandle::parse(str_v.substr(2));
} else {
QTC::TC("qpdf", "QPDF_json unrecognized string value");
error(value.getStart(), "unrecognized string value");
result = QPDFObjectHandle::newNull();
}
}
if (!result.isInitialized()) {
throw std::logic_error("JSONReactor::makeObject didn't initialize the object");
}
2022-05-20 18:23:32 +00:00
if (!result.hasObjectDescription()) {
setObjectDescription(result, value);
2022-05-20 15:10:12 +00:00
}
return result;
}
void
QPDF::createFromJSON(std::string const& json_file)
{
createFromJSON(std::make_shared<FileInputSource>(json_file.c_str()));
}
void
2022-05-14 12:49:42 +00:00
QPDF::createFromJSON(std::shared_ptr<InputSource> is)
{
processMemoryFile(is->getName().c_str(), JSON_PDF, strlen(JSON_PDF));
2022-05-14 12:49:42 +00:00
importJSON(is, true);
}
void
QPDF::updateFromJSON(std::string const& json_file)
{
updateFromJSON(std::make_shared<FileInputSource>(json_file.c_str()));
}
void
2022-05-14 12:49:42 +00:00
QPDF::updateFromJSON(std::shared_ptr<InputSource> is)
{
importJSON(is, false);
}
void
2022-05-15 16:34:27 +00:00
QPDF::importJSON(std::shared_ptr<InputSource> is, bool must_be_complete)
{
JSONReactor reactor(*this, is, must_be_complete);
2022-05-15 16:34:27 +00:00
try {
JSON::parse(*is, &reactor);
} catch (std::runtime_error& e) {
throw std::runtime_error(is->getName() + ": " + e.what());
}
if (reactor.anyErrors()) {
throw std::runtime_error(is->getName() + ": errors found in JSON");
}
}
void
writeJSONStreamFile(
int version,
JSON::Writer& jw,
QPDF_Stream& stream,
int id,
qpdf_stream_decode_level_e decode_level,
std::string const& file_prefix)
{
auto filename = file_prefix + "-" + std::to_string(id);
auto* f = QUtil::safe_fopen(filename.c_str(), "wb");
Pl_StdioFile f_pl{"stream data", f};
stream.writeStreamJSON(version, jw, qpdf_sj_file, decode_level, &f_pl, filename);
f_pl.finish();
fclose(f);
}
2022-07-31 19:26:24 +00:00
void
QPDF::writeJSON(
int version,
Pipeline* p,
qpdf_stream_decode_level_e decode_level,
qpdf_json_stream_data_e json_stream_data,
std::string const& file_prefix,
std::set<std::string> wanted_objects)
{
bool first = true;
writeJSON(version, p, true, first, decode_level, json_stream_data, file_prefix, wanted_objects);
}
void
QPDF::writeJSON(
int version,
Pipeline* p,
bool complete,
bool& first_key,
qpdf_stream_decode_level_e decode_level,
qpdf_json_stream_data_e json_stream_data,
std::string const& file_prefix,
std::set<std::string> wanted_objects)
{
if (version != 2) {
throw std::runtime_error("QPDF::writeJSON: only version 2 is supported");
}
2024-02-15 18:02:01 +00:00
JSON::Writer jw{p, 4};
if (complete) {
2024-02-15 18:02:01 +00:00
jw << "{";
} else if (!first_key) {
jw << ",";
}
first_key = false;
/* clang-format off */
jw << "\n"
" \"qpdf\": [\n"
" {\n"
" \"jsonversion\": " << std::to_string(version) << ",\n"
" \"pdfversion\": \"" << getPDFVersion() << "\",\n"
" \"pushedinheritedpageresources\": " << (everPushedInheritedAttributesToPages() ? "true" : "false") << ",\n"
" \"calledgetallpages\": " << (everCalledGetAllPages() ? "true" : "false") << ",\n"
" \"maxobjectid\": " << std::to_string(getObjectCount()) << "\n"
" },\n"
" {";
/* clang-format on */
bool all_objects = wanted_objects.empty();
2024-02-15 18:02:01 +00:00
bool first = true;
for (auto& obj: getAllObjects()) {
auto const og = obj.getObjGen();
std::string key = "obj:" + og.unparse(' ') + " R";
if (all_objects || wanted_objects.count(key)) {
2024-02-15 18:02:01 +00:00
if (first) {
jw << "\n \"" << key;
first = false;
} else {
jw << "\n },\n \"" << key;
}
if (auto* stream = obj.getObjectPtr()->as<QPDF_Stream>()) {
2024-02-15 18:02:01 +00:00
jw << "\": {\n \"stream\": ";
if (json_stream_data == qpdf_sj_file) {
writeJSONStreamFile(
version, jw, *stream, og.getObj(), decode_level, file_prefix);
} else {
stream->writeStreamJSON(
version, jw, json_stream_data, decode_level, nullptr, "");
}
} else {
2024-02-15 18:02:01 +00:00
jw << "\": {\n \"value\": ";
obj.writeJSON(version, jw, true);
}
}
}
if (all_objects || wanted_objects.count("trailer")) {
2024-02-15 18:02:01 +00:00
if (!first) {
jw << "\n },";
}
jw << "\n \"trailer\": {\n \"value\": ";
getTrailer().writeJSON(version, jw, true);
first = false;
}
2024-02-15 18:02:01 +00:00
if (!first) {
jw << "\n }";
}
/* clang-format off */
jw << "\n"
" }\n"
" ]";
/* clang-format on */
if (complete) {
2024-02-15 18:02:01 +00:00
jw << "\n}\n";
p->finish();
}
}