mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-22 14:48:28 +00:00
Parse objects; stream data is not yet handled
This commit is contained in:
parent
be0ed6ab5e
commit
7e7a9c4379
9
TODO
9
TODO
@ -54,6 +54,15 @@ Soon: Break ground on "Document-level work"
|
||||
Output JSON v2
|
||||
==============
|
||||
|
||||
XXX
|
||||
|
||||
* Reread from perspective of update
|
||||
* Test all ignore cases with QTC
|
||||
* Test case of correct file with dict before data/datafile
|
||||
* Have a test case if possible that exercises the object description
|
||||
which means we need some kind of semantic error that gets caught
|
||||
after creation.
|
||||
|
||||
Try to never flatten pages tree. Make sure we do something reasonable
|
||||
with pages tree repair. The problem is that if pages tree repair is
|
||||
done as a side effect of running --json, the qpdf part of the json may
|
||||
|
@ -998,7 +998,7 @@ class QPDF
|
||||
class JSONReactor: public JSON::Reactor
|
||||
{
|
||||
public:
|
||||
JSONReactor(QPDF&, bool must_be_complete);
|
||||
JSONReactor(QPDF&, std::string const& filename, bool must_be_complete);
|
||||
virtual ~JSONReactor() = default;
|
||||
virtual void dictionaryStart() override;
|
||||
virtual void arrayStart() override;
|
||||
@ -1008,31 +1008,51 @@ class QPDF
|
||||
dictionaryItem(std::string const& key, JSON const& value) override;
|
||||
virtual bool arrayItem(JSON const& value) override;
|
||||
|
||||
bool anyErrors() const;
|
||||
|
||||
private:
|
||||
enum state_e {
|
||||
st_initial,
|
||||
st_top,
|
||||
st_ignore,
|
||||
st_qpdf,
|
||||
st_objects_top,
|
||||
st_trailer_top,
|
||||
st_objects,
|
||||
st_trailer,
|
||||
st_object_top,
|
||||
st_stream,
|
||||
st_object,
|
||||
st_ignore,
|
||||
};
|
||||
|
||||
void containerStart();
|
||||
void nestedState(std::string const& key, JSON const& value, state_e);
|
||||
QPDFObjectHandle makeObject(JSON const& value);
|
||||
void error(size_t offset, std::string const& message);
|
||||
QPDFObjectHandle
|
||||
reserveObject(std::string const& obj, std::string const& gen);
|
||||
void replaceObject(
|
||||
QPDFObjectHandle to_replace, QPDFObjectHandle replacement);
|
||||
|
||||
QPDF& pdf;
|
||||
std::string filename;
|
||||
bool must_be_complete;
|
||||
bool errors;
|
||||
bool parse_error;
|
||||
bool saw_qpdf;
|
||||
bool saw_objects;
|
||||
bool saw_json_version;
|
||||
bool saw_pdf_version;
|
||||
bool saw_trailer;
|
||||
state_e state;
|
||||
state_e next_state;
|
||||
std::string cur_object;
|
||||
bool saw_value;
|
||||
bool saw_stream;
|
||||
bool saw_dict;
|
||||
bool saw_data;
|
||||
bool saw_datafile;
|
||||
std::vector<state_e> state_stack;
|
||||
std::vector<QPDFObjectHandle> object_stack;
|
||||
std::set<QPDFObjGen> reserved;
|
||||
};
|
||||
friend class JSONReactor;
|
||||
|
||||
@ -1080,6 +1100,7 @@ class QPDF
|
||||
void resolveObjectsInStream(int obj_stream_number);
|
||||
void stopOnError(std::string const& message);
|
||||
QPDFObjectHandle reserveObjectIfNotExists(int objid, int gen);
|
||||
QPDFObjectHandle reserveStream(int objid, int gen);
|
||||
|
||||
// Calls finish() on the pipeline when done but does not delete it
|
||||
bool pipeStreamData(
|
||||
|
@ -1431,7 +1431,6 @@ class QPDFObjectHandle
|
||||
{
|
||||
return QPDFObjectHandle::newIndirect(qpdf, objid, generation);
|
||||
}
|
||||
// object must be dictionary object
|
||||
static QPDFObjectHandle
|
||||
newStream(
|
||||
QPDF* qpdf,
|
||||
|
@ -2166,6 +2166,13 @@ QPDF::reserveObjectIfNotExists(int objid, int gen)
|
||||
return getObjectByID(objid, gen);
|
||||
}
|
||||
|
||||
QPDFObjectHandle
|
||||
QPDF::reserveStream(int objid, int gen)
|
||||
{
|
||||
return QPDFObjectHandle::Factory::newStream(
|
||||
this, objid, gen, QPDFObjectHandle::newDictionary(), 0, 0);
|
||||
}
|
||||
|
||||
QPDFObjectHandle
|
||||
QPDF::getObjectByObjGen(QPDFObjGen const& og)
|
||||
{
|
||||
|
@ -1,40 +1,101 @@
|
||||
#include <qpdf/QPDF.hh>
|
||||
|
||||
#include <qpdf/FileInputSource.hh>
|
||||
#include <qpdf/QIntC.hh>
|
||||
#include <qpdf/QTC.hh>
|
||||
#include <qpdf/QUtil.hh>
|
||||
#include <regex>
|
||||
|
||||
namespace
|
||||
{
|
||||
class JSONExc: public std::runtime_error
|
||||
{
|
||||
public:
|
||||
JSONExc(JSON const& value, std::string const& msg) :
|
||||
std::runtime_error(
|
||||
"offset " + QUtil::uint_to_string(value.getStart()) + ": " +
|
||||
msg)
|
||||
{
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
// This chart shows an example of the state transitions that would
|
||||
// occur in parsing a minimal file.
|
||||
|
||||
// | st_initial
|
||||
// { | -> st_top
|
||||
// "qpdf": { | -> st_qpdf
|
||||
// "objects": { | -> st_objects
|
||||
// "obj:1 0 R": { | -> st_object_top
|
||||
// "value": { | -> st_object
|
||||
// "/Pages": "2 0 R", | ...
|
||||
// "/Type": "/Catalog" | ...
|
||||
// } | <- st_object_top
|
||||
// }, | <- st_objects
|
||||
// "obj:2 0 R": { | -> st_object_top
|
||||
// "value": 12 | -> st_object
|
||||
// } | <- st_object_top
|
||||
// }, | <- st_objects
|
||||
// "obj:4 0 R": { | -> st_object_top
|
||||
// "stream": { | -> st_stream
|
||||
// "data": "cG90YXRv", | ...
|
||||
// "dict": { | -> st_object
|
||||
// "/K": true | ...
|
||||
// } | <- st_stream
|
||||
// } | <- st_object_top
|
||||
// }, | <- st_objects
|
||||
// "trailer": { | -> st_trailer
|
||||
// "value": { | -> st_object
|
||||
// "/Root": "1 0 R", | ...
|
||||
// "/Size": 7 | ...
|
||||
// } | <- st_trailer
|
||||
// } | <- st_objects
|
||||
// } | <- st_qpdf
|
||||
// } | <- st_top
|
||||
// } | <- st_initial
|
||||
|
||||
static char const* JSON_PDF = (
|
||||
// force line break
|
||||
"%PDF-1.3\n"
|
||||
"xref\n"
|
||||
"0 1\n"
|
||||
"0000000000 65535 f \n"
|
||||
"trailer << /Size 1 >>\n"
|
||||
"startxref\n"
|
||||
"9\n"
|
||||
"%%EOF\n");
|
||||
|
||||
static std::regex PDF_VERSION_RE("^\\d+\\.\\d+$");
|
||||
static std::regex OBJ_KEY_RE("^obj:(\\d+) (\\d+) R$");
|
||||
static std::regex INDIRECT_OBJ_RE("^(\\d+) (\\d+) R$");
|
||||
static std::regex UNICODE_RE("^u:(.*)$");
|
||||
static std::regex BINARY_RE("^b:((?:[0-9a-fA-F]{2})*)$");
|
||||
static std::regex NAME_RE("^/.*$");
|
||||
|
||||
QPDF::JSONReactor::JSONReactor(QPDF& pdf, bool must_be_complete) :
|
||||
QPDF::JSONReactor::JSONReactor(
|
||||
QPDF& pdf, std::string const& filename, bool must_be_complete) :
|
||||
pdf(pdf),
|
||||
filename(filename),
|
||||
must_be_complete(must_be_complete),
|
||||
errors(false),
|
||||
parse_error(false),
|
||||
saw_qpdf(false),
|
||||
saw_objects(false),
|
||||
saw_json_version(false),
|
||||
saw_pdf_version(false),
|
||||
saw_trailer(false),
|
||||
state(st_initial),
|
||||
next_state(st_top)
|
||||
next_state(st_top),
|
||||
saw_value(false),
|
||||
saw_stream(false),
|
||||
saw_dict(false),
|
||||
saw_data(false),
|
||||
saw_datafile(false)
|
||||
{
|
||||
state_stack.push_back(st_initial);
|
||||
}
|
||||
|
||||
void
|
||||
QPDF::JSONReactor::error(size_t offset, std::string const& msg)
|
||||
{
|
||||
this->errors = true;
|
||||
this->pdf.warn(
|
||||
qpdf_e_json, this->cur_object, QIntC::to_offset(offset), msg);
|
||||
}
|
||||
|
||||
bool
|
||||
QPDF::JSONReactor::anyErrors() const
|
||||
{
|
||||
return this->errors;
|
||||
}
|
||||
|
||||
void
|
||||
QPDF::JSONReactor::containerStart()
|
||||
{
|
||||
@ -46,7 +107,6 @@ void
|
||||
QPDF::JSONReactor::dictionaryStart()
|
||||
{
|
||||
containerStart();
|
||||
// QXXXQ
|
||||
}
|
||||
|
||||
void
|
||||
@ -57,7 +117,6 @@ QPDF::JSONReactor::arrayStart()
|
||||
QTC::TC("qpdf", "QPDF_json top-level array");
|
||||
throw std::runtime_error("QPDF JSON must be a dictionary");
|
||||
}
|
||||
// QXXXQ
|
||||
}
|
||||
|
||||
void
|
||||
@ -68,23 +127,102 @@ QPDF::JSONReactor::containerEnd(JSON const& value)
|
||||
if (state == st_initial) {
|
||||
if (!this->saw_qpdf) {
|
||||
QTC::TC("qpdf", "QPDF_json missing qpdf");
|
||||
throw std::runtime_error("\"qpdf\" object was not seen");
|
||||
error(0, "\"qpdf\" object was not seen");
|
||||
} else {
|
||||
if (!this->saw_json_version) {
|
||||
QTC::TC("qpdf", "QPDF_json missing json version");
|
||||
error(0, "\"qpdf.jsonversion\" was not seen");
|
||||
}
|
||||
if (must_be_complete && !this->saw_pdf_version) {
|
||||
QTC::TC("qpdf", "QPDF_json missing pdf version");
|
||||
error(0, "\"qpdf.pdfversion\" was not seen");
|
||||
}
|
||||
if (!this->saw_objects) {
|
||||
QTC::TC("qpdf", "QPDF_json missing objects");
|
||||
error(0, "\"qpdf.objects\" was not seen");
|
||||
} else {
|
||||
if (must_be_complete && !this->saw_trailer) {
|
||||
QTC::TC("qpdf", "QPDF_json missing trailer");
|
||||
error(0, "\"qpdf.objects.trailer\" was not seen");
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!this->saw_json_version) {
|
||||
QTC::TC("qpdf", "QPDF_json missing json version");
|
||||
throw std::runtime_error("\"qpdf.jsonversion\" was not seen");
|
||||
} else if (state == st_objects) {
|
||||
if (parse_error) {
|
||||
// ignore
|
||||
} else if (cur_object == "trailer") {
|
||||
if (!saw_value) {
|
||||
QTC::TC("qpdf", "QPDF_json trailer no value");
|
||||
error(value.getStart(), "\"trailer\" is missing \"value\"");
|
||||
}
|
||||
} else if (saw_value == saw_stream) {
|
||||
QTC::TC("qpdf", "QPDF_json value stream both or neither");
|
||||
error(
|
||||
value.getStart(),
|
||||
"object must have exactly one of \"value\" or \"stream\"");
|
||||
}
|
||||
if (must_be_complete && !this->saw_pdf_version) {
|
||||
QTC::TC("qpdf", "QPDF_json missing pdf version");
|
||||
throw std::runtime_error("\"qpdf.pdfversion\" was not seen");
|
||||
object_stack.clear();
|
||||
this->cur_object = "";
|
||||
this->saw_dict = false;
|
||||
this->saw_data = false;
|
||||
this->saw_datafile = false;
|
||||
this->saw_value = false;
|
||||
this->saw_stream = false;
|
||||
} else if (state == st_object_top) {
|
||||
if (saw_stream) {
|
||||
if (!saw_dict) {
|
||||
QTC::TC("qpdf", "QPDF_json stream no dict");
|
||||
error(value.getStart(), "\"stream\" is missing \"dict\"");
|
||||
}
|
||||
if (must_be_complete) {
|
||||
if (saw_data == saw_datafile) {
|
||||
QTC::TC("qpdf", "QPDF_json data datafile both or neither");
|
||||
error(
|
||||
value.getStart(),
|
||||
"\"stream\" must have exactly one of \"data\" or "
|
||||
"\"datafile\"");
|
||||
}
|
||||
} else if (saw_data && saw_datafile) {
|
||||
// QXXXQ
|
||||
/// QTC::TC("qpdf", "QPDF_json data and datafile");
|
||||
error(
|
||||
value.getStart(),
|
||||
"\"stream\" may at most one of \"data\" or \"datafile\"");
|
||||
}
|
||||
}
|
||||
if (must_be_complete && !this->saw_trailer) {
|
||||
/// QTC::TC("qpdf", "QPDF_json missing trailer");
|
||||
throw std::runtime_error("\"qpdf.objects.trailer\" was not seen");
|
||||
} else if ((state == st_stream) || (state == st_object)) {
|
||||
if (!parse_error) {
|
||||
object_stack.pop_back();
|
||||
}
|
||||
} else if (state == st_qpdf) {
|
||||
for (auto const& og: this->reserved) {
|
||||
// QXXXQ
|
||||
// QTC::TC("qpdf", "QPDF_json non-trivial null reserved");
|
||||
this->pdf.replaceObject(og, QPDFObjectHandle::newNull());
|
||||
}
|
||||
this->reserved.clear();
|
||||
}
|
||||
}
|
||||
|
||||
// QXXXQ
|
||||
QPDFObjectHandle
|
||||
QPDF::JSONReactor::reserveObject(std::string const& obj, std::string const& gen)
|
||||
{
|
||||
int o = QUtil::string_to_int(obj.c_str());
|
||||
int g = QUtil::string_to_int(gen.c_str());
|
||||
auto oh = pdf.reserveObjectIfNotExists(o, g);
|
||||
if (oh.isReserved()) {
|
||||
this->reserved.insert(QPDFObjGen(o, g));
|
||||
}
|
||||
return oh;
|
||||
}
|
||||
|
||||
void
|
||||
QPDF::JSONReactor::replaceObject(
|
||||
QPDFObjectHandle to_replace, QPDFObjectHandle replacement)
|
||||
{
|
||||
auto og = to_replace.getObjGen();
|
||||
this->reserved.erase(og);
|
||||
this->pdf.replaceObject(og, replacement);
|
||||
}
|
||||
|
||||
void
|
||||
@ -100,16 +238,20 @@ QPDF::JSONReactor::nestedState(
|
||||
{
|
||||
// Use this method when the next state is for processing a nested
|
||||
// dictionary.
|
||||
if (!value.isDictionary()) {
|
||||
throw JSONExc(value, "\"" + key + "\" must be a dictionary");
|
||||
if (value.isDictionary()) {
|
||||
this->next_state = next;
|
||||
} else {
|
||||
error(value.getStart(), "\"" + key + "\" must be a dictionary");
|
||||
this->next_state = st_ignore;
|
||||
this->parse_error = true;
|
||||
}
|
||||
this->next_state = next;
|
||||
}
|
||||
|
||||
bool
|
||||
QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
|
||||
{
|
||||
if (state == st_ignore) {
|
||||
QTC::TC("qpdf", "QPDF_json ignoring in st_ignore");
|
||||
// ignore
|
||||
} else if (state == st_top) {
|
||||
if (key == "qpdf") {
|
||||
@ -118,6 +260,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
|
||||
} else {
|
||||
// Ignore all other fields for forward compatibility.
|
||||
// Don't use nestedState since this can be any type.
|
||||
// QXXXQ QTC
|
||||
next_state = st_ignore;
|
||||
}
|
||||
} else if (state == st_qpdf) {
|
||||
@ -126,7 +269,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
|
||||
std::string v;
|
||||
if (!(value.getNumber(v) && (v == "2"))) {
|
||||
QTC::TC("qpdf", "QPDF_json bad json version");
|
||||
throw JSONExc(value, "only JSON version 2 is supported");
|
||||
error(value.getStart(), "only JSON version 2 is supported");
|
||||
}
|
||||
} else if (key == "pdfversion") {
|
||||
this->saw_pdf_version = true;
|
||||
@ -141,81 +284,197 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
|
||||
}
|
||||
if (!version_okay) {
|
||||
QTC::TC("qpdf", "QPDF_json bad pdf version");
|
||||
throw JSONExc(value, "invalid PDF version (must be x.y)");
|
||||
error(value.getStart(), "invalid PDF version (must be x.y)");
|
||||
}
|
||||
} else if (key == "objects") {
|
||||
nestedState(key, value, st_objects_top);
|
||||
this->saw_objects = true;
|
||||
nestedState(key, value, st_objects);
|
||||
} else {
|
||||
// ignore unknown keys for forward compatibility
|
||||
// QXXXQ QTC
|
||||
next_state = st_ignore;
|
||||
}
|
||||
} else if (state == st_objects_top) {
|
||||
} else if (state == st_objects) {
|
||||
std::smatch m;
|
||||
if (key == "trailer") {
|
||||
this->saw_trailer = true;
|
||||
nestedState(key, value, st_trailer_top);
|
||||
// QXXXQ
|
||||
nestedState(key, value, st_trailer);
|
||||
this->cur_object = "trailer";
|
||||
} else if (std::regex_match(key, m, OBJ_KEY_RE)) {
|
||||
// QXXXQ remember to handle null for delete
|
||||
object_stack.push_back(reserveObject(m[1].str(), m[2].str()));
|
||||
nestedState(key, value, st_object_top);
|
||||
// QXXXQ
|
||||
this->cur_object = key;
|
||||
} else {
|
||||
QTC::TC("qpdf", "QPDF_json bad object key");
|
||||
throw JSONExc(
|
||||
value, "object key should be \"trailer\" or \"obj:n n R\"");
|
||||
error(
|
||||
value.getStart(),
|
||||
"object key should be \"trailer\" or \"obj:n n R\"");
|
||||
next_state = st_ignore;
|
||||
parse_error = true;
|
||||
}
|
||||
} else if (state == st_object_top) {
|
||||
if (object_stack.size() == 0) {
|
||||
throw std::logic_error("no object on stack in st_object_top");
|
||||
}
|
||||
auto tos = object_stack.back();
|
||||
QPDFObjectHandle replacement;
|
||||
if (key == "value") {
|
||||
// Don't use nestedState since this can have any type.
|
||||
this->saw_value = true;
|
||||
next_state = st_object;
|
||||
// QXXXQ
|
||||
replacement = makeObject(value);
|
||||
replaceObject(tos, replacement);
|
||||
} else if (key == "stream") {
|
||||
this->saw_stream = true;
|
||||
nestedState(key, value, st_stream);
|
||||
// QXXXQ
|
||||
if (tos.isStream()) {
|
||||
// QXXXQ reusing -- need QTC
|
||||
} else {
|
||||
replacement =
|
||||
pdf.reserveStream(tos.getObjectID(), tos.getGeneration());
|
||||
replaceObject(tos, replacement);
|
||||
replacement.replaceStreamData(
|
||||
"", "<<>>"_qpdf, "<<>>"_qpdf); // QXXXQ
|
||||
}
|
||||
} else {
|
||||
// Ignore unknown keys for forward compatibility
|
||||
// QXXXQ QTC
|
||||
next_state = st_ignore;
|
||||
}
|
||||
} else if (state == st_trailer_top) {
|
||||
if (replacement.isInitialized()) {
|
||||
object_stack.pop_back();
|
||||
object_stack.push_back(replacement);
|
||||
}
|
||||
} else if (state == st_trailer) {
|
||||
if (key == "value") {
|
||||
this->saw_value = true;
|
||||
// The trailer must be a dictionary, so we can use nestedState.
|
||||
nestedState("trailer.value", value, st_object);
|
||||
// QXXXQ
|
||||
this->pdf.m->trailer = makeObject(value);
|
||||
} else if (key == "stream") {
|
||||
// Don't need to set saw_stream here since there's already
|
||||
// an error.
|
||||
QTC::TC("qpdf", "QPDF_json trailer stream");
|
||||
throw JSONExc(value, "the trailer may not be a stream");
|
||||
error(value.getStart(), "the trailer may not be a stream");
|
||||
next_state = st_ignore;
|
||||
parse_error = true;
|
||||
} else {
|
||||
// Ignore unknown keys for forward compatibility
|
||||
// QXXXQ QTC
|
||||
next_state = st_ignore;
|
||||
}
|
||||
} else if (state == st_stream) {
|
||||
if (key == "dict") {
|
||||
if (object_stack.size() == 0) {
|
||||
throw std::logic_error("no object on stack in st_stream");
|
||||
}
|
||||
auto tos = object_stack.back();
|
||||
if (!tos.isStream()) {
|
||||
// QXXXQ QTC in update mode
|
||||
error(value.getStart(), "this object is not a stream");
|
||||
parse_error = true;
|
||||
} else if (key == "dict") {
|
||||
this->saw_dict = true;
|
||||
// Since a stream dictionary must be a dictionary, we can
|
||||
// use nestedState to transition to st_value.
|
||||
nestedState("stream.dict", value, st_object);
|
||||
// QXXXQ
|
||||
auto dict = makeObject(value);
|
||||
if (dict.isDictionary()) {
|
||||
tos.replaceDict(dict);
|
||||
} else {
|
||||
// An error had already been given by nestedState
|
||||
QTC::TC("qpdf", "QPDF_json stream dict not dict");
|
||||
parse_error = true;
|
||||
}
|
||||
} else if (key == "data") {
|
||||
this->saw_data = true;
|
||||
// QXXXQ
|
||||
} else if (key == "datafile") {
|
||||
this->saw_datafile = true;
|
||||
// QXXXQ
|
||||
} else {
|
||||
// Ignore unknown keys for forward compatibility.
|
||||
// QXXXQ QTC
|
||||
next_state = st_ignore;
|
||||
}
|
||||
} else if (state == st_object) {
|
||||
// QXXXQ
|
||||
if (!parse_error) {
|
||||
auto dict = object_stack.back();
|
||||
if (dict.isStream()) {
|
||||
dict = dict.getDict();
|
||||
}
|
||||
dict.replaceKey(key, makeObject(value));
|
||||
}
|
||||
} else {
|
||||
throw std::logic_error(
|
||||
"QPDF_json: unknown state " + QUtil::int_to_string(state));
|
||||
}
|
||||
|
||||
// QXXXQ
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
QPDF::JSONReactor::arrayItem(JSON const& value)
|
||||
{
|
||||
// QXXXQ
|
||||
if (state == st_object) {
|
||||
if (!parse_error) {
|
||||
auto tos = object_stack.back();
|
||||
tos.appendItem(makeObject(value));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
QPDFObjectHandle
|
||||
QPDF::JSONReactor::makeObject(JSON const& value)
|
||||
{
|
||||
QPDFObjectHandle result;
|
||||
std::string str_v;
|
||||
bool bool_v = false;
|
||||
std::smatch m;
|
||||
if (value.isDictionary()) {
|
||||
result = QPDFObjectHandle::newDictionary();
|
||||
object_stack.push_back(result);
|
||||
} else if (value.isArray()) {
|
||||
result = QPDFObjectHandle::newArray();
|
||||
object_stack.push_back(result);
|
||||
} else if (value.isNull()) {
|
||||
result = QPDFObjectHandle::newNull();
|
||||
} else if (value.getBool(bool_v)) {
|
||||
result = QPDFObjectHandle::newBool(bool_v);
|
||||
} else if (value.getNumber(str_v)) {
|
||||
if (QUtil::is_long_long(str_v.c_str())) {
|
||||
result = QPDFObjectHandle::newInteger(
|
||||
QUtil::string_to_ll(str_v.c_str()));
|
||||
} else {
|
||||
result = QPDFObjectHandle::newReal(str_v);
|
||||
}
|
||||
} else if (value.getString(str_v)) {
|
||||
if (std::regex_match(str_v, m, INDIRECT_OBJ_RE)) {
|
||||
result = reserveObject(m[1].str(), m[2].str());
|
||||
} else if (std::regex_match(str_v, m, UNICODE_RE)) {
|
||||
result = QPDFObjectHandle::newUnicodeString(m[1].str());
|
||||
} else if (std::regex_match(str_v, m, BINARY_RE)) {
|
||||
result = QPDFObjectHandle::newString(QUtil::hex_decode(m[1].str()));
|
||||
} else if (std::regex_match(str_v, m, NAME_RE)) {
|
||||
result = QPDFObjectHandle::newName(str_v);
|
||||
} else {
|
||||
QTC::TC("qpdf", "QPDF_json unrecognized string value");
|
||||
error(value.getStart(), "unrecognized string value");
|
||||
result = QPDFObjectHandle::newNull();
|
||||
}
|
||||
}
|
||||
if (!result.isInitialized()) {
|
||||
throw std::logic_error(
|
||||
"JSONReactor::makeObject didn't initialize the object");
|
||||
}
|
||||
|
||||
// QXXXQ include object number in description
|
||||
result.setObjectDescription(
|
||||
&this->pdf,
|
||||
this->filename + " offset " + QUtil::uint_to_string(value.getStart()));
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
QPDF::createFromJSON(std::string const& json_file)
|
||||
{
|
||||
@ -225,6 +484,7 @@ QPDF::createFromJSON(std::string const& json_file)
|
||||
void
|
||||
QPDF::createFromJSON(std::shared_ptr<InputSource> is)
|
||||
{
|
||||
processMemoryFile(is->getName().c_str(), JSON_PDF, strlen(JSON_PDF));
|
||||
importJSON(is, true);
|
||||
}
|
||||
|
||||
@ -243,10 +503,19 @@ QPDF::updateFromJSON(std::shared_ptr<InputSource> is)
|
||||
void
|
||||
QPDF::importJSON(std::shared_ptr<InputSource> is, bool must_be_complete)
|
||||
{
|
||||
JSONReactor reactor(*this, must_be_complete);
|
||||
JSONReactor reactor(*this, is->getName(), must_be_complete);
|
||||
try {
|
||||
JSON::parse(*is, &reactor);
|
||||
} catch (std::runtime_error& e) {
|
||||
throw std::runtime_error(is->getName() + ": " + e.what());
|
||||
}
|
||||
if (reactor.anyErrors()) {
|
||||
throw std::runtime_error(is->getName() + ": errors found in JSON");
|
||||
}
|
||||
// QXXXQ
|
||||
// std::cout << "trailer:\n" << getTrailer().unparse() << std::endl;
|
||||
// for (auto& oh: getAllObjects()) {
|
||||
// std::cout << oh.unparse() << ":" << std::endl;
|
||||
// std::cout << oh.unparseResolved() << std::endl;
|
||||
// }
|
||||
}
|
||||
|
@ -659,3 +659,12 @@ QPDF_json bad pdf version 0
|
||||
QPDF_json top-level array 0
|
||||
QPDF_json bad object key 0
|
||||
QPDF_json trailer stream 0
|
||||
QPDF_json missing trailer 0
|
||||
QPDF_json missing objects 0
|
||||
QPDF_json ignoring in st_ignore 0
|
||||
QPDF_json stream dict not dict 0
|
||||
QPDF_json unrecognized string value 0
|
||||
QPDF_json data datafile both or neither 0
|
||||
QPDF_json stream no dict 0
|
||||
QPDF_json trailer no value 0
|
||||
QPDF_json value stream both or neither 0
|
||||
|
@ -33,6 +33,9 @@ my @badfiles = (
|
||||
'stream-dict-not-dict',
|
||||
'trailer-not-dict',
|
||||
'trailer-stream',
|
||||
'missing-trailer',
|
||||
'missing-objects',
|
||||
'obj-key-errors',
|
||||
);
|
||||
|
||||
$n_tests += scalar(@badfiles);
|
||||
|
@ -1 +1,2 @@
|
||||
qpdf: qjson-bad-json-version1.json: offset 98: only JSON version 2 is supported
|
||||
WARNING: qjson-bad-json-version1.json (offset 98): only JSON version 2 is supported
|
||||
qpdf: qjson-bad-json-version1.json: errors found in JSON
|
||||
|
@ -1 +1,2 @@
|
||||
qpdf: qjson-bad-json-version2.json: offset 98: only JSON version 2 is supported
|
||||
WARNING: qjson-bad-json-version2.json (offset 98): only JSON version 2 is supported
|
||||
qpdf: qjson-bad-json-version2.json: errors found in JSON
|
||||
|
@ -1 +1,2 @@
|
||||
qpdf: qjson-bad-object-key.json: offset 181: object key should be "trailer" or "obj:n n R"
|
||||
WARNING: qjson-bad-object-key.json (offset 181): object key should be "trailer" or "obj:n n R"
|
||||
qpdf: qjson-bad-object-key.json: errors found in JSON
|
||||
|
@ -1 +1,2 @@
|
||||
qpdf: qjson-bad-pdf-version1.json: offset 119: invalid PDF version (must be x.y)
|
||||
WARNING: qjson-bad-pdf-version1.json (offset 119): invalid PDF version (must be x.y)
|
||||
qpdf: qjson-bad-pdf-version1.json: errors found in JSON
|
||||
|
@ -1 +1,2 @@
|
||||
qpdf: qjson-bad-pdf-version2.json: offset 119: invalid PDF version (must be x.y)
|
||||
WARNING: qjson-bad-pdf-version2.json (offset 119): invalid PDF version (must be x.y)
|
||||
qpdf: qjson-bad-pdf-version2.json: errors found in JSON
|
||||
|
11
qpdf/qtest/qpdf/qjson-missing-objects.json
Normal file
11
qpdf/qtest/qpdf/qjson-missing-objects.json
Normal file
@ -0,0 +1,11 @@
|
||||
{
|
||||
"version": 2,
|
||||
"parameters": {
|
||||
"decodelevel": "none"
|
||||
},
|
||||
"qpdf": {
|
||||
"jsonversion": 2,
|
||||
"pdfversion": "1.3",
|
||||
"maxobjectid": 6
|
||||
}
|
||||
}
|
2
qpdf/qtest/qpdf/qjson-missing-objects.out
Normal file
2
qpdf/qtest/qpdf/qjson-missing-objects.out
Normal file
@ -0,0 +1,2 @@
|
||||
WARNING: qjson-missing-objects.json: "qpdf.objects" was not seen
|
||||
qpdf: qjson-missing-objects.json: errors found in JSON
|
67
qpdf/qtest/qpdf/qjson-missing-trailer.json
Normal file
67
qpdf/qtest/qpdf/qjson-missing-trailer.json
Normal file
@ -0,0 +1,67 @@
|
||||
{
|
||||
"version": 2,
|
||||
"parameters": {
|
||||
"decodelevel": "none"
|
||||
},
|
||||
"qpdf": {
|
||||
"jsonversion": 2,
|
||||
"pdfversion": "1.3",
|
||||
"maxobjectid": 6,
|
||||
"objects": {
|
||||
"obj:1 0 R": {
|
||||
"value": {
|
||||
"/Pages": "2 0 R",
|
||||
"/Type": "/Catalog"
|
||||
}
|
||||
},
|
||||
"obj:2 0 R": {
|
||||
"value": {
|
||||
"/Count": 1,
|
||||
"/Kids": [
|
||||
"3 0 R"
|
||||
],
|
||||
"/Type": "/Pages"
|
||||
}
|
||||
},
|
||||
"obj:3 0 R": {
|
||||
"value": {
|
||||
"/Contents": "4 0 R",
|
||||
"/MediaBox": [
|
||||
0,
|
||||
0,
|
||||
612,
|
||||
792
|
||||
],
|
||||
"/Parent": "2 0 R",
|
||||
"/Resources": {
|
||||
"/Font": {
|
||||
"/F1": "6 0 R"
|
||||
},
|
||||
"/ProcSet": "5 0 R"
|
||||
},
|
||||
"/Type": "/Page"
|
||||
}
|
||||
},
|
||||
"obj:4 0 R": {
|
||||
"stream": {
|
||||
"data": "QlQKICAvRjEgMjQgVGYKICA3MiA3MjAgVGQKICAoUG90YXRvKSBUagpFVAo=",
|
||||
"dict": {}
|
||||
}
|
||||
},
|
||||
"obj:5 0 R": {
|
||||
"value": [
|
||||
"/PDF",
|
||||
"/Text"
|
||||
]
|
||||
},
|
||||
"obj:6 0 R": {
|
||||
"value": {
|
||||
"/BaseFont": "/Helvetica",
|
||||
"/Encoding": "/WinAnsiEncoding",
|
||||
"/Subtype": "/Type1",
|
||||
"/Type": "/Font"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
2
qpdf/qtest/qpdf/qjson-missing-trailer.out
Normal file
2
qpdf/qtest/qpdf/qjson-missing-trailer.out
Normal file
@ -0,0 +1,2 @@
|
||||
WARNING: qjson-missing-trailer.json: "qpdf.objects.trailer" was not seen
|
||||
qpdf: qjson-missing-trailer.json: errors found in JSON
|
@ -1 +1,2 @@
|
||||
qpdf: qjson-no-json-version.json: "qpdf.jsonversion" was not seen
|
||||
WARNING: qjson-no-json-version.json: "qpdf.jsonversion" was not seen
|
||||
qpdf: qjson-no-json-version.json: errors found in JSON
|
||||
|
@ -1 +1,2 @@
|
||||
qpdf: qjson-no-pdf-version.json: "qpdf.pdfversion" was not seen
|
||||
WARNING: qjson-no-pdf-version.json: "qpdf.pdfversion" was not seen
|
||||
qpdf: qjson-no-pdf-version.json: errors found in JSON
|
||||
|
@ -1 +1,2 @@
|
||||
qpdf: qjson-no-qpdf-object.json: "qpdf" object was not seen
|
||||
WARNING: qjson-no-qpdf-object.json: "qpdf" object was not seen
|
||||
qpdf: qjson-no-qpdf-object.json: errors found in JSON
|
||||
|
64
qpdf/qtest/qpdf/qjson-obj-key-errors.json
Normal file
64
qpdf/qtest/qpdf/qjson-obj-key-errors.json
Normal file
@ -0,0 +1,64 @@
|
||||
{
|
||||
"version": 2,
|
||||
"parameters": {
|
||||
"decodelevel": "none"
|
||||
},
|
||||
"qpdf": {
|
||||
"jsonversion": 2,
|
||||
"pdfversion": "1.3",
|
||||
"maxobjectid": 6,
|
||||
"objects": {
|
||||
"obj:1 0 R": {
|
||||
"value": {
|
||||
"/Pages": "2 0 R",
|
||||
"/Type": "/Catalog"
|
||||
}
|
||||
},
|
||||
"obj:2 0 R": {
|
||||
"value": {
|
||||
"/Count": 1,
|
||||
"/Kids": [
|
||||
"3 0 R"
|
||||
],
|
||||
"/Type": "/Pages"
|
||||
},
|
||||
"stream": {
|
||||
"data": "QlQKICAvRjEgMjQgVGYKICA3MiA3MjAgVGQKICAoUG90YXRvKSBUagpFVAo=",
|
||||
"dict": {}
|
||||
}
|
||||
},
|
||||
"obj:3 0 R": {
|
||||
"potato": {
|
||||
"salad": "ignored-so-no-string-error",
|
||||
"nested": [1, 2, {"x": "y"}]
|
||||
}
|
||||
},
|
||||
"obj:4 0 R": {
|
||||
"stream": {
|
||||
"potato": "u:salad"
|
||||
}
|
||||
},
|
||||
"obj:5 0 R": {
|
||||
"stream": {
|
||||
"dict": {"/A": "/B"},
|
||||
"data": "QlQKICAvRjEgMjQgVGYKICA3MiA3MjAgVGQKICAoUG90YXRvKSBUagpFVAo=",
|
||||
"datafile": "abc"
|
||||
}
|
||||
},
|
||||
"obj:6 0 R": {
|
||||
"value": {
|
||||
"/BaseFont": "/Helvetica",
|
||||
"/Encoding": "/WinAnsiEncoding",
|
||||
"/Subtype": "/Type1",
|
||||
"/Type": "/Font"
|
||||
}
|
||||
},
|
||||
"trailer": {
|
||||
"potato": {
|
||||
"/Root": "1 0 R",
|
||||
"/Size": 7
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
7
qpdf/qtest/qpdf/qjson-obj-key-errors.out
Normal file
7
qpdf/qtest/qpdf/qjson-obj-key-errors.out
Normal file
@ -0,0 +1,7 @@
|
||||
WARNING: qjson-obj-key-errors.json (obj:2 0 R, offset 302): object must have exactly one of "value" or "stream"
|
||||
WARNING: qjson-obj-key-errors.json (obj:3 0 R, offset 600): object must have exactly one of "value" or "stream"
|
||||
WARNING: qjson-obj-key-errors.json (obj:4 0 R, offset 768): "stream" is missing "dict"
|
||||
WARNING: qjson-obj-key-errors.json (obj:4 0 R, offset 768): "stream" must have exactly one of "data" or "datafile"
|
||||
WARNING: qjson-obj-key-errors.json (obj:5 0 R, offset 858): "stream" must have exactly one of "data" or "datafile"
|
||||
WARNING: qjson-obj-key-errors.json (trailer, offset 1236): "trailer" is missing "value"
|
||||
qpdf: qjson-obj-key-errors.json: errors found in JSON
|
@ -1 +1,2 @@
|
||||
qpdf: qjson-object-not-dict.json: offset 184: "obj:1 0 R" must be a dictionary
|
||||
WARNING: qjson-object-not-dict.json (offset 184): "obj:1 0 R" must be a dictionary
|
||||
qpdf: qjson-object-not-dict.json: errors found in JSON
|
||||
|
@ -1 +1,3 @@
|
||||
qpdf: qjson-objects-not-dict.json: offset 77: "objects" must be a dictionary
|
||||
WARNING: qjson-objects-not-dict.json (offset 77): "objects" must be a dictionary
|
||||
WARNING: qjson-objects-not-dict.json: "qpdf.objects.trailer" was not seen
|
||||
qpdf: qjson-objects-not-dict.json: errors found in JSON
|
||||
|
@ -1 +1,5 @@
|
||||
qpdf: qjson-stream-dict-not-dict.json: offset 137: "stream.dict" must be a dictionary
|
||||
WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 137): "stream.dict" must be a dictionary
|
||||
WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 137): unrecognized string value
|
||||
WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 117): "stream" must have exactly one of "data" or "datafile"
|
||||
WARNING: qjson-stream-dict-not-dict.json: "qpdf.objects.trailer" was not seen
|
||||
qpdf: qjson-stream-dict-not-dict.json: errors found in JSON
|
||||
|
@ -1 +1,3 @@
|
||||
qpdf: qjson-stream-not-dict.json: offset 118: "stream" must be a dictionary
|
||||
WARNING: qjson-stream-not-dict.json (obj:1 0 R, offset 118): "stream" must be a dictionary
|
||||
WARNING: qjson-stream-not-dict.json: "qpdf.objects.trailer" was not seen
|
||||
qpdf: qjson-stream-not-dict.json: errors found in JSON
|
||||
|
@ -63,7 +63,7 @@
|
||||
}
|
||||
},
|
||||
"trailer": {
|
||||
"value": false,
|
||||
"value": false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1 +1,2 @@
|
||||
qpdf: qjson-trailer-not-dict.json: offset 1326: "trailer.value" must be a dictionary
|
||||
WARNING: qjson-trailer-not-dict.json (trailer, offset 1327): "trailer.value" must be a dictionary
|
||||
qpdf: qjson-trailer-not-dict.json: errors found in JSON
|
||||
|
@ -63,7 +63,7 @@
|
||||
}
|
||||
},
|
||||
"trailer": {
|
||||
"stream": {},
|
||||
"stream": {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1 +1,2 @@
|
||||
qpdf: qjson-trailer-stream.json: offset 1327: the trailer may not be a stream
|
||||
WARNING: qjson-trailer-stream.json (trailer, offset 1327): the trailer may not be a stream
|
||||
qpdf: qjson-trailer-stream.json: errors found in JSON
|
||||
|
Loading…
x
Reference in New Issue
Block a user