diff --git a/ChangeLog b/ChangeLog index 9c5ec9ce..c1024773 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2018-12-18 Jay Berkenbilt + * Add a simple JSON serializer. This is not a complete or + general-purpose JSON library. It allows assembly and serialization + of JSON structures with some restrictions, which are described in + the header file. + * Add QPDFNameTreeObjectHelper class. This class provides useful methods for dealing with name trees, which are discussed in section 7.9.6 of the PDF spec (ISO-32000). diff --git a/include/qpdf/JSON.hh b/include/qpdf/JSON.hh new file mode 100644 index 00000000..f5dd57d5 --- /dev/null +++ b/include/qpdf/JSON.hh @@ -0,0 +1,170 @@ +// Copyright (c) 2005-2018 Jay Berkenbilt +// +// This file is part of qpdf. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Versions of qpdf prior to version 7 were released under the terms +// of version 2.0 of the Artistic License. At your option, you may +// continue to consider qpdf to be licensed under those terms. Please +// see the manual for additional information. + +#ifndef JSON_HH +#define JSON_HH + +// This is a simple JSON serializer, primarily designed for +// serializing QPDF Objects as JSON. JSON objects contain their data +// as smart pointers. One JSON object is added to another, this +// pointer is copied. This means you can create temporary JSON objects +// on the stack, add them to other objects, and let them go out of +// scope safely. It also means that if the json JSON object is added +// in more than one place, all copies share underlying data. + +#include +#include +#include +#include +#include +#include + +class JSON +{ + public: + QPDF_DLL + std::string serialize() const; + + // The JSON spec calls dictionaries "objects", but that creates + // too much confusion when referring to instances of the JSON + // class. + QPDF_DLL + static JSON makeDictionary(); + // addDictionaryMember returns the newly added item. + QPDF_DLL + JSON addDictionaryMember(std::string const& key, JSON const&); + QPDF_DLL + static JSON makeArray(); + // addArrayElement returns the newly added item. + QPDF_DLL + JSON addArrayElement(JSON const&); + QPDF_DLL + static JSON makeString(std::string const& utf8); + QPDF_DLL + static JSON makeInt(long long int value); + QPDF_DLL + static JSON makeReal(double value); + QPDF_DLL + static JSON makeNumber(std::string const& encoded); + QPDF_DLL + static JSON makeBool(bool value); + QPDF_DLL + static JSON makeNull(); + + // Check this JSON object against a "schema". This is not a schema + // according to any standard. It's just a template of what the + // JSON is supposed to contain. The checking does the following: + // + // * The schema is a nested structure containing dictionaries, + // single-element arrays, and strings only. + // * Recursively walk the schema + // * If the current value is a dictionary, this object must have + // a dictionary in the same place with the same keys + // * If the current value is an array, this object must have an + // array in the same place. The schema's array must contain a + // single element, which is used as a schema to validate each + // element of this object's corresponding array. + // * Otherwise, the value is ignored. + // + // QPDF's JSON output conforms to certain strict compatability + // rules as discussed in the manual. The idea is that a JSON + // structure created manually in qpdf.cc doubles as both JSON help + // information and a schema for validating the JSON that qpdf + // generates. Any discrepancies are a bug in qpdf. + QPDF_DLL + bool checkSchema(JSON schema, std::list& errors); + + private: + static std::string encode_string(std::string const& utf8); + + struct JSON_value + { + virtual ~JSON_value(); + virtual std::string unparse(size_t depth) const = 0; + }; + struct JSON_dictionary: public JSON_value + { + virtual ~JSON_dictionary(); + virtual std::string unparse(size_t depth) const; + std::map > members; + }; + struct JSON_array: public JSON_value + { + virtual ~JSON_array(); + virtual std::string unparse(size_t depth) const; + std::vector > elements; + }; + struct JSON_string: public JSON_value + { + JSON_string(std::string const& utf8); + virtual ~JSON_string(); + virtual std::string unparse(size_t depth) const; + std::string encoded; + }; + struct JSON_number: public JSON_value + { + JSON_number(long long val); + JSON_number(double val); + JSON_number(std::string const& val); + virtual ~JSON_number(); + virtual std::string unparse(size_t depth) const; + std::string encoded; + }; + struct JSON_bool: public JSON_value + { + JSON_bool(bool val); + virtual ~JSON_bool(); + virtual std::string unparse(size_t depth) const; + bool value; + }; + struct JSON_null: public JSON_value + { + virtual ~JSON_null(); + virtual std::string unparse(size_t depth) const; + }; + + JSON(PointerHolder); + + static bool + checkSchemaInternal(JSON_value* this_v, JSON_value* sch_v, + std::list& errors, + std::string prefix); + + class Members + { + friend class JSON; + + public: + QPDF_DLL + ~Members(); + + private: + Members(PointerHolder); + Members(Members const&); + + PointerHolder value; + }; + + PointerHolder m; +}; + + +#endif // JSON_HH diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc new file mode 100644 index 00000000..def439cf --- /dev/null +++ b/libqpdf/JSON.cc @@ -0,0 +1,396 @@ +#include +#include +#include +#include + +JSON::Members::~Members() +{ +} + +JSON::Members::Members(PointerHolder value) : + value(value) +{ +} + +JSON::JSON(PointerHolder value) : + m(new Members(value)) +{ +} + +JSON::JSON_value::~JSON_value() +{ +} + +JSON::JSON_dictionary::~JSON_dictionary() +{ +} + +std::string JSON::JSON_dictionary::unparse(size_t depth) const +{ + std::string result = "{"; + bool first = true; + for (std::map >::const_iterator + iter = members.begin(); + iter != members.end(); ++iter) + { + if (first) + { + first = false; + } + else + { + result.append(1, ','); + } + result.append(1, '\n'); + result.append(2 * (1 + depth), ' '); + result += ("\"" + (*iter).first + "\": " + + (*iter).second->unparse(1 + depth)); + } + if (! first) + { + result.append(1, '\n'); + result.append(2 * depth, ' '); + } + result.append(1, '}'); + return result; +} + +JSON::JSON_array::~JSON_array() +{ +} + +std::string JSON::JSON_array::unparse(size_t depth) const +{ + std::string result = "["; + bool first = true; + for (std::vector >::const_iterator iter = + elements.begin(); + iter != elements.end(); ++iter) + { + if (first) + { + first = false; + } + else + { + result.append(1, ','); + } + result.append(1, '\n'); + result.append(2 * (1 + depth), ' '); + result += (*iter)->unparse(1 + depth); + } + if (! first) + { + result.append(1, '\n'); + result.append(2 * depth, ' '); + } + result.append(1, ']'); + return result; +} + +JSON::JSON_string::JSON_string(std::string const& utf8) : + encoded(encode_string(utf8)) +{ +} + +JSON::JSON_string::~JSON_string() +{ +} + +std::string JSON::JSON_string::unparse(size_t) const +{ + return "\"" + encoded + "\""; +} + +JSON::JSON_number::JSON_number(long long value) : + encoded(QUtil::int_to_string(value)) +{ +} + +JSON::JSON_number::JSON_number(double value) : + encoded(QUtil::double_to_string(value, 6)) +{ +} + +JSON::JSON_number::JSON_number(std::string const& value) : + encoded(value) +{ +} + +JSON::JSON_number::~JSON_number() +{ +} + +std::string JSON::JSON_number::unparse(size_t) const +{ + return encoded; +} + +JSON::JSON_bool::JSON_bool(bool val) : + value(val) +{ +} + +JSON::JSON_bool::~JSON_bool() +{ +} + +std::string JSON::JSON_bool::unparse(size_t) const +{ + return value ? "true" : "false"; +} + +JSON::JSON_null::~JSON_null() +{ +} + +std::string JSON::JSON_null::unparse(size_t) const +{ + return "null"; +} + +std::string +JSON::serialize() const +{ + if (0 == this->m->value.getPointer()) + { + return "null"; + } + else + { + return this->m->value->unparse(0); + } +} + +std::string +JSON::encode_string(std::string const& str) +{ + std::string result; + size_t len = str.length(); + for (size_t i = 0; i < len; ++i) + { + unsigned char ch = static_cast(str.at(i)); + switch (ch) + { + case '\\': + result += "\\\\"; + break; + case '\"': + result += "\\\""; + break; + case '\b': + result += "\\b"; + break; + case '\n': + result += "\\n"; + break; + case '\r': + result += "\\r"; + break; + case '\t': + result += "\\t"; + break; + default: + if (ch < 32) + { + result += "\\u" + QUtil::int_to_string_base(ch, 16, 4); + } + else + { + result.append(1, ch); + } + } + } + return result; +} + +JSON +JSON::makeDictionary() +{ + return JSON(new JSON_dictionary()); +} + +JSON +JSON::addDictionaryMember(std::string const& key, JSON const& val) +{ + JSON_dictionary* obj = dynamic_cast( + this->m->value.getPointer()); + if (0 == obj) + { + throw std::runtime_error( + "JSON::addDictionaryMember called on non-dictionary"); + } + if (val.m->value.getPointer()) + { + obj->members[encode_string(key)] = val.m->value; + } + else + { + obj->members[encode_string(key)] = new JSON_null(); + } + return obj->members[encode_string(key)]; +} + +JSON +JSON::makeArray() +{ + return JSON(new JSON_array()); +} + +JSON +JSON::addArrayElement(JSON const& val) +{ + JSON_array* arr = dynamic_cast( + this->m->value.getPointer()); + if (0 == arr) + { + throw std::runtime_error("JSON::addArrayElement called on non-array"); + } + if (val.m->value.getPointer()) + { + arr->elements.push_back(val.m->value); + } + else + { + arr->elements.push_back(new JSON_null()); + } + return arr->elements.back(); +} + +JSON +JSON::makeString(std::string const& utf8) +{ + return JSON(new JSON_string(utf8)); +} + +JSON +JSON::makeInt(long long int value) +{ + return JSON(new JSON_number(value)); +} + +JSON +JSON::makeReal(double value) +{ + return JSON(new JSON_number(value)); +} + +JSON +JSON::makeNumber(std::string const& encoded) +{ + return JSON(new JSON_number(encoded)); +} + +JSON +JSON::makeBool(bool value) +{ + return JSON(new JSON_bool(value)); +} + +JSON +JSON::makeNull() +{ + return JSON(new JSON_null()); +} + +bool +JSON::checkSchema(JSON schema, std::list& errors) +{ + return checkSchemaInternal(this->m->value.getPointer(), + schema.m->value.getPointer(), + errors, ""); +} + + +bool +JSON::checkSchemaInternal(JSON_value* this_v, JSON_value* sch_v, + std::list& errors, + std::string prefix) +{ + JSON_array* this_arr = dynamic_cast(this_v); + JSON_dictionary* this_dict = dynamic_cast(this_v); + + JSON_array* sch_arr = dynamic_cast(sch_v); + JSON_dictionary* sch_dict = dynamic_cast(sch_v); + + std::string err_prefix; + if (prefix.empty()) + { + err_prefix = "top-level object"; + } + else + { + err_prefix = "json key \"" + prefix + "\""; + } + + if (sch_dict) + { + if (! this_dict) + { + QTC::TC("libtests", "JSON wanted dictionary"); + errors.push_back(err_prefix + " is supposed to be a dictionary"); + return false; + } + for (std::map >::iterator iter = + sch_dict->members.begin(); + iter != sch_dict->members.end(); ++iter) + { + std::string const& key = (*iter).first; + if (this_dict->members.count(key)) + { + checkSchemaInternal( + this_dict->members[key].getPointer(), + (*iter).second.getPointer(), + errors, prefix + "." + key); + } + else + { + QTC::TC("libtests", "JSON key missing in object"); + errors.push_back( + err_prefix + ": key \"" + key + + "\" is present in schema but missing in object"); + } + } + for (std::map >::iterator iter = + this_dict->members.begin(); + iter != this_dict->members.end(); ++iter) + { + std::string const& key = (*iter).first; + if (sch_dict->members.count(key) == 0) + { + QTC::TC("libtests", "JSON key extra in object"); + errors.push_back( + err_prefix + ": key \"" + key + + "\" is not present in schema but appears in object"); + } + } + } + else if (sch_arr) + { + if (! this_arr) + { + QTC::TC("libtests", "JSON wanted array"); + errors.push_back(err_prefix + " is supposed to be an array"); + return false; + } + if (sch_arr->elements.size() != 1) + { + QTC::TC("libtests", "JSON schema array error"); + errors.push_back(err_prefix + + " schema array contains other than one item"); + return false; + } + int i = 0; + for (std::vector >::iterator iter = + this_arr->elements.begin(); + iter != this_arr->elements.end(); ++iter, ++i) + { + checkSchemaInternal( + (*iter).getPointer(), + sch_arr->elements.at(0).getPointer(), + errors, prefix + "." + QUtil::int_to_string(i)); + } + } + + return errors.empty(); +} diff --git a/libqpdf/build.mk b/libqpdf/build.mk index 6ad23a02..87252b5a 100644 --- a/libqpdf/build.mk +++ b/libqpdf/build.mk @@ -14,6 +14,7 @@ SRCS_libqpdf = \ libqpdf/FileInputSource.cc \ libqpdf/InputSource.cc \ libqpdf/InsecureRandomDataProvider.cc \ + libqpdf/JSON.cc \ libqpdf/MD5.cc \ libqpdf/OffsetInputSource.cc \ libqpdf/Pipeline.cc \ diff --git a/libtests/build.mk b/libtests/build.mk index e8f20270..7143eb56 100644 --- a/libtests/build.mk +++ b/libtests/build.mk @@ -10,6 +10,7 @@ BINS_libtests = \ flate \ hex \ input_source \ + json \ lzw \ md5 \ pointer_holder \ diff --git a/libtests/json.cc b/libtests/json.cc new file mode 100644 index 00000000..fb902c62 --- /dev/null +++ b/libtests/json.cc @@ -0,0 +1,155 @@ +#include +#include +#include +#include + +static void check(JSON& j, std::string const& exp) +{ + if (exp != j.serialize()) + { + std::cout << "Got " << j.serialize() << "; wanted " << exp << "\n"; + } +} + +static void test_main() +{ + JSON jstr = JSON::makeString( + "<1>\xcf\x80<2>\xf0\x9f\xa5\x94\\\"<3>\x03\t\b\r\n<4>"); + check(jstr, + "\"<1>\xcf\x80<2>\xf0\x9f\xa5\x94\\\\\\\"<3>" + "\\u0003\\t\\b\\r\\n<4>\""); + JSON jnull = JSON::makeNull(); + check(jnull, "null"); + JSON jarr = JSON::makeArray(); + check(jarr, "[]"); + JSON jstr2 = JSON::makeString("a\tb"); + JSON jint = JSON::makeInt(16059); + JSON jdouble = JSON::makeReal(3.14159); + JSON jexp = JSON::makeNumber("2.1e5"); + jarr.addArrayElement(jstr2); + jarr.addArrayElement(jnull); + jarr.addArrayElement(jint); + jarr.addArrayElement(jdouble); + jarr.addArrayElement(jexp); + check(jarr, + "[\n" + " \"a\\tb\",\n" + " null,\n" + " 16059,\n" + " 3.141590,\n" + " 2.1e5\n" + "]"); + JSON jmap = JSON::makeDictionary(); + check(jmap, "{}"); + jmap.addDictionaryMember("b", jstr2); + jmap.addDictionaryMember("a", jarr); + jmap.addDictionaryMember("c\r\nd", jnull); + jmap.addDictionaryMember("yes", JSON::makeBool(false)); + jmap.addDictionaryMember("no", JSON::makeBool(true)); + jmap.addDictionaryMember("empty_dict", JSON::makeDictionary()); + jmap.addDictionaryMember("empty_list", JSON::makeArray()); + jmap.addDictionaryMember("single", JSON::makeArray()). + addArrayElement(JSON::makeInt(12)); + check(jmap, + "{\n" + " \"a\": [\n" + " \"a\\tb\",\n" + " null,\n" + " 16059,\n" + " 3.141590,\n" + " 2.1e5\n" + " ],\n" + " \"b\": \"a\\tb\",\n" + " \"c\\r\\nd\": null,\n" + " \"empty_dict\": {},\n" + " \"empty_list\": [],\n" + " \"no\": true,\n" + " \"single\": [\n" + " 12\n" + " ],\n" + " \"yes\": false\n" + "}"); +} + +static void check_schema(JSON& obj, JSON& schema, bool exp, + std::string const& description) +{ + std::list errors; + std::cout << "--- " << description << std::endl; + assert(exp == obj.checkSchema(schema, errors)); + for (std::list::iterator iter = errors.begin(); + iter != errors.end(); ++iter) + { + std::cout << *iter << std::endl; + } + std::cout << "---" << std::endl; +} + +static void test_schema() +{ + // Since we don't have a JSON parser, use the PDF parser as a + // shortcut for creating a complex JSON structure. + JSON schema = QPDFObjectHandle::parse( + "<<" + " /one <<" + " /a <<" + " /q (queue)" + " /r <<" + " /x (ecks)" + " /y (why)" + " >>" + " /s [ (esses) ]" + " >>" + " >>" + " /two [" + " <<" + " /goose (gander)" + " /glarp (enspliel)" + " >>" + " ]" + ">>").getJSON(); + JSON a = QPDFObjectHandle::parse("[(not a) (dictionary)]").getJSON(); + check_schema(a, schema, false, "top-level type mismatch"); + JSON b = QPDFObjectHandle::parse( + "<<" + " /one <<" + " /a <<" + " /t (oops)" + " /r [" + " /x (ecks)" + " /y (why)" + " ]" + " /s << /z (esses) >>" + " >>" + " >>" + " /two [" + " <<" + " /goose (0 gander)" + " /glarp (0 enspliel)" + " >>" + " <<" + " /goose (1 gander)" + " /flarp (1 enspliel)" + " >>" + " 2" + " [ (three) ]" + " <<" + " /goose (4 gander)" + " /glarp (4 enspliel)" + " >>" + " ]" + ">>").getJSON(); + check_schema(b, schema, false, "top-level type mismatch"); + check_schema(a, a, false, "top-level schema array error"); + check_schema(b, b, false, "lower-level schema array error"); + check_schema(schema, schema, true, "pass"); +} + +int main() +{ + test_main(); + test_schema(); + + std::cout << "end of json tests\n"; + return 0; +} diff --git a/libtests/libtests.testcov b/libtests/libtests.testcov index 8b209281..775141d7 100644 --- a/libtests/libtests.testcov +++ b/libtests/libtests.testcov @@ -34,3 +34,8 @@ Pl_PNGFilter decodeUp 0 Pl_PNGFilter decodeAverage 0 Pl_PNGFilter decodePaeth 0 Pl_TIFFPredictor processRow 1 +JSON wanted dictionary 0 +JSON key missing in object 0 +JSON wanted array 0 +JSON schema array error 0 +JSON key extra in object 0 diff --git a/libtests/qtest/json.test b/libtests/qtest/json.test new file mode 100644 index 00000000..b62994d3 --- /dev/null +++ b/libtests/qtest/json.test @@ -0,0 +1,17 @@ +#!/usr/bin/env perl +require 5.008; +use warnings; +use strict; + +chdir("json") or die "chdir testdir failed: $!\n"; + +require TestDriver; + +my $td = new TestDriver('json'); + +$td->runtest("json", + {$td->COMMAND => "json"}, + {$td->FILE => "json.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + +$td->report(1); diff --git a/libtests/qtest/json/json.out b/libtests/qtest/json/json.out new file mode 100644 index 00000000..f06cc1fb --- /dev/null +++ b/libtests/qtest/json/json.out @@ -0,0 +1,23 @@ +--- top-level type mismatch +top-level object is supposed to be a dictionary +--- +--- top-level type mismatch +json key "./one./a": key "/q" is present in schema but missing in object +json key "./one./a./r" is supposed to be a dictionary +json key "./one./a./s" is supposed to be an array +json key "./one./a": key "/t" is not present in schema but appears in object +json key "./two.1": key "/glarp" is present in schema but missing in object +json key "./two.1": key "/flarp" is not present in schema but appears in object +json key "./two.2" is supposed to be a dictionary +json key "./two.3" is supposed to be a dictionary +--- +--- top-level schema array error +top-level object schema array contains other than one item +--- +--- lower-level schema array error +json key "./one./a./r" schema array contains other than one item +json key "./two" schema array contains other than one item +--- +--- pass +--- +end of json tests