Add simple JSON serializer

This commit is contained in:
Jay Berkenbilt 2018-12-17 11:55:11 -05:00
parent 0776c00129
commit 651179b5da
9 changed files with 773 additions and 0 deletions

View File

@ -1,5 +1,10 @@
2018-12-18 Jay Berkenbilt <ejb@ql.org>
* Add a simple JSON serializer. This is not a complete or
general-purpose JSON library. It allows assembly and serialization
of JSON structures with some restrictions, which are described in
the header file.
* Add QPDFNameTreeObjectHelper class. This class provides useful
methods for dealing with name trees, which are discussed in
section 7.9.6 of the PDF spec (ISO-32000).

170
include/qpdf/JSON.hh Normal file
View File

@ -0,0 +1,170 @@
// Copyright (c) 2005-2018 Jay Berkenbilt
//
// This file is part of qpdf.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Versions of qpdf prior to version 7 were released under the terms
// of version 2.0 of the Artistic License. At your option, you may
// continue to consider qpdf to be licensed under those terms. Please
// see the manual for additional information.
#ifndef JSON_HH
#define JSON_HH
// This is a simple JSON serializer, primarily designed for
// serializing QPDF Objects as JSON. JSON objects contain their data
// as smart pointers. One JSON object is added to another, this
// pointer is copied. This means you can create temporary JSON objects
// on the stack, add them to other objects, and let them go out of
// scope safely. It also means that if the json JSON object is added
// in more than one place, all copies share underlying data.
#include <qpdf/DLL.h>
#include <qpdf/PointerHolder.hh>
#include <string>
#include <map>
#include <vector>
#include <list>
class JSON
{
public:
QPDF_DLL
std::string serialize() const;
// The JSON spec calls dictionaries "objects", but that creates
// too much confusion when referring to instances of the JSON
// class.
QPDF_DLL
static JSON makeDictionary();
// addDictionaryMember returns the newly added item.
QPDF_DLL
JSON addDictionaryMember(std::string const& key, JSON const&);
QPDF_DLL
static JSON makeArray();
// addArrayElement returns the newly added item.
QPDF_DLL
JSON addArrayElement(JSON const&);
QPDF_DLL
static JSON makeString(std::string const& utf8);
QPDF_DLL
static JSON makeInt(long long int value);
QPDF_DLL
static JSON makeReal(double value);
QPDF_DLL
static JSON makeNumber(std::string const& encoded);
QPDF_DLL
static JSON makeBool(bool value);
QPDF_DLL
static JSON makeNull();
// Check this JSON object against a "schema". This is not a schema
// according to any standard. It's just a template of what the
// JSON is supposed to contain. The checking does the following:
//
// * The schema is a nested structure containing dictionaries,
// single-element arrays, and strings only.
// * Recursively walk the schema
// * If the current value is a dictionary, this object must have
// a dictionary in the same place with the same keys
// * If the current value is an array, this object must have an
// array in the same place. The schema's array must contain a
// single element, which is used as a schema to validate each
// element of this object's corresponding array.
// * Otherwise, the value is ignored.
//
// QPDF's JSON output conforms to certain strict compatability
// rules as discussed in the manual. The idea is that a JSON
// structure created manually in qpdf.cc doubles as both JSON help
// information and a schema for validating the JSON that qpdf
// generates. Any discrepancies are a bug in qpdf.
QPDF_DLL
bool checkSchema(JSON schema, std::list<std::string>& errors);
private:
static std::string encode_string(std::string const& utf8);
struct JSON_value
{
virtual ~JSON_value();
virtual std::string unparse(size_t depth) const = 0;
};
struct JSON_dictionary: public JSON_value
{
virtual ~JSON_dictionary();
virtual std::string unparse(size_t depth) const;
std::map<std::string, PointerHolder<JSON_value> > members;
};
struct JSON_array: public JSON_value
{
virtual ~JSON_array();
virtual std::string unparse(size_t depth) const;
std::vector<PointerHolder<JSON_value> > elements;
};
struct JSON_string: public JSON_value
{
JSON_string(std::string const& utf8);
virtual ~JSON_string();
virtual std::string unparse(size_t depth) const;
std::string encoded;
};
struct JSON_number: public JSON_value
{
JSON_number(long long val);
JSON_number(double val);
JSON_number(std::string const& val);
virtual ~JSON_number();
virtual std::string unparse(size_t depth) const;
std::string encoded;
};
struct JSON_bool: public JSON_value
{
JSON_bool(bool val);
virtual ~JSON_bool();
virtual std::string unparse(size_t depth) const;
bool value;
};
struct JSON_null: public JSON_value
{
virtual ~JSON_null();
virtual std::string unparse(size_t depth) const;
};
JSON(PointerHolder<JSON_value>);
static bool
checkSchemaInternal(JSON_value* this_v, JSON_value* sch_v,
std::list<std::string>& errors,
std::string prefix);
class Members
{
friend class JSON;
public:
QPDF_DLL
~Members();
private:
Members(PointerHolder<JSON_value>);
Members(Members const&);
PointerHolder<JSON_value> value;
};
PointerHolder<Members> m;
};
#endif // JSON_HH

396
libqpdf/JSON.cc Normal file
View File

@ -0,0 +1,396 @@
#include <qpdf/JSON.hh>
#include <qpdf/QUtil.hh>
#include <qpdf/QTC.hh>
#include <stdexcept>
JSON::Members::~Members()
{
}
JSON::Members::Members(PointerHolder<JSON_value> value) :
value(value)
{
}
JSON::JSON(PointerHolder<JSON_value> value) :
m(new Members(value))
{
}
JSON::JSON_value::~JSON_value()
{
}
JSON::JSON_dictionary::~JSON_dictionary()
{
}
std::string JSON::JSON_dictionary::unparse(size_t depth) const
{
std::string result = "{";
bool first = true;
for (std::map<std::string, PointerHolder<JSON_value> >::const_iterator
iter = members.begin();
iter != members.end(); ++iter)
{
if (first)
{
first = false;
}
else
{
result.append(1, ',');
}
result.append(1, '\n');
result.append(2 * (1 + depth), ' ');
result += ("\"" + (*iter).first + "\": " +
(*iter).second->unparse(1 + depth));
}
if (! first)
{
result.append(1, '\n');
result.append(2 * depth, ' ');
}
result.append(1, '}');
return result;
}
JSON::JSON_array::~JSON_array()
{
}
std::string JSON::JSON_array::unparse(size_t depth) const
{
std::string result = "[";
bool first = true;
for (std::vector<PointerHolder<JSON_value> >::const_iterator iter =
elements.begin();
iter != elements.end(); ++iter)
{
if (first)
{
first = false;
}
else
{
result.append(1, ',');
}
result.append(1, '\n');
result.append(2 * (1 + depth), ' ');
result += (*iter)->unparse(1 + depth);
}
if (! first)
{
result.append(1, '\n');
result.append(2 * depth, ' ');
}
result.append(1, ']');
return result;
}
JSON::JSON_string::JSON_string(std::string const& utf8) :
encoded(encode_string(utf8))
{
}
JSON::JSON_string::~JSON_string()
{
}
std::string JSON::JSON_string::unparse(size_t) const
{
return "\"" + encoded + "\"";
}
JSON::JSON_number::JSON_number(long long value) :
encoded(QUtil::int_to_string(value))
{
}
JSON::JSON_number::JSON_number(double value) :
encoded(QUtil::double_to_string(value, 6))
{
}
JSON::JSON_number::JSON_number(std::string const& value) :
encoded(value)
{
}
JSON::JSON_number::~JSON_number()
{
}
std::string JSON::JSON_number::unparse(size_t) const
{
return encoded;
}
JSON::JSON_bool::JSON_bool(bool val) :
value(val)
{
}
JSON::JSON_bool::~JSON_bool()
{
}
std::string JSON::JSON_bool::unparse(size_t) const
{
return value ? "true" : "false";
}
JSON::JSON_null::~JSON_null()
{
}
std::string JSON::JSON_null::unparse(size_t) const
{
return "null";
}
std::string
JSON::serialize() const
{
if (0 == this->m->value.getPointer())
{
return "null";
}
else
{
return this->m->value->unparse(0);
}
}
std::string
JSON::encode_string(std::string const& str)
{
std::string result;
size_t len = str.length();
for (size_t i = 0; i < len; ++i)
{
unsigned char ch = static_cast<unsigned char>(str.at(i));
switch (ch)
{
case '\\':
result += "\\\\";
break;
case '\"':
result += "\\\"";
break;
case '\b':
result += "\\b";
break;
case '\n':
result += "\\n";
break;
case '\r':
result += "\\r";
break;
case '\t':
result += "\\t";
break;
default:
if (ch < 32)
{
result += "\\u" + QUtil::int_to_string_base(ch, 16, 4);
}
else
{
result.append(1, ch);
}
}
}
return result;
}
JSON
JSON::makeDictionary()
{
return JSON(new JSON_dictionary());
}
JSON
JSON::addDictionaryMember(std::string const& key, JSON const& val)
{
JSON_dictionary* obj = dynamic_cast<JSON_dictionary*>(
this->m->value.getPointer());
if (0 == obj)
{
throw std::runtime_error(
"JSON::addDictionaryMember called on non-dictionary");
}
if (val.m->value.getPointer())
{
obj->members[encode_string(key)] = val.m->value;
}
else
{
obj->members[encode_string(key)] = new JSON_null();
}
return obj->members[encode_string(key)];
}
JSON
JSON::makeArray()
{
return JSON(new JSON_array());
}
JSON
JSON::addArrayElement(JSON const& val)
{
JSON_array* arr = dynamic_cast<JSON_array*>(
this->m->value.getPointer());
if (0 == arr)
{
throw std::runtime_error("JSON::addArrayElement called on non-array");
}
if (val.m->value.getPointer())
{
arr->elements.push_back(val.m->value);
}
else
{
arr->elements.push_back(new JSON_null());
}
return arr->elements.back();
}
JSON
JSON::makeString(std::string const& utf8)
{
return JSON(new JSON_string(utf8));
}
JSON
JSON::makeInt(long long int value)
{
return JSON(new JSON_number(value));
}
JSON
JSON::makeReal(double value)
{
return JSON(new JSON_number(value));
}
JSON
JSON::makeNumber(std::string const& encoded)
{
return JSON(new JSON_number(encoded));
}
JSON
JSON::makeBool(bool value)
{
return JSON(new JSON_bool(value));
}
JSON
JSON::makeNull()
{
return JSON(new JSON_null());
}
bool
JSON::checkSchema(JSON schema, std::list<std::string>& errors)
{
return checkSchemaInternal(this->m->value.getPointer(),
schema.m->value.getPointer(),
errors, "");
}
bool
JSON::checkSchemaInternal(JSON_value* this_v, JSON_value* sch_v,
std::list<std::string>& errors,
std::string prefix)
{
JSON_array* this_arr = dynamic_cast<JSON_array*>(this_v);
JSON_dictionary* this_dict = dynamic_cast<JSON_dictionary*>(this_v);
JSON_array* sch_arr = dynamic_cast<JSON_array*>(sch_v);
JSON_dictionary* sch_dict = dynamic_cast<JSON_dictionary*>(sch_v);
std::string err_prefix;
if (prefix.empty())
{
err_prefix = "top-level object";
}
else
{
err_prefix = "json key \"" + prefix + "\"";
}
if (sch_dict)
{
if (! this_dict)
{
QTC::TC("libtests", "JSON wanted dictionary");
errors.push_back(err_prefix + " is supposed to be a dictionary");
return false;
}
for (std::map<std::string, PointerHolder<JSON_value> >::iterator iter =
sch_dict->members.begin();
iter != sch_dict->members.end(); ++iter)
{
std::string const& key = (*iter).first;
if (this_dict->members.count(key))
{
checkSchemaInternal(
this_dict->members[key].getPointer(),
(*iter).second.getPointer(),
errors, prefix + "." + key);
}
else
{
QTC::TC("libtests", "JSON key missing in object");
errors.push_back(
err_prefix + ": key \"" + key +
"\" is present in schema but missing in object");
}
}
for (std::map<std::string, PointerHolder<JSON_value> >::iterator iter =
this_dict->members.begin();
iter != this_dict->members.end(); ++iter)
{
std::string const& key = (*iter).first;
if (sch_dict->members.count(key) == 0)
{
QTC::TC("libtests", "JSON key extra in object");
errors.push_back(
err_prefix + ": key \"" + key +
"\" is not present in schema but appears in object");
}
}
}
else if (sch_arr)
{
if (! this_arr)
{
QTC::TC("libtests", "JSON wanted array");
errors.push_back(err_prefix + " is supposed to be an array");
return false;
}
if (sch_arr->elements.size() != 1)
{
QTC::TC("libtests", "JSON schema array error");
errors.push_back(err_prefix +
" schema array contains other than one item");
return false;
}
int i = 0;
for (std::vector<PointerHolder<JSON_value> >::iterator iter =
this_arr->elements.begin();
iter != this_arr->elements.end(); ++iter, ++i)
{
checkSchemaInternal(
(*iter).getPointer(),
sch_arr->elements.at(0).getPointer(),
errors, prefix + "." + QUtil::int_to_string(i));
}
}
return errors.empty();
}

View File

@ -14,6 +14,7 @@ SRCS_libqpdf = \
libqpdf/FileInputSource.cc \
libqpdf/InputSource.cc \
libqpdf/InsecureRandomDataProvider.cc \
libqpdf/JSON.cc \
libqpdf/MD5.cc \
libqpdf/OffsetInputSource.cc \
libqpdf/Pipeline.cc \

View File

@ -10,6 +10,7 @@ BINS_libtests = \
flate \
hex \
input_source \
json \
lzw \
md5 \
pointer_holder \

155
libtests/json.cc Normal file
View File

@ -0,0 +1,155 @@
#include <qpdf/JSON.hh>
#include <qpdf/QPDFObjectHandle.hh>
#include <iostream>
#include <assert.h>
static void check(JSON& j, std::string const& exp)
{
if (exp != j.serialize())
{
std::cout << "Got " << j.serialize() << "; wanted " << exp << "\n";
}
}
static void test_main()
{
JSON jstr = JSON::makeString(
"<1>\xcf\x80<2>\xf0\x9f\xa5\x94\\\"<3>\x03\t\b\r\n<4>");
check(jstr,
"\"<1>\xcf\x80<2>\xf0\x9f\xa5\x94\\\\\\\"<3>"
"\\u0003\\t\\b\\r\\n<4>\"");
JSON jnull = JSON::makeNull();
check(jnull, "null");
JSON jarr = JSON::makeArray();
check(jarr, "[]");
JSON jstr2 = JSON::makeString("a\tb");
JSON jint = JSON::makeInt(16059);
JSON jdouble = JSON::makeReal(3.14159);
JSON jexp = JSON::makeNumber("2.1e5");
jarr.addArrayElement(jstr2);
jarr.addArrayElement(jnull);
jarr.addArrayElement(jint);
jarr.addArrayElement(jdouble);
jarr.addArrayElement(jexp);
check(jarr,
"[\n"
" \"a\\tb\",\n"
" null,\n"
" 16059,\n"
" 3.141590,\n"
" 2.1e5\n"
"]");
JSON jmap = JSON::makeDictionary();
check(jmap, "{}");
jmap.addDictionaryMember("b", jstr2);
jmap.addDictionaryMember("a", jarr);
jmap.addDictionaryMember("c\r\nd", jnull);
jmap.addDictionaryMember("yes", JSON::makeBool(false));
jmap.addDictionaryMember("no", JSON::makeBool(true));
jmap.addDictionaryMember("empty_dict", JSON::makeDictionary());
jmap.addDictionaryMember("empty_list", JSON::makeArray());
jmap.addDictionaryMember("single", JSON::makeArray()).
addArrayElement(JSON::makeInt(12));
check(jmap,
"{\n"
" \"a\": [\n"
" \"a\\tb\",\n"
" null,\n"
" 16059,\n"
" 3.141590,\n"
" 2.1e5\n"
" ],\n"
" \"b\": \"a\\tb\",\n"
" \"c\\r\\nd\": null,\n"
" \"empty_dict\": {},\n"
" \"empty_list\": [],\n"
" \"no\": true,\n"
" \"single\": [\n"
" 12\n"
" ],\n"
" \"yes\": false\n"
"}");
}
static void check_schema(JSON& obj, JSON& schema, bool exp,
std::string const& description)
{
std::list<std::string> errors;
std::cout << "--- " << description << std::endl;
assert(exp == obj.checkSchema(schema, errors));
for (std::list<std::string>::iterator iter = errors.begin();
iter != errors.end(); ++iter)
{
std::cout << *iter << std::endl;
}
std::cout << "---" << std::endl;
}
static void test_schema()
{
// Since we don't have a JSON parser, use the PDF parser as a
// shortcut for creating a complex JSON structure.
JSON schema = QPDFObjectHandle::parse(
"<<"
" /one <<"
" /a <<"
" /q (queue)"
" /r <<"
" /x (ecks)"
" /y (why)"
" >>"
" /s [ (esses) ]"
" >>"
" >>"
" /two ["
" <<"
" /goose (gander)"
" /glarp (enspliel)"
" >>"
" ]"
">>").getJSON();
JSON a = QPDFObjectHandle::parse("[(not a) (dictionary)]").getJSON();
check_schema(a, schema, false, "top-level type mismatch");
JSON b = QPDFObjectHandle::parse(
"<<"
" /one <<"
" /a <<"
" /t (oops)"
" /r ["
" /x (ecks)"
" /y (why)"
" ]"
" /s << /z (esses) >>"
" >>"
" >>"
" /two ["
" <<"
" /goose (0 gander)"
" /glarp (0 enspliel)"
" >>"
" <<"
" /goose (1 gander)"
" /flarp (1 enspliel)"
" >>"
" 2"
" [ (three) ]"
" <<"
" /goose (4 gander)"
" /glarp (4 enspliel)"
" >>"
" ]"
">>").getJSON();
check_schema(b, schema, false, "top-level type mismatch");
check_schema(a, a, false, "top-level schema array error");
check_schema(b, b, false, "lower-level schema array error");
check_schema(schema, schema, true, "pass");
}
int main()
{
test_main();
test_schema();
std::cout << "end of json tests\n";
return 0;
}

View File

@ -34,3 +34,8 @@ Pl_PNGFilter decodeUp 0
Pl_PNGFilter decodeAverage 0
Pl_PNGFilter decodePaeth 0
Pl_TIFFPredictor processRow 1
JSON wanted dictionary 0
JSON key missing in object 0
JSON wanted array 0
JSON schema array error 0
JSON key extra in object 0

17
libtests/qtest/json.test Normal file
View File

@ -0,0 +1,17 @@
#!/usr/bin/env perl
require 5.008;
use warnings;
use strict;
chdir("json") or die "chdir testdir failed: $!\n";
require TestDriver;
my $td = new TestDriver('json');
$td->runtest("json",
{$td->COMMAND => "json"},
{$td->FILE => "json.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->report(1);

View File

@ -0,0 +1,23 @@
--- top-level type mismatch
top-level object is supposed to be a dictionary
---
--- top-level type mismatch
json key "./one./a": key "/q" is present in schema but missing in object
json key "./one./a./r" is supposed to be a dictionary
json key "./one./a./s" is supposed to be an array
json key "./one./a": key "/t" is not present in schema but appears in object
json key "./two.1": key "/glarp" is present in schema but missing in object
json key "./two.1": key "/flarp" is not present in schema but appears in object
json key "./two.2" is supposed to be a dictionary
json key "./two.3" is supposed to be a dictionary
---
--- top-level schema array error
top-level object schema array contains other than one item
---
--- lower-level schema array error
json key "./one./a./r" schema array contains other than one item
json key "./two" schema array contains other than one item
---
--- pass
---
end of json tests