2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-12-22 10:58:58 +00:00

Add simple JSON serializer

This commit is contained in:
Jay Berkenbilt 2018-12-17 11:55:11 -05:00
parent 0776c00129
commit 651179b5da
9 changed files with 773 additions and 0 deletions

View File

@ -1,5 +1,10 @@
2018-12-18 Jay Berkenbilt <ejb@ql.org> 2018-12-18 Jay Berkenbilt <ejb@ql.org>
* Add a simple JSON serializer. This is not a complete or
general-purpose JSON library. It allows assembly and serialization
of JSON structures with some restrictions, which are described in
the header file.
* Add QPDFNameTreeObjectHelper class. This class provides useful * Add QPDFNameTreeObjectHelper class. This class provides useful
methods for dealing with name trees, which are discussed in methods for dealing with name trees, which are discussed in
section 7.9.6 of the PDF spec (ISO-32000). section 7.9.6 of the PDF spec (ISO-32000).

170
include/qpdf/JSON.hh Normal file
View File

@ -0,0 +1,170 @@
// Copyright (c) 2005-2018 Jay Berkenbilt
//
// This file is part of qpdf.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Versions of qpdf prior to version 7 were released under the terms
// of version 2.0 of the Artistic License. At your option, you may
// continue to consider qpdf to be licensed under those terms. Please
// see the manual for additional information.
#ifndef JSON_HH
#define JSON_HH
// This is a simple JSON serializer, primarily designed for
// serializing QPDF Objects as JSON. JSON objects contain their data
// as smart pointers. One JSON object is added to another, this
// pointer is copied. This means you can create temporary JSON objects
// on the stack, add them to other objects, and let them go out of
// scope safely. It also means that if the json JSON object is added
// in more than one place, all copies share underlying data.
#include <qpdf/DLL.h>
#include <qpdf/PointerHolder.hh>
#include <string>
#include <map>
#include <vector>
#include <list>
class JSON
{
public:
QPDF_DLL
std::string serialize() const;
// The JSON spec calls dictionaries "objects", but that creates
// too much confusion when referring to instances of the JSON
// class.
QPDF_DLL
static JSON makeDictionary();
// addDictionaryMember returns the newly added item.
QPDF_DLL
JSON addDictionaryMember(std::string const& key, JSON const&);
QPDF_DLL
static JSON makeArray();
// addArrayElement returns the newly added item.
QPDF_DLL
JSON addArrayElement(JSON const&);
QPDF_DLL
static JSON makeString(std::string const& utf8);
QPDF_DLL
static JSON makeInt(long long int value);
QPDF_DLL
static JSON makeReal(double value);
QPDF_DLL
static JSON makeNumber(std::string const& encoded);
QPDF_DLL
static JSON makeBool(bool value);
QPDF_DLL
static JSON makeNull();
// Check this JSON object against a "schema". This is not a schema
// according to any standard. It's just a template of what the
// JSON is supposed to contain. The checking does the following:
//
// * The schema is a nested structure containing dictionaries,
// single-element arrays, and strings only.
// * Recursively walk the schema
// * If the current value is a dictionary, this object must have
// a dictionary in the same place with the same keys
// * If the current value is an array, this object must have an
// array in the same place. The schema's array must contain a
// single element, which is used as a schema to validate each
// element of this object's corresponding array.
// * Otherwise, the value is ignored.
//
// QPDF's JSON output conforms to certain strict compatability
// rules as discussed in the manual. The idea is that a JSON
// structure created manually in qpdf.cc doubles as both JSON help
// information and a schema for validating the JSON that qpdf
// generates. Any discrepancies are a bug in qpdf.
QPDF_DLL
bool checkSchema(JSON schema, std::list<std::string>& errors);
private:
static std::string encode_string(std::string const& utf8);
struct JSON_value
{
virtual ~JSON_value();
virtual std::string unparse(size_t depth) const = 0;
};
struct JSON_dictionary: public JSON_value
{
virtual ~JSON_dictionary();
virtual std::string unparse(size_t depth) const;
std::map<std::string, PointerHolder<JSON_value> > members;
};
struct JSON_array: public JSON_value
{
virtual ~JSON_array();
virtual std::string unparse(size_t depth) const;
std::vector<PointerHolder<JSON_value> > elements;
};
struct JSON_string: public JSON_value
{
JSON_string(std::string const& utf8);
virtual ~JSON_string();
virtual std::string unparse(size_t depth) const;
std::string encoded;
};
struct JSON_number: public JSON_value
{
JSON_number(long long val);
JSON_number(double val);
JSON_number(std::string const& val);
virtual ~JSON_number();
virtual std::string unparse(size_t depth) const;
std::string encoded;
};
struct JSON_bool: public JSON_value
{
JSON_bool(bool val);
virtual ~JSON_bool();
virtual std::string unparse(size_t depth) const;
bool value;
};
struct JSON_null: public JSON_value
{
virtual ~JSON_null();
virtual std::string unparse(size_t depth) const;
};
JSON(PointerHolder<JSON_value>);
static bool
checkSchemaInternal(JSON_value* this_v, JSON_value* sch_v,
std::list<std::string>& errors,
std::string prefix);
class Members
{
friend class JSON;
public:
QPDF_DLL
~Members();
private:
Members(PointerHolder<JSON_value>);
Members(Members const&);
PointerHolder<JSON_value> value;
};
PointerHolder<Members> m;
};
#endif // JSON_HH

396
libqpdf/JSON.cc Normal file
View File

@ -0,0 +1,396 @@
#include <qpdf/JSON.hh>
#include <qpdf/QUtil.hh>
#include <qpdf/QTC.hh>
#include <stdexcept>
JSON::Members::~Members()
{
}
JSON::Members::Members(PointerHolder<JSON_value> value) :
value(value)
{
}
JSON::JSON(PointerHolder<JSON_value> value) :
m(new Members(value))
{
}
JSON::JSON_value::~JSON_value()
{
}
JSON::JSON_dictionary::~JSON_dictionary()
{
}
std::string JSON::JSON_dictionary::unparse(size_t depth) const
{
std::string result = "{";
bool first = true;
for (std::map<std::string, PointerHolder<JSON_value> >::const_iterator
iter = members.begin();
iter != members.end(); ++iter)
{
if (first)
{
first = false;
}
else
{
result.append(1, ',');
}
result.append(1, '\n');
result.append(2 * (1 + depth), ' ');
result += ("\"" + (*iter).first + "\": " +
(*iter).second->unparse(1 + depth));
}
if (! first)
{
result.append(1, '\n');
result.append(2 * depth, ' ');
}
result.append(1, '}');
return result;
}
JSON::JSON_array::~JSON_array()
{
}
std::string JSON::JSON_array::unparse(size_t depth) const
{
std::string result = "[";
bool first = true;
for (std::vector<PointerHolder<JSON_value> >::const_iterator iter =
elements.begin();
iter != elements.end(); ++iter)
{
if (first)
{
first = false;
}
else
{
result.append(1, ',');
}
result.append(1, '\n');
result.append(2 * (1 + depth), ' ');
result += (*iter)->unparse(1 + depth);
}
if (! first)
{
result.append(1, '\n');
result.append(2 * depth, ' ');
}
result.append(1, ']');
return result;
}
JSON::JSON_string::JSON_string(std::string const& utf8) :
encoded(encode_string(utf8))
{
}
JSON::JSON_string::~JSON_string()
{
}
std::string JSON::JSON_string::unparse(size_t) const
{
return "\"" + encoded + "\"";
}
JSON::JSON_number::JSON_number(long long value) :
encoded(QUtil::int_to_string(value))
{
}
JSON::JSON_number::JSON_number(double value) :
encoded(QUtil::double_to_string(value, 6))
{
}
JSON::JSON_number::JSON_number(std::string const& value) :
encoded(value)
{
}
JSON::JSON_number::~JSON_number()
{
}
std::string JSON::JSON_number::unparse(size_t) const
{
return encoded;
}
JSON::JSON_bool::JSON_bool(bool val) :
value(val)
{
}
JSON::JSON_bool::~JSON_bool()
{
}
std::string JSON::JSON_bool::unparse(size_t) const
{
return value ? "true" : "false";
}
JSON::JSON_null::~JSON_null()
{
}
std::string JSON::JSON_null::unparse(size_t) const
{
return "null";
}
std::string
JSON::serialize() const
{
if (0 == this->m->value.getPointer())
{
return "null";
}
else
{
return this->m->value->unparse(0);
}
}
std::string
JSON::encode_string(std::string const& str)
{
std::string result;
size_t len = str.length();
for (size_t i = 0; i < len; ++i)
{
unsigned char ch = static_cast<unsigned char>(str.at(i));
switch (ch)
{
case '\\':
result += "\\\\";
break;
case '\"':
result += "\\\"";
break;
case '\b':
result += "\\b";
break;
case '\n':
result += "\\n";
break;
case '\r':
result += "\\r";
break;
case '\t':
result += "\\t";
break;
default:
if (ch < 32)
{
result += "\\u" + QUtil::int_to_string_base(ch, 16, 4);
}
else
{
result.append(1, ch);
}
}
}
return result;
}
JSON
JSON::makeDictionary()
{
return JSON(new JSON_dictionary());
}
JSON
JSON::addDictionaryMember(std::string const& key, JSON const& val)
{
JSON_dictionary* obj = dynamic_cast<JSON_dictionary*>(
this->m->value.getPointer());
if (0 == obj)
{
throw std::runtime_error(
"JSON::addDictionaryMember called on non-dictionary");
}
if (val.m->value.getPointer())
{
obj->members[encode_string(key)] = val.m->value;
}
else
{
obj->members[encode_string(key)] = new JSON_null();
}
return obj->members[encode_string(key)];
}
JSON
JSON::makeArray()
{
return JSON(new JSON_array());
}
JSON
JSON::addArrayElement(JSON const& val)
{
JSON_array* arr = dynamic_cast<JSON_array*>(
this->m->value.getPointer());
if (0 == arr)
{
throw std::runtime_error("JSON::addArrayElement called on non-array");
}
if (val.m->value.getPointer())
{
arr->elements.push_back(val.m->value);
}
else
{
arr->elements.push_back(new JSON_null());
}
return arr->elements.back();
}
JSON
JSON::makeString(std::string const& utf8)
{
return JSON(new JSON_string(utf8));
}
JSON
JSON::makeInt(long long int value)
{
return JSON(new JSON_number(value));
}
JSON
JSON::makeReal(double value)
{
return JSON(new JSON_number(value));
}
JSON
JSON::makeNumber(std::string const& encoded)
{
return JSON(new JSON_number(encoded));
}
JSON
JSON::makeBool(bool value)
{
return JSON(new JSON_bool(value));
}
JSON
JSON::makeNull()
{
return JSON(new JSON_null());
}
bool
JSON::checkSchema(JSON schema, std::list<std::string>& errors)
{
return checkSchemaInternal(this->m->value.getPointer(),
schema.m->value.getPointer(),
errors, "");
}
bool
JSON::checkSchemaInternal(JSON_value* this_v, JSON_value* sch_v,
std::list<std::string>& errors,
std::string prefix)
{
JSON_array* this_arr = dynamic_cast<JSON_array*>(this_v);
JSON_dictionary* this_dict = dynamic_cast<JSON_dictionary*>(this_v);
JSON_array* sch_arr = dynamic_cast<JSON_array*>(sch_v);
JSON_dictionary* sch_dict = dynamic_cast<JSON_dictionary*>(sch_v);
std::string err_prefix;
if (prefix.empty())
{
err_prefix = "top-level object";
}
else
{
err_prefix = "json key \"" + prefix + "\"";
}
if (sch_dict)
{
if (! this_dict)
{
QTC::TC("libtests", "JSON wanted dictionary");
errors.push_back(err_prefix + " is supposed to be a dictionary");
return false;
}
for (std::map<std::string, PointerHolder<JSON_value> >::iterator iter =
sch_dict->members.begin();
iter != sch_dict->members.end(); ++iter)
{
std::string const& key = (*iter).first;
if (this_dict->members.count(key))
{
checkSchemaInternal(
this_dict->members[key].getPointer(),
(*iter).second.getPointer(),
errors, prefix + "." + key);
}
else
{
QTC::TC("libtests", "JSON key missing in object");
errors.push_back(
err_prefix + ": key \"" + key +
"\" is present in schema but missing in object");
}
}
for (std::map<std::string, PointerHolder<JSON_value> >::iterator iter =
this_dict->members.begin();
iter != this_dict->members.end(); ++iter)
{
std::string const& key = (*iter).first;
if (sch_dict->members.count(key) == 0)
{
QTC::TC("libtests", "JSON key extra in object");
errors.push_back(
err_prefix + ": key \"" + key +
"\" is not present in schema but appears in object");
}
}
}
else if (sch_arr)
{
if (! this_arr)
{
QTC::TC("libtests", "JSON wanted array");
errors.push_back(err_prefix + " is supposed to be an array");
return false;
}
if (sch_arr->elements.size() != 1)
{
QTC::TC("libtests", "JSON schema array error");
errors.push_back(err_prefix +
" schema array contains other than one item");
return false;
}
int i = 0;
for (std::vector<PointerHolder<JSON_value> >::iterator iter =
this_arr->elements.begin();
iter != this_arr->elements.end(); ++iter, ++i)
{
checkSchemaInternal(
(*iter).getPointer(),
sch_arr->elements.at(0).getPointer(),
errors, prefix + "." + QUtil::int_to_string(i));
}
}
return errors.empty();
}

View File

@ -14,6 +14,7 @@ SRCS_libqpdf = \
libqpdf/FileInputSource.cc \ libqpdf/FileInputSource.cc \
libqpdf/InputSource.cc \ libqpdf/InputSource.cc \
libqpdf/InsecureRandomDataProvider.cc \ libqpdf/InsecureRandomDataProvider.cc \
libqpdf/JSON.cc \
libqpdf/MD5.cc \ libqpdf/MD5.cc \
libqpdf/OffsetInputSource.cc \ libqpdf/OffsetInputSource.cc \
libqpdf/Pipeline.cc \ libqpdf/Pipeline.cc \

View File

@ -10,6 +10,7 @@ BINS_libtests = \
flate \ flate \
hex \ hex \
input_source \ input_source \
json \
lzw \ lzw \
md5 \ md5 \
pointer_holder \ pointer_holder \

155
libtests/json.cc Normal file
View File

@ -0,0 +1,155 @@
#include <qpdf/JSON.hh>
#include <qpdf/QPDFObjectHandle.hh>
#include <iostream>
#include <assert.h>
static void check(JSON& j, std::string const& exp)
{
if (exp != j.serialize())
{
std::cout << "Got " << j.serialize() << "; wanted " << exp << "\n";
}
}
static void test_main()
{
JSON jstr = JSON::makeString(
"<1>\xcf\x80<2>\xf0\x9f\xa5\x94\\\"<3>\x03\t\b\r\n<4>");
check(jstr,
"\"<1>\xcf\x80<2>\xf0\x9f\xa5\x94\\\\\\\"<3>"
"\\u0003\\t\\b\\r\\n<4>\"");
JSON jnull = JSON::makeNull();
check(jnull, "null");
JSON jarr = JSON::makeArray();
check(jarr, "[]");
JSON jstr2 = JSON::makeString("a\tb");
JSON jint = JSON::makeInt(16059);
JSON jdouble = JSON::makeReal(3.14159);
JSON jexp = JSON::makeNumber("2.1e5");
jarr.addArrayElement(jstr2);
jarr.addArrayElement(jnull);
jarr.addArrayElement(jint);
jarr.addArrayElement(jdouble);
jarr.addArrayElement(jexp);
check(jarr,
"[\n"
" \"a\\tb\",\n"
" null,\n"
" 16059,\n"
" 3.141590,\n"
" 2.1e5\n"
"]");
JSON jmap = JSON::makeDictionary();
check(jmap, "{}");
jmap.addDictionaryMember("b", jstr2);
jmap.addDictionaryMember("a", jarr);
jmap.addDictionaryMember("c\r\nd", jnull);
jmap.addDictionaryMember("yes", JSON::makeBool(false));
jmap.addDictionaryMember("no", JSON::makeBool(true));
jmap.addDictionaryMember("empty_dict", JSON::makeDictionary());
jmap.addDictionaryMember("empty_list", JSON::makeArray());
jmap.addDictionaryMember("single", JSON::makeArray()).
addArrayElement(JSON::makeInt(12));
check(jmap,
"{\n"
" \"a\": [\n"
" \"a\\tb\",\n"
" null,\n"
" 16059,\n"
" 3.141590,\n"
" 2.1e5\n"
" ],\n"
" \"b\": \"a\\tb\",\n"
" \"c\\r\\nd\": null,\n"
" \"empty_dict\": {},\n"
" \"empty_list\": [],\n"
" \"no\": true,\n"
" \"single\": [\n"
" 12\n"
" ],\n"
" \"yes\": false\n"
"}");
}
static void check_schema(JSON& obj, JSON& schema, bool exp,
std::string const& description)
{
std::list<std::string> errors;
std::cout << "--- " << description << std::endl;
assert(exp == obj.checkSchema(schema, errors));
for (std::list<std::string>::iterator iter = errors.begin();
iter != errors.end(); ++iter)
{
std::cout << *iter << std::endl;
}
std::cout << "---" << std::endl;
}
static void test_schema()
{
// Since we don't have a JSON parser, use the PDF parser as a
// shortcut for creating a complex JSON structure.
JSON schema = QPDFObjectHandle::parse(
"<<"
" /one <<"
" /a <<"
" /q (queue)"
" /r <<"
" /x (ecks)"
" /y (why)"
" >>"
" /s [ (esses) ]"
" >>"
" >>"
" /two ["
" <<"
" /goose (gander)"
" /glarp (enspliel)"
" >>"
" ]"
">>").getJSON();
JSON a = QPDFObjectHandle::parse("[(not a) (dictionary)]").getJSON();
check_schema(a, schema, false, "top-level type mismatch");
JSON b = QPDFObjectHandle::parse(
"<<"
" /one <<"
" /a <<"
" /t (oops)"
" /r ["
" /x (ecks)"
" /y (why)"
" ]"
" /s << /z (esses) >>"
" >>"
" >>"
" /two ["
" <<"
" /goose (0 gander)"
" /glarp (0 enspliel)"
" >>"
" <<"
" /goose (1 gander)"
" /flarp (1 enspliel)"
" >>"
" 2"
" [ (three) ]"
" <<"
" /goose (4 gander)"
" /glarp (4 enspliel)"
" >>"
" ]"
">>").getJSON();
check_schema(b, schema, false, "top-level type mismatch");
check_schema(a, a, false, "top-level schema array error");
check_schema(b, b, false, "lower-level schema array error");
check_schema(schema, schema, true, "pass");
}
int main()
{
test_main();
test_schema();
std::cout << "end of json tests\n";
return 0;
}

View File

@ -34,3 +34,8 @@ Pl_PNGFilter decodeUp 0
Pl_PNGFilter decodeAverage 0 Pl_PNGFilter decodeAverage 0
Pl_PNGFilter decodePaeth 0 Pl_PNGFilter decodePaeth 0
Pl_TIFFPredictor processRow 1 Pl_TIFFPredictor processRow 1
JSON wanted dictionary 0
JSON key missing in object 0
JSON wanted array 0
JSON schema array error 0
JSON key extra in object 0

17
libtests/qtest/json.test Normal file
View File

@ -0,0 +1,17 @@
#!/usr/bin/env perl
require 5.008;
use warnings;
use strict;
chdir("json") or die "chdir testdir failed: $!\n";
require TestDriver;
my $td = new TestDriver('json');
$td->runtest("json",
{$td->COMMAND => "json"},
{$td->FILE => "json.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->report(1);

View File

@ -0,0 +1,23 @@
--- top-level type mismatch
top-level object is supposed to be a dictionary
---
--- top-level type mismatch
json key "./one./a": key "/q" is present in schema but missing in object
json key "./one./a./r" is supposed to be a dictionary
json key "./one./a./s" is supposed to be an array
json key "./one./a": key "/t" is not present in schema but appears in object
json key "./two.1": key "/glarp" is present in schema but missing in object
json key "./two.1": key "/flarp" is not present in schema but appears in object
json key "./two.2" is supposed to be a dictionary
json key "./two.3" is supposed to be a dictionary
---
--- top-level schema array error
top-level object schema array contains other than one item
---
--- lower-level schema array error
json key "./one./a./r" schema array contains other than one item
json key "./two" schema array contains other than one item
---
--- pass
---
end of json tests