Implement JSONHandler for recursively processing JSON

This commit is contained in:
Jay Berkenbilt 2022-01-19 09:31:28 -05:00
parent a6df6fdaf7
commit 37105710ee
11 changed files with 613 additions and 2 deletions

View File

@ -30,7 +30,10 @@
// create temporary JSON objects on the stack, add them to other
// objects, and let them go out of scope safely. It also means that if
// the json JSON object is added in more than one place, all copies
// share underlying data.
// share underlying data. This makes them similar in structure and
// behavior to QPDFObjectHandle and may feel natural within the QPDF
// codebase, but it is also a good reason not to use this as a
// general-purpose JSON package.
#include <qpdf/DLL.h>
#include <qpdf/PointerHolder.hh>
@ -38,6 +41,7 @@
#include <map>
#include <vector>
#include <list>
#include <functional>
class JSON
{
@ -77,6 +81,24 @@ class JSON
QPDF_DLL
bool isDictionary() const;
// Accessors. Accessor behavior:
//
// - If argument is wrong type, including null, return false
// - If argument is right type, return true and initialize the value
QPDF_DLL
bool getString(std::string& utf8) const;
QPDF_DLL
bool getNumber(std::string& value) const;
QPDF_DLL
bool getBool(bool& value) const;
QPDF_DLL
bool isNull() const;
QPDF_DLL
bool forEachDictItem(
std::function<void(std::string const& key, JSON value)> fn) const;
QPDF_DLL
bool forEachArrayItem(std::function<void(JSON value)> fn) const;
// Check this JSON object against a "schema". This is not a schema
// according to any standard. It's just a template of what the
// JSON is supposed to contain. The checking does the following:
@ -129,6 +151,7 @@ class JSON
JSON_string(std::string const& utf8);
virtual ~JSON_string();
virtual std::string unparse(size_t depth) const;
std::string utf8;
std::string encoded;
};
struct JSON_number: public JSON_value

142
include/qpdf/JSONHandler.hh Normal file
View File

@ -0,0 +1,142 @@
// Copyright (c) 2005-2021 Jay Berkenbilt
//
// This file is part of qpdf.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Versions of qpdf prior to version 7 were released under the terms
// of version 2.0 of the Artistic License. At your option, you may
// continue to consider qpdf to be licensed under those terms. Please
// see the manual for additional information.
#ifndef JSONHANDLER_HH
#define JSONHANDLER_HH
#include <qpdf/DLL.h>
#include <qpdf/PointerHolder.hh>
#include <qpdf/JSON.hh>
#include <string>
#include <map>
#include <functional>
#include <stdexcept>
#include <memory>
class JSONHandler
{
public:
// Error exception is thrown if there are any errors validating
// the JSON object.
class QPDF_DLL_CLASS Error: public std::runtime_error
{
public:
QPDF_DLL
Error(std::string const&);
};
QPDF_DLL
JSONHandler();
QPDF_DLL
~JSONHandler() = default;
// Based on the type of handler, expect the object to be of a
// certain type. JSONHandler::Error is thrown otherwise. Multiple
// handlers may be registered, which allows the object to be of
// various types. If an anyHandler is added, no other handler will
// be called.
typedef std::function<void(
std::string const& path, JSON value)> json_handler_t;
typedef std::function<void(
std::string const& path)> void_handler_t;
typedef std::function<void(
std::string const& path, std::string const& value)> string_handler_t;
typedef std::function<void(
std::string const& path, bool value)> bool_handler_t;
// If an any handler is added, it will be called for any value
// including null, and no other handler will be called.
QPDF_DLL
void addAnyHandler(json_handler_t fn);
// If any of the remaining handlers are registered, each
// registered handle will be called.
QPDF_DLL
void addNullHandler(void_handler_t fn);
QPDF_DLL
void addStringHandler(string_handler_t fn);
QPDF_DLL
void addNumberHandler(string_handler_t fn);
QPDF_DLL
void addBoolHandler(bool_handler_t fn);
// Returns a reference to a map: keys are expected object keys,
// and values are handlers for that object.
QPDF_DLL
std::map<std::string, std::shared_ptr<JSONHandler>>& addDictHandlers();
// Apply the given handler to any key not explicitly in dict
// handlers.
QPDF_DLL
void addFallbackDictHandler(std::shared_ptr<JSONHandler>);
// Apply the given handler to each element of the array.
QPDF_DLL
void addArrayHandler(std::shared_ptr<JSONHandler>);
// Apply handlers recursively to a JSON object.
QPDF_DLL
void handle(std::string const& path, JSON j);
private:
JSONHandler(JSONHandler const&) = delete;
struct Handlers
{
Handlers() :
any_handler(nullptr),
null_handler(nullptr),
string_handler(nullptr),
number_handler(nullptr),
bool_handler(nullptr)
{
}
json_handler_t any_handler;
void_handler_t null_handler;
string_handler_t string_handler;
string_handler_t number_handler;
bool_handler_t bool_handler;
std::map<std::string, std::shared_ptr<JSONHandler>> dict_handlers;
std::shared_ptr<JSONHandler> fallback_dict_handler;
std::shared_ptr<JSONHandler> array_handler;
};
class Members
{
friend class JSONHandler;
public:
QPDF_DLL
~Members() = default;
private:
Members();
Members(Members const&) = delete;
Handlers h;
};
PointerHolder<Members> m;
};
#endif // JSONHANDLER_HH

View File

@ -90,6 +90,7 @@ std::string JSON::JSON_array::unparse(size_t depth) const
}
JSON::JSON_string::JSON_string(std::string const& utf8) :
utf8(utf8),
encoded(encode_string(utf8))
{
}
@ -311,6 +312,83 @@ JSON::isDictionary() const
this->m->value.getPointer());
}
bool
JSON::getString(std::string& utf8) const
{
auto v = dynamic_cast<JSON_string const*>(this->m->value.getPointer());
if (v == nullptr)
{
return false;
}
utf8 = v->utf8;
return true;
}
bool
JSON::getNumber(std::string& value) const
{
auto v = dynamic_cast<JSON_number const*>(this->m->value.getPointer());
if (v == nullptr)
{
return false;
}
value = v->encoded;
return true;
}
bool
JSON::getBool(bool& value) const
{
auto v = dynamic_cast<JSON_bool const*>(this->m->value.getPointer());
if (v == nullptr)
{
return false;
}
value = v->value;
return true;
}
bool
JSON::isNull() const
{
if (dynamic_cast<JSON_null const*>(this->m->value.getPointer()))
{
return true;
}
return false;
}
bool
JSON::forEachDictItem(
std::function<void(std::string const& key, JSON value)> fn) const
{
auto v = dynamic_cast<JSON_dictionary const*>(this->m->value.getPointer());
if (v == nullptr)
{
return false;
}
for (auto const& k: v->members)
{
fn(k.first, JSON(k.second));
}
return true;
}
bool
JSON::forEachArrayItem(std::function<void(JSON value)> fn) const
{
auto v = dynamic_cast<JSON_array const*>(this->m->value.getPointer());
if (v == nullptr)
{
return false;
}
for (auto const& i: v->elements)
{
fn(JSON(i));
}
return true;
}
bool
JSON::checkSchema(JSON schema, std::list<std::string>& errors)
{

160
libqpdf/JSONHandler.cc Normal file
View File

@ -0,0 +1,160 @@
#include <qpdf/JSONHandler.hh>
#include <qpdf/QUtil.hh>
#include <qpdf/QTC.hh>
JSONHandler::Error::Error(std::string const& msg) :
std::runtime_error(msg)
{
}
JSONHandler::JSONHandler() :
m(new Members())
{
}
JSONHandler::Members::Members()
{
}
void
JSONHandler::addAnyHandler(json_handler_t fn)
{
this->m->h.any_handler = fn;
}
void
JSONHandler::addNullHandler(void_handler_t fn)
{
this->m->h.null_handler = fn;
}
void
JSONHandler::addStringHandler(string_handler_t fn)
{
this->m->h.string_handler = fn;
}
void
JSONHandler::addNumberHandler(string_handler_t fn)
{
this->m->h.number_handler = fn;
}
void
JSONHandler::addBoolHandler(bool_handler_t fn)
{
this->m->h.bool_handler = fn;
}
std::map<std::string, std::shared_ptr<JSONHandler>>&
JSONHandler::addDictHandlers()
{
return this->m->h.dict_handlers;
}
void
JSONHandler::addFallbackDictHandler(std::shared_ptr<JSONHandler> fdh)
{
this->m->h.fallback_dict_handler = fdh;
}
void
JSONHandler::addArrayHandler(std::shared_ptr<JSONHandler> ah)
{
this->m->h.array_handler = ah;
}
void
JSONHandler::handle(std::string const& path, JSON j)
{
if (this->m->h.any_handler)
{
this->m->h.any_handler(path, j);
return;
}
bool handled = false;
bool bvalue = false;
std::string svalue;
if (this->m->h.null_handler && j.isNull())
{
this->m->h.null_handler(path);
handled = true;
}
if (this->m->h.string_handler && j.getString(svalue))
{
this->m->h.string_handler(path, svalue);
handled = true;
}
if (this->m->h.number_handler && j.getNumber(svalue))
{
this->m->h.number_handler(path, svalue);
handled = true;
}
if (this->m->h.bool_handler && j.getBool(bvalue))
{
this->m->h.bool_handler(path, bvalue);
handled = true;
}
if ((this->m->h.fallback_dict_handler.get() ||
(! this->m->h.dict_handlers.empty())) && j.isDictionary())
{
std::string path_base = path;
if (path_base != ".")
{
path_base += ".";
}
j.forEachDictItem([&path, &path_base, this](
std::string const& k, JSON v) {
auto i = this->m->h.dict_handlers.find(k);
if (i == this->m->h.dict_handlers.end())
{
if (this->m->h.fallback_dict_handler.get())
{
this->m->h.fallback_dict_handler->handle(
path_base + k, v);
}
else
{
QTC::TC("libtests", "JSONHandler unexpected key");
throw Error(
"JSON handler found unexpected key " + k +
" in object at " + path);
}
}
else
{
i->second->handle(path_base + k, v);
}
});
// Set handled = true even if we didn't call any handlers.
// This dictionary could have been empty, but it's okay since
// it's a dictionary like it's supposed to be.
handled = true;
}
if (this->m->h.array_handler.get())
{
size_t i = 0;
j.forEachArrayItem([&i, &path, this](JSON v) {
this->m->h.array_handler->handle(
path + "[" + QUtil::uint_to_string(i) + "]", v);
++i;
});
// Set handled = true even if we didn't call any handlers.
// This could have been an empty array.
handled = true;
}
if (! handled)
{
// It would be nice to include information about what type the
// object was and what types were allowed, but we're relying
// on schema validation to make sure input is properly
// structured before calling the handlers. It would be
// different if this code were trying to be part of a
// general-purpose JSON package.
QTC::TC("libtests", "JSONHandler unhandled value");
throw Error("JSON handler: value at " + path +
" is not of expected type");
}
}

View File

@ -38,6 +38,7 @@ SRCS_libqpdf = \
libqpdf/InputSource.cc \
libqpdf/InsecureRandomDataProvider.cc \
libqpdf/JSON.cc \
libqpdf/JSONHandler.cc \
libqpdf/MD5.cc \
libqpdf/NNTree.cc \
libqpdf/OffsetInputSource.cc \

View File

@ -13,6 +13,7 @@ BINS_libtests = \
hex \
input_source \
json \
json_handler \
json_parse \
lzw \
main_from_wmain \

View File

@ -1,7 +1,7 @@
#include <qpdf/JSON.hh>
#include <qpdf/QPDFObjectHandle.hh>
#include <iostream>
#include <assert.h>
#include <cassert>
static void check(JSON const& j, std::string const& exp)
{
@ -20,12 +20,25 @@ static void test_main()
"\\u0003\\t\\b\\r\\n<4>\"");
JSON jnull = JSON::makeNull();
check(jnull, "null");
assert(jnull.isNull());
std::string value;
assert(! jnull.getNumber(value));
JSON jarr = JSON::makeArray();
check(jarr, "[]");
JSON jstr2 = JSON::makeString("a\tb");
assert(jstr2.getString(value));
assert(value == "a\tb");
assert(! jstr2.getNumber(value));
JSON jint = JSON::makeInt(16059);
JSON jdouble = JSON::makeReal(3.14159);
JSON jexp = JSON::makeNumber("2.1e5");
JSON jbool1 = JSON::makeBool(true);
JSON jbool2 = JSON::makeBool(false);
bool bvalue = false;
assert(jbool1.getBool(bvalue));
assert(bvalue);
assert(jbool2.getBool(bvalue));
assert(! bvalue);
jarr.addArrayElement(jstr2);
jarr.addArrayElement(jnull);
jarr.addArrayElement(jint);
@ -39,6 +52,18 @@ static void test_main()
" 3.14159,\n"
" 2.1e5\n"
"]");
std::vector<std::string> avalue;
assert(jarr.forEachArrayItem([&avalue](JSON j) {
avalue.push_back(j.unparse());
}));
std::vector<std::string> xavalue = {
"\"a\\tb\"",
"null",
"16059",
"3.14159",
"2.1e5",
};
assert(avalue == xavalue);
JSON jmap = JSON::makeDictionary();
check(jmap, "{}");
jmap.addDictionaryMember("b", jstr2);
@ -73,6 +98,18 @@ static void test_main()
check(QPDFObjectHandle::newReal(".34").getJSON(), "0.34");
check(QPDFObjectHandle::newReal("-0.56").getJSON(), "-0.56");
check(QPDFObjectHandle::newReal("-.78").getJSON(), "-0.78");
JSON jmap2 = JSON::parse(R"({"a": 1, "b": "two", "c": [true]})");
std::map<std::string, std::string> dvalue;
assert(jmap2.forEachDictItem([&dvalue]
(std::string const& k, JSON j) {
dvalue[k] = j.unparse();
}));
std::map<std::string, std::string> xdvalue = {
{"a", "1"},
{"b", "\"two\""},
{"c", "[\n true\n]"},
};
assert(dvalue == xdvalue);
}
static void check_schema(JSON& obj, JSON& schema, bool exp,

128
libtests/json_handler.cc Normal file
View File

@ -0,0 +1,128 @@
#include <qpdf/JSONHandler.hh>
#include <qpdf/QUtil.hh>
#include <iostream>
#include <cassert>
static void print_null(std::string const& path)
{
std::cout << path << ": null" << std::endl;
}
static void print_string(std::string const& path, std::string const& value)
{
std::cout << path << ": string: " << value << std::endl;
}
static void print_number(std::string const& path, std::string const& value)
{
std::cout << path << ": number: " << value << std::endl;
}
static void print_bool(std::string const& path, bool value)
{
std::cout << path << ": bool: " << (value ? "true" : "false") << std::endl;
}
static void print_json(std::string const& path, JSON value)
{
std::cout << path << ": json: " << value.unparse() << std::endl;
}
static void test_scalar()
{
std::cout << "-- scalar --" << std::endl;
JSONHandler h;
h.addStringHandler(print_string);
JSON j = JSON::parse("\"potato\"");
h.handle(".", j);
}
static std::shared_ptr<JSONHandler> make_all_handler()
{
auto h = std::make_shared<JSONHandler>();
auto& m = h->addDictHandlers();
auto h1 = std::make_shared<JSONHandler>();
h1->addStringHandler(print_string);
m["one"] = h1;
auto h2 = std::make_shared<JSONHandler>();
h2->addNumberHandler(print_number);
m["two"] = h2;
auto h3 = std::make_shared<JSONHandler>();
h3->addBoolHandler(print_bool);
m["three"] = h3;
auto h4 = std::make_shared<JSONHandler>();
h4->addAnyHandler(print_json);
m["four"] = h4;
m["phour"] = h4; // share h4
auto h5 = std::make_shared<JSONHandler>();
// Allow to be either string or bool
h5->addBoolHandler(print_bool);
h5->addStringHandler(print_string);
h5->addNullHandler(print_null);
auto h5s = std::make_shared<JSONHandler>();
m["five"] = h5s;
h5s->addArrayHandler(h5);
auto h6 = std::make_shared<JSONHandler>();
auto& m6 = h6->addDictHandlers();
auto h6a = std::make_shared<JSONHandler>();
m6["a"] = h6a;
auto& m6a = h6a->addDictHandlers();
auto h6ab = std::make_shared<JSONHandler>();
m6a["b"] = h6ab;
auto h6ax = std::make_shared<JSONHandler>();
h6ax->addAnyHandler(print_json);
h6a->addFallbackDictHandler(h6ax);
m6["b"] = h6ab; // share
h6ab->addStringHandler(print_string);
m["six"] = h6;
return h;
}
static void test_all()
{
std::cout << "-- all --" << std::endl;
auto h = make_all_handler();
JSON j = JSON::parse(R"({
"one": "potato",
"two": 3.14,
"three": true,
"four": ["a", 1],
"five": ["x", false, "y", null, true],
"phour": null,
"six": {"a": {"b": "quack", "Q": "baaa"}, "b": "moo"}
})");
h->handle(".", j);
}
static void test_errors()
{
std::cout << "-- errors --" << std::endl;
auto h = make_all_handler();
auto t = [h](std::string const& msg, std::function<void()> fn) {
try
{
fn();
assert(false);
}
catch (JSONHandler::Error& e)
{
std::cout << msg << ": " << e.what() << std::endl;
}
};
t("bad type at top", [&h](){
h->handle(".", JSON::makeString("oops"));
});
t("unexpected key", [&h](){
JSON j = JSON::parse(R"({"x": "y"})");
h->handle(".", j);
});
}
int main(int argc, char* argv[])
{
test_scalar();
test_all();
test_errors();
return 0;
}

View File

@ -87,3 +87,5 @@ JSON parse leading zero 0
JSON parse number no digits 0
JSON parse premature end of u 0
JSON parse bad hex after u 0
JSONHandler unhandled value 0
JSONHandler unexpected key 0

View File

@ -0,0 +1,17 @@
#!/usr/bin/env perl
require 5.008;
use warnings;
use strict;
chdir("json_handler") or die "chdir testdir failed: $!\n";
require TestDriver;
my $td = new TestDriver('json_handler');
$td->runtest("JSON handler",
{$td->COMMAND => "json_handler"},
{$td->FILE => "json_handler.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->report(1);

View File

@ -0,0 +1,22 @@
-- scalar --
.: string: potato
-- all --
.five[0]: string: x
.five[1]: bool: false
.five[2]: string: y
.five[3]: null
.five[4]: bool: true
.four: json: [
"a",
1
]
.one: string: potato
.phour: json: null
.six.a.Q: json: "baaa"
.six.a.b: string: quack
.six.b: string: moo
.three: bool: true
.two: number: 3.14
-- errors --
bad type at top: JSON handler: value at . is not of expected type
unexpected key: JSON handler found unexpected key x in object at .