mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 10:58:58 +00:00
Add reactors to the JSON parser
This commit is contained in:
parent
f5dd63819d
commit
8d2a0eda5a
@ -1,3 +1,10 @@
|
||||
2022-05-01 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* JSON: add reactors to the JSON parser, making it possible to
|
||||
react to JSON parsing events as they occur and to block the
|
||||
results from being stored. This makes it possible to incrementally
|
||||
parse arbitrarily large JSON inputs.
|
||||
|
||||
2022-04-30 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* QPDFWriter: change encryption API calls
|
||||
|
@ -141,9 +141,86 @@ class JSON
|
||||
QPDF_DLL
|
||||
bool checkSchema(JSON schema, std::list<std::string>& errors);
|
||||
|
||||
// Create a JSON object from a string.
|
||||
// An pointer to a Reactor class can be passed to parse, which
|
||||
// will enable the caller to react to incremental events in the
|
||||
// construction of the JSON object. This makes it possible to
|
||||
// implement SAX-like handling of very large JSON objects.
|
||||
class QPDF_DLL_CLASS Reactor
|
||||
{
|
||||
public:
|
||||
QPDF_DLL
|
||||
virtual ~Reactor() = default;
|
||||
|
||||
// The start/end methods are called when parsing of a
|
||||
// dictionary or array is started or ended. The item methods
|
||||
// are called when an item is added to a dictionary or array.
|
||||
// See important notes in "Item methods" below.
|
||||
|
||||
// During parsing of a JSON string, the parser is operating on
|
||||
// a single object at a time. When a dictionary or array is
|
||||
// started, a new context begins, and when that dictionary or
|
||||
// array is ended, the previous context is resumed. So, for
|
||||
// example, if you have `{"a": [1]}`, you will receive the
|
||||
// following method calls
|
||||
//
|
||||
// dictionaryStart -- current object is the top-level dictionary
|
||||
// arrayStart -- current object is the array
|
||||
// arrayItem -- called with the "1" object
|
||||
// containerEnd -- now current object is the dictionary again
|
||||
// dictionaryItem -- called with "a" and the just-completed array
|
||||
// containerEnd -- current object is undefined
|
||||
//
|
||||
// If the top-level item in a JSON string is a scalar, the
|
||||
// topLevelScalar() method will be called. No argument is
|
||||
// passed since the object is the same as what is returned by
|
||||
// parse().
|
||||
|
||||
QPDF_DLL
|
||||
virtual void dictionaryStart() = 0;
|
||||
QPDF_DLL
|
||||
virtual void arrayStart() = 0;
|
||||
QPDF_DLL
|
||||
virtual void containerEnd(JSON const& value) = 0;
|
||||
QPDF_DLL
|
||||
virtual void topLevelScalar() = 0;
|
||||
|
||||
// Item methods:
|
||||
//
|
||||
// The return value of the item methods indicate whether the
|
||||
// item has been "consumed". If the item method returns true,
|
||||
// then the item will not be added to the containing JSON
|
||||
// object. This is what allows arbitrarily large JSON objects
|
||||
// to be parsed and not have to be kept in memory.
|
||||
//
|
||||
// NOTE: When a dictionary or an array is added to a
|
||||
// container, the dictionaryItem or arrayItem method is called
|
||||
// when the child item's start delimiter is encountered, so
|
||||
// the JSON object passed in at that time will always be
|
||||
// in its initial, empty state.
|
||||
|
||||
QPDF_DLL
|
||||
virtual bool
|
||||
dictionaryItem(std::string const& key, JSON const& value) = 0;
|
||||
QPDF_DLL
|
||||
virtual bool arrayItem(JSON const& value) = 0;
|
||||
};
|
||||
|
||||
// Create a JSON object from a string. See above for information
|
||||
// about how to use the Reactor.
|
||||
QPDF_DLL
|
||||
static JSON parse(std::string const&);
|
||||
static JSON parse(std::string const&, Reactor* reactor = nullptr);
|
||||
|
||||
// parse calls setOffsets to set the inclusive start and
|
||||
// non-inclusive end offsets of an object relative to its input
|
||||
// string. Otherwise, both values are 0.
|
||||
QPDF_DLL
|
||||
void setStart(size_t);
|
||||
QPDF_DLL
|
||||
void setEnd(size_t);
|
||||
QPDF_DLL
|
||||
size_t getStart() const;
|
||||
QPDF_DLL
|
||||
size_t getEnd() const;
|
||||
|
||||
private:
|
||||
static std::string encode_string(std::string const& utf8);
|
||||
@ -217,6 +294,9 @@ class JSON
|
||||
Members(Members const&) = delete;
|
||||
|
||||
std::shared_ptr<JSON_value> value;
|
||||
// start and end are only populated for objects created by parse
|
||||
size_t start;
|
||||
size_t end;
|
||||
};
|
||||
|
||||
std::shared_ptr<Members> m;
|
||||
|
@ -1,12 +1,15 @@
|
||||
#include <qpdf/JSON.hh>
|
||||
|
||||
#include <qpdf/QIntC.hh>
|
||||
#include <qpdf/QTC.hh>
|
||||
#include <qpdf/QUtil.hh>
|
||||
#include <cstring>
|
||||
#include <stdexcept>
|
||||
|
||||
JSON::Members::Members(std::shared_ptr<JSON_value> value) :
|
||||
value(value)
|
||||
value(value),
|
||||
start(0),
|
||||
end(0)
|
||||
{
|
||||
}
|
||||
|
||||
@ -455,7 +458,8 @@ namespace
|
||||
class JSONParser
|
||||
{
|
||||
public:
|
||||
JSONParser() :
|
||||
JSONParser(JSON::Reactor* reactor) :
|
||||
reactor(reactor),
|
||||
lex_state(ls_top),
|
||||
number_before_point(0),
|
||||
number_after_point(0),
|
||||
@ -499,6 +503,7 @@ namespace
|
||||
ls_backslash,
|
||||
};
|
||||
|
||||
JSON::Reactor* reactor;
|
||||
lex_state_e lex_state;
|
||||
size_t number_before_point;
|
||||
size_t number_after_point;
|
||||
@ -828,10 +833,18 @@ JSONParser::handleToken()
|
||||
switch (*tok_start) {
|
||||
case '{':
|
||||
item = std::make_shared<JSON>(JSON::makeDictionary());
|
||||
item->setStart(QIntC::to_size(tok_start - cstr));
|
||||
if (reactor) {
|
||||
reactor->dictionaryStart();
|
||||
}
|
||||
break;
|
||||
|
||||
case '[':
|
||||
item = std::make_shared<JSON>(JSON::makeArray());
|
||||
item->setStart(QIntC::to_size(tok_start - cstr));
|
||||
if (reactor) {
|
||||
reactor->arrayStart();
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
@ -997,6 +1010,11 @@ JSONParser::handleToken()
|
||||
} else if ((delimiter == '}') || (delimiter == ']')) {
|
||||
next_state = ps_stack.back();
|
||||
ps_stack.pop_back();
|
||||
auto tos = stack.back();
|
||||
tos->setEnd(QIntC::to_size(tok_end - cstr));
|
||||
if (reactor) {
|
||||
reactor->containerEnd(*tos);
|
||||
}
|
||||
if (next_state != ps_done) {
|
||||
stack.pop_back();
|
||||
}
|
||||
@ -1004,6 +1022,11 @@ JSONParser::handleToken()
|
||||
throw std::logic_error(
|
||||
"JSONParser::handleToken: unexpected delimiter in transition");
|
||||
} else if (item.get()) {
|
||||
if (!(item->isArray() || item->isDictionary())) {
|
||||
item->setStart(QIntC::to_size(tok_start - cstr));
|
||||
item->setEnd(QIntC::to_size(tok_end - cstr));
|
||||
}
|
||||
|
||||
std::shared_ptr<JSON> tos;
|
||||
if (!stack.empty()) {
|
||||
tos = stack.back();
|
||||
@ -1017,14 +1040,18 @@ JSONParser::handleToken()
|
||||
break;
|
||||
|
||||
case ps_dict_after_colon:
|
||||
tos->addDictionaryMember(dict_key, *item);
|
||||
if (!reactor || !reactor->dictionaryItem(dict_key, *item)) {
|
||||
tos->addDictionaryMember(dict_key, *item);
|
||||
}
|
||||
next_state = ps_dict_after_item;
|
||||
break;
|
||||
|
||||
case ps_array_begin:
|
||||
case ps_array_after_comma:
|
||||
if (!reactor || !reactor->arrayItem(*item)) {
|
||||
tos->addArrayElement(*item);
|
||||
}
|
||||
next_state = ps_array_after_item;
|
||||
tos->addArrayElement(*item);
|
||||
break;
|
||||
|
||||
case ps_top:
|
||||
@ -1083,12 +1110,40 @@ JSONParser::parse(std::string const& s)
|
||||
QTC::TC("libtests", "JSON parse premature EOF");
|
||||
throw std::runtime_error("JSON: premature end of input");
|
||||
}
|
||||
return stack.back();
|
||||
auto const& tos = stack.back();
|
||||
if (reactor && tos.get() && !(tos->isArray() || tos->isDictionary())) {
|
||||
reactor->topLevelScalar();
|
||||
}
|
||||
return tos;
|
||||
}
|
||||
|
||||
JSON
|
||||
JSON::parse(std::string const& s)
|
||||
JSON::parse(std::string const& s, Reactor* reactor)
|
||||
{
|
||||
JSONParser jp;
|
||||
JSONParser jp(reactor);
|
||||
return *jp.parse(s);
|
||||
}
|
||||
|
||||
void
|
||||
JSON::setStart(size_t start)
|
||||
{
|
||||
this->m->start = start;
|
||||
}
|
||||
|
||||
void
|
||||
JSON::setEnd(size_t end)
|
||||
{
|
||||
this->m->end = end;
|
||||
}
|
||||
|
||||
size_t
|
||||
JSON::getStart() const
|
||||
{
|
||||
return this->m->start;
|
||||
}
|
||||
|
||||
size_t
|
||||
JSON::getEnd() const
|
||||
{
|
||||
return this->m->end;
|
||||
}
|
||||
|
@ -1,21 +1,113 @@
|
||||
#include <qpdf/JSON.hh>
|
||||
#include <qpdf/QUtil.hh>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
|
||||
namespace
|
||||
{
|
||||
class Reactor: public JSON::Reactor
|
||||
{
|
||||
public:
|
||||
virtual ~Reactor() = default;
|
||||
virtual void dictionaryStart() override;
|
||||
virtual void arrayStart() override;
|
||||
virtual void containerEnd(JSON const& value) override;
|
||||
virtual void topLevelScalar() override;
|
||||
virtual bool
|
||||
dictionaryItem(std::string const& key, JSON const& value) override;
|
||||
virtual bool arrayItem(JSON const& value) override;
|
||||
|
||||
private:
|
||||
void printItem(JSON const&);
|
||||
};
|
||||
} // namespace
|
||||
|
||||
void
|
||||
Reactor::dictionaryStart()
|
||||
{
|
||||
std::cout << "dictionary start" << std::endl;
|
||||
}
|
||||
|
||||
void
|
||||
Reactor::arrayStart()
|
||||
{
|
||||
std::cout << "array start" << std::endl;
|
||||
}
|
||||
|
||||
void
|
||||
Reactor::containerEnd(JSON const& value)
|
||||
{
|
||||
std::cout << "container end: ";
|
||||
printItem(value);
|
||||
}
|
||||
|
||||
void
|
||||
Reactor::topLevelScalar()
|
||||
{
|
||||
std::cout << "top-level scalar" << std::endl;
|
||||
}
|
||||
|
||||
bool
|
||||
Reactor::dictionaryItem(std::string const& key, JSON const& value)
|
||||
{
|
||||
std::cout << "dictionary item: " << key << " -> ";
|
||||
printItem(value);
|
||||
if (key == "keep") {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Reactor::arrayItem(JSON const& value)
|
||||
{
|
||||
std::cout << "array item: ";
|
||||
printItem(value);
|
||||
std::string n;
|
||||
if (value.getString(n) && n == "keep") {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
Reactor::printItem(JSON const& j)
|
||||
{
|
||||
std::cout << "[" << j.getStart() << ", " << j.getEnd()
|
||||
<< "): " << j.unparse() << std::endl;
|
||||
}
|
||||
|
||||
static void
|
||||
usage()
|
||||
{
|
||||
std::cerr << "Usage: json_parse file [--react]" << std::endl;
|
||||
exit(2);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char* argv[])
|
||||
{
|
||||
if (argc != 2) {
|
||||
std::cerr << "Usage: json_parse file" << std::endl;
|
||||
if ((argc < 2) || (argc > 3)) {
|
||||
usage();
|
||||
return 2;
|
||||
}
|
||||
char const* filename = argv[1];
|
||||
std::shared_ptr<Reactor> reactor;
|
||||
if (argc == 3) {
|
||||
if (strcmp(argv[2], "--react") == 0) {
|
||||
reactor = std::make_shared<Reactor>();
|
||||
} else {
|
||||
usage();
|
||||
}
|
||||
}
|
||||
try {
|
||||
std::shared_ptr<char> buf;
|
||||
size_t size;
|
||||
QUtil::read_file_into_memory(filename, buf, size);
|
||||
std::string s(buf.get(), size);
|
||||
std::cout << JSON::parse(s).unparse() << std::endl;
|
||||
std::cout << JSON::parse(s, reactor.get()).unparse() << std::endl;
|
||||
} catch (std::exception& e) {
|
||||
std::cerr << "exception: " << filename << ": " << e.what() << std::endl;
|
||||
return 2;
|
||||
|
@ -32,7 +32,7 @@ if ($^O ne 'msys')
|
||||
|
||||
cleanup();
|
||||
|
||||
my $good = 9;
|
||||
my $good = 10;
|
||||
|
||||
for (my $i = 1; $i <= $good; ++$i)
|
||||
{
|
||||
@ -73,6 +73,11 @@ for (my $i = 1; $i <= $good; ++$i)
|
||||
{$td->FILE => "out.json"},
|
||||
{$td->STRING => ""});
|
||||
}
|
||||
|
||||
$td->runtest("good $n reactor",
|
||||
{$td->COMMAND => "json_parse good-$n.json --react"},
|
||||
{$td->FILE => "good-$n-react.out", $td->EXIT_STATUS => 0},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
}
|
||||
|
||||
my @bad = (
|
||||
@ -127,7 +132,7 @@ foreach my $d (@bad)
|
||||
|
||||
cleanup();
|
||||
|
||||
$td->report((2 * $good) + scalar(@bad));
|
||||
$td->report((3 * $good) + scalar(@bad));
|
||||
|
||||
sub cleanup
|
||||
{
|
||||
|
21
libtests/qtest/json_parse/good-01-react.out
Normal file
21
libtests/qtest/json_parse/good-01-react.out
Normal file
@ -0,0 +1,21 @@
|
||||
dictionary start
|
||||
dictionary item: a -> [6, 11): "bcd"
|
||||
array start
|
||||
dictionary item: e -> [18, 0): []
|
||||
array item: [19, 20): 1
|
||||
array item: [41, 42): 2
|
||||
array item: [44, 45): 3
|
||||
array item: [46, 47): 4
|
||||
array item: [48, 54): "five"
|
||||
dictionary start
|
||||
array item: [56, 0): {}
|
||||
dictionary item: six -> [64, 65): 7
|
||||
dictionary item: 8 -> [72, 73): 9
|
||||
container end: [56, 74): {}
|
||||
array item: [76, 80): null
|
||||
array item: [82, 86): true
|
||||
array item: [107, 112): false
|
||||
array item: [114, 134): "a\b\f\n\r\t\\\"/z"
|
||||
container end: [18, 135): []
|
||||
container end: [0, 136): {}
|
||||
{}
|
3
libtests/qtest/json_parse/good-02-react.out
Normal file
3
libtests/qtest/json_parse/good-02-react.out
Normal file
@ -0,0 +1,3 @@
|
||||
dictionary start
|
||||
container end: [0, 2): {}
|
||||
{}
|
3
libtests/qtest/json_parse/good-03-react.out
Normal file
3
libtests/qtest/json_parse/good-03-react.out
Normal file
@ -0,0 +1,3 @@
|
||||
array start
|
||||
container end: [0, 2): []
|
||||
[]
|
18
libtests/qtest/json_parse/good-04-react.out
Normal file
18
libtests/qtest/json_parse/good-04-react.out
Normal file
@ -0,0 +1,18 @@
|
||||
array start
|
||||
array start
|
||||
array item: [1, 0): []
|
||||
array start
|
||||
array item: [2, 0): []
|
||||
dictionary start
|
||||
array item: [3, 0): {}
|
||||
container end: [3, 5): {}
|
||||
container end: [2, 6): []
|
||||
dictionary start
|
||||
array item: [8, 0): {}
|
||||
dictionary start
|
||||
dictionary item: -> [13, 0): {}
|
||||
container end: [13, 15): {}
|
||||
container end: [8, 16): {}
|
||||
container end: [1, 17): []
|
||||
container end: [0, 18): []
|
||||
[]
|
2
libtests/qtest/json_parse/good-05-react.out
Normal file
2
libtests/qtest/json_parse/good-05-react.out
Normal file
@ -0,0 +1,2 @@
|
||||
top-level scalar
|
||||
"x"
|
2
libtests/qtest/json_parse/good-06-react.out
Normal file
2
libtests/qtest/json_parse/good-06-react.out
Normal file
@ -0,0 +1,2 @@
|
||||
top-level scalar
|
||||
123
|
2
libtests/qtest/json_parse/good-07-react.out
Normal file
2
libtests/qtest/json_parse/good-07-react.out
Normal file
@ -0,0 +1,2 @@
|
||||
top-level scalar
|
||||
-123
|
11
libtests/qtest/json_parse/good-08-react.out
Normal file
11
libtests/qtest/json_parse/good-08-react.out
Normal file
@ -0,0 +1,11 @@
|
||||
array start
|
||||
array item: [1, 2): 1
|
||||
array item: [4, 6): -2
|
||||
array item: [8, 11): 3.4
|
||||
array item: [13, 17): -5.6
|
||||
array item: [19, 23): -9e1
|
||||
array item: [25, 29): 10e2
|
||||
array item: [31, 37): 12.3e5
|
||||
array item: [39, 46): 12.6e-7
|
||||
container end: [0, 47): []
|
||||
[]
|
8
libtests/qtest/json_parse/good-09-react.out
Normal file
8
libtests/qtest/json_parse/good-09-react.out
Normal file
@ -0,0 +1,8 @@
|
||||
array start
|
||||
array item: [1, 7): "aπb"
|
||||
array item: [9, 23): "a\b\f\n\r\tc"
|
||||
array item: [25, 42): "aπbπc"
|
||||
array item: [44, 52): "π"
|
||||
array item: [54, 71): "a\u0018bʬc"
|
||||
container end: [0, 72): []
|
||||
[]
|
47
libtests/qtest/json_parse/good-10-react.out
Normal file
47
libtests/qtest/json_parse/good-10-react.out
Normal file
@ -0,0 +1,47 @@
|
||||
dictionary start
|
||||
array start
|
||||
dictionary item: a -> [9, 0): []
|
||||
array item: [10, 11): 1
|
||||
array item: [13, 14): 2
|
||||
dictionary start
|
||||
array item: [16, 0): {}
|
||||
dictionary item: x -> [22, 25): "y"
|
||||
container end: [16, 26): {}
|
||||
array item: [28, 29): 3
|
||||
dictionary start
|
||||
array item: [31, 0): {}
|
||||
dictionary item: keep -> [40, 61): "not in final output"
|
||||
container end: [31, 62): {
|
||||
"keep": "not in final output"
|
||||
}
|
||||
container end: [9, 63): []
|
||||
array start
|
||||
dictionary item: keep -> [75, 0): []
|
||||
array item: [76, 77): 1
|
||||
array item: [79, 83): null
|
||||
array item: [85, 86): 2
|
||||
array item: [88, 93): false
|
||||
array item: [95, 101): "keep"
|
||||
array item: [103, 104): 3
|
||||
array start
|
||||
array item: [106, 0): []
|
||||
array item: [107, 113): "this"
|
||||
array item: [115, 121): "keep"
|
||||
array item: [123, 128): "not"
|
||||
array item: [130, 137): "final"
|
||||
container end: [106, 138): [
|
||||
"keep"
|
||||
]
|
||||
container end: [75, 139): [
|
||||
"keep"
|
||||
]
|
||||
container end: [0, 141): {
|
||||
"keep": [
|
||||
"keep"
|
||||
]
|
||||
}
|
||||
{
|
||||
"keep": [
|
||||
"keep"
|
||||
]
|
||||
}
|
4
libtests/qtest/json_parse/good-10.json
Normal file
4
libtests/qtest/json_parse/good-10.json
Normal file
@ -0,0 +1,4 @@
|
||||
{
|
||||
"a": [1, 2, {"x": "y"}, 3, {"keep": "not in final output"}],
|
||||
"keep": [1, null, 2, false, "keep", 3, ["this", "keep", "not", "final"]]
|
||||
}
|
27
libtests/qtest/json_parse/save-10.json
Normal file
27
libtests/qtest/json_parse/save-10.json
Normal file
@ -0,0 +1,27 @@
|
||||
{
|
||||
"a": [
|
||||
1,
|
||||
2,
|
||||
{
|
||||
"x": "y"
|
||||
},
|
||||
3,
|
||||
{
|
||||
"keep": "not in final output"
|
||||
}
|
||||
],
|
||||
"keep": [
|
||||
1,
|
||||
null,
|
||||
2,
|
||||
false,
|
||||
"keep",
|
||||
3,
|
||||
[
|
||||
"this",
|
||||
"keep",
|
||||
"not",
|
||||
"final"
|
||||
]
|
||||
]
|
||||
}
|
Loading…
Reference in New Issue
Block a user