Add JSON::parse

This commit is contained in:
Jay Berkenbilt 2022-01-17 18:40:38 -05:00
parent b9af421ef7
commit e8e8f6f43c
98 changed files with 1207 additions and 82 deletions

View File

@ -4,8 +4,20 @@
to QPDFObjectHandle with corresponding functions added to the C
API. Thanks to m-holger for the contribution.
2022-01-19 Jay Berkenbilt <ejb@ql.org>
* Add a JSONHandler class that allows sax-like, recursive handling
of JSON objects.
2022-01-17 Jay Berkenbilt <ejb@ql.org>
* Add JSON::parse. Now qpdf's JSON class implements a
general-purpose JSON parser and serializer, but there are better
options for general use. This is really designed for qpdf's
internal use and is set up to be compatible with qpdf's existing
API and to hook into a planned JSON-based API to the QPDFJob
class.
* Add isDictionary and isArray to JSON
2022-01-11 Jay Berkenbilt <ejb@ql.org>

3
TODO
View File

@ -335,9 +335,6 @@ I find it useful to make reference to them in this list.
also be possible to create a widget annotation and a form field.
Update the pdf-attach-file.cc example with new APIs when ready.
* If I do more with json, take a look at this C++ header-only JSON
library: https://github.com/nlohmann/json/releases
* Flattening of form XObjects seems like something that would be
useful in the library. We are seeing more cases of completely valid
PDF files with form XObjects that cause problems in other software.

View File

@ -22,13 +22,15 @@
#ifndef JSON_HH
#define JSON_HH
// This is a simple JSON serializer, primarily designed for
// serializing QPDF Objects as JSON. JSON objects contain their data
// as smart pointers. One JSON object is added to another, this
// pointer is copied. This means you can create temporary JSON objects
// on the stack, add them to other objects, and let them go out of
// scope safely. It also means that if the json JSON object is added
// in more than one place, all copies share underlying data.
// This is a simple JSON serializer and parser, primarily designed for
// serializing QPDF Objects as JSON. While it may work as a
// general-purpose JSON parser/serializer, there are better options.
// JSON objects contain their data as smart pointers. One JSON object
// is added to another, this pointer is copied. This means you can
// create temporary JSON objects on the stack, add them to other
// objects, and let them go out of scope safely. It also means that if
// the json JSON object is added in more than one place, all copies
// share underlying data.
#include <qpdf/DLL.h>
#include <qpdf/PointerHolder.hh>
@ -98,6 +100,10 @@ class JSON
QPDF_DLL
bool checkSchema(JSON schema, std::list<std::string>& errors);
// Create a JSON object from a string.
QPDF_DLL
static JSON parse(std::string const&);
private:
static std::string encode_string(std::string const& utf8);

View File

@ -2,6 +2,7 @@
#include <qpdf/QUtil.hh>
#include <qpdf/QTC.hh>
#include <stdexcept>
#include <cstring>
JSON::Members::~Members()
{
@ -437,3 +438,774 @@ JSON::checkSchemaInternal(JSON_value* this_v, JSON_value* sch_v,
return errors.empty();
}
namespace {
class JSONParser
{
public:
JSONParser() :
lex_state(ls_top),
number_before_point(0),
number_after_point(0),
number_after_e(0),
number_saw_point(false),
number_saw_e(false),
cstr(nullptr),
end(nullptr),
tok_start(nullptr),
tok_end(nullptr),
p(nullptr),
parser_state(ps_top)
{
}
PointerHolder<JSON> parse(std::string const& s);
private:
void getToken();
void handleToken();
static std::string decode_string(std::string const& json);
enum parser_state_e {
ps_top,
ps_dict_begin,
ps_dict_after_key,
ps_dict_after_colon,
ps_dict_after_item,
ps_dict_after_comma,
ps_array_begin,
ps_array_after_item,
ps_array_after_comma,
ps_done,
};
enum lex_state_e {
ls_top,
ls_number,
ls_alpha,
ls_string,
ls_backslash,
};
lex_state_e lex_state;
size_t number_before_point;
size_t number_after_point;
size_t number_after_e;
bool number_saw_point;
bool number_saw_e;
char const* cstr;
char const* end;
char const* tok_start;
char const* tok_end;
char const* p;
parser_state_e parser_state;
std::vector<PointerHolder<JSON>> stack;
std::vector<parser_state_e> ps_stack;
std::string dict_key;
};
}
std::string
JSONParser::decode_string(std::string const& str)
{
// The string has already been validated when this private method
// is called, so errors are logic errors instead of runtime
// errors.
size_t len = str.length();
if ((len < 2) || (str.at(0) != '"') || (str.at(len-1) != '"'))
{
throw std::logic_error(
"JSON Parse: decode_string called with other than \"...\"");
}
char const* s = str.c_str();
// Move inside the quotation marks
++s;
len -= 2;
std::string result;
for (size_t i = 0; i < len; ++i)
{
if (s[i] == '\\')
{
if (i + 1 >= len)
{
throw std::logic_error("JSON parse: nothing after \\");
}
char ch = s[++i];
switch (ch)
{
case '\\':
case '\"':
result.append(1, ch);
break;
case 'b':
result.append(1, '\b');
break;
case 'f':
result.append(1, '\f');
break;
case 'n':
result.append(1, '\n');
break;
case 'r':
result.append(1, '\r');
break;
case 't':
result.append(1, '\t');
break;
case 'u':
if (i + 4 >= len)
{
throw std::logic_error(
"JSON parse: not enough characters after \\u");
}
{
std::string hex =
QUtil::hex_decode(std::string(s+i+1, s+i+5));
i += 4;
unsigned char high = static_cast<unsigned char>(hex.at(0));
unsigned char low = static_cast<unsigned char>(hex.at(1));
unsigned long codepoint = high;
codepoint <<= 8;
codepoint += low;
result += QUtil::toUTF8(codepoint);
}
break;
default:
throw std::logic_error(
"JSON parse: bad character after \\");
break;
}
}
else
{
result.append(1, s[i]);
}
}
return result;
}
void JSONParser::getToken()
{
while (p < end)
{
if (*p == 0)
{
QTC::TC("libtests", "JSON parse null character");
throw std::runtime_error(
"JSON: null character at offset " +
QUtil::int_to_string(p - cstr));
}
switch (lex_state)
{
case ls_top:
if (*p == '"')
{
tok_start = p;
tok_end = nullptr;
lex_state = ls_string;
}
else if (QUtil::is_space(*p))
{
// ignore
}
else if ((*p >= 'a') && (*p <= 'z'))
{
tok_start = p;
tok_end = nullptr;
lex_state = ls_alpha;
}
else if (*p == '-')
{
tok_start = p;
tok_end = nullptr;
lex_state = ls_number;
number_before_point = 0;
number_after_point = 0;
number_after_e = 0;
number_saw_point = false;
number_saw_e = false;
}
else if ((*p >= '0') && (*p <= '9'))
{
tok_start = p;
tok_end = nullptr;
lex_state = ls_number;
number_before_point = 1;
number_after_point = 0;
number_after_e = 0;
number_saw_point = false;
number_saw_e = false;
}
else if (*p == '.')
{
tok_start = p;
tok_end = nullptr;
lex_state = ls_number;
number_before_point = 0;
number_after_point = 0;
number_after_e = 0;
number_saw_point = true;
number_saw_e = false;
}
else if (strchr("{}[]:,", *p))
{
tok_start = p;
tok_end = p + 1;
}
else
{
QTC::TC("libtests", "JSON parse bad character");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": unexpected character " + std::string(p, 1));
}
break;
case ls_number:
if ((*p >= '0') && (*p <= '9'))
{
if (number_saw_e)
{
++number_after_e;
}
else if (number_saw_point)
{
++number_after_point;
}
else
{
++number_before_point;
}
}
else if (*p == '.')
{
if (number_saw_e)
{
QTC::TC("libtests", "JSON parse point after e");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": numeric literal: decimal point after e");
}
else if (number_saw_point)
{
QTC::TC("libtests", "JSON parse duplicate point");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": numeric literal: decimal point already seen");
}
else
{
number_saw_point = true;
}
}
else if (*p == 'e')
{
if (number_saw_e)
{
QTC::TC("libtests", "JSON parse duplicate e");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": numeric literal: e already seen");
}
else
{
number_saw_e = true;
}
}
else if ((*p == '+') || (*p == '-'))
{
if (number_saw_e && (number_after_e == 0))
{
// okay
}
else
{
QTC::TC("libtests", "JSON parse unexpected sign");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": numeric literal: unexpected sign");
}
}
else if (QUtil::is_space(*p))
{
tok_end = p;
}
else if (strchr("{}[]:,", *p))
{
tok_end = p;
--p;
}
else
{
QTC::TC("libtests", "JSON parse numeric bad character");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": numeric literal: unexpected character " +
std::string(p, 1));
}
break;
case ls_alpha:
if ((*p >= 'a') && (*p <= 'z'))
{
// okay
}
else if (QUtil::is_space(*p))
{
tok_end = p;
}
else if (strchr("{}[]:,", *p))
{
tok_end = p;
--p;
}
else
{
QTC::TC("libtests", "JSON parse keyword bad character");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": keyword: unexpected character " + std::string(p, 1));
}
break;
case ls_string:
if (*p == '"')
{
tok_end = p + 1;
}
else if (*p == '\\')
{
lex_state = ls_backslash;
}
break;
case ls_backslash:
if (strchr("\\\"bfnrt", *p))
{
lex_state = ls_string;
}
else if (*p == 'u')
{
if (p + 4 >= end)
{
QTC::TC("libtests", "JSON parse premature end of u");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": \\u must be followed by four characters");
}
for (size_t i = 1; i <= 4; ++i)
{
if (! QUtil::is_hex_digit(p[i]))
{
QTC::TC("libtests", "JSON parse bad hex after u");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": \\u must be followed by four hex digits");
}
}
p += 4;
lex_state = ls_string;
}
else
{
QTC::TC("libtests", "JSON parse backslash bad character");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": invalid character after backslash: " +
std::string(p, 1));
}
break;
}
++p;
if (tok_start && tok_end)
{
break;
}
}
if (p == end)
{
if (tok_start && (! tok_end))
{
switch (lex_state)
{
case ls_top:
// Can't happen
throw std::logic_error(
"tok_start set in ls_top while parsing " +
std::string(cstr));
break;
case ls_number:
case ls_alpha:
tok_end = p;
break;
case ls_string:
case ls_backslash:
QTC::TC("libtests", "JSON parse unterminated string");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": unterminated string");
break;
}
}
}
}
void
JSONParser::handleToken()
{
if (! (tok_start && tok_end))
{
return;
}
// Get token value.
std::string value(tok_start, tok_end);
if (parser_state == ps_done)
{
QTC::TC("libtests", "JSON parse junk after object");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": material follows end of object: " + value);
}
// Git string value
std::string svalue;
if (lex_state == ls_string)
{
// Token includes the quotation marks
if (tok_end - tok_start < 2)
{
throw std::logic_error("JSON string length < 2");
}
svalue = decode_string(value);
}
// Based on the lexical state and value, figure out whether we are
// looking at an item or a delimiter. It will always be exactly
// one of those two or an error condition.
PointerHolder<JSON> item;
char delimiter = '\0';
switch (lex_state)
{
case ls_top:
switch (*tok_start)
{
case '{':
item = new JSON(JSON::makeDictionary());
break;
case '[':
item = new JSON(JSON::makeArray());
break;
default:
delimiter = *tok_start;
break;
}
break;
case ls_number:
if (number_saw_point && (number_after_point == 0))
{
QTC::TC("libtests", "JSON parse decimal with no digits");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": decimal point with no digits");
}
if ((number_before_point > 1) &&
((tok_start[0] == '0') ||
((tok_start[0] == '-') && (tok_start[1] == '0'))))
{
QTC::TC("libtests", "JSON parse leading zero");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": number with leading zero");
}
if ((number_before_point == 0) && (number_after_point == 0))
{
QTC::TC("libtests", "JSON parse number no digits");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": number with no digits");
}
item = new JSON(JSON::makeNumber(value));
break;
case ls_alpha:
if (value == "true")
{
item = new JSON(JSON::makeBool(true));
}
else if (value == "false")
{
item = new JSON(JSON::makeBool(false));
}
else if (value == "null")
{
item = new JSON(JSON::makeNull());
}
else
{
QTC::TC("libtests", "JSON parse invalid keyword");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": invalid keyword " + value);
}
break;
case ls_string:
item = new JSON(JSON::makeString(svalue));
break;
case ls_backslash:
throw std::logic_error(
"tok_end is set while state = ls_backslash");
break;
}
if ((item.getPointer() == nullptr) == (delimiter == '\0'))
{
throw std::logic_error(
"JSONParser::handleToken: logic error: exactly one of item"
" or delimiter must be set");
}
// See whether what we have is allowed at this point.
if (item.getPointer())
{
switch (parser_state)
{
case ps_done:
throw std::logic_error("can't happen; ps_done already handled");
break;
case ps_dict_after_key:
QTC::TC("libtests", "JSON parse expected colon");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": expected ':'");
break;
case ps_dict_after_item:
QTC::TC("libtests", "JSON parse expected , or }");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": expected ',' or '}'");
break;
case ps_array_after_item:
QTC::TC("libtests", "JSON parse expected, or ]");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": expected ',' or ']'");
break;
case ps_dict_begin:
case ps_dict_after_comma:
if (lex_state != ls_string)
{
QTC::TC("libtests", "JSON parse string as dict key");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": expect string as dictionary key");
}
break;
case ps_top:
case ps_dict_after_colon:
case ps_array_begin:
case ps_array_after_comma:
break;
// okay
}
}
else if (delimiter == '}')
{
if (! ((parser_state == ps_dict_begin) ||
(parser_state == ps_dict_after_item)))
{
QTC::TC("libtests", "JSON parse unexpected }");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": unexpected dictionary end delimiter");
}
}
else if (delimiter == ']')
{
if (! ((parser_state == ps_array_begin) ||
(parser_state == ps_array_after_item)))
{
QTC::TC("libtests", "JSON parse unexpected ]");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": unexpected array end delimiter");
}
}
else if (delimiter == ':')
{
if (parser_state != ps_dict_after_key)
{
QTC::TC("libtests", "JSON parse unexpected :");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": unexpected colon");
}
}
else if (delimiter == ',')
{
if (! ((parser_state == ps_dict_after_item) ||
(parser_state == ps_array_after_item)))
{
QTC::TC("libtests", "JSON parse unexpected ,");
throw std::runtime_error(
"JSON: offset " + QUtil::int_to_string(p - cstr) +
": unexpected comma");
}
}
else if (delimiter != '\0')
{
throw std::logic_error("JSONParser::handleToken: bad delimiter");
}
// Now we know we have a delimiter or item that is allowed. Do
// whatever we need to do with it.
parser_state_e next_state = ps_top;
if (delimiter == ':')
{
next_state = ps_dict_after_colon;
}
else if (delimiter == ',')
{
if (parser_state == ps_dict_after_item)
{
next_state = ps_dict_after_comma;
}
else if (parser_state == ps_array_after_item)
{
next_state = ps_array_after_comma;
}
else
{
throw std::logic_error(
"JSONParser::handleToken: unexpected parser"
" state for comma");
}
}
else if ((delimiter == '}') || (delimiter == ']'))
{
next_state = ps_stack.back();
ps_stack.pop_back();
if (next_state != ps_done)
{
stack.pop_back();
}
}
else if (delimiter != '\0')
{
throw std::logic_error(
"JSONParser::handleToken: unexpected delimiter in transition");
}
else if (item.getPointer())
{
PointerHolder<JSON> tos;
if (! stack.empty())
{
tos = stack.back();
}
switch (parser_state)
{
case ps_dict_begin:
case ps_dict_after_comma:
this->dict_key = svalue;
item = nullptr;
next_state = ps_dict_after_key;
break;
case ps_dict_after_colon:
tos->addDictionaryMember(dict_key, *item);
next_state = ps_dict_after_item;
break;
case ps_array_begin:
case ps_array_after_comma:
next_state = ps_array_after_item;
tos->addArrayElement(*item);
break;
case ps_top:
next_state = ps_done;
break;
case ps_dict_after_key:
case ps_dict_after_item:
case ps_array_after_item:
case ps_done:
throw std::logic_error(
"JSONParser::handleToken: unexpected parser state");
}
}
else
{
throw std::logic_error(
"JSONParser::handleToken: unexpected null item in transition");
}
// Prepare for next token
if (item.getPointer())
{
if (item->isDictionary())
{
stack.push_back(item);
ps_stack.push_back(next_state);
next_state = ps_dict_begin;
}
else if (item->isArray())
{
stack.push_back(item);
ps_stack.push_back(next_state);
next_state = ps_array_begin;
}
else if (parser_state == ps_top)
{
stack.push_back(item);
}
}
parser_state = next_state;
tok_start = nullptr;
tok_end = nullptr;
lex_state = ls_top;
}
PointerHolder<JSON>
JSONParser::parse(std::string const& s)
{
cstr = s.c_str();
end = cstr + s.length();
p = cstr;
while (p < end)
{
getToken();
handleToken();
}
if (parser_state != ps_done)
{
QTC::TC("libtests", "JSON parse preature EOF");
throw std::runtime_error("JSON: premature end of input");
}
return stack.back();
}
JSON
JSON::parse(std::string const& s)
{
JSONParser jp;
return *jp.parse(s);
}

View File

@ -13,6 +13,7 @@ BINS_libtests = \
hex \
input_source \
json \
json_parse \
lzw \
main_from_wmain \
matrix \

View File

@ -91,67 +91,83 @@ static void check_schema(JSON& obj, JSON& schema, bool exp,
static void test_schema()
{
// Since we don't have a JSON parser, use the PDF parser as a
// shortcut for creating a complex JSON structure.
JSON schema = QPDFObjectHandle::parse(
"<<"
" /one <<"
" /a <<"
" /q (queue)"
" /r <<"
" /x (ecks)"
" /y (why)"
" >>"
" /s [ (esses) ]"
" >>"
" >>"
" /two ["
" <<"
" /goose (gander)"
" /glarp (enspliel)"
" >>"
" ]"
">>").getJSON();
JSON three = JSON::makeDictionary();
three.addDictionaryMember(
"<objid>",
QPDFObjectHandle::parse("<< /z (ebra) >>").getJSON());
schema.addDictionaryMember("/three", three);
JSON a = QPDFObjectHandle::parse("[(not a) (dictionary)]").getJSON();
JSON schema = JSON::parse(R"(
{
"one": {
"a": {
"q": "queue",
"r": {
"x": "ecks",
"y": "(bool) why"
},
"s": [
"esses"
]
}
},
"two": [
{
"goose": "gander",
"glarp": "enspliel"
}
],
"three": {
"<objid>": {
"z": "ebra",
"o": "(optional, string) optional"
}
}
}
)");
JSON a = JSON::parse(R"(["not a", "dictionary"])");
check_schema(a, schema, false, "top-level type mismatch");
JSON b = QPDFObjectHandle::parse(
"<<"
" /one <<"
" /a <<"
" /t (oops)"
" /r ["
" /x (ecks)"
" /y (why)"
" ]"
" /s << /z (esses) >>"
" >>"
" >>"
" /two ["
" <<"
" /goose (0 gander)"
" /glarp (0 enspliel)"
" >>"
" <<"
" /goose (1 gander)"
" /flarp (1 enspliel)"
" >>"
" 2"
" [ (three) ]"
" <<"
" /goose (4 gander)"
" /glarp (4 enspliel)"
" >>"
" ]"
" /three <<"
" /anything << /x (oops) >>"
" /else << /z (okay) >>"
" >>"
">>").getJSON();
JSON b = JSON::parse(R"(
{
"one": {
"a": {
"t": "oops",
"r": [
"x",
"ecks",
"y",
"why"
],
"s": {
"z": "esses"
}
}
},
"two": [
{
"goose": "0 gander",
"glarp": "0 enspliel"
},
{
"goose": "1 gander",
"flarp": "1 enspliel"
},
2,
[
"three"
],
{
"goose": "4 gander",
"glarp": 4
}
],
"three": {
"anything": {
"x": "oops",
"o": "okay"
},
"else": {
"z": "okay"
}
}
}
)");
check_schema(b, schema, false, "missing items");
check_schema(a, a, false, "top-level schema array error");
check_schema(b, b, false, "lower-level schema array error");

27
libtests/json_parse.cc Normal file
View File

@ -0,0 +1,27 @@
#include <qpdf/JSON.hh>
#include <qpdf/QUtil.hh>
#include <iostream>
int main(int argc, char* argv[])
{
if (argc != 2)
{
std::cerr << "Usage: json_parse file" << std::endl;
return 2;
}
char const* filename = argv[1];
try
{
PointerHolder<char> buf;
size_t size;
QUtil::read_file_into_memory(filename, buf, size);
std::string s(buf.getPointer(), size);
std::cout << JSON::parse(s).unparse() << std::endl;
}
catch (std::exception& e)
{
std::cerr << "exception: " << filename<< ": " << e.what() << std::endl;
return 2;
}
return 0;
}

View File

@ -61,3 +61,29 @@ QPDFArgParser duplicate option help 0
QPDFArgParser bad option for help 0
QPDFArgParser bad topic for help 0
QPDFArgParser invalid choice handler to unknown 0
JSON parse junk after object 0
JSON parse decimal with no digits 0
JSON parse invalid keyword 0
JSON parse expected colon 0
JSON parse expected , or } 0
JSON parse expected, or ] 0
JSON parse string as dict key 0
JSON parse unexpected } 0
JSON parse unexpected ] 0
JSON parse unexpected : 0
JSON parse unexpected , 0
JSON parse preature EOF 0
JSON parse null character 0
JSON parse bad character 0
JSON parse point after e 0
JSON parse duplicate point 0
JSON parse duplicate e 0
JSON parse unexpected sign 0
JSON parse numeric bad character 0
JSON parse keyword bad character 0
JSON parse backslash bad character 0
JSON parse unterminated string 0
JSON parse leading zero 0
JSON parse number no digits 0
JSON parse premature end of u 0
JSON parse bad hex after u 0

View File

@ -2,23 +2,24 @@
top-level object is supposed to be a dictionary
---
--- missing items
json key "./one./a": key "/q" is present in schema but missing in object
json key "./one./a./r" is supposed to be a dictionary
json key "./one./a./s" is supposed to be an array
json key "./one./a": key "/t" is not present in schema but appears in object
json key "./three./anything": key "/z" is present in schema but missing in object
json key "./three./anything": key "/x" is not present in schema but appears in object
json key "./two.1": key "/glarp" is present in schema but missing in object
json key "./two.1": key "/flarp" is not present in schema but appears in object
json key "./two.2" is supposed to be a dictionary
json key "./two.3" is supposed to be a dictionary
json key ".one.a": key "q" is present in schema but missing in object
json key ".one.a.r" is supposed to be a dictionary
json key ".one.a.s" is supposed to be an array
json key ".one.a": key "t" is not present in schema but appears in object
json key ".three.anything": key "z" is present in schema but missing in object
json key ".three.anything": key "x" is not present in schema but appears in object
json key ".three.else": key "o" is present in schema but missing in object
json key ".two.1": key "glarp" is present in schema but missing in object
json key ".two.1": key "flarp" is not present in schema but appears in object
json key ".two.2" is supposed to be a dictionary
json key ".two.3" is supposed to be a dictionary
---
--- top-level schema array error
top-level object schema array contains other than one item
---
--- lower-level schema array error
json key "./one./a./r" schema array contains other than one item
json key "./two" schema array contains other than one item
json key ".one.a.r" schema array contains other than one item
json key ".two" schema array contains other than one item
---
--- pass
---

View File

@ -0,0 +1,137 @@
#!/usr/bin/env perl
require 5.008;
use warnings;
use strict;
use File::Copy;
use File::Compare;
chdir("json_parse") or die "chdir testdir failed: $!\n";
require TestDriver;
my $td = new TestDriver('json_parse');
my $json_mod = 0;
eval {
require JSON;
$json_mod = 1;
};
if ($@)
{
$td->emphasize("JSON.pm not found -- using stored actual outputs");
}
cleanup();
my $good = 9;
for (my $i = 1; $i <= $good; ++$i)
{
my $n = sprintf("%02d", $i);
unlink "out.json";
my $r = system("json_parse good-$n.json > out.json 2>&1");
if ($td->runtest("json_parse accepted $n",
{$td->STRING => "$r\n"},
{$td->STRING => "0\n"},
$td->NORMALIZE_NEWLINES))
{
if ($json_mod)
{
if ($td->runtest("check output $n",
{$td->STRING => normalize_json("out.json")},
{$td->STRING => normalize_json("good-$n.json")},
$td->NORMALIZE_NEWLINES))
{
if (compare("out.json", "save-$n.json"))
{
copy("out.json", "save-$n.json");
$td->emphasize("updated save-$n.json from out.json");
}
}
}
else
{
$td->runtest("check output $n against saved",
{$td->FILE => "out.json"},
{$td->FILE => "save-$n.json"},
$td->NORMALIZE_NEWLINES);
}
}
else
{
$td->runtest("skip checking output $n",
{$td->FILE => "out.json"},
{$td->STRING => ""});
}
}
my @bad = (
"junk after string", # 1
"junk after array", # 2
"junk after dictionary", # 3
"bad number", # 4
"invalid keyword", # 5
"missing colon", # 6
"missing comma in dict", # 7
"missing comma in array", # 8
"dict key not string", # 9
"unexpected } in array", # 10
"unexpected } at top", # 11
"unexpected } in dict", # 12
"unexpected ] in dict", # 13
"unexpected ] at top", # 14
"unexpected :", # 15
"unexpected ,", # 16
"premature end array", # 17
"null character", # 18
"unexpected character", # 19
"point in exponent", # 20
"duplicate point", # 21
"duplicate e", # 22
"stray +", # 23
"bad character in number", # 24
"bad character in keyword", # 25
"bad backslash character", # 26
"unterminated string", # 27
"unterminated after \\", # 28
"leading +", # 29
"decimal with no digits", # 30
"minus with no digits", # 31
"leading zero", # 32
"leading zero negative", # 33
"premature end after u", # 34
"bad hex digit", # 35
);
my $i = 0;
foreach my $d (@bad)
{
++$i;
my $n = sprintf("%02d", $i);
$td->runtest("$n: $d",
{$td->COMMAND => "json_parse bad-$n.json"},
{$td->FILE => "bad-$n.out", $td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
}
cleanup();
$td->report((2 * $good) + scalar(@bad));
sub cleanup
{
unlink "out.json";
}
sub normalize_json
{
my $file = shift;
open(F, "<$file") or die "can't open $file: $file: $!\n";
$/ = undef;
my $encoded = scalar(<F>);
close(F);
my $j = JSON->new->allow_nonref;
$j->canonical();
$j->utf8->pretty->encode($j->utf8->decode($encoded));
}

View File

@ -0,0 +1 @@
"a" junk

View File

@ -0,0 +1 @@
exception: bad-01.json: JSON: offset 9: material follows end of object: junk

View File

@ -0,0 +1 @@
["a"] junk

View File

@ -0,0 +1 @@
exception: bad-02.json: JSON: offset 11: material follows end of object: junk

View File

@ -0,0 +1 @@
{"a": "b"} junk

View File

@ -0,0 +1 @@
exception: bad-03.json: JSON: offset 16: material follows end of object: junk

View File

@ -0,0 +1,2 @@
[1, .]

View File

@ -0,0 +1 @@
exception: bad-04.json: JSON: offset 5: decimal point with no digits

View File

@ -0,0 +1 @@
[true, potato]

View File

@ -0,0 +1 @@
exception: bad-05.json: JSON: offset 13: invalid keyword potato

View File

@ -0,0 +1 @@
{"x" "y"}

View File

@ -0,0 +1 @@
exception: bad-06.json: JSON: offset 8: expected ':'

View File

@ -0,0 +1 @@
{"x": 3 "y"}

View File

@ -0,0 +1 @@
exception: bad-07.json: JSON: offset 11: expected ',' or '}'

View File

@ -0,0 +1 @@
["x" "y"]

View File

@ -0,0 +1 @@
exception: bad-08.json: JSON: offset 8: expected ',' or ']'

View File

@ -0,0 +1 @@
{5 : 5}

View File

@ -0,0 +1 @@
exception: bad-09.json: JSON: offset 3: expect string as dictionary key

View File

@ -0,0 +1 @@
["a"}

View File

@ -0,0 +1 @@
exception: bad-10.json: JSON: offset 5: unexpected dictionary end delimiter

View File

@ -0,0 +1 @@
}

View File

@ -0,0 +1 @@
exception: bad-11.json: JSON: offset 1: unexpected dictionary end delimiter

View File

@ -0,0 +1 @@
{""}

View File

@ -0,0 +1 @@
exception: bad-12.json: JSON: offset 4: unexpected dictionary end delimiter

View File

@ -0,0 +1 @@
{"": "x"]

View File

@ -0,0 +1 @@
exception: bad-13.json: JSON: offset 9: unexpected array end delimiter

View File

@ -0,0 +1 @@
]

View File

@ -0,0 +1 @@
exception: bad-14.json: JSON: offset 1: unexpected array end delimiter

View File

@ -0,0 +1 @@
["a": ]

View File

@ -0,0 +1 @@
exception: bad-15.json: JSON: offset 5: unexpected colon

View File

@ -0,0 +1 @@
[,]

View File

@ -0,0 +1 @@
exception: bad-16.json: JSON: offset 2: unexpected comma

View File

@ -0,0 +1 @@
[1, 2,

View File

@ -0,0 +1 @@
exception: bad-17.json: JSON: premature end of input

Binary file not shown.

View File

@ -0,0 +1 @@
exception: bad-18.json: JSON: null character at offset 5

View File

@ -0,0 +1 @@
/

View File

@ -0,0 +1 @@
exception: bad-19.json: JSON: offset 0: unexpected character /

View File

@ -0,0 +1 @@
3.14e5.6

View File

@ -0,0 +1 @@
exception: bad-20.json: JSON: offset 6: numeric literal: decimal point after e

View File

@ -0,0 +1 @@
3.14.159

View File

@ -0,0 +1 @@
exception: bad-21.json: JSON: offset 4: numeric literal: decimal point already seen

View File

@ -0,0 +1 @@
3e4e5

View File

@ -0,0 +1 @@
exception: bad-22.json: JSON: offset 3: numeric literal: e already seen

View File

@ -0,0 +1 @@
3+4

View File

@ -0,0 +1 @@
exception: bad-23.json: JSON: offset 1: numeric literal: unexpected sign

View File

@ -0,0 +1 @@
12x

View File

@ -0,0 +1 @@
exception: bad-24.json: JSON: offset 2: numeric literal: unexpected character x

View File

@ -0,0 +1 @@
abc1

View File

@ -0,0 +1 @@
exception: bad-25.json: JSON: offset 3: keyword: unexpected character 1

View File

@ -0,0 +1 @@
"abc\yd"

View File

@ -0,0 +1 @@
exception: bad-26.json: JSON: offset 5: invalid character after backslash: y

View File

@ -0,0 +1 @@
"abcd

View File

@ -0,0 +1 @@
exception: bad-27.json: JSON: offset 6: unterminated string

View File

@ -0,0 +1 @@
"abc-no-newline\

View File

@ -0,0 +1 @@
exception: bad-28.json: JSON: offset 16: unterminated string

View File

@ -0,0 +1 @@
+123.

View File

@ -0,0 +1 @@
exception: bad-29.json: JSON: offset 0: unexpected character +

View File

@ -0,0 +1 @@
123.

View File

@ -0,0 +1 @@
exception: bad-30.json: JSON: offset 5: decimal point with no digits

View File

@ -0,0 +1 @@
-

View File

@ -0,0 +1 @@
exception: bad-31.json: JSON: offset 2: number with no digits

View File

@ -0,0 +1 @@
0123

View File

@ -0,0 +1 @@
exception: bad-32.json: JSON: offset 5: number with leading zero

View File

@ -0,0 +1 @@
-0123

View File

@ -0,0 +1 @@
exception: bad-33.json: JSON: offset 6: number with leading zero

View File

@ -0,0 +1 @@
"a\u123

View File

@ -0,0 +1 @@
exception: bad-34.json: JSON: offset 3: \u must be followed by four characters

View File

@ -0,0 +1 @@
"a\u123qx"

View File

@ -0,0 +1 @@
exception: bad-35.json: JSON: offset 3: \u must be followed by four hex digits

View File

@ -0,0 +1,3 @@
{"a": "bcd", "e": [1,
2, 3,4,"five", {"six": 7, "8": 9}, null, true,
false]}

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1 @@
[[[{}], {"": {}}]]

View File

@ -0,0 +1 @@
"x"

View File

@ -0,0 +1 @@
123

View File

@ -0,0 +1 @@
-123

View File

@ -0,0 +1 @@
[1, -2, 3.4, -5.6, -9e1, 10e2, 12.3e5, 12.6e-7]

View File

@ -0,0 +1 @@
["aπb", "a\b\f\n\r\tc", "a\u03c0b\u03C0c", "\u03c0", "a\u0018b\u02acc"]

View File

@ -0,0 +1,17 @@
{
"a": "bcd",
"e": [
1,
2,
3,
4,
"five",
{
"8": 9,
"six": 7
},
null,
true,
false
]
}

View File

@ -0,0 +1 @@
{}

View File

@ -0,0 +1 @@
[]

View File

@ -0,0 +1,10 @@
[
[
[
{}
],
{
"": {}
}
]
]

View File

@ -0,0 +1 @@
"x"

View File

@ -0,0 +1 @@
123

View File

@ -0,0 +1 @@
-123

View File

@ -0,0 +1,10 @@
[
1,
-2,
3.4,
-5.6,
-9e1,
10e2,
12.3e5,
12.6e-7
]

View File

@ -0,0 +1,7 @@
[
"aπb",
"a\b\f\n\r\tc",
"aπbπc",
"π",
"a\u0018bʬc"
]