mirror of
https://github.com/qpdf/qpdf.git
synced 2024-11-15 17:17:08 +00:00
1150 lines
33 KiB
C++
1150 lines
33 KiB
C++
#include <qpdf/JSON.hh>
|
|
|
|
#include <qpdf/QIntC.hh>
|
|
#include <qpdf/QTC.hh>
|
|
#include <qpdf/QUtil.hh>
|
|
#include <cstring>
|
|
#include <stdexcept>
|
|
|
|
JSON::Members::Members(std::shared_ptr<JSON_value> value) :
|
|
value(value),
|
|
start(0),
|
|
end(0)
|
|
{
|
|
}
|
|
|
|
JSON::JSON(std::shared_ptr<JSON_value> value) :
|
|
m(new Members(value))
|
|
{
|
|
}
|
|
|
|
std::string
|
|
JSON::JSON_dictionary::unparse(size_t depth) const
|
|
{
|
|
std::string result = "{";
|
|
bool first = true;
|
|
for (auto const& iter: members) {
|
|
if (first) {
|
|
first = false;
|
|
} else {
|
|
result.append(1, ',');
|
|
}
|
|
result.append(1, '\n');
|
|
result.append(2 * (1 + depth), ' ');
|
|
result +=
|
|
("\"" + iter.first + "\": " + iter.second->unparse(1 + depth));
|
|
}
|
|
if (!first) {
|
|
result.append(1, '\n');
|
|
result.append(2 * depth, ' ');
|
|
}
|
|
result.append(1, '}');
|
|
return result;
|
|
}
|
|
|
|
std::string
|
|
JSON::JSON_array::unparse(size_t depth) const
|
|
{
|
|
std::string result = "[";
|
|
bool first = true;
|
|
for (auto const& element: elements) {
|
|
if (first) {
|
|
first = false;
|
|
} else {
|
|
result.append(1, ',');
|
|
}
|
|
result.append(1, '\n');
|
|
result.append(2 * (1 + depth), ' ');
|
|
result += element->unparse(1 + depth);
|
|
}
|
|
if (!first) {
|
|
result.append(1, '\n');
|
|
result.append(2 * depth, ' ');
|
|
}
|
|
result.append(1, ']');
|
|
return result;
|
|
}
|
|
|
|
JSON::JSON_string::JSON_string(std::string const& utf8) :
|
|
utf8(utf8),
|
|
encoded(encode_string(utf8))
|
|
{
|
|
}
|
|
|
|
std::string
|
|
JSON::JSON_string::unparse(size_t) const
|
|
{
|
|
return "\"" + encoded + "\"";
|
|
}
|
|
|
|
JSON::JSON_number::JSON_number(long long value) :
|
|
encoded(QUtil::int_to_string(value))
|
|
{
|
|
}
|
|
|
|
JSON::JSON_number::JSON_number(double value) :
|
|
encoded(QUtil::double_to_string(value, 6))
|
|
{
|
|
}
|
|
|
|
JSON::JSON_number::JSON_number(std::string const& value) :
|
|
encoded(value)
|
|
{
|
|
}
|
|
|
|
std::string
|
|
JSON::JSON_number::unparse(size_t) const
|
|
{
|
|
return encoded;
|
|
}
|
|
|
|
JSON::JSON_bool::JSON_bool(bool val) :
|
|
value(val)
|
|
{
|
|
}
|
|
|
|
std::string
|
|
JSON::JSON_bool::unparse(size_t) const
|
|
{
|
|
return value ? "true" : "false";
|
|
}
|
|
|
|
std::string
|
|
JSON::JSON_null::unparse(size_t) const
|
|
{
|
|
return "null";
|
|
}
|
|
|
|
std::string
|
|
JSON::unparse() const
|
|
{
|
|
if (0 == this->m->value.get()) {
|
|
return "null";
|
|
} else {
|
|
return this->m->value->unparse(0);
|
|
}
|
|
}
|
|
|
|
std::string
|
|
JSON::encode_string(std::string const& str)
|
|
{
|
|
std::string result;
|
|
size_t len = str.length();
|
|
for (size_t i = 0; i < len; ++i) {
|
|
unsigned char ch = static_cast<unsigned char>(str.at(i));
|
|
switch (ch) {
|
|
case '\\':
|
|
result += "\\\\";
|
|
break;
|
|
case '\"':
|
|
result += "\\\"";
|
|
break;
|
|
case '\b':
|
|
result += "\\b";
|
|
break;
|
|
case '\f':
|
|
result += "\\f";
|
|
break;
|
|
case '\n':
|
|
result += "\\n";
|
|
break;
|
|
case '\r':
|
|
result += "\\r";
|
|
break;
|
|
case '\t':
|
|
result += "\\t";
|
|
break;
|
|
default:
|
|
if (ch < 32) {
|
|
result += "\\u" + QUtil::int_to_string_base(ch, 16, 4);
|
|
} else {
|
|
result.append(1, static_cast<char>(ch));
|
|
}
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
JSON
|
|
JSON::makeDictionary()
|
|
{
|
|
return JSON(std::make_shared<JSON_dictionary>());
|
|
}
|
|
|
|
JSON
|
|
JSON::addDictionaryMember(std::string const& key, JSON const& val)
|
|
{
|
|
JSON_dictionary* obj = dynamic_cast<JSON_dictionary*>(this->m->value.get());
|
|
if (0 == obj) {
|
|
throw std::runtime_error(
|
|
"JSON::addDictionaryMember called on non-dictionary");
|
|
}
|
|
if (val.m->value.get()) {
|
|
obj->members[encode_string(key)] = val.m->value;
|
|
} else {
|
|
obj->members[encode_string(key)] = std::make_shared<JSON_null>();
|
|
}
|
|
return obj->members[encode_string(key)];
|
|
}
|
|
|
|
JSON
|
|
JSON::makeArray()
|
|
{
|
|
return JSON(std::make_shared<JSON_array>());
|
|
}
|
|
|
|
JSON
|
|
JSON::addArrayElement(JSON const& val)
|
|
{
|
|
JSON_array* arr = dynamic_cast<JSON_array*>(this->m->value.get());
|
|
if (0 == arr) {
|
|
throw std::runtime_error("JSON::addArrayElement called on non-array");
|
|
}
|
|
if (val.m->value.get()) {
|
|
arr->elements.push_back(val.m->value);
|
|
} else {
|
|
arr->elements.push_back(std::make_shared<JSON_null>());
|
|
}
|
|
return arr->elements.back();
|
|
}
|
|
|
|
JSON
|
|
JSON::makeString(std::string const& utf8)
|
|
{
|
|
return JSON(std::make_shared<JSON_string>(utf8));
|
|
}
|
|
|
|
JSON
|
|
JSON::makeInt(long long int value)
|
|
{
|
|
return JSON(std::make_shared<JSON_number>(value));
|
|
}
|
|
|
|
JSON
|
|
JSON::makeReal(double value)
|
|
{
|
|
return JSON(std::make_shared<JSON_number>(value));
|
|
}
|
|
|
|
JSON
|
|
JSON::makeNumber(std::string const& encoded)
|
|
{
|
|
return JSON(std::make_shared<JSON_number>(encoded));
|
|
}
|
|
|
|
JSON
|
|
JSON::makeBool(bool value)
|
|
{
|
|
return JSON(std::make_shared<JSON_bool>(value));
|
|
}
|
|
|
|
JSON
|
|
JSON::makeNull()
|
|
{
|
|
return JSON(std::make_shared<JSON_null>());
|
|
}
|
|
|
|
bool
|
|
JSON::isArray() const
|
|
{
|
|
return nullptr != dynamic_cast<JSON_array const*>(this->m->value.get());
|
|
}
|
|
|
|
bool
|
|
JSON::isDictionary() const
|
|
{
|
|
return nullptr !=
|
|
dynamic_cast<JSON_dictionary const*>(this->m->value.get());
|
|
}
|
|
|
|
bool
|
|
JSON::getString(std::string& utf8) const
|
|
{
|
|
auto v = dynamic_cast<JSON_string const*>(this->m->value.get());
|
|
if (v == nullptr) {
|
|
return false;
|
|
}
|
|
utf8 = v->utf8;
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
JSON::getNumber(std::string& value) const
|
|
{
|
|
auto v = dynamic_cast<JSON_number const*>(this->m->value.get());
|
|
if (v == nullptr) {
|
|
return false;
|
|
}
|
|
value = v->encoded;
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
JSON::getBool(bool& value) const
|
|
{
|
|
auto v = dynamic_cast<JSON_bool const*>(this->m->value.get());
|
|
if (v == nullptr) {
|
|
return false;
|
|
}
|
|
value = v->value;
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
JSON::isNull() const
|
|
{
|
|
if (dynamic_cast<JSON_null const*>(this->m->value.get())) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool
|
|
JSON::forEachDictItem(
|
|
std::function<void(std::string const& key, JSON value)> fn) const
|
|
{
|
|
auto v = dynamic_cast<JSON_dictionary const*>(this->m->value.get());
|
|
if (v == nullptr) {
|
|
return false;
|
|
}
|
|
for (auto const& k: v->members) {
|
|
fn(k.first, JSON(k.second));
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
JSON::forEachArrayItem(std::function<void(JSON value)> fn) const
|
|
{
|
|
auto v = dynamic_cast<JSON_array const*>(this->m->value.get());
|
|
if (v == nullptr) {
|
|
return false;
|
|
}
|
|
for (auto const& i: v->elements) {
|
|
fn(JSON(i));
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool
|
|
JSON::checkSchema(JSON schema, std::list<std::string>& errors)
|
|
{
|
|
return checkSchemaInternal(
|
|
this->m->value.get(), schema.m->value.get(), 0, errors, "");
|
|
}
|
|
|
|
bool
|
|
JSON::checkSchema(
|
|
JSON schema, unsigned long flags, std::list<std::string>& errors)
|
|
{
|
|
return checkSchemaInternal(
|
|
this->m->value.get(), schema.m->value.get(), flags, errors, "");
|
|
}
|
|
|
|
bool
|
|
JSON::checkSchemaInternal(
|
|
JSON_value* this_v,
|
|
JSON_value* sch_v,
|
|
unsigned long flags,
|
|
std::list<std::string>& errors,
|
|
std::string prefix)
|
|
{
|
|
JSON_array* this_arr = dynamic_cast<JSON_array*>(this_v);
|
|
JSON_dictionary* this_dict = dynamic_cast<JSON_dictionary*>(this_v);
|
|
|
|
JSON_array* sch_arr = dynamic_cast<JSON_array*>(sch_v);
|
|
JSON_dictionary* sch_dict = dynamic_cast<JSON_dictionary*>(sch_v);
|
|
|
|
JSON_string* sch_str = dynamic_cast<JSON_string*>(sch_v);
|
|
|
|
std::string err_prefix;
|
|
if (prefix.empty()) {
|
|
err_prefix = "top-level object";
|
|
} else {
|
|
err_prefix = "json key \"" + prefix + "\"";
|
|
}
|
|
|
|
std::string pattern_key;
|
|
if (sch_dict) {
|
|
if (!this_dict) {
|
|
QTC::TC("libtests", "JSON wanted dictionary");
|
|
errors.push_back(err_prefix + " is supposed to be a dictionary");
|
|
return false;
|
|
}
|
|
auto members = sch_dict->members;
|
|
std::string key;
|
|
if ((members.size() == 1) &&
|
|
((key = members.begin()->first, key.length() > 2) &&
|
|
(key.at(0) == '<') && (key.at(key.length() - 1) == '>'))) {
|
|
pattern_key = key;
|
|
}
|
|
}
|
|
|
|
if (sch_dict && (!pattern_key.empty())) {
|
|
auto pattern_schema = sch_dict->members[pattern_key].get();
|
|
for (auto const& iter: this_dict->members) {
|
|
std::string const& key = iter.first;
|
|
checkSchemaInternal(
|
|
this_dict->members[key].get(),
|
|
pattern_schema,
|
|
flags,
|
|
errors,
|
|
prefix + "." + key);
|
|
}
|
|
} else if (sch_dict) {
|
|
for (auto& iter: sch_dict->members) {
|
|
std::string const& key = iter.first;
|
|
if (this_dict->members.count(key)) {
|
|
checkSchemaInternal(
|
|
this_dict->members[key].get(),
|
|
iter.second.get(),
|
|
flags,
|
|
errors,
|
|
prefix + "." + key);
|
|
} else {
|
|
if (flags & f_optional) {
|
|
QTC::TC("libtests", "JSON optional key");
|
|
} else {
|
|
QTC::TC("libtests", "JSON key missing in object");
|
|
errors.push_back(
|
|
err_prefix + ": key \"" + key +
|
|
"\" is present in schema but missing in object");
|
|
}
|
|
}
|
|
}
|
|
for (auto const& iter: this_dict->members) {
|
|
std::string const& key = iter.first;
|
|
if (sch_dict->members.count(key) == 0) {
|
|
QTC::TC("libtests", "JSON key extra in object");
|
|
errors.push_back(
|
|
err_prefix + ": key \"" + key +
|
|
"\" is not present in schema but appears in object");
|
|
}
|
|
}
|
|
} else if (sch_arr) {
|
|
if (!this_arr) {
|
|
QTC::TC("libtests", "JSON wanted array");
|
|
errors.push_back(err_prefix + " is supposed to be an array");
|
|
return false;
|
|
}
|
|
if (sch_arr->elements.size() != 1) {
|
|
QTC::TC("libtests", "JSON schema array error");
|
|
errors.push_back(
|
|
err_prefix + " schema array contains other than one item");
|
|
return false;
|
|
}
|
|
int i = 0;
|
|
for (auto const& element: this_arr->elements) {
|
|
checkSchemaInternal(
|
|
element.get(),
|
|
sch_arr->elements.at(0).get(),
|
|
flags,
|
|
errors,
|
|
prefix + "." + QUtil::int_to_string(i));
|
|
++i;
|
|
}
|
|
} else if (!sch_str) {
|
|
QTC::TC("libtests", "JSON schema other type");
|
|
errors.push_back(
|
|
err_prefix + " schema value is not dictionary, array, or string");
|
|
return false;
|
|
}
|
|
|
|
return errors.empty();
|
|
}
|
|
|
|
namespace
|
|
{
|
|
class JSONParser
|
|
{
|
|
public:
|
|
JSONParser(JSON::Reactor* reactor) :
|
|
reactor(reactor),
|
|
lex_state(ls_top),
|
|
number_before_point(0),
|
|
number_after_point(0),
|
|
number_after_e(0),
|
|
number_saw_point(false),
|
|
number_saw_e(false),
|
|
cstr(nullptr),
|
|
end(nullptr),
|
|
tok_start(nullptr),
|
|
tok_end(nullptr),
|
|
p(nullptr),
|
|
parser_state(ps_top)
|
|
{
|
|
}
|
|
|
|
std::shared_ptr<JSON> parse(std::string const& s);
|
|
|
|
private:
|
|
void getToken();
|
|
void handleToken();
|
|
static std::string decode_string(std::string const& json);
|
|
|
|
enum parser_state_e {
|
|
ps_top,
|
|
ps_dict_begin,
|
|
ps_dict_after_key,
|
|
ps_dict_after_colon,
|
|
ps_dict_after_item,
|
|
ps_dict_after_comma,
|
|
ps_array_begin,
|
|
ps_array_after_item,
|
|
ps_array_after_comma,
|
|
ps_done,
|
|
};
|
|
|
|
enum lex_state_e {
|
|
ls_top,
|
|
ls_number,
|
|
ls_alpha,
|
|
ls_string,
|
|
ls_backslash,
|
|
};
|
|
|
|
JSON::Reactor* reactor;
|
|
lex_state_e lex_state;
|
|
size_t number_before_point;
|
|
size_t number_after_point;
|
|
size_t number_after_e;
|
|
bool number_saw_point;
|
|
bool number_saw_e;
|
|
char const* cstr;
|
|
char const* end;
|
|
char const* tok_start;
|
|
char const* tok_end;
|
|
char const* p;
|
|
parser_state_e parser_state;
|
|
std::vector<std::shared_ptr<JSON>> stack;
|
|
std::vector<parser_state_e> ps_stack;
|
|
std::string dict_key;
|
|
};
|
|
} // namespace
|
|
|
|
std::string
|
|
JSONParser::decode_string(std::string const& str)
|
|
{
|
|
// The string has already been validated when this private method
|
|
// is called, so errors are logic errors instead of runtime
|
|
// errors.
|
|
size_t len = str.length();
|
|
if ((len < 2) || (str.at(0) != '"') || (str.at(len - 1) != '"')) {
|
|
throw std::logic_error(
|
|
"JSON Parse: decode_string called with other than \"...\"");
|
|
}
|
|
char const* s = str.c_str();
|
|
// Move inside the quotation marks
|
|
++s;
|
|
len -= 2;
|
|
std::string result;
|
|
for (size_t i = 0; i < len; ++i) {
|
|
if (s[i] == '\\') {
|
|
if (i + 1 >= len) {
|
|
throw std::logic_error("JSON parse: nothing after \\");
|
|
}
|
|
char ch = s[++i];
|
|
switch (ch) {
|
|
case '\\':
|
|
case '\"':
|
|
case '/':
|
|
// \/ is allowed in json input, but so is /, so we
|
|
// don't map / to \/ in output.
|
|
result.append(1, ch);
|
|
break;
|
|
case 'b':
|
|
result.append(1, '\b');
|
|
break;
|
|
case 'f':
|
|
result.append(1, '\f');
|
|
break;
|
|
case 'n':
|
|
result.append(1, '\n');
|
|
break;
|
|
case 'r':
|
|
result.append(1, '\r');
|
|
break;
|
|
case 't':
|
|
result.append(1, '\t');
|
|
break;
|
|
case 'u':
|
|
if (i + 4 >= len) {
|
|
throw std::logic_error(
|
|
"JSON parse: not enough characters after \\u");
|
|
}
|
|
{
|
|
std::string hex =
|
|
QUtil::hex_decode(std::string(s + i + 1, s + i + 5));
|
|
i += 4;
|
|
unsigned char high = static_cast<unsigned char>(hex.at(0));
|
|
unsigned char low = static_cast<unsigned char>(hex.at(1));
|
|
unsigned long codepoint = high;
|
|
codepoint <<= 8;
|
|
codepoint += low;
|
|
result += QUtil::toUTF8(codepoint);
|
|
}
|
|
break;
|
|
default:
|
|
throw std::logic_error("JSON parse: bad character after \\");
|
|
break;
|
|
}
|
|
} else {
|
|
result.append(1, s[i]);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
void
|
|
JSONParser::getToken()
|
|
{
|
|
while (p < end) {
|
|
if (*p == 0) {
|
|
QTC::TC("libtests", "JSON parse null character");
|
|
throw std::runtime_error(
|
|
"JSON: null character at offset " +
|
|
QUtil::int_to_string(p - cstr));
|
|
}
|
|
switch (lex_state) {
|
|
case ls_top:
|
|
if (*p == '"') {
|
|
tok_start = p;
|
|
tok_end = nullptr;
|
|
lex_state = ls_string;
|
|
} else if (QUtil::is_space(*p)) {
|
|
// ignore
|
|
} else if ((*p >= 'a') && (*p <= 'z')) {
|
|
tok_start = p;
|
|
tok_end = nullptr;
|
|
lex_state = ls_alpha;
|
|
} else if (*p == '-') {
|
|
tok_start = p;
|
|
tok_end = nullptr;
|
|
lex_state = ls_number;
|
|
number_before_point = 0;
|
|
number_after_point = 0;
|
|
number_after_e = 0;
|
|
number_saw_point = false;
|
|
number_saw_e = false;
|
|
} else if ((*p >= '0') && (*p <= '9')) {
|
|
tok_start = p;
|
|
tok_end = nullptr;
|
|
lex_state = ls_number;
|
|
number_before_point = 1;
|
|
number_after_point = 0;
|
|
number_after_e = 0;
|
|
number_saw_point = false;
|
|
number_saw_e = false;
|
|
} else if (*p == '.') {
|
|
tok_start = p;
|
|
tok_end = nullptr;
|
|
lex_state = ls_number;
|
|
number_before_point = 0;
|
|
number_after_point = 0;
|
|
number_after_e = 0;
|
|
number_saw_point = true;
|
|
number_saw_e = false;
|
|
} else if (strchr("{}[]:,", *p)) {
|
|
tok_start = p;
|
|
tok_end = p + 1;
|
|
} else {
|
|
QTC::TC("libtests", "JSON parse bad character");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": unexpected character " + std::string(p, 1));
|
|
}
|
|
break;
|
|
|
|
case ls_number:
|
|
if ((*p >= '0') && (*p <= '9')) {
|
|
if (number_saw_e) {
|
|
++number_after_e;
|
|
} else if (number_saw_point) {
|
|
++number_after_point;
|
|
} else {
|
|
++number_before_point;
|
|
}
|
|
} else if (*p == '.') {
|
|
if (number_saw_e) {
|
|
QTC::TC("libtests", "JSON parse point after e");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": numeric literal: decimal point after e");
|
|
} else if (number_saw_point) {
|
|
QTC::TC("libtests", "JSON parse duplicate point");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": numeric literal: decimal point already seen");
|
|
} else {
|
|
number_saw_point = true;
|
|
}
|
|
} else if (*p == 'e') {
|
|
if (number_saw_e) {
|
|
QTC::TC("libtests", "JSON parse duplicate e");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": numeric literal: e already seen");
|
|
} else {
|
|
number_saw_e = true;
|
|
}
|
|
} else if ((*p == '+') || (*p == '-')) {
|
|
if (number_saw_e && (number_after_e == 0)) {
|
|
// okay
|
|
} else {
|
|
QTC::TC("libtests", "JSON parse unexpected sign");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": numeric literal: unexpected sign");
|
|
}
|
|
} else if (QUtil::is_space(*p)) {
|
|
tok_end = p;
|
|
} else if (strchr("{}[]:,", *p)) {
|
|
tok_end = p;
|
|
--p;
|
|
} else {
|
|
QTC::TC("libtests", "JSON parse numeric bad character");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": numeric literal: unexpected character " +
|
|
std::string(p, 1));
|
|
}
|
|
break;
|
|
|
|
case ls_alpha:
|
|
if ((*p >= 'a') && (*p <= 'z')) {
|
|
// okay
|
|
} else if (QUtil::is_space(*p)) {
|
|
tok_end = p;
|
|
} else if (strchr("{}[]:,", *p)) {
|
|
tok_end = p;
|
|
--p;
|
|
} else {
|
|
QTC::TC("libtests", "JSON parse keyword bad character");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": keyword: unexpected character " + std::string(p, 1));
|
|
}
|
|
break;
|
|
|
|
case ls_string:
|
|
if (*p == '"') {
|
|
tok_end = p + 1;
|
|
} else if (*p == '\\') {
|
|
lex_state = ls_backslash;
|
|
}
|
|
break;
|
|
|
|
case ls_backslash:
|
|
/* cSpell: ignore bfnrt */
|
|
if (strchr("\\\"/bfnrt", *p)) {
|
|
lex_state = ls_string;
|
|
} else if (*p == 'u') {
|
|
if (p + 4 >= end) {
|
|
QTC::TC("libtests", "JSON parse premature end of u");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": \\u must be followed by four characters");
|
|
}
|
|
for (size_t i = 1; i <= 4; ++i) {
|
|
if (!QUtil::is_hex_digit(p[i])) {
|
|
QTC::TC("libtests", "JSON parse bad hex after u");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": \\u must be followed by four hex digits");
|
|
}
|
|
}
|
|
p += 4;
|
|
lex_state = ls_string;
|
|
} else {
|
|
QTC::TC("libtests", "JSON parse backslash bad character");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": invalid character after backslash: " +
|
|
std::string(p, 1));
|
|
}
|
|
break;
|
|
}
|
|
++p;
|
|
if (tok_start && tok_end) {
|
|
break;
|
|
}
|
|
}
|
|
if (p == end) {
|
|
if (tok_start && (!tok_end)) {
|
|
switch (lex_state) {
|
|
case ls_top:
|
|
// Can't happen
|
|
throw std::logic_error(
|
|
"tok_start set in ls_top while parsing " +
|
|
std::string(cstr));
|
|
break;
|
|
|
|
case ls_number:
|
|
case ls_alpha:
|
|
tok_end = p;
|
|
break;
|
|
|
|
case ls_string:
|
|
case ls_backslash:
|
|
QTC::TC("libtests", "JSON parse unterminated string");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": unterminated string");
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
JSONParser::handleToken()
|
|
{
|
|
if (!(tok_start && tok_end)) {
|
|
return;
|
|
}
|
|
|
|
// Get token value.
|
|
std::string value(tok_start, tok_end);
|
|
|
|
if (parser_state == ps_done) {
|
|
QTC::TC("libtests", "JSON parse junk after object");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": material follows end of object: " + value);
|
|
}
|
|
|
|
// Git string value
|
|
std::string s_value;
|
|
if (lex_state == ls_string) {
|
|
// Token includes the quotation marks
|
|
if (tok_end - tok_start < 2) {
|
|
throw std::logic_error("JSON string length < 2");
|
|
}
|
|
s_value = decode_string(value);
|
|
}
|
|
// Based on the lexical state and value, figure out whether we are
|
|
// looking at an item or a delimiter. It will always be exactly
|
|
// one of those two or an error condition.
|
|
|
|
std::shared_ptr<JSON> item;
|
|
char delimiter = '\0';
|
|
switch (lex_state) {
|
|
case ls_top:
|
|
switch (*tok_start) {
|
|
case '{':
|
|
item = std::make_shared<JSON>(JSON::makeDictionary());
|
|
item->setStart(QIntC::to_size(tok_start - cstr));
|
|
if (reactor) {
|
|
reactor->dictionaryStart();
|
|
}
|
|
break;
|
|
|
|
case '[':
|
|
item = std::make_shared<JSON>(JSON::makeArray());
|
|
item->setStart(QIntC::to_size(tok_start - cstr));
|
|
if (reactor) {
|
|
reactor->arrayStart();
|
|
}
|
|
break;
|
|
|
|
default:
|
|
delimiter = *tok_start;
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case ls_number:
|
|
if (number_saw_point && (number_after_point == 0)) {
|
|
QTC::TC("libtests", "JSON parse decimal with no digits");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": decimal point with no digits");
|
|
}
|
|
if ((number_before_point > 1) &&
|
|
((tok_start[0] == '0') ||
|
|
((tok_start[0] == '-') && (tok_start[1] == '0')))) {
|
|
QTC::TC("libtests", "JSON parse leading zero");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": number with leading zero");
|
|
}
|
|
if ((number_before_point == 0) && (number_after_point == 0)) {
|
|
QTC::TC("libtests", "JSON parse number no digits");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": number with no digits");
|
|
}
|
|
item = std::make_shared<JSON>(JSON::makeNumber(value));
|
|
break;
|
|
|
|
case ls_alpha:
|
|
if (value == "true") {
|
|
item = std::make_shared<JSON>(JSON::makeBool(true));
|
|
} else if (value == "false") {
|
|
item = std::make_shared<JSON>(JSON::makeBool(false));
|
|
} else if (value == "null") {
|
|
item = std::make_shared<JSON>(JSON::makeNull());
|
|
} else {
|
|
QTC::TC("libtests", "JSON parse invalid keyword");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": invalid keyword " + value);
|
|
}
|
|
break;
|
|
|
|
case ls_string:
|
|
item = std::make_shared<JSON>(JSON::makeString(s_value));
|
|
break;
|
|
|
|
case ls_backslash:
|
|
throw std::logic_error("tok_end is set while state = ls_backslash");
|
|
break;
|
|
}
|
|
|
|
if ((item.get() == nullptr) == (delimiter == '\0')) {
|
|
throw std::logic_error(
|
|
"JSONParser::handleToken: logic error: exactly one of item"
|
|
" or delimiter must be set");
|
|
}
|
|
|
|
// See whether what we have is allowed at this point.
|
|
|
|
if (item.get()) {
|
|
switch (parser_state) {
|
|
case ps_done:
|
|
throw std::logic_error("can't happen; ps_done already handled");
|
|
break;
|
|
|
|
case ps_dict_after_key:
|
|
QTC::TC("libtests", "JSON parse expected colon");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": expected ':'");
|
|
break;
|
|
|
|
case ps_dict_after_item:
|
|
QTC::TC("libtests", "JSON parse expected , or }");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": expected ',' or '}'");
|
|
break;
|
|
|
|
case ps_array_after_item:
|
|
QTC::TC("libtests", "JSON parse expected, or ]");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": expected ',' or ']'");
|
|
break;
|
|
|
|
case ps_dict_begin:
|
|
case ps_dict_after_comma:
|
|
if (lex_state != ls_string) {
|
|
QTC::TC("libtests", "JSON parse string as dict key");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": expect string as dictionary key");
|
|
}
|
|
break;
|
|
|
|
case ps_top:
|
|
case ps_dict_after_colon:
|
|
case ps_array_begin:
|
|
case ps_array_after_comma:
|
|
break;
|
|
// okay
|
|
}
|
|
} else if (delimiter == '}') {
|
|
if (!((parser_state == ps_dict_begin) ||
|
|
(parser_state == ps_dict_after_item)))
|
|
|
|
{
|
|
QTC::TC("libtests", "JSON parse unexpected }");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": unexpected dictionary end delimiter");
|
|
}
|
|
} else if (delimiter == ']') {
|
|
if (!((parser_state == ps_array_begin) ||
|
|
(parser_state == ps_array_after_item)))
|
|
|
|
{
|
|
QTC::TC("libtests", "JSON parse unexpected ]");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": unexpected array end delimiter");
|
|
}
|
|
} else if (delimiter == ':') {
|
|
if (parser_state != ps_dict_after_key) {
|
|
QTC::TC("libtests", "JSON parse unexpected :");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": unexpected colon");
|
|
}
|
|
} else if (delimiter == ',') {
|
|
if (!((parser_state == ps_dict_after_item) ||
|
|
(parser_state == ps_array_after_item))) {
|
|
QTC::TC("libtests", "JSON parse unexpected ,");
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": unexpected comma");
|
|
}
|
|
} else if (delimiter != '\0') {
|
|
throw std::logic_error("JSONParser::handleToken: bad delimiter");
|
|
}
|
|
|
|
// Now we know we have a delimiter or item that is allowed. Do
|
|
// whatever we need to do with it.
|
|
|
|
parser_state_e next_state = ps_top;
|
|
if (delimiter == ':') {
|
|
next_state = ps_dict_after_colon;
|
|
} else if (delimiter == ',') {
|
|
if (parser_state == ps_dict_after_item) {
|
|
next_state = ps_dict_after_comma;
|
|
} else if (parser_state == ps_array_after_item) {
|
|
next_state = ps_array_after_comma;
|
|
} else {
|
|
throw std::logic_error("JSONParser::handleToken: unexpected parser"
|
|
" state for comma");
|
|
}
|
|
} else if ((delimiter == '}') || (delimiter == ']')) {
|
|
next_state = ps_stack.back();
|
|
ps_stack.pop_back();
|
|
auto tos = stack.back();
|
|
tos->setEnd(QIntC::to_size(tok_end - cstr));
|
|
if (reactor) {
|
|
reactor->containerEnd(*tos);
|
|
}
|
|
if (next_state != ps_done) {
|
|
stack.pop_back();
|
|
}
|
|
} else if (delimiter != '\0') {
|
|
throw std::logic_error(
|
|
"JSONParser::handleToken: unexpected delimiter in transition");
|
|
} else if (item.get()) {
|
|
if (!(item->isArray() || item->isDictionary())) {
|
|
item->setStart(QIntC::to_size(tok_start - cstr));
|
|
item->setEnd(QIntC::to_size(tok_end - cstr));
|
|
}
|
|
|
|
std::shared_ptr<JSON> tos;
|
|
if (!stack.empty()) {
|
|
tos = stack.back();
|
|
}
|
|
switch (parser_state) {
|
|
case ps_dict_begin:
|
|
case ps_dict_after_comma:
|
|
this->dict_key = s_value;
|
|
item = nullptr;
|
|
next_state = ps_dict_after_key;
|
|
break;
|
|
|
|
case ps_dict_after_colon:
|
|
if (!reactor || !reactor->dictionaryItem(dict_key, *item)) {
|
|
tos->addDictionaryMember(dict_key, *item);
|
|
}
|
|
next_state = ps_dict_after_item;
|
|
break;
|
|
|
|
case ps_array_begin:
|
|
case ps_array_after_comma:
|
|
if (!reactor || !reactor->arrayItem(*item)) {
|
|
tos->addArrayElement(*item);
|
|
}
|
|
next_state = ps_array_after_item;
|
|
break;
|
|
|
|
case ps_top:
|
|
next_state = ps_done;
|
|
break;
|
|
|
|
case ps_dict_after_key:
|
|
case ps_dict_after_item:
|
|
case ps_array_after_item:
|
|
case ps_done:
|
|
throw std::logic_error(
|
|
"JSONParser::handleToken: unexpected parser state");
|
|
}
|
|
} else {
|
|
throw std::logic_error(
|
|
"JSONParser::handleToken: unexpected null item in transition");
|
|
}
|
|
|
|
// Prepare for next token
|
|
if (item.get()) {
|
|
if (item->isDictionary()) {
|
|
stack.push_back(item);
|
|
ps_stack.push_back(next_state);
|
|
next_state = ps_dict_begin;
|
|
} else if (item->isArray()) {
|
|
stack.push_back(item);
|
|
ps_stack.push_back(next_state);
|
|
next_state = ps_array_begin;
|
|
} else if (parser_state == ps_top) {
|
|
stack.push_back(item);
|
|
}
|
|
}
|
|
if (ps_stack.size() > 500) {
|
|
throw std::runtime_error(
|
|
"JSON: offset " + QUtil::int_to_string(p - cstr) +
|
|
": maximum object depth exceeded");
|
|
}
|
|
parser_state = next_state;
|
|
tok_start = nullptr;
|
|
tok_end = nullptr;
|
|
lex_state = ls_top;
|
|
}
|
|
|
|
std::shared_ptr<JSON>
|
|
JSONParser::parse(std::string const& s)
|
|
{
|
|
cstr = s.c_str();
|
|
end = cstr + s.length();
|
|
p = cstr;
|
|
|
|
while (p < end) {
|
|
getToken();
|
|
handleToken();
|
|
}
|
|
if (parser_state != ps_done) {
|
|
QTC::TC("libtests", "JSON parse premature EOF");
|
|
throw std::runtime_error("JSON: premature end of input");
|
|
}
|
|
auto const& tos = stack.back();
|
|
if (reactor && tos.get() && !(tos->isArray() || tos->isDictionary())) {
|
|
reactor->topLevelScalar();
|
|
}
|
|
return tos;
|
|
}
|
|
|
|
JSON
|
|
JSON::parse(std::string const& s, Reactor* reactor)
|
|
{
|
|
JSONParser jp(reactor);
|
|
return *jp.parse(s);
|
|
}
|
|
|
|
void
|
|
JSON::setStart(size_t start)
|
|
{
|
|
this->m->start = start;
|
|
}
|
|
|
|
void
|
|
JSON::setEnd(size_t end)
|
|
{
|
|
this->m->end = end;
|
|
}
|
|
|
|
size_t
|
|
JSON::getStart() const
|
|
{
|
|
return this->m->start;
|
|
}
|
|
|
|
size_t
|
|
JSON::getEnd() const
|
|
{
|
|
return this->m->end;
|
|
}
|