From f8d1ab946205440ed3c44511ef42e5ad13fb9e5e Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sun, 24 Jul 2022 16:17:03 -0400 Subject: [PATCH] JSON schema -- accept single item in place of array When the schema wants a variable-length array, allow a single item as well as allowing an array. --- ChangeLog | 10 ++++++++++ TODO | 4 ---- include/qpdf/JSON.hh | 8 ++++++++ libqpdf/JSON.cc | 27 ++++++++++++++------------- libtests/json.cc | 20 +++++++++++--------- libtests/libtests.testcov | 2 +- libtests/qtest/json/json.out | 3 ++- 7 files changed, 46 insertions(+), 28 deletions(-) diff --git a/ChangeLog b/ChangeLog index 612c359d..e0c20f9d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,15 @@ 2022-07-24 Jay Berkenbilt + * include/qpdf/JSON.hh: Schema validation: allow a single item to + appear anywhere that the schema has an array of a single item. + This makes it possible to change an element of the schema from an + item to an array to allow the data to accept an array where a + single value was previously required. This change is needed to + allow QPDFJob JSON to start accepting multiple items where a + single item used to be expected without breaking backward + compatibility. Without this change, the earlier fix to + removeAttachment would be a breaking change. + * QPDFObjectHandle: for the methods insertItem, appendItem, eraseItem, replaceKey, and removeKey, add a corresponding "AndGetNew" and/or "AndGetOld" methods. The ones that end with diff --git a/TODO b/TODO index 25ca9ad7..80c9ef0d 100644 --- a/TODO +++ b/TODO @@ -19,10 +19,6 @@ Pending changes: appimage build specifically is setting the runpath, which is actually desirable in this case. Make sure to understand and document this. Maybe add a check for it in the build. -* Make job JSON accept a single element and treat as an array of one - when an array is expected. This allows for making things repeatable - in the future without breaking compatibility and is needed for the - remote-attachment fix to be backward-compatible. * Decide what to do about #664 (get*Box) * Add an option --ignore-encryption to ignore encryption information and treat encrypted files as if they weren't encrypted. This should diff --git a/include/qpdf/JSON.hh b/include/qpdf/JSON.hh index ef1632c2..ebeaaec8 100644 --- a/include/qpdf/JSON.hh +++ b/include/qpdf/JSON.hh @@ -184,6 +184,14 @@ class JSON // * The schema is a nested structure containing dictionaries, // single-element arrays, and strings only. // * Recursively walk the schema. + // * Whenever the schema has an array of length 1 and the object + // does not have an array in the corresponding location, + // validate the object against the array's single element. + // This effectively enables a single element to appear in + // place of an array and be treated as if it were an array of + // one element. This makes it possible to decide later that + // something that used to contain a single element now allows + // an array without invalidating any old data. // * If the current value is a dictionary, this object must have // a dictionary in the same place with the same keys. If flags // contains f_optional, a key in the schema does not have to diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 8f4e75a2..7bd2337f 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -538,26 +538,27 @@ JSON::checkSchemaInternal( } } } else if (sch_arr) { - if (!this_arr) { - QTC::TC("libtests", "JSON wanted array"); - errors.push_back(err_prefix + " is supposed to be an array"); - return false; - } if (sch_arr->elements.size() != 1) { QTC::TC("libtests", "JSON schema array error"); errors.push_back( err_prefix + " schema array contains other than one item"); return false; } - int i = 0; - for (auto const& element: this_arr->elements) { + if (this_arr) { + int i = 0; + for (auto const& element: this_arr->elements) { + checkSchemaInternal( + element.get(), + sch_arr->elements.at(0).get(), + flags, + errors, + prefix + "." + QUtil::int_to_string(i)); + ++i; + } + } else { + QTC::TC("libtests", "JSON schema array for single item"); checkSchemaInternal( - element.get(), - sch_arr->elements.at(0).get(), - flags, - errors, - prefix + "." + QUtil::int_to_string(i)); - ++i; + this_v, sch_arr->elements.at(0).get(), flags, errors, prefix); } } else if (!sch_str) { QTC::TC("libtests", "JSON schema other type"); diff --git a/libtests/json.cc b/libtests/json.cc index 9c5720f4..a509c2be 100644 --- a/libtests/json.cc +++ b/libtests/json.cc @@ -162,7 +162,9 @@ test_schema() "x": "ecks" }, "s": [ - "esses" + { + "ss": "esses" + } ] } }, @@ -236,6 +238,8 @@ test_schema() JSON bad_schema = JSON::parse(R"({"a": true, "b": "potato?"})"); check_schema(bad_schema, bad_schema, 0, false, "bad schema field type"); + // "two" exercises the case of the JSON containing a single + // element where the schema has an array. JSON good = JSON::parse(R"( { "one": { @@ -245,17 +249,15 @@ test_schema() "x": [1, null] }, "s": [ - null, - "anything" + {"ss": null}, + {"ss": "anything"} ] } }, - "two": [ - { - "glarp": "enspliel", - "goose": 3.14 - } - ], + "two": { + "glarp": "enspliel", + "goose": 3.14 + }, "three": { "": { "z": "ebra" diff --git a/libtests/libtests.testcov b/libtests/libtests.testcov index 21def9f3..7f71d8e0 100644 --- a/libtests/libtests.testcov +++ b/libtests/libtests.testcov @@ -36,7 +36,6 @@ Pl_PNGFilter decodePaeth 0 Pl_TIFFPredictor processRow 1 JSON wanted dictionary 0 JSON key missing in object 0 -JSON wanted array 0 JSON schema array error 0 JSON key extra in object 0 QPDFArgParser read args from stdin 0 @@ -93,3 +92,4 @@ JSON 16 high high 0 JSON 16 low not after high 0 JSON 16 dangling high 0 JSON parse duplicate key 0 +JSON schema array for single item 0 diff --git a/libtests/qtest/json/json.out b/libtests/qtest/json/json.out index c7cb85a7..811f8aed 100644 --- a/libtests/qtest/json/json.out +++ b/libtests/qtest/json/json.out @@ -4,7 +4,8 @@ top-level object is supposed to be a dictionary --- missing items json key ".one.a": key "q" is present in schema but missing in object json key ".one.a.r" is supposed to be a dictionary -json key ".one.a.s" is supposed to be an array +json key ".one.a.s": key "ss" is present in schema but missing in object +json key ".one.a.s": key "z" is not present in schema but appears in object json key ".one.a": key "t" is not present in schema but appears in object json key ".three.anything": key "z" is present in schema but missing in object json key ".three.anything": key "x" is not present in schema but appears in object