From 4c7cfd5cbc64c34b4532aad0d87e4c81e2277b02 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 14 May 2022 10:11:52 -0400 Subject: [PATCH] JSON reactor: improve handling of nested containers Call the parent container's item method before calling the child item's start method so we can easily know the current nesting level when nested items are added. --- include/qpdf/JSON.hh | 15 +++++++++++---- libqpdf/JSON.cc | 18 ++++++++++++------ libtests/qtest/json_parse/good-01-react.out | 4 ++-- libtests/qtest/json_parse/good-04-react.out | 6 +++--- libtests/qtest/json_parse/good-10-react.out | 10 +++++----- 5 files changed, 33 insertions(+), 20 deletions(-) diff --git a/include/qpdf/JSON.hh b/include/qpdf/JSON.hh index 79f2a25f..e711a2df 100644 --- a/include/qpdf/JSON.hh +++ b/include/qpdf/JSON.hh @@ -220,7 +220,10 @@ class JSON // The start/end methods are called when parsing of a // dictionary or array is started or ended. The item methods // are called when an item is added to a dictionary or array. - // See important notes in "Item methods" below. + // When adding a container to another container, the item + // method is called with an empty container before the lower + // container's start method is called. See important notes in + // "Item methods" below. // During parsing of a JSON string, the parser is operating on // a single object at a time. When a dictionary or array is @@ -230,10 +233,10 @@ class JSON // following method calls // // dictionaryStart -- current object is the top-level dictionary + // dictionaryItem -- called with "a" and an empty array // arrayStart -- current object is the array // arrayItem -- called with the "1" object // containerEnd -- now current object is the dictionary again - // dictionaryItem -- called with "a" and the just-completed array // containerEnd -- current object is undefined // // If the top-level item in a JSON string is a scalar, the @@ -261,8 +264,12 @@ class JSON // NOTE: When a dictionary or an array is added to a // container, the dictionaryItem or arrayItem method is called // when the child item's start delimiter is encountered, so - // the JSON object passed in at that time will always be - // in its initial, empty state. + // the JSON object passed in at that time will always be in + // its initial, empty state. Additionally, the child item's + // start method is not called until after the parent item's + // item method is called. This makes it possible to keep track + // of the current depth level by incrementing level on start + // methods and decrementing on end methods. QPDF_DLL virtual bool diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 1c49f9ee..a2aff78b 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -949,17 +949,11 @@ JSONParser::handleToken() case '{': item = std::make_shared(JSON::makeDictionary()); item->setStart(offset - token.length()); - if (reactor) { - reactor->dictionaryStart(); - } break; case '[': item = std::make_shared(JSON::makeArray()); item->setStart(offset - token.length()); - if (reactor) { - reactor->arrayStart(); - } break; default: @@ -1187,6 +1181,18 @@ JSONParser::handleToken() "JSONParser::handleToken: unexpected null item in transition"); } + if (reactor && item.get()) { + // Calling container start method is postponed until after + // adding the containers to their parent containers, if any. + // This makes it much easier to keep track of the current + // nesting level. + if (item->isDictionary()) { + reactor->dictionaryStart(); + } else if (item->isArray()) { + reactor->arrayStart(); + } + } + // Prepare for next token if (item.get()) { if (item->isDictionary()) { diff --git a/libtests/qtest/json_parse/good-01-react.out b/libtests/qtest/json_parse/good-01-react.out index d6167a6b..e3813bcc 100644 --- a/libtests/qtest/json_parse/good-01-react.out +++ b/libtests/qtest/json_parse/good-01-react.out @@ -1,14 +1,14 @@ dictionary start dictionary item: a -> [6, 11): "bcd" -array start dictionary item: e -> [18, 0): [] +array start array item: [19, 20): 1 array item: [41, 42): 2 array item: [44, 45): 3 array item: [46, 47): 4 array item: [48, 54): "five" -dictionary start array item: [56, 0): {} +dictionary start dictionary item: six -> [64, 65): 7 dictionary item: 8 -> [72, 73): 9 container end: [56, 74): {} diff --git a/libtests/qtest/json_parse/good-04-react.out b/libtests/qtest/json_parse/good-04-react.out index bd18ccfc..ded004b2 100644 --- a/libtests/qtest/json_parse/good-04-react.out +++ b/libtests/qtest/json_parse/good-04-react.out @@ -1,16 +1,16 @@ array start -array start array item: [1, 0): [] array start array item: [2, 0): [] -dictionary start +array start array item: [3, 0): {} +dictionary start container end: [3, 5): {} container end: [2, 6): [] -dictionary start array item: [8, 0): {} dictionary start dictionary item: -> [13, 0): {} +dictionary start container end: [13, 15): {} container end: [8, 16): {} container end: [1, 17): [] diff --git a/libtests/qtest/json_parse/good-10-react.out b/libtests/qtest/json_parse/good-10-react.out index 142d95d0..3cceeb2f 100644 --- a/libtests/qtest/json_parse/good-10-react.out +++ b/libtests/qtest/json_parse/good-10-react.out @@ -1,30 +1,30 @@ dictionary start -array start dictionary item: a -> [9, 0): [] +array start array item: [10, 11): 1 array item: [13, 14): 2 -dictionary start array item: [16, 0): {} +dictionary start dictionary item: x -> [22, 25): "y" container end: [16, 26): {} array item: [28, 29): 3 -dictionary start array item: [31, 0): {} +dictionary start dictionary item: keep -> [40, 61): "not in final output" container end: [31, 62): { "keep": "not in final output" } container end: [9, 63): [] -array start dictionary item: keep -> [75, 0): [] +array start array item: [76, 77): 1 array item: [79, 83): null array item: [85, 86): 2 array item: [88, 93): false array item: [95, 101): "keep" array item: [103, 104): 3 -array start array item: [106, 0): [] +array start array item: [107, 113): "this" array item: [115, 121): "keep" array item: [123, 128): "not"