From e259635986a799f0b72b6040aba8c1ed870e552a Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Wed, 4 May 2022 08:32:54 -0400 Subject: [PATCH] JSON: add write methods and implement unparse() in terms of those --- ChangeLog | 5 ++ TODO | 11 --- cSpell.json | 1 + include/qpdf/JSON.hh | 62 ++++++++++++-- libqpdf/JSON.cc | 169 +++++++++++++++++++++++++++------------ manual/release-notes.rst | 4 + 6 files changed, 181 insertions(+), 71 deletions(-) diff --git a/ChangeLog b/ChangeLog index be196fff..27b1d679 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2022-05-04 Jay Berkenbilt + * Enhance JSON by adding a write method that takes a Pipeline* and + depth, and add several helper methods to make it easier to write + large amounts of JSON incrementally without having to have the + whole thing in memory. + * json v1 output: make "pages" and "objects" consistent. Previously, "objects" always reflected the objects exactly as they appeared in the original file, while "pages" reflected objects diff --git a/TODO b/TODO index ec110f34..d1bf0117 100644 --- a/TODO +++ b/TODO @@ -51,11 +51,6 @@ library, when context is available, to have a pipeline rather than a FILE* or std::ostream. This makes it possible for people to capture output more flexibly. -JSON: rather than unparse() -> string, there should be write method -that takes a pipeline and a depth. Then rewrite all the unparse -methods to use it. This makes incremental write possible as well as -writing arbitrarily large amounts of output. - JSON::parse should work from an InputSource. BufferInputSource can already start with a std::string. @@ -64,12 +59,6 @@ writes data to the pipeline. It's writer should create a Pl_Base64 -> Pl_Concatenate in front of the pipeline passed to write and call the function with that. -Add methods needed to do incremental writes. Basically we need to -expose functionality the array and dictionary unparse methods. Maybe -we can have a DictionaryWriter and an ArrayWriter that deal with the -first/depth logic and have writeElement or writeEntry(key, value) -methods. - For json output, do not unparse to string. Use the writers instead. Write incrementally. This changes ordering only, but we should be able manually update the test output for those cases. Objects should be diff --git a/cSpell.json b/cSpell.json index 2a5a4db4..cba39d9a 100644 --- a/cSpell.json +++ b/cSpell.json @@ -328,6 +328,7 @@ "outfilename", "pacman", "paeth", + "pagelabel", "pagelabels", "pageno", "pageposfrom", diff --git a/include/qpdf/JSON.hh b/include/qpdf/JSON.hh index e5fa629d..24d09cc2 100644 --- a/include/qpdf/JSON.hh +++ b/include/qpdf/JSON.hh @@ -45,12 +45,56 @@ #include #include +class Pipeline; + class JSON { public: QPDF_DLL std::string unparse() const; + // Write the JSON object through a pipline. The `depth` parameter + // specifies how deeply nested this in another JSON structure, + // which makes it possible to write clean-looking JSON + // incrementally. + QPDF_DLL + void write(Pipeline*, size_t depth = 0) const; + + // Helper methods for writing JSON incrementally. Several methods + // take a `bool& first` parameter. The open methods always set it + // to true, and the methods to output items always set it to + // false. This way, the item and close methods can always know + // whether or not a first item is being written. The intended mode + // of operation is to start with `bool first = true` (though it + // doesn't matter how it's initialized) and just pass the same + // `first` through to all the methods, letting the JSON object use + // it to keep track of when it's writing a first object and when + // it's not. + + // Open methods ignore the value of first and set it to false + QPDF_DLL + static void writeDictionaryOpen(Pipeline*, bool& first, size_t depth = 0); + QPDF_DLL + static void writeArrayOpen(Pipeline*, bool& first, size_t depth = 0); + // Close methods don't modify first. A true value indicates that + // we are closing an empty object. + QPDF_DLL + static void writeDictionaryClose(Pipeline*, bool first, size_t depth = 0); + QPDF_DLL + static void writeArrayClose(Pipeline*, bool first, size_t depth = 0); + // The item methods use the value of first to determine if this is + // the first item and always set it to false. + QPDF_DLL + static void writeDictionaryItem( + Pipeline*, + bool& first, + std::string const& key, + JSON const& value, + size_t depth = 0); + QPDF_DLL + static void writeArrayItem( + Pipeline*, bool& first, JSON const& element, size_t depth = 0); + // The JSON spec calls dictionaries "objects", but that creates // too much confusion when referring to instances of the JSON // class. @@ -224,29 +268,33 @@ class JSON private: static std::string encode_string(std::string const& utf8); + static void + writeClose(Pipeline* p, bool first, size_t depth, char const* delimeter); + static void writeIndent(Pipeline* p, size_t depth); + static void writeNext(Pipeline* p, bool& first, size_t depth); struct JSON_value { virtual ~JSON_value() = default; - virtual std::string unparse(size_t depth) const = 0; + virtual void write(Pipeline*, size_t depth) const = 0; }; struct JSON_dictionary: public JSON_value { virtual ~JSON_dictionary() = default; - virtual std::string unparse(size_t depth) const; + virtual void write(Pipeline*, size_t depth) const; std::map> members; }; struct JSON_array: public JSON_value { virtual ~JSON_array() = default; - virtual std::string unparse(size_t depth) const; + virtual void write(Pipeline*, size_t depth) const; std::vector> elements; }; struct JSON_string: public JSON_value { JSON_string(std::string const& utf8); virtual ~JSON_string() = default; - virtual std::string unparse(size_t depth) const; + virtual void write(Pipeline*, size_t depth) const; std::string utf8; std::string encoded; }; @@ -256,20 +304,20 @@ class JSON JSON_number(double val); JSON_number(std::string const& val); virtual ~JSON_number() = default; - virtual std::string unparse(size_t depth) const; + virtual void write(Pipeline*, size_t depth) const; std::string encoded; }; struct JSON_bool: public JSON_value { JSON_bool(bool val); virtual ~JSON_bool() = default; - virtual std::string unparse(size_t depth) const; + virtual void write(Pipeline*, size_t depth) const; bool value; }; struct JSON_null: public JSON_value { virtual ~JSON_null() = default; - virtual std::string unparse(size_t depth) const; + virtual void write(Pipeline*, size_t depth) const; }; JSON(std::shared_ptr); diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 0f589804..3c5ddafd 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -1,5 +1,7 @@ #include +#include +#include #include #include #include @@ -18,51 +20,103 @@ JSON::JSON(std::shared_ptr value) : { } -std::string -JSON::JSON_dictionary::unparse(size_t depth) const +void +JSON::writeClose(Pipeline* p, bool first, size_t depth, char const* delimiter) { - std::string result = "{"; - bool first = true; - for (auto const& iter: members) { - if (first) { - first = false; - } else { - result.append(1, ','); - } - result.append(1, '\n'); - result.append(2 * (1 + depth), ' '); - result += - ("\"" + iter.first + "\": " + iter.second->unparse(1 + depth)); - } if (!first) { - result.append(1, '\n'); - result.append(2 * depth, ' '); + *p << "\n"; + writeIndent(p, depth); } - result.append(1, '}'); - return result; + *p << delimiter; } -std::string -JSON::JSON_array::unparse(size_t depth) const +void +JSON::writeIndent(Pipeline* p, size_t depth) +{ + for (size_t i = 0; i < depth; ++i) { + *p << " "; + } +} + +void +JSON::writeNext(Pipeline* p, bool& first, size_t depth) +{ + if (first) { + first = false; + } else { + *p << ","; + } + *p << "\n"; + writeIndent(p, 1 + depth); +} + +void +JSON::writeDictionaryOpen(Pipeline* p, bool& first, size_t depth) +{ + *p << "{"; + first = true; +} + +void +JSON::writeArrayOpen(Pipeline* p, bool& first, size_t depth) +{ + *p << "["; + first = true; +} + +void +JSON::writeDictionaryClose(Pipeline* p, bool first, size_t depth) +{ + writeClose(p, first, depth, "}"); +} + +void +JSON::writeArrayClose(Pipeline* p, bool first, size_t depth) +{ + writeClose(p, first, depth, "]"); +} + +void +JSON::writeDictionaryItem( + Pipeline* p, + bool& first, + std::string const& key, + JSON const& value, + size_t depth) +{ + writeNext(p, first, depth); + *p << "\"" << key << "\": "; + value.write(p, 1 + depth); +} + +void +JSON::writeArrayItem( + Pipeline* p, bool& first, JSON const& element, size_t depth) +{ + writeNext(p, first, depth); + element.write(p, 1 + depth); +} + +void +JSON::JSON_dictionary::write(Pipeline* p, size_t depth) const { - std::string result = "["; bool first = true; + writeDictionaryOpen(p, first, depth); + for (auto const& iter: members) { + writeDictionaryItem(p, first, iter.first, iter.second, depth); + } + writeDictionaryClose(p, first, depth); +} + +void +JSON::JSON_array::write(Pipeline* p, size_t depth) const +{ + bool first = true; + writeArrayOpen(p, first, depth); for (auto const& element: elements) { - if (first) { - first = false; - } else { - result.append(1, ','); - } - result.append(1, '\n'); - result.append(2 * (1 + depth), ' '); - result += element->unparse(1 + depth); + writeArrayItem(p, first, element, depth); } - if (!first) { - result.append(1, '\n'); - result.append(2 * depth, ' '); - } - result.append(1, ']'); - return result; + writeArrayClose(p, first, depth); } JSON::JSON_string::JSON_string(std::string const& utf8) : @@ -71,10 +125,10 @@ JSON::JSON_string::JSON_string(std::string const& utf8) : { } -std::string -JSON::JSON_string::unparse(size_t) const +void +JSON::JSON_string::write(Pipeline* p, size_t) const { - return "\"" + encoded + "\""; + *p << "\"" << encoded << "\""; } JSON::JSON_number::JSON_number(long long value) : @@ -92,10 +146,10 @@ JSON::JSON_number::JSON_number(std::string const& value) : { } -std::string -JSON::JSON_number::unparse(size_t) const +void +JSON::JSON_number::write(Pipeline* p, size_t) const { - return encoded; + *p << encoded; } JSON::JSON_bool::JSON_bool(bool val) : @@ -103,26 +157,35 @@ JSON::JSON_bool::JSON_bool(bool val) : { } -std::string -JSON::JSON_bool::unparse(size_t) const +void +JSON::JSON_bool::write(Pipeline* p, size_t) const { - return value ? "true" : "false"; + *p << (value ? "true" : "false"); } -std::string -JSON::JSON_null::unparse(size_t) const +void +JSON::JSON_null::write(Pipeline* p, size_t) const { - return "null"; + *p << "null"; +} + +void +JSON::write(Pipeline* p, size_t depth) const +{ + if (0 == this->m->value.get()) { + *p << "null"; + } else { + this->m->value->write(p, depth); + } } std::string JSON::unparse() const { - if (0 == this->m->value.get()) { - return "null"; - } else { - return this->m->value->unparse(0); - } + std::string s; + Pl_String p("unparse", s); + write(&p, 0); + return s; } std::string diff --git a/manual/release-notes.rst b/manual/release-notes.rst index f313cd82..043245f4 100644 --- a/manual/release-notes.rst +++ b/manual/release-notes.rst @@ -123,6 +123,10 @@ For a detailed list of changes, please see the file - Add new ``Pipeline`` type ``Pl_String`` to append to a ``std::string``. + - Enhance JSON class to better support incrementally reading and + writing large amounts of data without having to keep everything + in memory. + - Other changes - In JSON v1 mode, the ``"objects"`` key now reflects the repaired