JSON: add write methods and implement unparse() in terms of those

This commit is contained in:
Jay Berkenbilt 2022-05-04 08:32:54 -04:00
parent 8b25de24c9
commit e259635986
6 changed files with 181 additions and 71 deletions

View File

@ -1,5 +1,10 @@
2022-05-04 Jay Berkenbilt <ejb@ql.org>
* Enhance JSON by adding a write method that takes a Pipeline* and
depth, and add several helper methods to make it easier to write
large amounts of JSON incrementally without having to have the
whole thing in memory.
* json v1 output: make "pages" and "objects" consistent.
Previously, "objects" always reflected the objects exactly as they
appeared in the original file, while "pages" reflected objects

11
TODO
View File

@ -51,11 +51,6 @@ library, when context is available, to have a pipeline rather than a
FILE* or std::ostream. This makes it possible for people to capture
output more flexibly.
JSON: rather than unparse() -> string, there should be write method
that takes a pipeline and a depth. Then rewrite all the unparse
methods to use it. This makes incremental write possible as well as
writing arbitrarily large amounts of output.
JSON::parse should work from an InputSource. BufferInputSource can
already start with a std::string.
@ -64,12 +59,6 @@ writes data to the pipeline. It's writer should create a Pl_Base64 ->
Pl_Concatenate in front of the pipeline passed to write and call the
function with that.
Add methods needed to do incremental writes. Basically we need to
expose functionality the array and dictionary unparse methods. Maybe
we can have a DictionaryWriter and an ArrayWriter that deal with the
first/depth logic and have writeElement or writeEntry(key, value)
methods.
For json output, do not unparse to string. Use the writers instead.
Write incrementally. This changes ordering only, but we should be able
manually update the test output for those cases. Objects should be

View File

@ -328,6 +328,7 @@
"outfilename",
"pacman",
"paeth",
"pagelabel",
"pagelabels",
"pageno",
"pageposfrom",

View File

@ -45,12 +45,56 @@
#include <string>
#include <vector>
class Pipeline;
class JSON
{
public:
QPDF_DLL
std::string unparse() const;
// Write the JSON object through a pipline. The `depth` parameter
// specifies how deeply nested this in another JSON structure,
// which makes it possible to write clean-looking JSON
// incrementally.
QPDF_DLL
void write(Pipeline*, size_t depth = 0) const;
// Helper methods for writing JSON incrementally. Several methods
// take a `bool& first` parameter. The open methods always set it
// to true, and the methods to output items always set it to
// false. This way, the item and close methods can always know
// whether or not a first item is being written. The intended mode
// of operation is to start with `bool first = true` (though it
// doesn't matter how it's initialized) and just pass the same
// `first` through to all the methods, letting the JSON object use
// it to keep track of when it's writing a first object and when
// it's not.
// Open methods ignore the value of first and set it to false
QPDF_DLL
static void writeDictionaryOpen(Pipeline*, bool& first, size_t depth = 0);
QPDF_DLL
static void writeArrayOpen(Pipeline*, bool& first, size_t depth = 0);
// Close methods don't modify first. A true value indicates that
// we are closing an empty object.
QPDF_DLL
static void writeDictionaryClose(Pipeline*, bool first, size_t depth = 0);
QPDF_DLL
static void writeArrayClose(Pipeline*, bool first, size_t depth = 0);
// The item methods use the value of first to determine if this is
// the first item and always set it to false.
QPDF_DLL
static void writeDictionaryItem(
Pipeline*,
bool& first,
std::string const& key,
JSON const& value,
size_t depth = 0);
QPDF_DLL
static void writeArrayItem(
Pipeline*, bool& first, JSON const& element, size_t depth = 0);
// The JSON spec calls dictionaries "objects", but that creates
// too much confusion when referring to instances of the JSON
// class.
@ -224,29 +268,33 @@ class JSON
private:
static std::string encode_string(std::string const& utf8);
static void
writeClose(Pipeline* p, bool first, size_t depth, char const* delimeter);
static void writeIndent(Pipeline* p, size_t depth);
static void writeNext(Pipeline* p, bool& first, size_t depth);
struct JSON_value
{
virtual ~JSON_value() = default;
virtual std::string unparse(size_t depth) const = 0;
virtual void write(Pipeline*, size_t depth) const = 0;
};
struct JSON_dictionary: public JSON_value
{
virtual ~JSON_dictionary() = default;
virtual std::string unparse(size_t depth) const;
virtual void write(Pipeline*, size_t depth) const;
std::map<std::string, std::shared_ptr<JSON_value>> members;
};
struct JSON_array: public JSON_value
{
virtual ~JSON_array() = default;
virtual std::string unparse(size_t depth) const;
virtual void write(Pipeline*, size_t depth) const;
std::vector<std::shared_ptr<JSON_value>> elements;
};
struct JSON_string: public JSON_value
{
JSON_string(std::string const& utf8);
virtual ~JSON_string() = default;
virtual std::string unparse(size_t depth) const;
virtual void write(Pipeline*, size_t depth) const;
std::string utf8;
std::string encoded;
};
@ -256,20 +304,20 @@ class JSON
JSON_number(double val);
JSON_number(std::string const& val);
virtual ~JSON_number() = default;
virtual std::string unparse(size_t depth) const;
virtual void write(Pipeline*, size_t depth) const;
std::string encoded;
};
struct JSON_bool: public JSON_value
{
JSON_bool(bool val);
virtual ~JSON_bool() = default;
virtual std::string unparse(size_t depth) const;
virtual void write(Pipeline*, size_t depth) const;
bool value;
};
struct JSON_null: public JSON_value
{
virtual ~JSON_null() = default;
virtual std::string unparse(size_t depth) const;
virtual void write(Pipeline*, size_t depth) const;
};
JSON(std::shared_ptr<JSON_value>);

View File

@ -1,5 +1,7 @@
#include <qpdf/JSON.hh>
#include <qpdf/Pipeline.hh>
#include <qpdf/Pl_String.hh>
#include <qpdf/QIntC.hh>
#include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh>
@ -18,51 +20,103 @@ JSON::JSON(std::shared_ptr<JSON_value> value) :
{
}
std::string
JSON::JSON_dictionary::unparse(size_t depth) const
void
JSON::writeClose(Pipeline* p, bool first, size_t depth, char const* delimiter)
{
std::string result = "{";
bool first = true;
for (auto const& iter: members) {
if (first) {
first = false;
} else {
result.append(1, ',');
}
result.append(1, '\n');
result.append(2 * (1 + depth), ' ');
result +=
("\"" + iter.first + "\": " + iter.second->unparse(1 + depth));
}
if (!first) {
result.append(1, '\n');
result.append(2 * depth, ' ');
*p << "\n";
writeIndent(p, depth);
}
result.append(1, '}');
return result;
*p << delimiter;
}
std::string
JSON::JSON_array::unparse(size_t depth) const
void
JSON::writeIndent(Pipeline* p, size_t depth)
{
for (size_t i = 0; i < depth; ++i) {
*p << " ";
}
}
void
JSON::writeNext(Pipeline* p, bool& first, size_t depth)
{
if (first) {
first = false;
} else {
*p << ",";
}
*p << "\n";
writeIndent(p, 1 + depth);
}
void
JSON::writeDictionaryOpen(Pipeline* p, bool& first, size_t depth)
{
*p << "{";
first = true;
}
void
JSON::writeArrayOpen(Pipeline* p, bool& first, size_t depth)
{
*p << "[";
first = true;
}
void
JSON::writeDictionaryClose(Pipeline* p, bool first, size_t depth)
{
writeClose(p, first, depth, "}");
}
void
JSON::writeArrayClose(Pipeline* p, bool first, size_t depth)
{
writeClose(p, first, depth, "]");
}
void
JSON::writeDictionaryItem(
Pipeline* p,
bool& first,
std::string const& key,
JSON const& value,
size_t depth)
{
writeNext(p, first, depth);
*p << "\"" << key << "\": ";
value.write(p, 1 + depth);
}
void
JSON::writeArrayItem(
Pipeline* p, bool& first, JSON const& element, size_t depth)
{
writeNext(p, first, depth);
element.write(p, 1 + depth);
}
void
JSON::JSON_dictionary::write(Pipeline* p, size_t depth) const
{
std::string result = "[";
bool first = true;
writeDictionaryOpen(p, first, depth);
for (auto const& iter: members) {
writeDictionaryItem(p, first, iter.first, iter.second, depth);
}
writeDictionaryClose(p, first, depth);
}
void
JSON::JSON_array::write(Pipeline* p, size_t depth) const
{
bool first = true;
writeArrayOpen(p, first, depth);
for (auto const& element: elements) {
if (first) {
first = false;
} else {
result.append(1, ',');
}
result.append(1, '\n');
result.append(2 * (1 + depth), ' ');
result += element->unparse(1 + depth);
writeArrayItem(p, first, element, depth);
}
if (!first) {
result.append(1, '\n');
result.append(2 * depth, ' ');
}
result.append(1, ']');
return result;
writeArrayClose(p, first, depth);
}
JSON::JSON_string::JSON_string(std::string const& utf8) :
@ -71,10 +125,10 @@ JSON::JSON_string::JSON_string(std::string const& utf8) :
{
}
std::string
JSON::JSON_string::unparse(size_t) const
void
JSON::JSON_string::write(Pipeline* p, size_t) const
{
return "\"" + encoded + "\"";
*p << "\"" << encoded << "\"";
}
JSON::JSON_number::JSON_number(long long value) :
@ -92,10 +146,10 @@ JSON::JSON_number::JSON_number(std::string const& value) :
{
}
std::string
JSON::JSON_number::unparse(size_t) const
void
JSON::JSON_number::write(Pipeline* p, size_t) const
{
return encoded;
*p << encoded;
}
JSON::JSON_bool::JSON_bool(bool val) :
@ -103,26 +157,35 @@ JSON::JSON_bool::JSON_bool(bool val) :
{
}
std::string
JSON::JSON_bool::unparse(size_t) const
void
JSON::JSON_bool::write(Pipeline* p, size_t) const
{
return value ? "true" : "false";
*p << (value ? "true" : "false");
}
std::string
JSON::JSON_null::unparse(size_t) const
void
JSON::JSON_null::write(Pipeline* p, size_t) const
{
return "null";
*p << "null";
}
void
JSON::write(Pipeline* p, size_t depth) const
{
if (0 == this->m->value.get()) {
*p << "null";
} else {
this->m->value->write(p, depth);
}
}
std::string
JSON::unparse() const
{
if (0 == this->m->value.get()) {
return "null";
} else {
return this->m->value->unparse(0);
}
std::string s;
Pl_String p("unparse", s);
write(&p, 0);
return s;
}
std::string

View File

@ -123,6 +123,10 @@ For a detailed list of changes, please see the file
- Add new ``Pipeline`` type ``Pl_String`` to append to a
``std::string``.
- Enhance JSON class to better support incrementally reading and
writing large amounts of data without having to keep everything
in memory.
- Other changes
- In JSON v1 mode, the ``"objects"`` key now reflects the repaired