diff --git a/TODO b/TODO index 64d537a1..f3eaebaf 100644 --- a/TODO +++ b/TODO @@ -54,14 +54,14 @@ Soon: Break ground on "Document-level work" Output JSON v2 ============== -XXX - * Reread from perspective of update * Test all ignore cases with QTC * Test case of correct file with dict before data/datafile * Have a test case if possible that exercises the object description which means we need some kind of semantic error that gets caught after creation. +* Test invalid data, invalid data file +* Tests: round-trip through json, round-trip through qpdf --qdf Try to never flatten pages tree. Make sure we do something reasonable with pages tree repair. The problem is that if pages tree repair is @@ -236,6 +236,11 @@ Other documentation fodder: You can't create a PDF from v1 json because +* Change: names are written in canonical form with a leading slash + just as they are treated in the code. In v1, they were written in + PDF syntax in the json file. Example: /text#2fplain in pdf will be + written as /text/plain in json v2 and as /text#2fplain in json v1. + * The PDF version header is not recorded * Strings cannot be unambiguously encoded/decoded diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index f3ce4684..146015dc 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -998,7 +998,8 @@ class QPDF class JSONReactor: public JSON::Reactor { public: - JSONReactor(QPDF&, std::string const& filename, bool must_be_complete); + JSONReactor( + QPDF&, std::shared_ptr is, bool must_be_complete); virtual ~JSONReactor() = default; virtual void dictionaryStart() override; virtual void arrayStart() override; @@ -1033,7 +1034,7 @@ class QPDF QPDFObjectHandle to_replace, QPDFObjectHandle replacement); QPDF& pdf; - std::string filename; + std::shared_ptr is; bool must_be_complete; bool errors; bool parse_error; diff --git a/libqpdf/QPDF_Dictionary.cc b/libqpdf/QPDF_Dictionary.cc index 67d59a2d..26239317 100644 --- a/libqpdf/QPDF_Dictionary.cc +++ b/libqpdf/QPDF_Dictionary.cc @@ -37,9 +37,10 @@ QPDF_Dictionary::getJSON(int json_version) JSON j = JSON::makeDictionary(); for (auto& iter: this->items) { if (!iter.second.isNull()) { - j.addDictionaryMember( - QPDF_Name::normalizeName(iter.first), - iter.second.getJSON(json_version)); + std::string key = + (json_version == 1 ? QPDF_Name::normalizeName(iter.first) + : iter.first); + j.addDictionaryMember(key, iter.second.getJSON(json_version)); } } return j; diff --git a/libqpdf/QPDF_Name.cc b/libqpdf/QPDF_Name.cc index 8dc48faa..236d6133 100644 --- a/libqpdf/QPDF_Name.cc +++ b/libqpdf/QPDF_Name.cc @@ -42,7 +42,11 @@ QPDF_Name::unparse() JSON QPDF_Name::getJSON(int json_version) { - return JSON::makeString(normalizeName(this->name)); + if (json_version == 1) { + return JSON::makeString(normalizeName(this->name)); + } else { + return JSON::makeString(this->name); + } } QPDFObject::object_type_e diff --git a/libqpdf/QPDF_json.cc b/libqpdf/QPDF_json.cc index d71c75ba..1037a2cf 100644 --- a/libqpdf/QPDF_json.cc +++ b/libqpdf/QPDF_json.cc @@ -1,9 +1,11 @@ #include #include +#include #include #include #include +#include #include // This chart shows an example of the state transitions that would @@ -52,17 +54,40 @@ static char const* JSON_PDF = ( "9\n" "%%EOF\n"); +// Note use of [\\s\\S] rather than . to match any character since . +// doesn't match newlines. static std::regex PDF_VERSION_RE("^\\d+\\.\\d+$"); static std::regex OBJ_KEY_RE("^obj:(\\d+) (\\d+) R$"); static std::regex INDIRECT_OBJ_RE("^(\\d+) (\\d+) R$"); -static std::regex UNICODE_RE("^u:(.*)$"); +static std::regex UNICODE_RE("^u:([\\s\\S]*)$"); static std::regex BINARY_RE("^b:((?:[0-9a-fA-F]{2})*)$"); -static std::regex NAME_RE("^/.*$"); +static std::regex NAME_RE("^/[\\s\\S]*$"); + +static std::function +provide_data(std::shared_ptr is, size_t start, size_t end) +{ + return [is, start, end](Pipeline* p) { + Pl_Base64 decode("base64-decode", p, Pl_Base64::a_decode); + p = &decode; + size_t bytes = end - start; + char buf[8192]; + is->seek(QIntC::to_offset(start), SEEK_SET); + size_t len = 0; + while ((len = is->read(buf, std::min(bytes, sizeof(buf)))) > 0) { + p->write(buf, len); + bytes -= len; + if (bytes == 0) { + break; + } + } + decode.finish(); + }; +} QPDF::JSONReactor::JSONReactor( - QPDF& pdf, std::string const& filename, bool must_be_complete) : + QPDF& pdf, std::shared_ptr is, bool must_be_complete) : pdf(pdf), - filename(filename), + is(is), must_be_complete(must_be_complete), errors(false), parse_error(false), @@ -334,8 +359,6 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) replacement = pdf.reserveStream(tos.getObjectID(), tos.getGeneration()); replaceObject(tos, replacement); - replacement.replaceStreamData( - "", "<<>>"_qpdf, "<<>>"_qpdf); // QXXXQ } } else { // Ignore unknown keys for forward compatibility @@ -369,6 +392,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) throw std::logic_error("no object on stack in st_stream"); } auto tos = object_stack.back(); + auto uninitialized = QPDFObjectHandle(); if (!tos.isStream()) { // QXXXQ QTC in update mode error(value.getStart(), "this object is not a stream"); @@ -388,10 +412,33 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) } } else if (key == "data") { this->saw_data = true; - // QXXXQ + std::string v; + if (!value.getString(v)) { + error(value.getStart(), "\"stream.data\" must be a string"); + } else { + // The range includes the quotes. + auto start = value.getStart() + 1; + auto end = value.getEnd() - 1; + if (end < start) { + throw std::logic_error("QPDF_json: JSON string length < 0"); + } + tos.replaceStreamData( + provide_data(is, start, end), uninitialized, uninitialized); + } } else if (key == "datafile") { this->saw_datafile = true; - // QXXXQ + std::string filename; + if (value.getString(filename)) { + tos.replaceStreamData( + QUtil::file_provider(filename), + uninitialized, + uninitialized); + } else { + error( + value.getStart(), + "\"stream.datafile\" must be a string containing a file " + "name"); + } } else { // Ignore unknown keys for forward compatibility. // QXXXQ QTC @@ -471,7 +518,8 @@ QPDF::JSONReactor::makeObject(JSON const& value) // QXXXQ include object number in description result.setObjectDescription( &this->pdf, - this->filename + " offset " + QUtil::uint_to_string(value.getStart())); + this->is->getName() + " offset " + + QUtil::uint_to_string(value.getStart())); return result; } @@ -503,7 +551,7 @@ QPDF::updateFromJSON(std::shared_ptr is) void QPDF::importJSON(std::shared_ptr is, bool must_be_complete) { - JSONReactor reactor(*this, is->getName(), must_be_complete); + JSONReactor reactor(*this, is, must_be_complete); try { JSON::parse(*is, &reactor); } catch (std::runtime_error& e) {