2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-06-01 01:40:51 +00:00

Support stream data -- not tested

There are no automated tests yet, but committing work so far in
preparation for some refactoring.
This commit is contained in:
Jay Berkenbilt 2022-05-17 19:18:02 -04:00
parent 63c7eefe9d
commit 0fe8d44762
5 changed files with 77 additions and 18 deletions

9
TODO
View File

@ -54,14 +54,14 @@ Soon: Break ground on "Document-level work"
Output JSON v2
==============
XXX
* Reread from perspective of update
* Test all ignore cases with QTC
* Test case of correct file with dict before data/datafile
* Have a test case if possible that exercises the object description
which means we need some kind of semantic error that gets caught
after creation.
* Test invalid data, invalid data file
* Tests: round-trip through json, round-trip through qpdf --qdf
Try to never flatten pages tree. Make sure we do something reasonable
with pages tree repair. The problem is that if pages tree repair is
@ -236,6 +236,11 @@ Other documentation fodder:
You can't create a PDF from v1 json because
* Change: names are written in canonical form with a leading slash
just as they are treated in the code. In v1, they were written in
PDF syntax in the json file. Example: /text#2fplain in pdf will be
written as /text/plain in json v2 and as /text#2fplain in json v1.
* The PDF version header is not recorded
* Strings cannot be unambiguously encoded/decoded

View File

@ -998,7 +998,8 @@ class QPDF
class JSONReactor: public JSON::Reactor
{
public:
JSONReactor(QPDF&, std::string const& filename, bool must_be_complete);
JSONReactor(
QPDF&, std::shared_ptr<InputSource> is, bool must_be_complete);
virtual ~JSONReactor() = default;
virtual void dictionaryStart() override;
virtual void arrayStart() override;
@ -1033,7 +1034,7 @@ class QPDF
QPDFObjectHandle to_replace, QPDFObjectHandle replacement);
QPDF& pdf;
std::string filename;
std::shared_ptr<InputSource> is;
bool must_be_complete;
bool errors;
bool parse_error;

View File

@ -37,9 +37,10 @@ QPDF_Dictionary::getJSON(int json_version)
JSON j = JSON::makeDictionary();
for (auto& iter: this->items) {
if (!iter.second.isNull()) {
j.addDictionaryMember(
QPDF_Name::normalizeName(iter.first),
iter.second.getJSON(json_version));
std::string key =
(json_version == 1 ? QPDF_Name::normalizeName(iter.first)
: iter.first);
j.addDictionaryMember(key, iter.second.getJSON(json_version));
}
}
return j;

View File

@ -42,7 +42,11 @@ QPDF_Name::unparse()
JSON
QPDF_Name::getJSON(int json_version)
{
return JSON::makeString(normalizeName(this->name));
if (json_version == 1) {
return JSON::makeString(normalizeName(this->name));
} else {
return JSON::makeString(this->name);
}
}
QPDFObject::object_type_e

View File

@ -1,9 +1,11 @@
#include <qpdf/QPDF.hh>
#include <qpdf/FileInputSource.hh>
#include <qpdf/Pl_Base64.hh>
#include <qpdf/QIntC.hh>
#include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh>
#include <algorithm>
#include <regex>
// This chart shows an example of the state transitions that would
@ -52,17 +54,40 @@ static char const* JSON_PDF = (
"9\n"
"%%EOF\n");
// Note use of [\\s\\S] rather than . to match any character since .
// doesn't match newlines.
static std::regex PDF_VERSION_RE("^\\d+\\.\\d+$");
static std::regex OBJ_KEY_RE("^obj:(\\d+) (\\d+) R$");
static std::regex INDIRECT_OBJ_RE("^(\\d+) (\\d+) R$");
static std::regex UNICODE_RE("^u:(.*)$");
static std::regex UNICODE_RE("^u:([\\s\\S]*)$");
static std::regex BINARY_RE("^b:((?:[0-9a-fA-F]{2})*)$");
static std::regex NAME_RE("^/.*$");
static std::regex NAME_RE("^/[\\s\\S]*$");
static std::function<void(Pipeline*)>
provide_data(std::shared_ptr<InputSource> is, size_t start, size_t end)
{
return [is, start, end](Pipeline* p) {
Pl_Base64 decode("base64-decode", p, Pl_Base64::a_decode);
p = &decode;
size_t bytes = end - start;
char buf[8192];
is->seek(QIntC::to_offset(start), SEEK_SET);
size_t len = 0;
while ((len = is->read(buf, std::min(bytes, sizeof(buf)))) > 0) {
p->write(buf, len);
bytes -= len;
if (bytes == 0) {
break;
}
}
decode.finish();
};
}
QPDF::JSONReactor::JSONReactor(
QPDF& pdf, std::string const& filename, bool must_be_complete) :
QPDF& pdf, std::shared_ptr<InputSource> is, bool must_be_complete) :
pdf(pdf),
filename(filename),
is(is),
must_be_complete(must_be_complete),
errors(false),
parse_error(false),
@ -334,8 +359,6 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
replacement =
pdf.reserveStream(tos.getObjectID(), tos.getGeneration());
replaceObject(tos, replacement);
replacement.replaceStreamData(
"", "<<>>"_qpdf, "<<>>"_qpdf); // QXXXQ
}
} else {
// Ignore unknown keys for forward compatibility
@ -369,6 +392,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
throw std::logic_error("no object on stack in st_stream");
}
auto tos = object_stack.back();
auto uninitialized = QPDFObjectHandle();
if (!tos.isStream()) {
// QXXXQ QTC in update mode
error(value.getStart(), "this object is not a stream");
@ -388,10 +412,33 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
}
} else if (key == "data") {
this->saw_data = true;
// QXXXQ
std::string v;
if (!value.getString(v)) {
error(value.getStart(), "\"stream.data\" must be a string");
} else {
// The range includes the quotes.
auto start = value.getStart() + 1;
auto end = value.getEnd() - 1;
if (end < start) {
throw std::logic_error("QPDF_json: JSON string length < 0");
}
tos.replaceStreamData(
provide_data(is, start, end), uninitialized, uninitialized);
}
} else if (key == "datafile") {
this->saw_datafile = true;
// QXXXQ
std::string filename;
if (value.getString(filename)) {
tos.replaceStreamData(
QUtil::file_provider(filename),
uninitialized,
uninitialized);
} else {
error(
value.getStart(),
"\"stream.datafile\" must be a string containing a file "
"name");
}
} else {
// Ignore unknown keys for forward compatibility.
// QXXXQ QTC
@ -471,7 +518,8 @@ QPDF::JSONReactor::makeObject(JSON const& value)
// QXXXQ include object number in description
result.setObjectDescription(
&this->pdf,
this->filename + " offset " + QUtil::uint_to_string(value.getStart()));
this->is->getName() + " offset " +
QUtil::uint_to_string(value.getStart()));
return result;
}
@ -503,7 +551,7 @@ QPDF::updateFromJSON(std::shared_ptr<InputSource> is)
void
QPDF::importJSON(std::shared_ptr<InputSource> is, bool must_be_complete)
{
JSONReactor reactor(*this, is->getName(), must_be_complete);
JSONReactor reactor(*this, is, must_be_complete);
try {
JSON::parse(*is, &reactor);
} catch (std::runtime_error& e) {