mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-31 10:58:25 +00:00
Support stream data -- not tested
There are no automated tests yet, but committing work so far in preparation for some refactoring.
This commit is contained in:
parent
63c7eefe9d
commit
0fe8d44762
9
TODO
9
TODO
@ -54,14 +54,14 @@ Soon: Break ground on "Document-level work"
|
||||
Output JSON v2
|
||||
==============
|
||||
|
||||
XXX
|
||||
|
||||
* Reread from perspective of update
|
||||
* Test all ignore cases with QTC
|
||||
* Test case of correct file with dict before data/datafile
|
||||
* Have a test case if possible that exercises the object description
|
||||
which means we need some kind of semantic error that gets caught
|
||||
after creation.
|
||||
* Test invalid data, invalid data file
|
||||
* Tests: round-trip through json, round-trip through qpdf --qdf
|
||||
|
||||
Try to never flatten pages tree. Make sure we do something reasonable
|
||||
with pages tree repair. The problem is that if pages tree repair is
|
||||
@ -236,6 +236,11 @@ Other documentation fodder:
|
||||
|
||||
You can't create a PDF from v1 json because
|
||||
|
||||
* Change: names are written in canonical form with a leading slash
|
||||
just as they are treated in the code. In v1, they were written in
|
||||
PDF syntax in the json file. Example: /text#2fplain in pdf will be
|
||||
written as /text/plain in json v2 and as /text#2fplain in json v1.
|
||||
|
||||
* The PDF version header is not recorded
|
||||
|
||||
* Strings cannot be unambiguously encoded/decoded
|
||||
|
@ -998,7 +998,8 @@ class QPDF
|
||||
class JSONReactor: public JSON::Reactor
|
||||
{
|
||||
public:
|
||||
JSONReactor(QPDF&, std::string const& filename, bool must_be_complete);
|
||||
JSONReactor(
|
||||
QPDF&, std::shared_ptr<InputSource> is, bool must_be_complete);
|
||||
virtual ~JSONReactor() = default;
|
||||
virtual void dictionaryStart() override;
|
||||
virtual void arrayStart() override;
|
||||
@ -1033,7 +1034,7 @@ class QPDF
|
||||
QPDFObjectHandle to_replace, QPDFObjectHandle replacement);
|
||||
|
||||
QPDF& pdf;
|
||||
std::string filename;
|
||||
std::shared_ptr<InputSource> is;
|
||||
bool must_be_complete;
|
||||
bool errors;
|
||||
bool parse_error;
|
||||
|
@ -37,9 +37,10 @@ QPDF_Dictionary::getJSON(int json_version)
|
||||
JSON j = JSON::makeDictionary();
|
||||
for (auto& iter: this->items) {
|
||||
if (!iter.second.isNull()) {
|
||||
j.addDictionaryMember(
|
||||
QPDF_Name::normalizeName(iter.first),
|
||||
iter.second.getJSON(json_version));
|
||||
std::string key =
|
||||
(json_version == 1 ? QPDF_Name::normalizeName(iter.first)
|
||||
: iter.first);
|
||||
j.addDictionaryMember(key, iter.second.getJSON(json_version));
|
||||
}
|
||||
}
|
||||
return j;
|
||||
|
@ -42,7 +42,11 @@ QPDF_Name::unparse()
|
||||
JSON
|
||||
QPDF_Name::getJSON(int json_version)
|
||||
{
|
||||
return JSON::makeString(normalizeName(this->name));
|
||||
if (json_version == 1) {
|
||||
return JSON::makeString(normalizeName(this->name));
|
||||
} else {
|
||||
return JSON::makeString(this->name);
|
||||
}
|
||||
}
|
||||
|
||||
QPDFObject::object_type_e
|
||||
|
@ -1,9 +1,11 @@
|
||||
#include <qpdf/QPDF.hh>
|
||||
|
||||
#include <qpdf/FileInputSource.hh>
|
||||
#include <qpdf/Pl_Base64.hh>
|
||||
#include <qpdf/QIntC.hh>
|
||||
#include <qpdf/QTC.hh>
|
||||
#include <qpdf/QUtil.hh>
|
||||
#include <algorithm>
|
||||
#include <regex>
|
||||
|
||||
// This chart shows an example of the state transitions that would
|
||||
@ -52,17 +54,40 @@ static char const* JSON_PDF = (
|
||||
"9\n"
|
||||
"%%EOF\n");
|
||||
|
||||
// Note use of [\\s\\S] rather than . to match any character since .
|
||||
// doesn't match newlines.
|
||||
static std::regex PDF_VERSION_RE("^\\d+\\.\\d+$");
|
||||
static std::regex OBJ_KEY_RE("^obj:(\\d+) (\\d+) R$");
|
||||
static std::regex INDIRECT_OBJ_RE("^(\\d+) (\\d+) R$");
|
||||
static std::regex UNICODE_RE("^u:(.*)$");
|
||||
static std::regex UNICODE_RE("^u:([\\s\\S]*)$");
|
||||
static std::regex BINARY_RE("^b:((?:[0-9a-fA-F]{2})*)$");
|
||||
static std::regex NAME_RE("^/.*$");
|
||||
static std::regex NAME_RE("^/[\\s\\S]*$");
|
||||
|
||||
static std::function<void(Pipeline*)>
|
||||
provide_data(std::shared_ptr<InputSource> is, size_t start, size_t end)
|
||||
{
|
||||
return [is, start, end](Pipeline* p) {
|
||||
Pl_Base64 decode("base64-decode", p, Pl_Base64::a_decode);
|
||||
p = &decode;
|
||||
size_t bytes = end - start;
|
||||
char buf[8192];
|
||||
is->seek(QIntC::to_offset(start), SEEK_SET);
|
||||
size_t len = 0;
|
||||
while ((len = is->read(buf, std::min(bytes, sizeof(buf)))) > 0) {
|
||||
p->write(buf, len);
|
||||
bytes -= len;
|
||||
if (bytes == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
decode.finish();
|
||||
};
|
||||
}
|
||||
|
||||
QPDF::JSONReactor::JSONReactor(
|
||||
QPDF& pdf, std::string const& filename, bool must_be_complete) :
|
||||
QPDF& pdf, std::shared_ptr<InputSource> is, bool must_be_complete) :
|
||||
pdf(pdf),
|
||||
filename(filename),
|
||||
is(is),
|
||||
must_be_complete(must_be_complete),
|
||||
errors(false),
|
||||
parse_error(false),
|
||||
@ -334,8 +359,6 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
|
||||
replacement =
|
||||
pdf.reserveStream(tos.getObjectID(), tos.getGeneration());
|
||||
replaceObject(tos, replacement);
|
||||
replacement.replaceStreamData(
|
||||
"", "<<>>"_qpdf, "<<>>"_qpdf); // QXXXQ
|
||||
}
|
||||
} else {
|
||||
// Ignore unknown keys for forward compatibility
|
||||
@ -369,6 +392,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
|
||||
throw std::logic_error("no object on stack in st_stream");
|
||||
}
|
||||
auto tos = object_stack.back();
|
||||
auto uninitialized = QPDFObjectHandle();
|
||||
if (!tos.isStream()) {
|
||||
// QXXXQ QTC in update mode
|
||||
error(value.getStart(), "this object is not a stream");
|
||||
@ -388,10 +412,33 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
|
||||
}
|
||||
} else if (key == "data") {
|
||||
this->saw_data = true;
|
||||
// QXXXQ
|
||||
std::string v;
|
||||
if (!value.getString(v)) {
|
||||
error(value.getStart(), "\"stream.data\" must be a string");
|
||||
} else {
|
||||
// The range includes the quotes.
|
||||
auto start = value.getStart() + 1;
|
||||
auto end = value.getEnd() - 1;
|
||||
if (end < start) {
|
||||
throw std::logic_error("QPDF_json: JSON string length < 0");
|
||||
}
|
||||
tos.replaceStreamData(
|
||||
provide_data(is, start, end), uninitialized, uninitialized);
|
||||
}
|
||||
} else if (key == "datafile") {
|
||||
this->saw_datafile = true;
|
||||
// QXXXQ
|
||||
std::string filename;
|
||||
if (value.getString(filename)) {
|
||||
tos.replaceStreamData(
|
||||
QUtil::file_provider(filename),
|
||||
uninitialized,
|
||||
uninitialized);
|
||||
} else {
|
||||
error(
|
||||
value.getStart(),
|
||||
"\"stream.datafile\" must be a string containing a file "
|
||||
"name");
|
||||
}
|
||||
} else {
|
||||
// Ignore unknown keys for forward compatibility.
|
||||
// QXXXQ QTC
|
||||
@ -471,7 +518,8 @@ QPDF::JSONReactor::makeObject(JSON const& value)
|
||||
// QXXXQ include object number in description
|
||||
result.setObjectDescription(
|
||||
&this->pdf,
|
||||
this->filename + " offset " + QUtil::uint_to_string(value.getStart()));
|
||||
this->is->getName() + " offset " +
|
||||
QUtil::uint_to_string(value.getStart()));
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -503,7 +551,7 @@ QPDF::updateFromJSON(std::shared_ptr<InputSource> is)
|
||||
void
|
||||
QPDF::importJSON(std::shared_ptr<InputSource> is, bool must_be_complete)
|
||||
{
|
||||
JSONReactor reactor(*this, is->getName(), must_be_complete);
|
||||
JSONReactor reactor(*this, is, must_be_complete);
|
||||
try {
|
||||
JSON::parse(*is, &reactor);
|
||||
} catch (std::runtime_error& e) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user