mirror of
https://github.com/qpdf/qpdf.git
synced 2024-09-28 04:59:05 +00:00
Add new private method QPDF::readStream
This commit is contained in:
parent
2643ed4dd9
commit
c422b918b1
@ -1008,6 +1008,7 @@ class QPDF
|
|||||||
void setLastObjectDescription(std::string const& description, QPDFObjGen const& og);
|
void setLastObjectDescription(std::string const& description, QPDFObjGen const& og);
|
||||||
QPDFObjectHandle readTrailer();
|
QPDFObjectHandle readTrailer();
|
||||||
QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og);
|
QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og);
|
||||||
|
void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
|
||||||
QPDFObjectHandle readObjectInStream(std::shared_ptr<InputSource>, QPDFObjGen og);
|
QPDFObjectHandle readObjectInStream(std::shared_ptr<InputSource>, QPDFObjGen og);
|
||||||
size_t recoverStreamLength(
|
size_t recoverStreamLength(
|
||||||
std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset);
|
std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset);
|
||||||
|
165
libqpdf/QPDF.cc
165
libqpdf/QPDF.cc
@ -1296,7 +1296,7 @@ QPDF::readObject(std::string const& description, QPDFObjGen og)
|
|||||||
std::shared_ptr<StringDecrypter> decrypter_ph;
|
std::shared_ptr<StringDecrypter> decrypter_ph;
|
||||||
StringDecrypter* decrypter = nullptr;
|
StringDecrypter* decrypter = nullptr;
|
||||||
if (m->encp->encrypted) {
|
if (m->encp->encrypted) {
|
||||||
decrypter_ph = std::make_shared<StringDecrypter>(this, og);
|
decrypter_ph = std::make_unique<StringDecrypter>(this, og);
|
||||||
decrypter = decrypter_ph.get();
|
decrypter = decrypter_ph.get();
|
||||||
}
|
}
|
||||||
auto object = QPDFParser(m->file, m->last_object_description, m->tokenizer, decrypter, this)
|
auto object = QPDFParser(m->file, m->last_object_description, m->tokenizer, decrypter, this)
|
||||||
@ -1309,85 +1309,7 @@ QPDF::readObject(std::string const& description, QPDFObjGen og)
|
|||||||
// check for stream
|
// check for stream
|
||||||
qpdf_offset_t cur_offset = m->file->tell();
|
qpdf_offset_t cur_offset = m->file->tell();
|
||||||
if (readToken(m->file).isWord("stream")) {
|
if (readToken(m->file).isWord("stream")) {
|
||||||
// The PDF specification states that the word "stream" should be followed by either a
|
readStream(object, og, offset);
|
||||||
// carriage return and a newline or by a newline alone. It specifically disallowed
|
|
||||||
// following it by a carriage return alone since, in that case, there would be no way to
|
|
||||||
// tell whether the NL in a CR NL sequence was part of the stream data. However, some
|
|
||||||
// readers, including Adobe reader, accept a carriage return by itself when followed by
|
|
||||||
// a non-newline character, so that's what we do here. We have also seen files that have
|
|
||||||
// extraneous whitespace between the stream keyword and the newline.
|
|
||||||
bool done = false;
|
|
||||||
while (!done) {
|
|
||||||
done = true;
|
|
||||||
char ch;
|
|
||||||
if (m->file->read(&ch, 1) == 0) {
|
|
||||||
// A premature EOF here will result in some other problem that will get reported
|
|
||||||
// at another time.
|
|
||||||
} else if (ch == '\n') {
|
|
||||||
// ready to read stream data
|
|
||||||
QTC::TC("qpdf", "QPDF stream with NL only");
|
|
||||||
} else if (ch == '\r') {
|
|
||||||
// Read another character
|
|
||||||
if (m->file->read(&ch, 1) != 0) {
|
|
||||||
if (ch == '\n') {
|
|
||||||
// Ready to read stream data
|
|
||||||
QTC::TC("qpdf", "QPDF stream with CRNL");
|
|
||||||
} else {
|
|
||||||
// Treat the \r by itself as the whitespace after endstream and start
|
|
||||||
// reading stream data in spite of not having seen a newline.
|
|
||||||
QTC::TC("qpdf", "QPDF stream with CR only");
|
|
||||||
m->file->unreadCh(ch);
|
|
||||||
warn(damagedPDF(
|
|
||||||
m->file->tell(),
|
|
||||||
"stream keyword followed by carriage return only"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (QUtil::is_space(ch)) {
|
|
||||||
warn(damagedPDF(
|
|
||||||
m->file->tell(), "stream keyword followed by extraneous whitespace"));
|
|
||||||
done = false;
|
|
||||||
} else {
|
|
||||||
QTC::TC("qpdf", "QPDF stream without newline");
|
|
||||||
m->file->unreadCh(ch);
|
|
||||||
warn(damagedPDF(
|
|
||||||
m->file->tell(), "stream keyword not followed by proper line terminator"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Must get offset before accessing any additional objects since resolving a previously
|
|
||||||
// unresolved indirect object will change file position.
|
|
||||||
qpdf_offset_t stream_offset = m->file->tell();
|
|
||||||
size_t length = 0;
|
|
||||||
|
|
||||||
try {
|
|
||||||
auto length_obj = object.getKey("/Length");
|
|
||||||
|
|
||||||
if (!length_obj.isInteger()) {
|
|
||||||
if (length_obj.isNull()) {
|
|
||||||
QTC::TC("qpdf", "QPDF stream without length");
|
|
||||||
throw damagedPDF(offset, "stream dictionary lacks /Length key");
|
|
||||||
}
|
|
||||||
QTC::TC("qpdf", "QPDF stream length not integer");
|
|
||||||
throw damagedPDF(offset, "/Length key in stream dictionary is not an integer");
|
|
||||||
}
|
|
||||||
|
|
||||||
length = toS(length_obj.getUIntValue());
|
|
||||||
// Seek in two steps to avoid potential integer overflow
|
|
||||||
m->file->seek(stream_offset, SEEK_SET);
|
|
||||||
m->file->seek(toO(length), SEEK_CUR);
|
|
||||||
if (!readToken(m->file).isWord("endstream")) {
|
|
||||||
QTC::TC("qpdf", "QPDF missing endstream");
|
|
||||||
throw damagedPDF("expected endstream");
|
|
||||||
}
|
|
||||||
} catch (QPDFExc& e) {
|
|
||||||
if (m->attempt_recovery) {
|
|
||||||
warn(e);
|
|
||||||
length = recoverStreamLength(m->file, og, stream_offset);
|
|
||||||
} else {
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
object = newIndirect(og, QPDF_Stream::create(this, og, object, stream_offset, length));
|
|
||||||
} else {
|
} else {
|
||||||
m->file->seek(cur_offset, SEEK_SET);
|
m->file->seek(cur_offset, SEEK_SET);
|
||||||
}
|
}
|
||||||
@ -1398,6 +1320,89 @@ QPDF::readObject(std::string const& description, QPDFObjGen og)
|
|||||||
return object;
|
return object;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// After reading stream dictionary and stream keyword, read rest of stream.
|
||||||
|
void
|
||||||
|
QPDF::readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
|
||||||
|
{
|
||||||
|
// The PDF specification states that the word "stream" should be followed by either a carriage
|
||||||
|
// return and a newline or by a newline alone. It specifically disallowed following it by a
|
||||||
|
// carriage return alone since, in that case, there would be no way to tell whether the NL in a
|
||||||
|
// CR NL sequence was part of the stream data. However, some readers, including Adobe reader,
|
||||||
|
// accept a carriage return by itself when followed by a non-newline character, so that's what
|
||||||
|
// we do here. We have also seen files that have extraneous whitespace between the stream
|
||||||
|
// keyword and the newline.
|
||||||
|
bool done = false;
|
||||||
|
while (!done) {
|
||||||
|
done = true;
|
||||||
|
char ch;
|
||||||
|
if (m->file->read(&ch, 1) == 0) {
|
||||||
|
// A premature EOF here will result in some other problem that will get reported at
|
||||||
|
// another time.
|
||||||
|
} else if (ch == '\n') {
|
||||||
|
// ready to read stream data
|
||||||
|
QTC::TC("qpdf", "QPDF stream with NL only");
|
||||||
|
} else if (ch == '\r') {
|
||||||
|
// Read another character
|
||||||
|
if (m->file->read(&ch, 1) != 0) {
|
||||||
|
if (ch == '\n') {
|
||||||
|
// Ready to read stream data
|
||||||
|
QTC::TC("qpdf", "QPDF stream with CRNL");
|
||||||
|
} else {
|
||||||
|
// Treat the \r by itself as the whitespace after endstream and start reading
|
||||||
|
// stream data in spite of not having seen a newline.
|
||||||
|
QTC::TC("qpdf", "QPDF stream with CR only");
|
||||||
|
m->file->unreadCh(ch);
|
||||||
|
warn(damagedPDF(
|
||||||
|
m->file->tell(), "stream keyword followed by carriage return only"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (QUtil::is_space(ch)) {
|
||||||
|
warn(damagedPDF(m->file->tell(), "stream keyword followed by extraneous whitespace"));
|
||||||
|
done = false;
|
||||||
|
} else {
|
||||||
|
QTC::TC("qpdf", "QPDF stream without newline");
|
||||||
|
m->file->unreadCh(ch);
|
||||||
|
warn(damagedPDF(
|
||||||
|
m->file->tell(), "stream keyword not followed by proper line terminator"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Must get offset before accessing any additional objects since resolving a previously
|
||||||
|
// unresolved indirect object will change file position.
|
||||||
|
qpdf_offset_t stream_offset = m->file->tell();
|
||||||
|
size_t length = 0;
|
||||||
|
|
||||||
|
try {
|
||||||
|
auto length_obj = object.getKey("/Length");
|
||||||
|
|
||||||
|
if (!length_obj.isInteger()) {
|
||||||
|
if (length_obj.isNull()) {
|
||||||
|
QTC::TC("qpdf", "QPDF stream without length");
|
||||||
|
throw damagedPDF(offset, "stream dictionary lacks /Length key");
|
||||||
|
}
|
||||||
|
QTC::TC("qpdf", "QPDF stream length not integer");
|
||||||
|
throw damagedPDF(offset, "/Length key in stream dictionary is not an integer");
|
||||||
|
}
|
||||||
|
|
||||||
|
length = toS(length_obj.getUIntValue());
|
||||||
|
// Seek in two steps to avoid potential integer overflow
|
||||||
|
m->file->seek(stream_offset, SEEK_SET);
|
||||||
|
m->file->seek(toO(length), SEEK_CUR);
|
||||||
|
if (!readToken(m->file).isWord("endstream")) {
|
||||||
|
QTC::TC("qpdf", "QPDF missing endstream");
|
||||||
|
throw damagedPDF("expected endstream");
|
||||||
|
}
|
||||||
|
} catch (QPDFExc& e) {
|
||||||
|
if (m->attempt_recovery) {
|
||||||
|
warn(e);
|
||||||
|
length = recoverStreamLength(m->file, og, stream_offset);
|
||||||
|
} else {
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
object = newIndirect(og, QPDF_Stream::create(this, og, object, stream_offset, length));
|
||||||
|
}
|
||||||
|
|
||||||
QPDFObjectHandle
|
QPDFObjectHandle
|
||||||
QPDF::readObjectInStream(std::shared_ptr<InputSource> input, QPDFObjGen og)
|
QPDF::readObjectInStream(std::shared_ptr<InputSource> input, QPDFObjGen og)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user