2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-09-28 04:59:05 +00:00

Add new private method QPDF::readStream

This commit is contained in:
m-holger 2023-07-08 11:38:23 +01:00
parent 2643ed4dd9
commit c422b918b1
2 changed files with 86 additions and 80 deletions

View File

@ -1008,6 +1008,7 @@ class QPDF
void setLastObjectDescription(std::string const& description, QPDFObjGen const& og); void setLastObjectDescription(std::string const& description, QPDFObjGen const& og);
QPDFObjectHandle readTrailer(); QPDFObjectHandle readTrailer();
QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og); QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og);
void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
QPDFObjectHandle readObjectInStream(std::shared_ptr<InputSource>, QPDFObjGen og); QPDFObjectHandle readObjectInStream(std::shared_ptr<InputSource>, QPDFObjGen og);
size_t recoverStreamLength( size_t recoverStreamLength(
std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset); std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset);

View File

@ -1296,7 +1296,7 @@ QPDF::readObject(std::string const& description, QPDFObjGen og)
std::shared_ptr<StringDecrypter> decrypter_ph; std::shared_ptr<StringDecrypter> decrypter_ph;
StringDecrypter* decrypter = nullptr; StringDecrypter* decrypter = nullptr;
if (m->encp->encrypted) { if (m->encp->encrypted) {
decrypter_ph = std::make_shared<StringDecrypter>(this, og); decrypter_ph = std::make_unique<StringDecrypter>(this, og);
decrypter = decrypter_ph.get(); decrypter = decrypter_ph.get();
} }
auto object = QPDFParser(m->file, m->last_object_description, m->tokenizer, decrypter, this) auto object = QPDFParser(m->file, m->last_object_description, m->tokenizer, decrypter, this)
@ -1309,20 +1309,35 @@ QPDF::readObject(std::string const& description, QPDFObjGen og)
// check for stream // check for stream
qpdf_offset_t cur_offset = m->file->tell(); qpdf_offset_t cur_offset = m->file->tell();
if (readToken(m->file).isWord("stream")) { if (readToken(m->file).isWord("stream")) {
// The PDF specification states that the word "stream" should be followed by either a readStream(object, og, offset);
// carriage return and a newline or by a newline alone. It specifically disallowed } else {
// following it by a carriage return alone since, in that case, there would be no way to m->file->seek(cur_offset, SEEK_SET);
// tell whether the NL in a CR NL sequence was part of the stream data. However, some }
// readers, including Adobe reader, accept a carriage return by itself when followed by }
// a non-newline character, so that's what we do here. We have also seen files that have
// extraneous whitespace between the stream keyword and the newline. // Override last_offset so that it points to the beginning of the object we just read
m->file->setLastOffset(offset);
return object;
}
// After reading stream dictionary and stream keyword, read rest of stream.
void
QPDF::readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
{
// The PDF specification states that the word "stream" should be followed by either a carriage
// return and a newline or by a newline alone. It specifically disallowed following it by a
// carriage return alone since, in that case, there would be no way to tell whether the NL in a
// CR NL sequence was part of the stream data. However, some readers, including Adobe reader,
// accept a carriage return by itself when followed by a non-newline character, so that's what
// we do here. We have also seen files that have extraneous whitespace between the stream
// keyword and the newline.
bool done = false; bool done = false;
while (!done) { while (!done) {
done = true; done = true;
char ch; char ch;
if (m->file->read(&ch, 1) == 0) { if (m->file->read(&ch, 1) == 0) {
// A premature EOF here will result in some other problem that will get reported // A premature EOF here will result in some other problem that will get reported at
// at another time. // another time.
} else if (ch == '\n') { } else if (ch == '\n') {
// ready to read stream data // ready to read stream data
QTC::TC("qpdf", "QPDF stream with NL only"); QTC::TC("qpdf", "QPDF stream with NL only");
@ -1333,18 +1348,16 @@ QPDF::readObject(std::string const& description, QPDFObjGen og)
// Ready to read stream data // Ready to read stream data
QTC::TC("qpdf", "QPDF stream with CRNL"); QTC::TC("qpdf", "QPDF stream with CRNL");
} else { } else {
// Treat the \r by itself as the whitespace after endstream and start // Treat the \r by itself as the whitespace after endstream and start reading
// reading stream data in spite of not having seen a newline. // stream data in spite of not having seen a newline.
QTC::TC("qpdf", "QPDF stream with CR only"); QTC::TC("qpdf", "QPDF stream with CR only");
m->file->unreadCh(ch); m->file->unreadCh(ch);
warn(damagedPDF( warn(damagedPDF(
m->file->tell(), m->file->tell(), "stream keyword followed by carriage return only"));
"stream keyword followed by carriage return only"));
} }
} }
} else if (QUtil::is_space(ch)) { } else if (QUtil::is_space(ch)) {
warn(damagedPDF( warn(damagedPDF(m->file->tell(), "stream keyword followed by extraneous whitespace"));
m->file->tell(), "stream keyword followed by extraneous whitespace"));
done = false; done = false;
} else { } else {
QTC::TC("qpdf", "QPDF stream without newline"); QTC::TC("qpdf", "QPDF stream without newline");
@ -1388,14 +1401,6 @@ QPDF::readObject(std::string const& description, QPDFObjGen og)
} }
} }
object = newIndirect(og, QPDF_Stream::create(this, og, object, stream_offset, length)); object = newIndirect(og, QPDF_Stream::create(this, og, object, stream_offset, length));
} else {
m->file->seek(cur_offset, SEEK_SET);
}
}
// Override last_offset so that it points to the beginning of the object we just read
m->file->setLastOffset(offset);
return object;
} }
QPDFObjectHandle QPDFObjectHandle