From aabd3f6f9b09e844958fd4ee07bde5e8df546fc4 Mon Sep 17 00:00:00 2001 From: m-holger Date: Sat, 8 Jul 2023 12:17:48 +0100 Subject: [PATCH] Add private method QPDF::validateStreamLineEnd --- include/qpdf/QPDF.hh | 1 + libqpdf/QPDF.cc | 96 +++++++++++++++++++++++--------------------- 2 files changed, 52 insertions(+), 45 deletions(-) diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index ead09a0a..61510c17 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -1009,6 +1009,7 @@ class QPDF QPDFObjectHandle readTrailer(); QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og); void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); + void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset); QPDFObjectHandle readObjectInStream(std::shared_ptr, QPDFObjGen og); size_t recoverStreamLength( std::shared_ptr input, QPDFObjGen const& og, qpdf_offset_t stream_offset); diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 3fbb4da1..05929764 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -1324,51 +1324,7 @@ QPDF::readObject(std::string const& description, QPDFObjGen og) void QPDF::readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset) { - // The PDF specification states that the word "stream" should be followed by either a carriage - // return and a newline or by a newline alone. It specifically disallowed following it by a - // carriage return alone since, in that case, there would be no way to tell whether the NL in a - // CR NL sequence was part of the stream data. However, some readers, including Adobe reader, - // accept a carriage return by itself when followed by a non-newline character, so that's what - // we do here. We have also seen files that have extraneous whitespace between the stream - // keyword and the newline. - while (true) { - char ch; - if (m->file->read(&ch, 1) == 0) { - // A premature EOF here will result in some other problem that will get reported at - // another time. - break; - } - if (ch == '\n') { - // ready to read stream data - QTC::TC("qpdf", "QPDF stream with NL only"); - break; - } - if (ch == '\r') { - // Read another character - if (m->file->read(&ch, 1) != 0) { - if (ch == '\n') { - // Ready to read stream data - QTC::TC("qpdf", "QPDF stream with CRNL"); - } else { - // Treat the \r by itself as the whitespace after endstream and start reading - // stream data in spite of not having seen a newline. - QTC::TC("qpdf", "QPDF stream with CR only"); - m->file->unreadCh(ch); - warn(damagedPDF( - m->file->tell(), "stream keyword followed by carriage return only")); - } - } - break; - } - if (!QUtil::is_space(ch)) { - QTC::TC("qpdf", "QPDF stream without newline"); - m->file->unreadCh(ch); - warn(damagedPDF( - m->file->tell(), "stream keyword not followed by proper line terminator")); - break; - } - warn(damagedPDF(m->file->tell(), "stream keyword followed by extraneous whitespace")); - } + validateStreamLineEnd(object, og, offset); // Must get offset before accessing any additional objects since resolving a previously // unresolved indirect object will change file position. @@ -1406,6 +1362,56 @@ QPDF::readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset) object = newIndirect(og, QPDF_Stream::create(this, og, object, stream_offset, length)); } +void +QPDF::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset) +{ + // The PDF specification states that the word "stream" should be followed by either a carriage + // return and a newline or by a newline alone. It specifically disallowed following it by a + // carriage return alone since, in that case, there would be no way to tell whether the NL in a + // CR NL sequence was part of the stream data. However, some readers, including Adobe reader, + // accept a carriage return by itself when followed by a non-newline character, so that's what + // we do here. We have also seen files that have extraneous whitespace between the stream + // keyword and the newline. + while (true) { + char ch; + if (m->file->read(&ch, 1) == 0) { + // A premature EOF here will result in some other problem that will get reported at + // another time. + return; + } + if (ch == '\n') { + // ready to read stream data + QTC::TC("qpdf", "QPDF stream with NL only"); + return; + } + if (ch == '\r') { + // Read another character + if (m->file->read(&ch, 1) != 0) { + if (ch == '\n') { + // Ready to read stream data + QTC::TC("qpdf", "QPDF stream with CRNL"); + } else { + // Treat the \r by itself as the whitespace after endstream and start reading + // stream data in spite of not having seen a newline. + QTC::TC("qpdf", "QPDF stream with CR only"); + m->file->unreadCh(ch); + warn(damagedPDF( + m->file->tell(), "stream keyword followed by carriage return only")); + } + } + return; + } + if (!QUtil::is_space(ch)) { + QTC::TC("qpdf", "QPDF stream without newline"); + m->file->unreadCh(ch); + warn(damagedPDF( + m->file->tell(), "stream keyword not followed by proper line terminator")); + return; + } + warn(damagedPDF(m->file->tell(), "stream keyword followed by extraneous whitespace")); + } +} + QPDFObjectHandle QPDF::readObjectInStream(std::shared_ptr input, QPDFObjGen og) {