diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 6e567569..f97029ea 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -1061,7 +1061,7 @@ class QPDF QPDFObjectHandle readObjectInStream(std::shared_ptr& input, int obj); size_t recoverStreamLength( std::shared_ptr input, QPDFObjGen const& og, qpdf_offset_t stream_offset); - QPDFTokenizer::Token readToken(std::shared_ptr, size_t max_len = 0); + QPDFTokenizer::Token readToken(InputSource&, size_t max_len = 0); QPDFObjectHandle readObjectAtOffset( bool attempt_recovery, @@ -1088,14 +1088,11 @@ class QPDF qpdf_offset_t end_before_space, qpdf_offset_t end_after_space); static QPDFExc damagedPDF( - std::shared_ptr const& input, + InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message); - QPDFExc damagedPDF( - std::shared_ptr const& input, - qpdf_offset_t offset, - std::string const& message); + QPDFExc damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message); QPDFExc damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message); QPDFExc damagedPDF(std::string const& object, std::string const& message); QPDFExc damagedPDF(qpdf_offset_t offset, std::string const& message); diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 6200a192..6a3bc80e 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -419,7 +419,7 @@ QPDF::findHeader() bool QPDF::findStartxref() { - if (readToken(m->file).isWord("startxref") && readToken(m->file).isInteger()) { + if (readToken(*m->file).isWord("startxref") && readToken(*m->file).isInteger()) { // Position in front of offset token m->file->seek(m->file->getLastOffset(), SEEK_SET); return true; @@ -457,7 +457,7 @@ QPDF::parse(char const* password) PatternFinder sf(*this, &QPDF::findStartxref); qpdf_offset_t xref_offset = 0; if (m->file->findLast("startxref", start_offset, 0, sf)) { - xref_offset = QUtil::string_to_ll(readToken(m->file).getValue().c_str()); + xref_offset = QUtil::string_to_ll(readToken(*m->file).getValue().c_str()); } try { @@ -575,12 +575,12 @@ QPDF::reconstruct_xref(QPDFExc& e) // Don't allow very long tokens here during recovery. All the interesting tokens are covered. static size_t const MAX_LEN = 10; while (m->file->tell() < eof) { - QPDFTokenizer::Token t1 = readToken(m->file, MAX_LEN); + QPDFTokenizer::Token t1 = readToken(*m->file, MAX_LEN); qpdf_offset_t token_start = m->file->tell() - toO(t1.getValue().length()); if (t1.isInteger()) { auto pos = m->file->tell(); - QPDFTokenizer::Token t2 = readToken(m->file, MAX_LEN); - if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) { + QPDFTokenizer::Token t2 = readToken(*m->file, MAX_LEN); + if ((t2.isInteger()) && (readToken(*m->file, MAX_LEN).isWord("obj"))) { int obj = QUtil::string_to_int(t1.getValue().c_str()); int gen = QUtil::string_to_int(t2.getValue().c_str()); if (obj <= m->xref_table_max_id) { @@ -989,7 +989,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) } } qpdf_offset_t pos = m->file->tell(); - if (readToken(m->file).isWord("trailer")) { + if (readToken(*m->file).isWord("trailer")) { break; } else { m->file->seek(pos, SEEK_SET); @@ -1465,12 +1465,12 @@ QPDF::readTrailer() qpdf_offset_t offset = m->file->tell(); bool empty = false; auto object = - QPDFParser(m->file, "trailer", m->tokenizer, nullptr, this, true).parse(empty, false); + QPDFParser(*m->file, "trailer", m->tokenizer, nullptr, this, true).parse(empty, false); if (empty) { // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in // actual PDF files and Adobe Reader appears to ignore them. warn(damagedPDF("trailer", "empty object treated as null")); - } else if (object.isDictionary() && readToken(m->file).isWord("stream")) { + } else if (object.isDictionary() && readToken(*m->file).isWord("stream")) { warn(damagedPDF("trailer", m->file->tell(), "stream keyword found in trailer")); } // Override last_offset so that it points to the beginning of the object we just read @@ -1488,18 +1488,18 @@ QPDF::readObject(std::string const& description, QPDFObjGen og) StringDecrypter decrypter{this, og}; StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr; auto object = - QPDFParser(m->file, m->last_object_description, m->tokenizer, decrypter_ptr, this, true) + QPDFParser(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, this, true) .parse(empty, false); if (empty) { // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in // actual PDF files and Adobe Reader appears to ignore them. - warn(damagedPDF(m->file, m->file->getLastOffset(), "empty object treated as null")); + warn(damagedPDF(*m->file, m->file->getLastOffset(), "empty object treated as null")); return object; } - auto token = readToken(m->file); + auto token = readToken(*m->file); if (object.isDictionary() && token.isWord("stream")) { readStream(object, og, offset); - token = readToken(m->file); + token = readToken(*m->file); } if (!token.isWord("endobj")) { QTC::TC("qpdf", "QPDF err expected endobj"); @@ -1535,7 +1535,7 @@ QPDF::readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset) // Seek in two steps to avoid potential integer overflow m->file->seek(stream_offset, SEEK_SET); m->file->seek(toO(length), SEEK_CUR); - if (!readToken(m->file).isWord("endstream")) { + if (!readToken(*m->file).isWord("endstream")) { QTC::TC("qpdf", "QPDF missing endstream"); throw damagedPDF("expected endstream"); } @@ -1608,12 +1608,12 @@ QPDF::readObjectInStream(std::shared_ptr& input, int obj) m->last_object_description += " 0"; bool empty = false; - auto object = QPDFParser(input, m->last_object_description, m->tokenizer, nullptr, this, true) + auto object = QPDFParser(*input, m->last_object_description, m->tokenizer, nullptr, this, true) .parse(empty, false); if (empty) { // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in // actual PDF files and Adobe Reader appears to ignore them. - warn(damagedPDF(input, input->getLastOffset(), "empty object treated as null")); + warn(damagedPDF(*input, input->getLastOffset(), "empty object treated as null")); } return object; } @@ -1622,7 +1622,7 @@ bool QPDF::findEndstream() { // Find endstream or endobj. Position the input at that token. - auto t = readToken(m->file, 20); + auto t = readToken(*m->file, 20); if (t.isWord("endobj") || t.isWord("endstream")) { m->file->seek(m->file->getLastOffset(), SEEK_SET); return true; @@ -1635,14 +1635,14 @@ QPDF::recoverStreamLength( std::shared_ptr input, QPDFObjGen const& og, qpdf_offset_t stream_offset) { // Try to reconstruct stream length by looking for endstream or endobj - warn(damagedPDF(input, stream_offset, "attempting to recover stream length")); + warn(damagedPDF(*input, stream_offset, "attempting to recover stream length")); PatternFinder ef(*this, &QPDF::findEndstream); size_t length = 0; if (m->file->findFirst("end", stream_offset, 0, ef)) { length = toS(m->file->tell() - stream_offset); // Reread endstream but, if it was endobj, don't skip that. - QPDFTokenizer::Token t = readToken(m->file); + QPDFTokenizer::Token t = readToken(*m->file); if (t.getValue() == "endobj") { m->file->seek(m->file->getLastOffset(), SEEK_SET); } @@ -1674,10 +1674,10 @@ QPDF::recoverStreamLength( if (length == 0) { warn(damagedPDF( - input, stream_offset, "unable to recover stream data; treating stream as empty")); + *input, stream_offset, "unable to recover stream data; treating stream as empty")); } else { - warn( - damagedPDF(input, stream_offset, "recovered stream length: " + std::to_string(length))); + warn(damagedPDF( + *input, stream_offset, "recovered stream length: " + std::to_string(length))); } QTC::TC("qpdf", "QPDF recovered stream length"); @@ -1685,7 +1685,7 @@ QPDF::recoverStreamLength( } QPDFTokenizer::Token -QPDF::readToken(std::shared_ptr input, size_t max_len) +QPDF::readToken(InputSource& input, size_t max_len) { return m->tokenizer.readToken(input, m->last_object_description, true, max_len); } @@ -1728,20 +1728,20 @@ QPDF::readObjectAtOffset( m->file->seek(offset, SEEK_SET); try { - QPDFTokenizer::Token tobjid = readToken(m->file); + QPDFTokenizer::Token tobjid = readToken(*m->file); bool objidok = tobjid.isInteger(); QTC::TC("qpdf", "QPDF check objid", objidok ? 1 : 0); if (!objidok) { QTC::TC("qpdf", "QPDF expected n n obj"); throw damagedPDF(offset, "expected n n obj"); } - QPDFTokenizer::Token tgen = readToken(m->file); + QPDFTokenizer::Token tgen = readToken(*m->file); bool genok = tgen.isInteger(); QTC::TC("qpdf", "QPDF check generation", genok ? 1 : 0); if (!genok) { throw damagedPDF(offset, "expected n n obj"); } - QPDFTokenizer::Token tobj = readToken(m->file); + QPDFTokenizer::Token tobj = readToken(*m->file); bool objok = tobj.isWord("obj"); QTC::TC("qpdf", "QPDF check obj", objok ? 1 : 0); @@ -1954,11 +1954,11 @@ QPDF::resolveObjectsInStream(int obj_stream_number) bp.get())); for (int i = 0; i < n; ++i) { - QPDFTokenizer::Token tnum = readToken(input); - QPDFTokenizer::Token toffset = readToken(input); + QPDFTokenizer::Token tnum = readToken(*input); + QPDFTokenizer::Token toffset = readToken(*input); if (!(tnum.isInteger() && toffset.isInteger())) { throw damagedPDF( - input, + *input, m->last_object_description, input->getLastOffset(), "expected integer in object stream header"); @@ -1972,7 +1972,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number) if (num == obj_stream_number) { QTC::TC("qpdf", "QPDF ignore self-referential object stream"); warn(damagedPDF( - input, + *input, m->last_object_description, input->getLastOffset(), "object stream claims to contain itself")); @@ -2726,7 +2726,7 @@ QPDF::pipeStreamData( file->seek(offset, SEEK_SET); auto buf = std::make_unique(length); if (auto read = file->read(buf.get(), length); read != length) { - throw damagedPDF(file, "", offset + toO(read), "unexpected EOF reading stream data"); + throw damagedPDF(*file, "", offset + toO(read), "unexpected EOF reading stream data"); } pipeline->write(buf.get(), length); attempted_finish = true; @@ -2742,7 +2742,7 @@ QPDF::pipeStreamData( qpdf_for_warning.warn( // line-break damagedPDF( - file, + *file, "", file->getLastOffset(), ("error decoding stream data for object " + og.unparse(' ') + ": " + @@ -2751,7 +2751,7 @@ QPDF::pipeStreamData( qpdf_for_warning.warn( // line-break damagedPDF( - file, + *file, "", file->getLastOffset(), "stream will be re-processed without filtering to avoid data loss")); @@ -2825,19 +2825,15 @@ QPDF::stopOnError(std::string const& message) // Return an exception of type qpdf_e_damaged_pdf. QPDFExc QPDF::damagedPDF( - std::shared_ptr const& input, - std::string const& object, - qpdf_offset_t offset, - std::string const& message) + InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message) { - return {qpdf_e_damaged_pdf, input->getName(), object, offset, message}; + return {qpdf_e_damaged_pdf, input.getName(), object, offset, message}; } // Return an exception of type qpdf_e_damaged_pdf. The object is taken from // m->last_object_description. QPDFExc -QPDF::damagedPDF( - std::shared_ptr const& input, qpdf_offset_t offset, std::string const& message) +QPDF::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message) { return damagedPDF(input, m->last_object_description, offset, message); } diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 3ea9613d..ce77526a 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -2164,7 +2164,7 @@ QPDFObjectHandle::parseContentStream_data( qpdf_offset_t offset = input->getLastOffset(); input->seek(offset, SEEK_SET); auto obj = - QPDFParser(input, "content", tokenizer, nullptr, context, false).parse(empty, true); + QPDFParser(*input, "content", tokenizer, nullptr, context, false).parse(empty, true); if (!obj.isInitialized()) { // EOF break; @@ -2223,7 +2223,7 @@ QPDFObjectHandle::parse( StringDecrypter* decrypter, QPDF* context) { - return QPDFParser(input, object_description, tokenizer, decrypter, context, false) + return QPDFParser(*input, object_description, tokenizer, decrypter, context, false) .parse(empty, false); } diff --git a/libqpdf/QPDFParser.cc b/libqpdf/QPDFParser.cc index 08f94c64..a9d3e04b 100644 --- a/libqpdf/QPDFParser.cc +++ b/libqpdf/QPDFParser.cc @@ -33,9 +33,9 @@ QPDFParser::parse(bool& empty, bool content_stream) QPDF::ParseGuard pg(context); empty = false; - start = input->tell(); + start = input.tell(); - if (!tokenizer.nextToken(*input, object_description)) { + if (!tokenizer.nextToken(input, object_description)) { warn(tokenizer.getErrorMessage()); } @@ -101,7 +101,7 @@ QPDFParser::parse(bool& empty, bool content_stream) } else if (value == "endobj") { // We just saw endobj without having read anything. Treat this as a null and do // not move the input source's offset. - input->seek(input->getLastOffset(), SEEK_SET); + input.seek(input.getLastOffset(), SEEK_SET); empty = true; return {QPDF_Null::create()}; } else { @@ -138,7 +138,7 @@ QPDFParser::parseRemainder(bool content_stream) bool b_contents = false; while (true) { - if (!tokenizer.nextToken(*input, object_description)) { + if (!tokenizer.nextToken(input, object_description)) { warn(tokenizer.getErrorMessage()); } ++good_count; // optimistically @@ -151,7 +151,7 @@ QPDFParser::parseRemainder(bool content_stream) // Process the oldest buffered integer. addInt(int_count); } - last_offset_buffer[int_count % 2] = input->getLastOffset(); + last_offset_buffer[int_count % 2] = input.getLastOffset(); int_buffer[int_count % 2] = QUtil::string_to_ll(tokenizer.getValue().c_str()); continue; @@ -309,7 +309,7 @@ QPDFParser::parseRemainder(bool content_stream) case QPDFTokenizer::tt_integer: if (!content_stream) { // Buffer token in case it is part of an indirect reference. - last_offset_buffer[1] = input->getLastOffset(); + last_offset_buffer[1] = input.getLastOffset(); int_buffer[1] = QUtil::string_to_ll(tokenizer.getValue().c_str()); int_count = 1; } else { @@ -351,7 +351,7 @@ QPDFParser::parseRemainder(bool content_stream) if (decrypter) { if (b_contents) { frame->contents_string = val; - frame->contents_offset = input->getLastOffset(); + frame->contents_offset = input.getLastOffset(); b_contents = false; } std::string s{val}; @@ -419,7 +419,7 @@ void QPDFParser::addScalar(Args&&... args) { auto obj = T::create(args...); - obj->setDescription(context, description, input->getLastOffset()); + obj->setDescription(context, description, input.getLastOffset()); add(std::move(obj)); } @@ -506,11 +506,11 @@ QPDFParser::warnDuplicateKey() void QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const { - warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), object_description, offset, msg)); + warn(QPDFExc(qpdf_e_damaged_pdf, input.getName(), object_description, offset, msg)); } void QPDFParser::warn(std::string const& msg) const { - warn(input->getLastOffset(), msg); + warn(input.getLastOffset(), msg); } diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc index c1f3044d..9ffefd24 100644 --- a/libqpdf/QPDF_linearization.cc +++ b/libqpdf/QPDF_linearization.cc @@ -118,9 +118,10 @@ QPDF::isLinearized() ++p; } - QPDFTokenizer::Token t1 = readToken(m->file); - if (t1.isInteger() && readToken(m->file).isInteger() && readToken(m->file).isWord("obj") && - (readToken(m->file).getType() == QPDFTokenizer::tt_dict_open)) { + QPDFTokenizer::Token t1 = readToken(*m->file); + if (t1.isInteger() && readToken(*m->file).isInteger() && + readToken(*m->file).isWord("obj") && + readToken(*m->file).getType() == QPDFTokenizer::tt_dict_open) { lindict_obj = toI(QUtil::string_to_ll(t1.getValue().c_str())); } } diff --git a/libqpdf/qpdf/QPDFParser.hh b/libqpdf/qpdf/QPDFParser.hh index e036b022..1eecf975 100644 --- a/libqpdf/qpdf/QPDFParser.hh +++ b/libqpdf/qpdf/QPDFParser.hh @@ -12,7 +12,7 @@ class QPDFParser public: QPDFParser() = delete; QPDFParser( - std::shared_ptr input, + InputSource& input, std::string const& object_description, QPDFTokenizer& tokenizer, QPDFObjectHandle::StringDecrypter* decrypter, @@ -24,7 +24,7 @@ class QPDFParser decrypter(decrypter), context(context), description(std::make_shared( - std::string(input->getName() + ", " + object_description + " at offset $PO"))), + std::string(input.getName() + ", " + object_description + " at offset $PO"))), parse_pdf(parse_pdf) { } @@ -39,9 +39,9 @@ class QPDFParser struct StackFrame { - StackFrame(std::shared_ptr const& input, parser_state_e state) : + StackFrame(InputSource& input, parser_state_e state) : state(state), - offset(input->tell()) + offset(input.tell()) { } @@ -72,7 +72,7 @@ class QPDFParser // NB the offset includes any leading whitespace. QPDFObjectHandle withDescription(Args&&... args); void setDescription(std::shared_ptr& obj, qpdf_offset_t parsed_offset); - std::shared_ptr input; + InputSource& input; std::string const& object_description; QPDFTokenizer& tokenizer; QPDFObjectHandle::StringDecrypter* decrypter;