From 863d95e5676b0c03539c1cd9bacb31039d53b433 Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 4 Oct 2022 20:50:36 +0100 Subject: [PATCH] Add new method QPDFTokenizer::nextToken --- include/qpdf/QPDFTokenizer.hh | 12 +++++++ libqpdf/QPDFTokenizer.cc | 64 +++++++++++++++++++++-------------- 2 files changed, 51 insertions(+), 25 deletions(-) diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh index 0fd34fb9..db95eff6 100644 --- a/include/qpdf/QPDFTokenizer.hh +++ b/include/qpdf/QPDFTokenizer.hh @@ -204,6 +204,18 @@ class QPDFTokenizer QPDF_DLL void expectInlineImage(std::shared_ptr input); + // Read a token from an input source. Context describes the + // context in which the token is being read and is used in the + // exception thrown if there is an error. After a token is read, + // the position of the input source returned by input->tell() + // points to just after the token, and the input source's "last + // offset" as returned by input->getLastOffset() points to the + // beginning of the token. Returns false if the token is bad + // or if scanning produced an error message for any reason. + QPDF_DLL + bool nextToken( + InputSource& input, std::string const& context, size_t max_len = 0); + private: QPDFTokenizer(QPDFTokenizer const&) = delete; QPDFTokenizer& operator=(QPDFTokenizer const&) = delete; diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index b828691c..fe36d768 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -805,7 +805,9 @@ QPDFTokenizer::presentEOF() void QPDFTokenizer::expectInlineImage(std::shared_ptr input) { - if (this->state != st_before_token) { + if (this->state == st_token_ready) { + reset(); + } else if (this->state != st_before_token) { throw std::logic_error("QPDFTokenizer::expectInlineImage called" " when tokenizer is in improper state"); } @@ -941,11 +943,40 @@ QPDFTokenizer::readToken( bool allow_bad, size_t max_len) { - qpdf_offset_t offset = input->fastTell(); + nextToken(*input, context, max_len); + + Token token; + bool unread_char; + char char_to_unread; + getToken(token, unread_char, char_to_unread); + + if (token.getType() == tt_bad) { + if (allow_bad) { + QTC::TC("qpdf", "QPDFTokenizer allowing bad token"); + } else { + throw QPDFExc( + qpdf_e_damaged_pdf, + input->getName(), + context, + input->getLastOffset(), + token.getErrorMessage()); + } + } + return token; +} + +bool +QPDFTokenizer::nextToken( + InputSource& input, std::string const& context, size_t max_len) +{ + if (this->state != st_inline_image) { + reset(); + } + qpdf_offset_t offset = input.fastTell(); while (this->state != st_token_ready) { char ch; - if (!input->fastRead(ch)) { + if (!input.fastRead(ch)) { presentEOF(); if ((this->type == tt_eof) && (!this->allow_eof)) { @@ -954,7 +985,7 @@ QPDFTokenizer::readToken( // exercised. this->type = tt_bad; this->error_message = "unexpected EOF"; - offset = input->getLastOffset(); + offset = input.getLastOffset(); } } else { handleCharacter(ch); @@ -976,28 +1007,11 @@ QPDFTokenizer::readToken( } } - Token token; - bool unread_char; - char char_to_unread; - getToken(token, unread_char, char_to_unread); - input->fastUnread(unread_char); + input.fastUnread(!this->in_token && !this->before_token); - if (token.getType() != tt_eof) { - input->setLastOffset(offset); + if (this->type != tt_eof) { + input.setLastOffset(offset); } - if (token.getType() == tt_bad) { - if (allow_bad) { - QTC::TC("qpdf", "QPDFTokenizer allowing bad token"); - } else { - throw QPDFExc( - qpdf_e_damaged_pdf, - input->getName(), - context, - offset, - token.getErrorMessage()); - } - } - - return token; + return this->error_message.empty(); }