mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 10:58:58 +00:00
commit
0b3debaf86
@ -191,6 +191,8 @@ class QPDFTokenizer
|
||||
// returns a tt_inline_image token.
|
||||
QPDF_DLL
|
||||
void expectInlineImage(std::shared_ptr<InputSource> input);
|
||||
QPDF_DLL
|
||||
void expectInlineImage(InputSource& input);
|
||||
|
||||
private:
|
||||
friend class QPDFParser;
|
||||
@ -217,7 +219,7 @@ class QPDFTokenizer
|
||||
|
||||
bool isSpace(char);
|
||||
bool isDelimiter(char);
|
||||
void findEI(std::shared_ptr<InputSource> input);
|
||||
void findEI(InputSource& input);
|
||||
|
||||
enum state_e {
|
||||
st_top,
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <qpdf/ContentNormalizer.hh>
|
||||
|
||||
#include <qpdf/QPDF_Name.hh>
|
||||
#include <qpdf/QUtil.hh>
|
||||
|
||||
ContentNormalizer::ContentNormalizer() :
|
||||
@ -11,7 +12,6 @@ ContentNormalizer::ContentNormalizer() :
|
||||
void
|
||||
ContentNormalizer::handleToken(QPDFTokenizer::Token const& token)
|
||||
{
|
||||
std::string value = token.getRawValue();
|
||||
QPDFTokenizer::token_type_e token_type = token.getType();
|
||||
|
||||
if (token_type == QPDFTokenizer::tt_bad) {
|
||||
@ -24,40 +24,48 @@ ContentNormalizer::handleToken(QPDFTokenizer::Token const& token)
|
||||
switch (token_type) {
|
||||
case QPDFTokenizer::tt_space:
|
||||
{
|
||||
size_t len = value.length();
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
char ch = value.at(i);
|
||||
if (ch == '\r') {
|
||||
if ((i + 1 < len) && (value.at(i + 1) == '\n')) {
|
||||
// ignore
|
||||
} else {
|
||||
std::string const& value = token.getRawValue();
|
||||
auto size = value.size();
|
||||
size_t pos = 0;
|
||||
auto r_pos = value.find('\r');
|
||||
while (r_pos != std::string::npos) {
|
||||
if (pos != r_pos) {
|
||||
write(&value[pos], r_pos - pos);
|
||||
}
|
||||
if (++r_pos >= size) {
|
||||
write("\n");
|
||||
return;
|
||||
}
|
||||
if (value[r_pos] != '\n') {
|
||||
write("\n");
|
||||
}
|
||||
} else {
|
||||
write(&ch, 1);
|
||||
pos = r_pos;
|
||||
r_pos = value.find('\r', pos);
|
||||
}
|
||||
if (pos < size) {
|
||||
write(&value[pos], size - pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
return;
|
||||
|
||||
case QPDFTokenizer::tt_string:
|
||||
// Replacing string and name tokens in this way normalizes their representation as this will
|
||||
// automatically handle quoting of unprintable characters, etc.
|
||||
writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_string, token.getValue()));
|
||||
write(QPDFObjectHandle::newString(token.getValue()).unparse());
|
||||
break;
|
||||
|
||||
case QPDFTokenizer::tt_name:
|
||||
writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_name, token.getValue()));
|
||||
write(QPDF_Name::normalizeName(token.getValue()));
|
||||
break;
|
||||
|
||||
default:
|
||||
writeToken(token);
|
||||
break;
|
||||
return;
|
||||
}
|
||||
|
||||
value = token.getRawValue();
|
||||
if (((token_type == QPDFTokenizer::tt_string) || (token_type == QPDFTokenizer::tt_name)) &&
|
||||
((value.find('\r') != std::string::npos) || (value.find('\n') != std::string::npos))) {
|
||||
// tt_string or tt_name
|
||||
std::string const& value = token.getRawValue();
|
||||
if (value.find('\r') != std::string::npos || value.find('\n') != std::string::npos) {
|
||||
write("\n");
|
||||
}
|
||||
}
|
||||
|
@ -36,20 +36,17 @@ void
|
||||
Pl_QPDFTokenizer::finish()
|
||||
{
|
||||
m->buf.finish();
|
||||
auto input = std::shared_ptr<InputSource>(
|
||||
// line-break
|
||||
new BufferInputSource("tokenizer data", m->buf.getBuffer(), true));
|
||||
|
||||
auto input = BufferInputSource("tokenizer data", m->buf.getBuffer(), true);
|
||||
std::string empty;
|
||||
while (true) {
|
||||
QPDFTokenizer::Token token =
|
||||
m->tokenizer.readToken(input, "offset " + std::to_string(input->tell()), true);
|
||||
auto token = m->tokenizer.readToken(input, empty, true);
|
||||
m->filter->handleToken(token);
|
||||
if (token.getType() == QPDFTokenizer::tt_eof) {
|
||||
break;
|
||||
} else if (token.isWord("ID")) {
|
||||
// Read the space after the ID.
|
||||
char ch = ' ';
|
||||
input->read(&ch, 1);
|
||||
input.read(&ch, 1);
|
||||
m->filter->handleToken(
|
||||
// line-break
|
||||
QPDFTokenizer::Token(QPDFTokenizer::tt_space, std::string(1, ch)));
|
||||
|
@ -148,7 +148,7 @@ QPDFObjectHandle::TokenFilter::write(std::string const& str)
|
||||
void
|
||||
QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const& token)
|
||||
{
|
||||
std::string value = token.getRawValue();
|
||||
std::string const& value = token.getRawValue();
|
||||
write(value.c_str(), value.length());
|
||||
}
|
||||
|
||||
|
@ -27,7 +27,7 @@ namespace
|
||||
class QPDFWordTokenFinder: public InputSource::Finder
|
||||
{
|
||||
public:
|
||||
QPDFWordTokenFinder(std::shared_ptr<InputSource> is, std::string const& str) :
|
||||
QPDFWordTokenFinder(InputSource& is, std::string const& str) :
|
||||
is(is),
|
||||
str(str)
|
||||
{
|
||||
@ -36,7 +36,7 @@ namespace
|
||||
bool check() override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<InputSource> is;
|
||||
InputSource& is;
|
||||
std::string str;
|
||||
};
|
||||
} // namespace
|
||||
@ -48,21 +48,21 @@ QPDFWordTokenFinder::check()
|
||||
// delimiter or EOF.
|
||||
QPDFTokenizer tokenizer;
|
||||
QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true);
|
||||
qpdf_offset_t pos = is->tell();
|
||||
qpdf_offset_t pos = is.tell();
|
||||
if (!(t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, str))) {
|
||||
QTC::TC("qpdf", "QPDFTokenizer finder found wrong word");
|
||||
return false;
|
||||
}
|
||||
qpdf_offset_t token_start = is->getLastOffset();
|
||||
qpdf_offset_t token_start = is.getLastOffset();
|
||||
char next;
|
||||
bool next_okay = false;
|
||||
if (is->read(&next, 1) == 0) {
|
||||
if (is.read(&next, 1) == 0) {
|
||||
QTC::TC("qpdf", "QPDFTokenizer inline image at EOF");
|
||||
next_okay = true;
|
||||
} else {
|
||||
next_okay = is_delimiter(next);
|
||||
}
|
||||
is->seek(pos, SEEK_SET);
|
||||
is.seek(pos, SEEK_SET);
|
||||
if (!next_okay) {
|
||||
return false;
|
||||
}
|
||||
@ -763,12 +763,18 @@ QPDFTokenizer::presentEOF()
|
||||
|
||||
void
|
||||
QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input)
|
||||
{
|
||||
expectInlineImage(*input);
|
||||
}
|
||||
|
||||
void
|
||||
QPDFTokenizer::expectInlineImage(InputSource& input)
|
||||
{
|
||||
if (this->state == st_token_ready) {
|
||||
reset();
|
||||
} else if (this->state != st_before_token) {
|
||||
throw std::logic_error("QPDFTokenizer::expectInlineImage called"
|
||||
" when tokenizer is in improper state");
|
||||
throw std::logic_error(
|
||||
"QPDFTokenizer::expectInlineImage called when tokenizer is in improper state");
|
||||
}
|
||||
findEI(input);
|
||||
this->before_token = false;
|
||||
@ -777,14 +783,10 @@ QPDFTokenizer::expectInlineImage(std::shared_ptr<InputSource> input)
|
||||
}
|
||||
|
||||
void
|
||||
QPDFTokenizer::findEI(std::shared_ptr<InputSource> input)
|
||||
QPDFTokenizer::findEI(InputSource& input)
|
||||
{
|
||||
if (!input.get()) {
|
||||
return;
|
||||
}
|
||||
|
||||
qpdf_offset_t last_offset = input->getLastOffset();
|
||||
qpdf_offset_t pos = input->tell();
|
||||
qpdf_offset_t last_offset = input.getLastOffset();
|
||||
qpdf_offset_t pos = input.tell();
|
||||
|
||||
// Use QPDFWordTokenFinder to find EI surrounded by delimiters. Then read the next several
|
||||
// tokens or up to EOF. If we find any suspicious-looking or tokens, this is probably still part
|
||||
@ -797,10 +799,10 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input)
|
||||
bool first_try = true;
|
||||
while (!okay) {
|
||||
QPDFWordTokenFinder f(input, "EI");
|
||||
if (!input->findFirst("EI", input->tell(), 0, f)) {
|
||||
if (!input.findFirst("EI", input.tell(), 0, f)) {
|
||||
break;
|
||||
}
|
||||
this->inline_image_bytes = QIntC::to_size(input->tell() - pos - 2);
|
||||
inline_image_bytes = QIntC::to_size(input.tell() - pos - 2);
|
||||
|
||||
QPDFTokenizer check;
|
||||
bool found_bad = false;
|
||||
@ -858,8 +860,8 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input)
|
||||
QTC::TC("qpdf", "QPDFTokenizer found EI after more than one try");
|
||||
}
|
||||
|
||||
input->seek(pos, SEEK_SET);
|
||||
input->setLastOffset(last_offset);
|
||||
input.seek(pos, SEEK_SET);
|
||||
input.setLastOffset(last_offset);
|
||||
}
|
||||
|
||||
bool
|
||||
@ -902,7 +904,7 @@ QPDFTokenizer::readToken(
|
||||
throw QPDFExc(
|
||||
qpdf_e_damaged_pdf,
|
||||
input.getName(),
|
||||
context,
|
||||
context.empty() ? "offset " + std::to_string(input.getLastOffset()) : context,
|
||||
input.getLastOffset(),
|
||||
token.getErrorMessage());
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user