2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-12-23 03:18:59 +00:00

Allow QPDFTokenizer::readToken to return bad tokens

Sometimes we want to ignore bad tokens rather than having them throw
an exception. A coverage case is commented out here and added in a
later commit.
This commit is contained in:
Jay Berkenbilt 2017-08-08 21:44:37 -04:00
parent 8320d16cd2
commit ef8ae5449d
7 changed files with 114 additions and 10 deletions

View File

@ -666,7 +666,8 @@ class QPDF
size_t recoverStreamLength( size_t recoverStreamLength(
PointerHolder<InputSource> input, int objid, int generation, PointerHolder<InputSource> input, int objid, int generation,
qpdf_offset_t stream_offset); qpdf_offset_t stream_offset);
QPDFTokenizer::Token readToken(PointerHolder<InputSource>); QPDFTokenizer::Token readToken(PointerHolder<InputSource>,
bool allow_bad = false);
QPDFObjectHandle readObjectAtOffset( QPDFObjectHandle readObjectAtOffset(
bool attempt_recovery, bool attempt_recovery,

View File

@ -138,7 +138,8 @@ class QPDFTokenizer
// exception thrown if there is an error. // exception thrown if there is an error.
QPDF_DLL QPDF_DLL
Token readToken(PointerHolder<InputSource> input, Token readToken(PointerHolder<InputSource> input,
std::string const& context); std::string const& context,
bool allow_bad = false);
private: private:
void reset(); void reset();

View File

@ -1329,9 +1329,10 @@ QPDF::recoverStreamLength(PointerHolder<InputSource> input,
} }
QPDFTokenizer::Token QPDFTokenizer::Token
QPDF::readToken(PointerHolder<InputSource> input) QPDF::readToken(PointerHolder<InputSource> input, bool allow_bad)
{ {
return this->tokenizer.readToken(input, this->last_object_description); return this->tokenizer.readToken(
input, this->last_object_description, allow_bad);
} }
QPDFObjectHandle QPDFObjectHandle

View File

@ -475,7 +475,8 @@ QPDFTokenizer::betweenTokens()
QPDFTokenizer::Token QPDFTokenizer::Token
QPDFTokenizer::readToken(PointerHolder<InputSource> input, QPDFTokenizer::readToken(PointerHolder<InputSource> input,
std::string const& context) std::string const& context,
bool allow_bad)
{ {
qpdf_offset_t offset = input->tell(); qpdf_offset_t offset = input->tell();
Token token; Token token;
@ -514,13 +515,20 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input,
input->unreadCh(char_to_unread); input->unreadCh(char_to_unread);
} }
input->setLastOffset(offset);
if (token.getType() == tt_bad) if (token.getType() == tt_bad)
{
if (allow_bad)
{
// QTC::TC("qpdf", "QPDFTokenizer allowing bad token");
}
else
{ {
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
context, offset, token.getErrorMessage()); context, offset, token.getErrorMessage());
} }
}
input->setLastOffset(offset);
return token; return token;
} }

View File

@ -232,7 +232,7 @@ foreach my $d (@bug_tests)
show_ntests(); show_ntests();
# ---------- # ----------
$td->notify("--- Miscellaneous Tests ---"); $td->notify("--- Miscellaneous Tests ---");
$n_tests += 85; $n_tests += 86;
$td->runtest("qpdf version", $td->runtest("qpdf version",
{$td->COMMAND => "qpdf --version"}, {$td->COMMAND => "qpdf --version"},
@ -662,6 +662,13 @@ $td->runtest("combine show and --pages",
$td->EXIT_STATUS => 0}, $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES); $td->NORMALIZE_NEWLINES);
$td->runtest("ignore bad token",
{$td->COMMAND =>
"qpdf --show-xref bad-token-startxref.pdf"},
{$td->FILE => "bad-token-startxref.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests(); show_ntests();
# ---------- # ----------
$td->notify("--- Single Page ---"); $td->notify("--- Single Page ---");

View File

@ -0,0 +1,6 @@
1/0: uncompressed; offset = 9
2/0: uncompressed; offset = 63
3/0: uncompressed; offset = 135
4/0: uncompressed; offset = 307
5/0: uncompressed; offset = 403
6/0: uncompressed; offset = 438

View File

@ -0,0 +1,80 @@
%PDF-1.3
1 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
2 0 obj
<<
/Type /Pages
/Kids [
3 0 R
]
/Count 1
>>
endobj
3 0 obj
<<
/Type /Page
/Parent 2 0 R
/MediaBox [0 0 612 792]
/Contents 4 0 R
/Resources <<
/ProcSet 5 0 R
/Font <<
/F1 6 0 R
>>
>>
>>
endobj
4 0 obj
<<
/Length 44
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
5 0 obj
[
/PDF
/Text
]
endobj
6 0 obj
<<
/Type /Font
/Subtype /Type1
/Name /F1
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
>>
endobj
xref
0 7
0000000000 65535 f
0000000009 00000 n
0000000063 00000 n
0000000135 00000 n
0000000307 00000 n
0000000403 00000 n
0000000438 00000 n
trailer <<
/Size 7
/Root 1 0 R
>>
startxref(
startxref
556
%%EOF