diff --git a/ChangeLog b/ChangeLog index e95e2370..e9dea347 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,34 +1,49 @@ 2018-02-04 Jay Berkenbilt * Significant lexer (tokenizer) enhancements. These are changes to - the QPDFTokenizer class. These changes are of concern only to - people who are operating with PDF files at the lexical layer - using qpdf. They have little or no impact on most high-level - interfaces or the command-line tool. - * New token types tt_space and tt_comment to recognize - whitespace and comments. this makes it possible to tokenize a - PDF file or stream and preserve everything about it. - * For backward compatibility, space and comment tokens are not - returned by the tokenizer unless - QPDFTokenizer.includeIgnorable() is called. - * Better handling of null bytes. These are now included in space - tokens rather than being their own "tt_word" tokens. This - should have no impact on any correct PDF file and has no - impact on output, but it may change offsets in some error - messages when trying to parse contents of bad files. Under - default operation, qpdf does not attempt to parse content - streams, so this change is mostly invisible. - * Bug fix to handling of bad tokens at ends of streams. Now, - when allowEOF() has been called, these are treated as bad tokens - (tt_bad or an exception, depending on invocation), and a - separate tt_eof token is returned. Before the bad token - contents were returned as the value of a tt_eof token. tt_eof - tokens are always empty now. - * Fix a bug that would, on rare occasions, report the offset in an - error message in the wrong space because of spaces or comments - adjacent to a bad token. - * Clarify in comments exactly where the input source is - positioned surrounding calls to readToken and getToken. + the QPDFTokenizer class. These changes are of concern only to + people who are operating with PDF files at the lexical layer using + qpdf. They have little or no impact on most high-level interfaces + or the command-line tool. + + New token types tt_space and tt_comment to recognize whitespace + and comments. this makes it possible to tokenize a PDF file or + stream and preserve everything about it. + + For backward compatibility, space and comment tokens are not + returned by the tokenizer unless QPDFTokenizer.includeIgnorable() + is called. + + Better handling of null bytes. These are now included in space + tokens rather than being their own "tt_word" tokens. This should + have no impact on any correct PDF file and has no impact on + output, but it may change offsets in some error messages when + trying to parse contents of bad files. Under default operation, + qpdf does not attempt to parse content streams, so this change is + mostly invisible. + + Bug fix to handling of bad tokens at ends of streams. Now, when + allowEOF() has been called, these are treated as bad tokens + (tt_bad or an exception, depending on invocation), and a + separate tt_eof token is returned. Before the bad token + contents were returned as the value of a tt_eof token. tt_eof + tokens are always empty now. + + Fix a bug that would, on rare occasions, report the offset in an + error message in the wrong space because of spaces or comments + adjacent to a bad token. + + Clarify in comments exactly where the input source is positioned + surrounding calls to readToken and getToken. + + * Add a new token type for inline images. This token type is only + returned by QPDFTokenizer immediately following a call to + expectInlineImage(). This change includes internal refactoring of + a handful of places that all separately handled inline images, The + logic of detecting inline images in content streams is now handled + in one place in the code. Also we are more flexible about what + characters may surround the EI operator that marks the end of an + inline image. 2018-02-04 Jay Berkenbilt diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh index 7d7f6132..fe2e95f7 100644 --- a/include/qpdf/QPDFTokenizer.hh +++ b/include/qpdf/QPDFTokenizer.hh @@ -34,7 +34,8 @@ class QPDFTokenizer public: // Token type tt_eof is only returned of allowEOF() is called on // the tokenizer. tt_eof was introduced in QPDF version 4.1. - // tt_space and tt_comment were added in QPDF version 8. + // tt_space, tt_comment, and tt_inline_image were added in QPDF + // version 8. enum token_type_e { tt_bad, @@ -54,6 +55,7 @@ class QPDFTokenizer tt_eof, tt_space, tt_comment, + tt_inline_image, }; class Token @@ -128,11 +130,17 @@ class QPDFTokenizer QPDF_DLL void includeIgnorable(); - // Mode of operation: + // There are two modes of operation: push and pull. The pull + // method is easier but requires an input source. The push method + // is more complicated but can be used to tokenize a stream of + // incoming characters in a pipeline. - // Keep presenting characters and calling getToken() until - // getToken() returns true. When it does, be sure to check - // unread_ch and to unread ch if it is true. + // Push mode: + + // Keep presenting characters with presentCharacter() and + // presentEOF() and calling getToken() until getToken() returns + // true. When it does, be sure to check unread_ch and to unread ch + // if it is true. // It these are called when a token is available, an exception // will be thrown. @@ -155,15 +163,30 @@ class QPDFTokenizer QPDF_DLL bool betweenTokens(); - // Read a token from an input source. Context describes the + // Pull mode: + + // Read a token from an input source. Context describes the // context in which the token is being read and is used in the - // exception thrown if there is an error. + // exception thrown if there is an error. After a token is read, + // the position of the input source returned by input->tell() + // points to just after the token, and the input source's "last + // offset" as returned by input->getLastOffset() points to the + // beginning of the token. QPDF_DLL Token readToken(PointerHolder input, std::string const& context, bool allow_bad = false, size_t max_len = 0); + // Calling this method puts the tokenizer in a state for reading + // inline images. In that state, it will return all data up to and + // including the next EI token. After you call this method, the + // next call to readToken (or the token created next time getToken + // returns true) will either be tt_inline_image or tt_bad. This is + // the only way readToken returns a tt_inline_image token. + QPDF_DLL + void expectInlineImage(); + private: // Do not implement copy or assignment QPDFTokenizer(QPDFTokenizer const&); @@ -171,10 +194,11 @@ class QPDFTokenizer void resolveLiteral(); bool isSpace(char); + bool isDelimiter(char); enum state_e { st_top, st_in_space, st_in_comment, st_in_string, st_lt, st_gt, - st_literal, st_in_hexstring, st_token_ready + st_literal, st_in_hexstring, st_inline_image, st_token_ready }; class Members diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index 776019c8..078b1af0 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -69,6 +69,12 @@ QPDFTokenizer::isSpace(char ch) return ((ch == '\0') || QUtil::is_space(ch)); } +bool +QPDFTokenizer::isDelimiter(char ch) +{ + return (strchr(" \t\n\v\f\r()<>[]{}/%", ch) != 0); +} + void QPDFTokenizer::resolveLiteral() { @@ -95,7 +101,7 @@ QPDFTokenizer::resolveLiteral() if (ch == '\0') { this->m->type = tt_bad; - QTC::TC("qpdf", "QPDF_Tokenizer null in name"); + QTC::TC("qpdf", "QPDFTokenizer null in name"); this->m->error_message = "null character not allowed in name token"; nval += "#00"; @@ -108,7 +114,7 @@ QPDFTokenizer::resolveLiteral() } else { - QTC::TC("qpdf", "QPDF_Tokenizer bad name"); + QTC::TC("qpdf", "QPDFTokenizer bad name"); this->m->type = tt_bad; this->m->error_message = "invalid name token"; nval += *p; @@ -209,7 +215,7 @@ QPDFTokenizer::presentCharacter(char ch) if (ch == ')') { this->m->type = tt_bad; - QTC::TC("qpdf", "QPDF_Tokenizer bad )"); + QTC::TC("qpdf", "QPDFTokenizer bad )"); this->m->error_message = "unexpected )"; this->m->state = st_token_ready; } @@ -301,7 +307,7 @@ QPDFTokenizer::presentCharacter(char ch) { this->m->val = ">"; this->m->type = tt_bad; - QTC::TC("qpdf", "QPDF_Tokenizer bad >"); + QTC::TC("qpdf", "QPDFTokenizer bad >"); this->m->error_message = "unexpected >"; this->m->unread_char = true; this->m->char_to_unread = ch; @@ -403,7 +409,7 @@ QPDFTokenizer::presentCharacter(char ch) } else if (this->m->state == st_literal) { - if (strchr(" \t\n\v\f\r()<>[]{}/%", ch) != 0) + if (isDelimiter(ch)) { // A C-locale whitespace character or delimiter terminates // token. It is important to unread the whitespace @@ -423,6 +429,25 @@ QPDFTokenizer::presentCharacter(char ch) this->m->val += ch; } } + else if (this->m->state == st_inline_image) + { + size_t len = this->m->val.length(); + if ((len >= 4) && + isDelimiter(this->m->val.at(len-4)) && + (this->m->val.at(len-3) == 'E') && + (this->m->val.at(len-2) == 'I') && + isDelimiter(this->m->val.at(len-1))) + { + this->m->type = tt_inline_image; + this->m->unread_char = true; + this->m->char_to_unread = ch; + this->m->state = st_token_ready; + } + else + { + this->m->val += ch; + } + } else { handled = false; @@ -468,7 +493,7 @@ QPDFTokenizer::presentCharacter(char ch) else { this->m->type = tt_bad; - QTC::TC("qpdf", "QPDF_Tokenizer bad hexstring character"); + QTC::TC("qpdf", "QPDFTokenizer bad hexstring character"); this->m->error_message = std::string("invalid character (") + ch + ") in hexstring"; this->m->state = st_token_ready; @@ -495,9 +520,23 @@ QPDFTokenizer::presentCharacter(char ch) void QPDFTokenizer::presentEOF() { + if (this->m->state == st_inline_image) + { + size_t len = this->m->val.length(); + if ((len >= 3) && + isDelimiter(this->m->val.at(len-3)) && + (this->m->val.at(len-2) == 'E') && + (this->m->val.at(len-1) == 'I')) + { + QTC::TC("qpdf", "QPDFTokenizer inline image at EOF"); + this->m->type = tt_inline_image; + this->m->state = st_token_ready; + } + } + if (this->m->state == st_literal) { - QTC::TC("qpdf", "QPDF_Tokenizer EOF reading appendable token"); + QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token"); resolveLiteral(); } else if ((this->m->include_ignorable) && (this->m->state == st_in_space)) @@ -514,7 +553,7 @@ QPDFTokenizer::presentEOF() } else if (this->m->state != st_token_ready) { - QTC::TC("qpdf", "QPDF_Tokenizer EOF reading token"); + QTC::TC("qpdf", "QPDFTokenizer EOF reading token"); this->m->type = tt_bad; this->m->error_message = "EOF while reading token"; } @@ -522,6 +561,17 @@ QPDFTokenizer::presentEOF() this->m->state = st_token_ready; } +void +QPDFTokenizer::expectInlineImage() +{ + if (this->m->state != st_top) + { + throw std::logic_error("QPDFTokenizer::expectInlineImage called" + " when tokenizer is in improper state"); + } + this->m->state = st_inline_image; +} + bool QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch) { @@ -572,7 +622,7 @@ QPDFTokenizer::readToken(PointerHolder input, presented_eof = true; if ((this->m->type == tt_eof) && (! this->m->allow_eof)) { - QTC::TC("qpdf", "QPDF_Tokenizer EOF when not allowed"); + QTC::TC("qpdf", "QPDFTokenizer EOF when not allowed"); this->m->type = tt_bad; this->m->error_message = "unexpected EOF"; offset = input->getLastOffset(); diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 914f6887..11ab767c 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -64,11 +64,11 @@ QPDF stream length not integer 0 QPDF missing endstream 0 QPDFObjectHandle bad dictionary close 0 QPDF can't find xref 0 -QPDF_Tokenizer bad ) 0 -QPDF_Tokenizer bad > 0 -QPDF_Tokenizer bad hexstring character 0 -QPDF_Tokenizer null in name 0 -QPDF_Tokenizer bad name 0 +QPDFTokenizer bad ) 0 +QPDFTokenizer bad > 0 +QPDFTokenizer bad hexstring character 0 +QPDFTokenizer null in name 0 +QPDFTokenizer bad name 0 QPDF_Stream invalid filter 0 QPDF UseOutlines but no Outlines 0 QPDFObjectHandle clone bool 0 @@ -233,8 +233,8 @@ QPDFWriter copy use_aes 1 QPDFObjectHandle indirect without context 0 QPDFObjectHandle trailing data in parse 0 qpdf pages encryption password 0 -QPDF_Tokenizer EOF reading token 0 -QPDF_Tokenizer EOF reading appendable token 0 +QPDFTokenizer EOF reading token 0 +QPDFTokenizer EOF reading appendable token 0 QPDFWriter extra header text no newline 0 QPDFWriter extra header text add newline 0 QPDF bogus 0 offset 0 @@ -302,4 +302,5 @@ qpdf-c called qpdf_set_compress_streams 0 qpdf-c called qpdf_set_preserve_unreferenced_objects 0 qpdf-c called qpdf_set_newline_before_endstream 0 QPDF_Stream TIFF predictor 0 -QPDF_Tokenizer EOF when not allowed 0 +QPDFTokenizer EOF when not allowed 0 +QPDFTokenizer inline image at EOF 0 diff --git a/qpdf/qtest/qpdf/tokens-maxlen.out b/qpdf/qtest/qpdf/tokens-maxlen.out index 01283fc8..6eb8072b 100644 --- a/qpdf/qtest/qpdf/tokens-maxlen.out +++ b/qpdf/qtest/qpdf/tokens-maxlen.out @@ -222,307 +222,311 @@ skipping to endstream 7601: word: endstream 7610: space: \x0a 7611: word: endobj -7617: space: \x0a\x0a -7619: integer: 46 -7621: space: -7622: integer: 0 -7623: space: -7624: word: obj -7627: space: \x0a -7628: integer: 68 -7630: space: \x0a -7631: word: endobj -7637: space: \x0a\x0a -7639: comment: %% Contents for page 6 -7661: space: \x0a -7662: comment: %% Original object ID: 42 0 -7689: space: \x0a -7690: integer: 47 -7692: space: -7693: integer: 0 -7694: space: -7695: word: obj -7698: space: \x0a -7699: dict_open: << -7701: space: \x0a -7704: name: /Length -7711: space: -7712: integer: 48 -7714: space: -7715: integer: 0 -7716: space: -7717: word: R -7718: space: \x0a -7719: dict_close: >> -7721: space: \x0a -7722: word: stream +7617: space: \x0a +7618: comment: %QDF: ignore_newline +7638: space: \x0a\x0a +7640: integer: 46 +7642: space: +7643: integer: 0 +7644: space: +7645: word: obj +7648: space: \x0a +7649: integer: 67 +7651: space: \x0a +7652: word: endobj +7658: space: \x0a\x0a +7660: comment: %% Contents for page 6 +7682: space: \x0a +7683: comment: %% Original object ID: 42 0 +7710: space: \x0a +7711: integer: 47 +7713: space: +7714: integer: 0 +7715: space: +7716: word: obj +7719: space: \x0a +7720: dict_open: << +7722: space: \x0a +7725: name: /Length +7732: space: +7733: integer: 48 +7735: space: +7736: integer: 0 +7737: space: +7738: word: R +7739: space: \x0a +7740: dict_close: >> +7742: space: \x0a +7743: word: stream skipping to endstream -7773: word: endstream -7782: space: \x0a -7783: word: endobj -7789: space: \x0a\x0a -7791: integer: 48 -7793: space: -7794: integer: 0 -7795: space: -7796: word: obj -7799: space: \x0a -7800: integer: 44 -7802: space: \x0a -7803: word: endobj -7809: space: \x0a\x0a -7811: comment: %% Contents for page 7 -7833: space: \x0a -7834: comment: %% Original object ID: 43 0 -7861: space: \x0a -7862: integer: 49 -7864: space: -7865: integer: 0 -7866: space: -7867: word: obj -7870: space: \x0a -7871: dict_open: << -7873: space: \x0a -7876: name: /Length -7883: space: -7884: integer: 50 -7886: space: -7887: integer: 0 -7888: space: -7889: word: R -7890: space: \x0a -7891: dict_close: >> -7893: space: \x0a -7894: word: stream +7794: word: endstream +7803: space: \x0a +7804: word: endobj +7810: space: \x0a\x0a +7812: integer: 48 +7814: space: +7815: integer: 0 +7816: space: +7817: word: obj +7820: space: \x0a +7821: integer: 44 +7823: space: \x0a +7824: word: endobj +7830: space: \x0a\x0a +7832: comment: %% Contents for page 7 +7854: space: \x0a +7855: comment: %% Original object ID: 43 0 +7882: space: \x0a +7883: integer: 49 +7885: space: +7886: integer: 0 +7887: space: +7888: word: obj +7891: space: \x0a +7892: dict_open: << +7894: space: \x0a +7897: name: /Length +7904: space: +7905: integer: 50 +7907: space: +7908: integer: 0 +7909: space: +7910: word: R +7911: space: \x0a +7912: dict_close: >> +7914: space: \x0a +7915: word: stream skipping to endstream -7945: word: endstream -7954: space: \x0a -7955: word: endobj -7961: space: \x0a\x0a -7963: integer: 50 -7965: space: -7966: integer: 0 -7967: space: -7968: word: obj -7971: space: \x0a -7972: integer: 44 -7974: space: \x0a -7975: word: endobj -7981: space: \x0a\x0a -7983: comment: %% Contents for page 8 -8005: space: \x0a -8006: comment: %% Original object ID: 44 0 -8033: space: \x0a -8034: integer: 51 -8036: space: -8037: integer: 0 -8038: space: -8039: word: obj -8042: space: \x0a -8043: dict_open: << -8045: space: \x0a -8048: name: /Length -8055: space: -8056: integer: 52 -8058: space: -8059: integer: 0 -8060: space: -8061: word: R -8062: space: \x0a -8063: dict_close: >> -8065: space: \x0a -8066: word: stream +8241: word: endstream +8250: space: \x0a +8251: word: endobj +8257: space: \x0a +8258: comment: %QDF: ignore_newline +8278: space: \x0a\x0a +8280: integer: 50 +8282: space: +8283: integer: 0 +8284: space: +8285: word: obj +8288: space: \x0a +8289: integer: 318 +8292: space: \x0a +8293: word: endobj +8299: space: \x0a\x0a +8301: comment: %% Contents for page 8 +8323: space: \x0a +8324: comment: %% Original object ID: 44 0 +8351: space: \x0a +8352: integer: 51 +8354: space: +8355: integer: 0 +8356: space: +8357: word: obj +8360: space: \x0a +8361: dict_open: << +8363: space: \x0a +8366: name: /Length +8373: space: +8374: integer: 52 +8376: space: +8377: integer: 0 +8378: space: +8379: word: R +8380: space: \x0a +8381: dict_close: >> +8383: space: \x0a +8384: word: stream skipping to endstream -8117: word: endstream -8126: space: \x0a -8127: word: endobj -8133: space: \x0a\x0a -8135: integer: 52 -8137: space: -8138: integer: 0 -8139: space: -8140: word: obj -8143: space: \x0a -8144: integer: 44 -8146: space: \x0a -8147: word: endobj -8153: space: \x0a\x0a -8155: comment: %% Contents for page 9 -8177: space: \x0a -8178: comment: %% Original object ID: 45 0 -8205: space: \x0a -8206: integer: 53 -8208: space: -8209: integer: 0 -8210: space: -8211: word: obj -8214: space: \x0a -8215: dict_open: << -8217: space: \x0a -8220: name: /Length -8227: space: -8228: integer: 54 -8230: space: -8231: integer: 0 -8232: space: -8233: word: R -8234: space: \x0a -8235: dict_close: >> -8237: space: \x0a -8238: word: stream -skipping to endstream -8289: word: endstream -8298: space: \x0a -8299: word: endobj -8305: space: \x0a\x0a -8307: integer: 54 -8309: space: -8310: integer: 0 -8311: space: -8312: word: obj -8315: space: \x0a -8316: integer: 44 -8318: space: \x0a -8319: word: endobj -8325: space: \x0a\x0a -8327: comment: %% Contents for page 10 -8350: space: \x0a -8351: comment: %% Original object ID: 46 0 -8378: space: \x0a -8379: integer: 55 -8381: space: -8382: integer: 0 -8383: space: -8384: word: obj -8387: space: \x0a -8388: dict_open: << -8390: space: \x0a -8393: name: /Length -8400: space: -8401: integer: 56 -8403: space: -8404: integer: 0 -8405: space: -8406: word: R -8407: space: \x0a -8408: dict_close: >> -8410: space: \x0a -8411: word: stream -skipping to endstream -8462: word: endstream -8471: space: \x0a -8472: word: endobj -8478: space: \x0a\x0a -8480: integer: 56 -8482: space: -8483: integer: 0 -8484: space: -8485: word: obj -8488: space: \x0a -8489: integer: 44 -8491: space: \x0a -8492: word: endobj -8498: space: \x0a\x0a -8500: comment: %% Contents for page 11 +8435: word: endstream +8444: space: \x0a +8445: word: endobj +8451: space: \x0a\x0a +8453: integer: 52 +8455: space: +8456: integer: 0 +8457: space: +8458: word: obj +8461: space: \x0a +8462: integer: 44 +8464: space: \x0a +8465: word: endobj +8471: space: \x0a\x0a +8473: comment: %% Contents for page 9 +8495: space: \x0a +8496: comment: %% Original object ID: 45 0 8523: space: \x0a -8524: comment: %% Original object ID: 47 0 -8551: space: \x0a -8552: integer: 57 -8554: space: -8555: integer: 0 -8556: space: -8557: word: obj -8560: space: \x0a -8561: dict_open: << -8563: space: \x0a -8566: name: /Length -8573: space: -8574: integer: 58 -8576: space: -8577: integer: 0 -8578: space: -8579: word: R -8580: space: \x0a -8581: dict_close: >> -8583: space: \x0a -8584: word: stream +8524: integer: 53 +8526: space: +8527: integer: 0 +8528: space: +8529: word: obj +8532: space: \x0a +8533: dict_open: << +8535: space: \x0a +8538: name: /Length +8545: space: +8546: integer: 54 +8548: space: +8549: integer: 0 +8550: space: +8551: word: R +8552: space: \x0a +8553: dict_close: >> +8555: space: \x0a +8556: word: stream skipping to endstream -8635: word: endstream -8644: space: \x0a -8645: word: endobj -8651: space: \x0a\x0a -8653: integer: 58 -8655: space: -8656: integer: 0 -8657: space: -8658: word: obj -8661: space: \x0a -8662: integer: 44 -8664: space: \x0a -8665: word: endobj -8671: space: \x0a\x0a -8673: integer: 59 -8675: space: -8676: integer: 0 -8677: space: -8678: word: obj -8681: space: \x0a -8682: dict_open: << -8684: space: \x0a -8687: name: /Type -8692: space: -8693: name: /XRef -8698: space: \x0a -8701: name: /Length -8708: space: -8709: integer: 240 -8712: space: \x0a -8715: name: /W -8717: space: -8718: array_open: [ -8719: space: -8720: integer: 1 +8607: word: endstream +8616: space: \x0a +8617: word: endobj +8623: space: \x0a\x0a +8625: integer: 54 +8627: space: +8628: integer: 0 +8629: space: +8630: word: obj +8633: space: \x0a +8634: integer: 44 +8636: space: \x0a +8637: word: endobj +8643: space: \x0a\x0a +8645: comment: %% Contents for page 10 +8668: space: \x0a +8669: comment: %% Original object ID: 46 0 +8696: space: \x0a +8697: integer: 55 +8699: space: +8700: integer: 0 +8701: space: +8702: word: obj +8705: space: \x0a +8706: dict_open: << +8708: space: \x0a +8711: name: /Length +8718: space: +8719: integer: 56 8721: space: -8722: integer: 2 +8722: integer: 0 8723: space: -8724: integer: 1 -8725: space: -8726: array_close: ] -8727: space: \x0a -8730: name: /Root -8735: space: -8736: integer: 2 -8737: space: -8738: integer: 0 -8739: space: -8740: word: R -8741: space: \x0a -8744: name: /Size -8749: space: -8750: integer: 60 -8752: space: \x0a -8755: name: /ID -8758: space: -8759: array_open: [ -8760: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>) -8794: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: ) -8828: array_close: ] -8829: space: \x0a -8830: dict_close: >> -8832: space: \x0a -8833: word: stream +8724: word: R +8725: space: \x0a +8726: dict_close: >> +8728: space: \x0a +8729: word: stream skipping to endstream -9081: word: endstream -9090: space: \x0a -9091: word: endobj -9097: space: \x0a\x0a -9099: word: startxref -9108: space: \x0a -9109: integer: 8673 -9113: space: \x0a -9114: comment: %%EOF -9119: space: \x0a -9120: eof +8780: word: endstream +8789: space: \x0a +8790: word: endobj +8796: space: \x0a\x0a +8798: integer: 56 +8800: space: +8801: integer: 0 +8802: space: +8803: word: obj +8806: space: \x0a +8807: integer: 44 +8809: space: \x0a +8810: word: endobj +8816: space: \x0a\x0a +8818: comment: %% Contents for page 11 +8841: space: \x0a +8842: comment: %% Original object ID: 47 0 +8869: space: \x0a +8870: integer: 57 +8872: space: +8873: integer: 0 +8874: space: +8875: word: obj +8878: space: \x0a +8879: dict_open: << +8881: space: \x0a +8884: name: /Length +8891: space: +8892: integer: 58 +8894: space: +8895: integer: 0 +8896: space: +8897: word: R +8898: space: \x0a +8899: dict_close: >> +8901: space: \x0a +8902: word: stream +skipping to endstream +8953: word: endstream +8962: space: \x0a +8963: word: endobj +8969: space: \x0a\x0a +8971: integer: 58 +8973: space: +8974: integer: 0 +8975: space: +8976: word: obj +8979: space: \x0a +8980: integer: 44 +8982: space: \x0a +8983: word: endobj +8989: space: \x0a\x0a +8991: integer: 59 +8993: space: +8994: integer: 0 +8995: space: +8996: word: obj +8999: space: \x0a +9000: dict_open: << +9002: space: \x0a +9005: name: /Type +9010: space: +9011: name: /XRef +9016: space: \x0a +9019: name: /Length +9026: space: +9027: integer: 240 +9030: space: \x0a +9033: name: /W +9035: space: +9036: array_open: [ +9037: space: +9038: integer: 1 +9039: space: +9040: integer: 2 +9041: space: +9042: integer: 1 +9043: space: +9044: array_close: ] +9045: space: \x0a +9048: name: /Root +9053: space: +9054: integer: 2 +9055: space: +9056: integer: 0 +9057: space: +9058: word: R +9059: space: \x0a +9062: name: /Size +9067: space: +9068: integer: 60 +9070: space: \x0a +9073: name: /ID +9076: space: +9077: array_open: [ +9078: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>) +9112: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: ) +9146: array_close: ] +9147: space: \x0a +9148: dict_close: >> +9150: space: \x0a +9151: word: stream +skipping to endstream +9399: word: endstream +9408: space: \x0a +9409: word: endobj +9415: space: \x0a\x0a +9417: word: startxref +9426: space: \x0a +9427: integer: 8991 +9431: space: \x0a +9432: comment: %%EOF +9437: space: \x0a +9438: eof --- END FILE --- --- BEGIN PAGE 1 --- 0: word: BT @@ -595,9 +599,7 @@ skipping to endstream 103: dict_close: >> 105: space: \x0a 106: word: ID -skipping to EI -352: word: EI -354: space: \x0a +108: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI\x0a 355: word: BT 357: space: \x0a 360: name: /F1 @@ -743,13 +745,11 @@ skipping to EI 47: word: ET 49: space: \x0a\x00\x0a 52: name: /ThisMustBeLast -67: space: \x0a -68: eof +67: eof --- END PAGE 5 --- --- BEGIN PAGE 6 --- 0: word: ID -skipping to EI -EI not found +EI not found; resuming normal scanning 2: space: \x0a 5: name: /F1 8: space: @@ -772,27 +772,37 @@ EI not found 44: eof --- END PAGE 6 --- --- BEGIN PAGE 7 --- -0: word: BT -2: space: \x0a -5: name: /F1 -8: space: -9: integer: 24 -11: space: -12: word: Tf -14: space: \x0a -17: integer: 72 +0: name: /potato +7: space: \x0a +8: word: BI +10: space: \x0a +11: name: /CS +14: space: +15: name: /G +17: name: /W 19: space: -20: integer: 720 -23: space: -24: word: Td -26: space: \x0a -29: string: Potato (raw: (Potato)) -37: space: -38: word: Tj -40: space: \x0a -41: word: ET -43: space: \x0a -44: eof +20: integer: 66 +22: name: /H +24: space: +25: integer: 47 +27: name: /BPC +31: space: +32: integer: 8 +33: name: /F +35: name: /Fl +38: name: /DP +41: dict_open: << +43: name: /Predictor +53: space: +54: integer: 15 +56: name: /Columns +64: space: +65: integer: 66 +67: dict_close: >> +69: space: \x0a +70: word: ID +72: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI +318: eof --- END PAGE 7 --- --- BEGIN PAGE 8 --- 0: word: BT diff --git a/qpdf/qtest/qpdf/tokens-no-ignorable.out b/qpdf/qtest/qpdf/tokens-no-ignorable.out index 2ad1e3a6..3bbef579 100644 --- a/qpdf/qtest/qpdf/tokens-no-ignorable.out +++ b/qpdf/qtest/qpdf/tokens-no-ignorable.out @@ -101,152 +101,152 @@ skipping to endstream skipping to endstream 7601: word: endstream 7611: word: endobj -7619: integer: 46 -7622: integer: 0 -7624: word: obj -7628: integer: 68 -7631: word: endobj -7690: integer: 47 -7693: integer: 0 -7695: word: obj -7699: dict_open: << -7704: name: /Length -7712: integer: 48 -7715: integer: 0 -7717: word: R -7719: dict_close: >> -7722: word: stream +7640: integer: 46 +7643: integer: 0 +7645: word: obj +7649: integer: 67 +7652: word: endobj +7711: integer: 47 +7714: integer: 0 +7716: word: obj +7720: dict_open: << +7725: name: /Length +7733: integer: 48 +7736: integer: 0 +7738: word: R +7740: dict_close: >> +7743: word: stream skipping to endstream -7773: word: endstream -7783: word: endobj -7791: integer: 48 -7794: integer: 0 -7796: word: obj -7800: integer: 44 -7803: word: endobj -7862: integer: 49 -7865: integer: 0 -7867: word: obj -7871: dict_open: << -7876: name: /Length -7884: integer: 50 -7887: integer: 0 -7889: word: R -7891: dict_close: >> -7894: word: stream +7794: word: endstream +7804: word: endobj +7812: integer: 48 +7815: integer: 0 +7817: word: obj +7821: integer: 44 +7824: word: endobj +7883: integer: 49 +7886: integer: 0 +7888: word: obj +7892: dict_open: << +7897: name: /Length +7905: integer: 50 +7908: integer: 0 +7910: word: R +7912: dict_close: >> +7915: word: stream skipping to endstream -7945: word: endstream -7955: word: endobj -7963: integer: 50 -7966: integer: 0 -7968: word: obj -7972: integer: 44 -7975: word: endobj -8034: integer: 51 -8037: integer: 0 -8039: word: obj -8043: dict_open: << -8048: name: /Length -8056: integer: 52 -8059: integer: 0 -8061: word: R -8063: dict_close: >> -8066: word: stream +8241: word: endstream +8251: word: endobj +8280: integer: 50 +8283: integer: 0 +8285: word: obj +8289: integer: 318 +8293: word: endobj +8352: integer: 51 +8355: integer: 0 +8357: word: obj +8361: dict_open: << +8366: name: /Length +8374: integer: 52 +8377: integer: 0 +8379: word: R +8381: dict_close: >> +8384: word: stream skipping to endstream -8117: word: endstream -8127: word: endobj -8135: integer: 52 -8138: integer: 0 -8140: word: obj -8144: integer: 44 -8147: word: endobj -8206: integer: 53 -8209: integer: 0 -8211: word: obj -8215: dict_open: << -8220: name: /Length -8228: integer: 54 -8231: integer: 0 -8233: word: R -8235: dict_close: >> -8238: word: stream +8435: word: endstream +8445: word: endobj +8453: integer: 52 +8456: integer: 0 +8458: word: obj +8462: integer: 44 +8465: word: endobj +8524: integer: 53 +8527: integer: 0 +8529: word: obj +8533: dict_open: << +8538: name: /Length +8546: integer: 54 +8549: integer: 0 +8551: word: R +8553: dict_close: >> +8556: word: stream skipping to endstream -8289: word: endstream -8299: word: endobj -8307: integer: 54 -8310: integer: 0 -8312: word: obj -8316: integer: 44 -8319: word: endobj -8379: integer: 55 -8382: integer: 0 -8384: word: obj -8388: dict_open: << -8393: name: /Length -8401: integer: 56 -8404: integer: 0 -8406: word: R -8408: dict_close: >> -8411: word: stream +8607: word: endstream +8617: word: endobj +8625: integer: 54 +8628: integer: 0 +8630: word: obj +8634: integer: 44 +8637: word: endobj +8697: integer: 55 +8700: integer: 0 +8702: word: obj +8706: dict_open: << +8711: name: /Length +8719: integer: 56 +8722: integer: 0 +8724: word: R +8726: dict_close: >> +8729: word: stream skipping to endstream -8462: word: endstream -8472: word: endobj -8480: integer: 56 -8483: integer: 0 -8485: word: obj -8489: integer: 44 -8492: word: endobj -8552: integer: 57 -8555: integer: 0 -8557: word: obj -8561: dict_open: << -8566: name: /Length -8574: integer: 58 -8577: integer: 0 -8579: word: R -8581: dict_close: >> -8584: word: stream +8780: word: endstream +8790: word: endobj +8798: integer: 56 +8801: integer: 0 +8803: word: obj +8807: integer: 44 +8810: word: endobj +8870: integer: 57 +8873: integer: 0 +8875: word: obj +8879: dict_open: << +8884: name: /Length +8892: integer: 58 +8895: integer: 0 +8897: word: R +8899: dict_close: >> +8902: word: stream skipping to endstream -8635: word: endstream -8645: word: endobj -8653: integer: 58 -8656: integer: 0 -8658: word: obj -8662: integer: 44 -8665: word: endobj -8673: integer: 59 -8676: integer: 0 -8678: word: obj -8682: dict_open: << -8687: name: /Type -8693: name: /XRef -8701: name: /Length -8709: integer: 240 -8715: name: /W -8718: array_open: [ -8720: integer: 1 -8722: integer: 2 -8724: integer: 1 -8726: array_close: ] -8730: name: /Root -8736: integer: 2 -8738: integer: 0 -8740: word: R -8744: name: /Size -8750: integer: 60 -8755: name: /ID -8759: array_open: [ -8760: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>) -8794: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: ) -8828: array_close: ] -8830: dict_close: >> -8833: word: stream +8953: word: endstream +8963: word: endobj +8971: integer: 58 +8974: integer: 0 +8976: word: obj +8980: integer: 44 +8983: word: endobj +8991: integer: 59 +8994: integer: 0 +8996: word: obj +9000: dict_open: << +9005: name: /Type +9011: name: /XRef +9019: name: /Length +9027: integer: 240 +9033: name: /W +9036: array_open: [ +9038: integer: 1 +9040: integer: 2 +9042: integer: 1 +9044: array_close: ] +9048: name: /Root +9054: integer: 2 +9056: integer: 0 +9058: word: R +9062: name: /Size +9068: integer: 60 +9073: name: /ID +9077: array_open: [ +9078: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>) +9112: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: ) +9146: array_close: ] +9148: dict_close: >> +9151: word: stream skipping to endstream -9081: word: endstream -9091: word: endobj -9099: word: startxref -9109: integer: 8673 -9120: eof +9399: word: endstream +9409: word: endobj +9417: word: startxref +9427: integer: 8991 +9438: eof --- END FILE --- --- BEGIN PAGE 1 --- 0: word: BT @@ -291,8 +291,7 @@ skipping to endstream 101: integer: 66 103: dict_close: >> 106: word: ID -skipping to EI -352: word: EI +108: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI\x0a 355: word: BT 360: name: /F1 364: integer: 24 @@ -374,12 +373,11 @@ skipping to EI 44: word: Tj 47: word: ET 52: name: /ThisMustBeLast -68: eof +67: eof --- END PAGE 5 --- --- BEGIN PAGE 6 --- 0: word: ID -skipping to EI -EI not found +EI not found; resuming normal scanning 5: name: /F1 9: integer: 24 12: word: Tf @@ -392,17 +390,28 @@ EI not found 44: eof --- END PAGE 6 --- --- BEGIN PAGE 7 --- -0: word: BT -5: name: /F1 -9: integer: 24 -12: word: Tf -17: integer: 72 -20: integer: 720 -24: word: Td -29: string: Potato (raw: (Potato)) -38: word: Tj -41: word: ET -44: eof +0: name: /potato +8: word: BI +11: name: /CS +15: name: /G +17: name: /W +20: integer: 66 +22: name: /H +25: integer: 47 +27: name: /BPC +32: integer: 8 +33: name: /F +35: name: /Fl +38: name: /DP +41: dict_open: << +43: name: /Predictor +54: integer: 15 +56: name: /Columns +65: integer: 66 +67: dict_close: >> +70: word: ID +72: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI +318: eof --- END PAGE 7 --- --- BEGIN PAGE 8 --- 0: word: BT diff --git a/qpdf/qtest/qpdf/tokens.out b/qpdf/qtest/qpdf/tokens.out index 2e08f3e0..ee1f6d3e 100644 --- a/qpdf/qtest/qpdf/tokens.out +++ b/qpdf/qtest/qpdf/tokens.out @@ -222,307 +222,311 @@ skipping to endstream 7601: word: endstream 7610: space: \x0a 7611: word: endobj -7617: space: \x0a\x0a -7619: integer: 46 -7621: space: -7622: integer: 0 -7623: space: -7624: word: obj -7627: space: \x0a -7628: integer: 68 -7630: space: \x0a -7631: word: endobj -7637: space: \x0a\x0a -7639: comment: %% Contents for page 6 -7661: space: \x0a -7662: comment: %% Original object ID: 42 0 -7689: space: \x0a -7690: integer: 47 -7692: space: -7693: integer: 0 -7694: space: -7695: word: obj -7698: space: \x0a -7699: dict_open: << -7701: space: \x0a -7704: name: /Length -7711: space: -7712: integer: 48 -7714: space: -7715: integer: 0 -7716: space: -7717: word: R -7718: space: \x0a -7719: dict_close: >> -7721: space: \x0a -7722: word: stream +7617: space: \x0a +7618: comment: %QDF: ignore_newline +7638: space: \x0a\x0a +7640: integer: 46 +7642: space: +7643: integer: 0 +7644: space: +7645: word: obj +7648: space: \x0a +7649: integer: 67 +7651: space: \x0a +7652: word: endobj +7658: space: \x0a\x0a +7660: comment: %% Contents for page 6 +7682: space: \x0a +7683: comment: %% Original object ID: 42 0 +7710: space: \x0a +7711: integer: 47 +7713: space: +7714: integer: 0 +7715: space: +7716: word: obj +7719: space: \x0a +7720: dict_open: << +7722: space: \x0a +7725: name: /Length +7732: space: +7733: integer: 48 +7735: space: +7736: integer: 0 +7737: space: +7738: word: R +7739: space: \x0a +7740: dict_close: >> +7742: space: \x0a +7743: word: stream skipping to endstream -7773: word: endstream -7782: space: \x0a -7783: word: endobj -7789: space: \x0a\x0a -7791: integer: 48 -7793: space: -7794: integer: 0 -7795: space: -7796: word: obj -7799: space: \x0a -7800: integer: 44 -7802: space: \x0a -7803: word: endobj -7809: space: \x0a\x0a -7811: comment: %% Contents for page 7 -7833: space: \x0a -7834: comment: %% Original object ID: 43 0 -7861: space: \x0a -7862: integer: 49 -7864: space: -7865: integer: 0 -7866: space: -7867: word: obj -7870: space: \x0a -7871: dict_open: << -7873: space: \x0a -7876: name: /Length -7883: space: -7884: integer: 50 -7886: space: -7887: integer: 0 -7888: space: -7889: word: R -7890: space: \x0a -7891: dict_close: >> -7893: space: \x0a -7894: word: stream +7794: word: endstream +7803: space: \x0a +7804: word: endobj +7810: space: \x0a\x0a +7812: integer: 48 +7814: space: +7815: integer: 0 +7816: space: +7817: word: obj +7820: space: \x0a +7821: integer: 44 +7823: space: \x0a +7824: word: endobj +7830: space: \x0a\x0a +7832: comment: %% Contents for page 7 +7854: space: \x0a +7855: comment: %% Original object ID: 43 0 +7882: space: \x0a +7883: integer: 49 +7885: space: +7886: integer: 0 +7887: space: +7888: word: obj +7891: space: \x0a +7892: dict_open: << +7894: space: \x0a +7897: name: /Length +7904: space: +7905: integer: 50 +7907: space: +7908: integer: 0 +7909: space: +7910: word: R +7911: space: \x0a +7912: dict_close: >> +7914: space: \x0a +7915: word: stream skipping to endstream -7945: word: endstream -7954: space: \x0a -7955: word: endobj -7961: space: \x0a\x0a -7963: integer: 50 -7965: space: -7966: integer: 0 -7967: space: -7968: word: obj -7971: space: \x0a -7972: integer: 44 -7974: space: \x0a -7975: word: endobj -7981: space: \x0a\x0a -7983: comment: %% Contents for page 8 -8005: space: \x0a -8006: comment: %% Original object ID: 44 0 -8033: space: \x0a -8034: integer: 51 -8036: space: -8037: integer: 0 -8038: space: -8039: word: obj -8042: space: \x0a -8043: dict_open: << -8045: space: \x0a -8048: name: /Length -8055: space: -8056: integer: 52 -8058: space: -8059: integer: 0 -8060: space: -8061: word: R -8062: space: \x0a -8063: dict_close: >> -8065: space: \x0a -8066: word: stream +8241: word: endstream +8250: space: \x0a +8251: word: endobj +8257: space: \x0a +8258: comment: %QDF: ignore_newline +8278: space: \x0a\x0a +8280: integer: 50 +8282: space: +8283: integer: 0 +8284: space: +8285: word: obj +8288: space: \x0a +8289: integer: 318 +8292: space: \x0a +8293: word: endobj +8299: space: \x0a\x0a +8301: comment: %% Contents for page 8 +8323: space: \x0a +8324: comment: %% Original object ID: 44 0 +8351: space: \x0a +8352: integer: 51 +8354: space: +8355: integer: 0 +8356: space: +8357: word: obj +8360: space: \x0a +8361: dict_open: << +8363: space: \x0a +8366: name: /Length +8373: space: +8374: integer: 52 +8376: space: +8377: integer: 0 +8378: space: +8379: word: R +8380: space: \x0a +8381: dict_close: >> +8383: space: \x0a +8384: word: stream skipping to endstream -8117: word: endstream -8126: space: \x0a -8127: word: endobj -8133: space: \x0a\x0a -8135: integer: 52 -8137: space: -8138: integer: 0 -8139: space: -8140: word: obj -8143: space: \x0a -8144: integer: 44 -8146: space: \x0a -8147: word: endobj -8153: space: \x0a\x0a -8155: comment: %% Contents for page 9 -8177: space: \x0a -8178: comment: %% Original object ID: 45 0 -8205: space: \x0a -8206: integer: 53 -8208: space: -8209: integer: 0 -8210: space: -8211: word: obj -8214: space: \x0a -8215: dict_open: << -8217: space: \x0a -8220: name: /Length -8227: space: -8228: integer: 54 -8230: space: -8231: integer: 0 -8232: space: -8233: word: R -8234: space: \x0a -8235: dict_close: >> -8237: space: \x0a -8238: word: stream -skipping to endstream -8289: word: endstream -8298: space: \x0a -8299: word: endobj -8305: space: \x0a\x0a -8307: integer: 54 -8309: space: -8310: integer: 0 -8311: space: -8312: word: obj -8315: space: \x0a -8316: integer: 44 -8318: space: \x0a -8319: word: endobj -8325: space: \x0a\x0a -8327: comment: %% Contents for page 10 -8350: space: \x0a -8351: comment: %% Original object ID: 46 0 -8378: space: \x0a -8379: integer: 55 -8381: space: -8382: integer: 0 -8383: space: -8384: word: obj -8387: space: \x0a -8388: dict_open: << -8390: space: \x0a -8393: name: /Length -8400: space: -8401: integer: 56 -8403: space: -8404: integer: 0 -8405: space: -8406: word: R -8407: space: \x0a -8408: dict_close: >> -8410: space: \x0a -8411: word: stream -skipping to endstream -8462: word: endstream -8471: space: \x0a -8472: word: endobj -8478: space: \x0a\x0a -8480: integer: 56 -8482: space: -8483: integer: 0 -8484: space: -8485: word: obj -8488: space: \x0a -8489: integer: 44 -8491: space: \x0a -8492: word: endobj -8498: space: \x0a\x0a -8500: comment: %% Contents for page 11 +8435: word: endstream +8444: space: \x0a +8445: word: endobj +8451: space: \x0a\x0a +8453: integer: 52 +8455: space: +8456: integer: 0 +8457: space: +8458: word: obj +8461: space: \x0a +8462: integer: 44 +8464: space: \x0a +8465: word: endobj +8471: space: \x0a\x0a +8473: comment: %% Contents for page 9 +8495: space: \x0a +8496: comment: %% Original object ID: 45 0 8523: space: \x0a -8524: comment: %% Original object ID: 47 0 -8551: space: \x0a -8552: integer: 57 -8554: space: -8555: integer: 0 -8556: space: -8557: word: obj -8560: space: \x0a -8561: dict_open: << -8563: space: \x0a -8566: name: /Length -8573: space: -8574: integer: 58 -8576: space: -8577: integer: 0 -8578: space: -8579: word: R -8580: space: \x0a -8581: dict_close: >> -8583: space: \x0a -8584: word: stream +8524: integer: 53 +8526: space: +8527: integer: 0 +8528: space: +8529: word: obj +8532: space: \x0a +8533: dict_open: << +8535: space: \x0a +8538: name: /Length +8545: space: +8546: integer: 54 +8548: space: +8549: integer: 0 +8550: space: +8551: word: R +8552: space: \x0a +8553: dict_close: >> +8555: space: \x0a +8556: word: stream skipping to endstream -8635: word: endstream -8644: space: \x0a -8645: word: endobj -8651: space: \x0a\x0a -8653: integer: 58 -8655: space: -8656: integer: 0 -8657: space: -8658: word: obj -8661: space: \x0a -8662: integer: 44 -8664: space: \x0a -8665: word: endobj -8671: space: \x0a\x0a -8673: integer: 59 -8675: space: -8676: integer: 0 -8677: space: -8678: word: obj -8681: space: \x0a -8682: dict_open: << -8684: space: \x0a -8687: name: /Type -8692: space: -8693: name: /XRef -8698: space: \x0a -8701: name: /Length -8708: space: -8709: integer: 240 -8712: space: \x0a -8715: name: /W -8717: space: -8718: array_open: [ -8719: space: -8720: integer: 1 +8607: word: endstream +8616: space: \x0a +8617: word: endobj +8623: space: \x0a\x0a +8625: integer: 54 +8627: space: +8628: integer: 0 +8629: space: +8630: word: obj +8633: space: \x0a +8634: integer: 44 +8636: space: \x0a +8637: word: endobj +8643: space: \x0a\x0a +8645: comment: %% Contents for page 10 +8668: space: \x0a +8669: comment: %% Original object ID: 46 0 +8696: space: \x0a +8697: integer: 55 +8699: space: +8700: integer: 0 +8701: space: +8702: word: obj +8705: space: \x0a +8706: dict_open: << +8708: space: \x0a +8711: name: /Length +8718: space: +8719: integer: 56 8721: space: -8722: integer: 2 +8722: integer: 0 8723: space: -8724: integer: 1 -8725: space: -8726: array_close: ] -8727: space: \x0a -8730: name: /Root -8735: space: -8736: integer: 2 -8737: space: -8738: integer: 0 -8739: space: -8740: word: R -8741: space: \x0a -8744: name: /Size -8749: space: -8750: integer: 60 -8752: space: \x0a -8755: name: /ID -8758: space: -8759: array_open: [ -8760: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>) -8794: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: ) -8828: array_close: ] -8829: space: \x0a -8830: dict_close: >> -8832: space: \x0a -8833: word: stream +8724: word: R +8725: space: \x0a +8726: dict_close: >> +8728: space: \x0a +8729: word: stream skipping to endstream -9081: word: endstream -9090: space: \x0a -9091: word: endobj -9097: space: \x0a\x0a -9099: word: startxref -9108: space: \x0a -9109: integer: 8673 -9113: space: \x0a -9114: comment: %%EOF -9119: space: \x0a -9120: eof +8780: word: endstream +8789: space: \x0a +8790: word: endobj +8796: space: \x0a\x0a +8798: integer: 56 +8800: space: +8801: integer: 0 +8802: space: +8803: word: obj +8806: space: \x0a +8807: integer: 44 +8809: space: \x0a +8810: word: endobj +8816: space: \x0a\x0a +8818: comment: %% Contents for page 11 +8841: space: \x0a +8842: comment: %% Original object ID: 47 0 +8869: space: \x0a +8870: integer: 57 +8872: space: +8873: integer: 0 +8874: space: +8875: word: obj +8878: space: \x0a +8879: dict_open: << +8881: space: \x0a +8884: name: /Length +8891: space: +8892: integer: 58 +8894: space: +8895: integer: 0 +8896: space: +8897: word: R +8898: space: \x0a +8899: dict_close: >> +8901: space: \x0a +8902: word: stream +skipping to endstream +8953: word: endstream +8962: space: \x0a +8963: word: endobj +8969: space: \x0a\x0a +8971: integer: 58 +8973: space: +8974: integer: 0 +8975: space: +8976: word: obj +8979: space: \x0a +8980: integer: 44 +8982: space: \x0a +8983: word: endobj +8989: space: \x0a\x0a +8991: integer: 59 +8993: space: +8994: integer: 0 +8995: space: +8996: word: obj +8999: space: \x0a +9000: dict_open: << +9002: space: \x0a +9005: name: /Type +9010: space: +9011: name: /XRef +9016: space: \x0a +9019: name: /Length +9026: space: +9027: integer: 240 +9030: space: \x0a +9033: name: /W +9035: space: +9036: array_open: [ +9037: space: +9038: integer: 1 +9039: space: +9040: integer: 2 +9041: space: +9042: integer: 1 +9043: space: +9044: array_close: ] +9045: space: \x0a +9048: name: /Root +9053: space: +9054: integer: 2 +9055: space: +9056: integer: 0 +9057: space: +9058: word: R +9059: space: \x0a +9062: name: /Size +9067: space: +9068: integer: 60 +9070: space: \x0a +9073: name: /ID +9076: space: +9077: array_open: [ +9078: string: \x88\x04\x8e\x17\xc9a\xe0\x94\xff\xec\xe9\x8c\xb8\x8cF\xd0 (raw: <88048e17c961e094ffece98cb88c46d0>) +9112: string: \xed\xd6\x0f\xe8\xee\x87\xf8\x871\xa8o\x81\x9f\xe6Q\x99 (raw: ) +9146: array_close: ] +9147: space: \x0a +9148: dict_close: >> +9150: space: \x0a +9151: word: stream +skipping to endstream +9399: word: endstream +9408: space: \x0a +9409: word: endobj +9415: space: \x0a\x0a +9417: word: startxref +9426: space: \x0a +9427: integer: 8991 +9431: space: \x0a +9432: comment: %%EOF +9437: space: \x0a +9438: eof --- END FILE --- --- BEGIN PAGE 1 --- 0: word: BT @@ -595,9 +599,7 @@ skipping to endstream 103: dict_close: >> 105: space: \x0a 106: word: ID -skipping to EI -352: word: EI -354: space: \x0a +108: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI\x0a 355: word: BT 357: space: \x0a 360: name: /F1 @@ -743,13 +745,11 @@ skipping to EI 47: word: ET 49: space: \x0a\x00\x0a 52: name: /ThisMustBeLast -67: space: \x0a -68: eof +67: eof --- END PAGE 5 --- --- BEGIN PAGE 6 --- 0: word: ID -skipping to EI -EI not found +EI not found; resuming normal scanning 2: space: \x0a 5: name: /F1 8: space: @@ -772,27 +772,37 @@ EI not found 44: eof --- END PAGE 6 --- --- BEGIN PAGE 7 --- -0: word: BT -2: space: \x0a -5: name: /F1 -8: space: -9: integer: 24 -11: space: -12: word: Tf -14: space: \x0a -17: integer: 72 +0: name: /potato +7: space: \x0a +8: word: BI +10: space: \x0a +11: name: /CS +14: space: +15: name: /G +17: name: /W 19: space: -20: integer: 720 -23: space: -24: word: Td -26: space: \x0a -29: string: Potato (raw: (Potato)) -37: space: -38: word: Tj -40: space: \x0a -41: word: ET -43: space: \x0a -44: eof +20: integer: 66 +22: name: /H +24: space: +25: integer: 47 +27: name: /BPC +31: space: +32: integer: 8 +33: name: /F +35: name: /Fl +38: name: /DP +41: dict_open: << +43: name: /Predictor +53: space: +54: integer: 15 +56: name: /Columns +64: space: +65: integer: 66 +67: dict_close: >> +69: space: \x0a +70: word: ID +72: inline-image: x\x9c\xc5\xd6I\x0e\xc3 \x0c\x05P|\xffC;U\xc8`\xc0\xd37\x91Z\xa9\x0b\xa6\x17\x02\xc4\x98\xda\xe6\x8f\x1b}D\xf0\xef_\xb4\xf8\x1c\xc9W\xa9\x84\x9c\xc4-\x94\x88>\xff\x87\xc0\x8d>\x94^\x01&\xae\xa1u\xe2]\x80"!\x87\x95\x08\x96\x05*\xac&\x8fE|Sy\xae \xf0d-\x80<\x9d\x19B\x010B\x05\xfa@N\x11\xea+<\x1fhl\xe8K\xd0\xee/56L\xa0\x89\x90\xe3\x19\x1e \xa3\x96\xb9\xa6>0\x06>\x15Y\x81\xf9!c\xec\\x0eY\x0c\xd8\x0f%Y\xf0\x01\xa5\xd68?&\xa0\xd6\xeb\x88}j\x92\xfb\xe8\x1d;\xab\x8d3\x9d\xc2\xd6l\x14p\xdbsH\xf6\xfbt\xfa\x01Q\x02\xd8Tt*h\xccU\xfa\xe3w\x07\xcd\xd5\xd0%\xa8)p\x96\xb3"\x95DiRj\xb9\x96D\x18YNU\x11\xd3\xd9Av\x92F\xe0&\x0d\x90\xcd\xd4u#c\x95\xc6W\x09\xf4\xdf\x89\x03W\x93O\x0d\x0aEI +318: eof --- END PAGE 7 --- --- BEGIN PAGE 8 --- 0: word: BT diff --git a/qpdf/qtest/qpdf/tokens.pdf b/qpdf/qtest/qpdf/tokens.pdf index b444db5f..a7157eef 100644 Binary files a/qpdf/qtest/qpdf/tokens.pdf and b/qpdf/qtest/qpdf/tokens.pdf differ diff --git a/qpdf/test_tokenizer.cc b/qpdf/test_tokenizer.cc index 6e694bfb..aa6c8acc 100644 --- a/qpdf/test_tokenizer.cc +++ b/qpdf/test_tokenizer.cc @@ -88,6 +88,8 @@ static char const* tokenTypeName(QPDFTokenizer::token_type_e ttype) return "space"; case QPDFTokenizer::tt_comment: return "comment"; + case QPDFTokenizer::tt_inline_image: + return "inline-image"; } return 0; } @@ -131,7 +133,6 @@ dump_tokens(PointerHolder is, std::string const& label, bool skip_streams, bool skip_inline_images) { Finder f1(is, "endstream"); - Finder f2(is, "EI"); std::cout << "--- BEGIN " << label << " ---" << std::endl; bool done = false; QPDFTokenizer tokenizer; @@ -140,10 +141,20 @@ dump_tokens(PointerHolder is, std::string const& label, { tokenizer.includeIgnorable(); } + qpdf_offset_t inline_image_offset = 0; while (! done) { QPDFTokenizer::Token token = - tokenizer.readToken(is, "test", true, max_len); + tokenizer.readToken(is, "test", true, + inline_image_offset ? 0 : max_len); + if (inline_image_offset && (token.getType() == QPDFTokenizer::tt_bad)) + { + std::cout << "EI not found; resuming normal scanning" << std::endl; + is->seek(inline_image_offset, SEEK_SET); + inline_image_offset = 0; + continue; + } + inline_image_offset = 0; qpdf_offset_t offset = is->getLastOffset(); std::cout << offset << ": " @@ -170,7 +181,8 @@ dump_tokens(PointerHolder is, std::string const& label, else if (skip_inline_images && (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "ID"))) { - try_skipping(tokenizer, is, max_len, "EI", f2); + tokenizer.expectInlineImage(); + inline_image_offset = is->tell(); } else if (token.getType() == QPDFTokenizer::tt_eof) {