From b30deaeeaba3941d7615bc2cc89c664b1273e5df Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Fri, 23 Oct 2020 06:40:27 -0400 Subject: [PATCH] Avoid merging adjacent tokens when concatenating contents (fixes #444) --- ChangeLog | 6 + TODO | 1 - libqpdf/QPDFObjectHandle.cc | 53 +++- manual/qpdf-manual.xml | 39 +-- qpdf/qpdf.testcov | 1 + qpdf/qtest/qpdf.test | 18 +- qpdf/qtest/qpdf/coalesce-out.pdf | Bin 1623 -> 2951 bytes qpdf/qtest/qpdf/coalesce-out.qdf | Bin 2192 -> 3520 bytes qpdf/qtest/qpdf/coalesce.pdf | Bin 2445 -> 3769 bytes qpdf/qtest/qpdf/coalesce.qdf | Bin 2801 -> 4126 bytes qpdf/qtest/qpdf/normalize-warnings.out | 16 +- ...lit-1-2.pdf => split-tokens-split-1-2.pdf} | 0 ...lesce-split.out => split-tokens-split.out} | 2 +- qpdf/qtest/qpdf/split-tokens.pdf | 217 ++++++++++++++++ qpdf/qtest/qpdf/split-tokens.qdf | 231 ++++++++++++++++++ qpdf/qtest/qpdf/token-filters-out.pdf | Bin 2178 -> 3505 bytes 16 files changed, 541 insertions(+), 43 deletions(-) rename qpdf/qtest/qpdf/{coalesce-split-1-2.pdf => split-tokens-split-1-2.pdf} (100%) rename qpdf/qtest/qpdf/{coalesce-split.out => split-tokens-split.out} (88%) create mode 100644 qpdf/qtest/qpdf/split-tokens.pdf create mode 100644 qpdf/qtest/qpdf/split-tokens.qdf diff --git a/ChangeLog b/ChangeLog index f7ba4f6a..fd057636 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,11 @@ 2020-10-23 Jay Berkenbilt + * Bug fix: when concatenating content streams, insert a newline if + needed to prevent the last token from the old stream from being + merged with the first token of the new stream. Qpdf was mistakenly + concatenating the streams without regard to the specification that + content streams are to be broken on token boundaries. Fixes #444. + * Bug fix: fix-qdf: properly handle empty streams with ignore newline. diff --git a/TODO b/TODO index cd6f4c88..2e3898ff 100644 --- a/TODO +++ b/TODO @@ -4,7 +4,6 @@ Candidates for upcoming release * Open "next" issues * bugs * #473: zsh completion with directories - * #444: concatenated stream/whitespace bug * Non-bugs * #446: recognize edited QDF files * #436: parsing of document with form xobject diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 85493680..472ff4e8 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -165,6 +165,47 @@ QPDFObjectHandle::ParserCallbacks::terminateParsing() throw TerminateParsing(); } +class LastChar: public Pipeline +{ + public: + LastChar(Pipeline* next); + virtual ~LastChar() = default; + virtual void write(unsigned char* data, size_t len); + virtual void finish(); + unsigned char getLastChar(); + + private: + unsigned char last_char; +}; + +LastChar::LastChar(Pipeline* next) : + Pipeline("lastchar", next), + last_char(0) +{ +} + +void +LastChar::write(unsigned char* data, size_t len) +{ + if (len > 0) + { + this->last_char = data[len - 1]; + } + getNext()->write(data, len); +} + +void +LastChar::finish() +{ + getNext()->finish(); +} + +unsigned char +LastChar::getLastChar() +{ + return this->last_char; +} + QPDFObjectHandle::QPDFObjectHandle() : initialized(false), qpdf(0), @@ -1600,21 +1641,31 @@ QPDFObjectHandle::pipeContentStreams( std::vector streams = arrayOrStreamToStreamArray( description, all_description); + bool need_newline = false; for (std::vector::iterator iter = streams.begin(); iter != streams.end(); ++iter) { + if (need_newline) + { + p->write(QUtil::unsigned_char_pointer("\n"), 1); + } + LastChar lc(p); QPDFObjectHandle stream = *iter; std::string og = QUtil::int_to_string(stream.getObjectID()) + " " + QUtil::int_to_string(stream.getGeneration()); std::string w_description = "content stream object " + og; - if (! stream.pipeStreamData(p, 0, qpdf_dl_specialized)) + if (! stream.pipeStreamData(&lc, 0, qpdf_dl_specialized)) { QTC::TC("qpdf", "QPDFObjectHandle errors in parsecontent"); throw QPDFExc(qpdf_e_damaged_pdf, "content stream", w_description, 0, "errors while decoding content stream"); } + lc.finish(); + need_newline = (lc.getLastChar() != static_cast('\n')); + QTC::TC("qpdf", "QPDFObjectHandle need_newline", + need_newline ? 0 : 1); } } diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml index 866a5016..659fbd08 100644 --- a/manual/qpdf-manual.xml +++ b/manual/qpdf-manual.xml @@ -2090,14 +2090,9 @@ outfile.pdf option causes qpdf to combine them into a single stream. Use of this option is never necessary for ordinary usage, but it can help when working with some files in some cases. For - example, some PDF writers split page contents into small - streams at arbitrary points that may fall in the middle of - lexical tokens within the content, and some PDF readers may - get confused on such files. If you use qpdf to coalesce the - content streams, such readers may be able to work with the - file more easily. This can also be combined with QDF mode or - content normalization to make it easier to look at all of a - page's contents at once. + example, this can also be combined with QDF mode or content + normalization to make it easier to look at all of a page's + contents at once. @@ -2398,25 +2393,15 @@ outfile.pdf You should not use this for “production” PDF files. - This paragraph discusses edge cases of content normalization that - are not of concern to most users and are not relevant when content - normalization is not enabled. When normalizing content, if qpdf - runs into any lexical errors, it will print a warning indicating - that content may be damaged. The only situation in which qpdf is - known to cause damage during content normalization is when a - page's contents are split across multiple streams and streams are - split in the middle of a lexical token such as a string, name, or - inline image. There may be some pathological cases in which qpdf - could damage content without noticing this, such as if the partial - tokens at the end of one stream and the beginning of the next - stream are both valid, but usually qpdf will be able to detect - this case. For slightly increased safety, you can specify - in addition to - or . - This will cause qpdf to combine all the content streams into one, - thus recombining any split tokens. However doing this will prevent - you from being able to see the original layout of the content - streams. If you must inspect the original content streams in an + When normalizing content, if qpdf runs into any lexical errors, it + will print a warning indicating that content may be damaged. The + only situation in which qpdf is known to cause damage during + content normalization is when a page's contents are split across + multiple streams and streams are split in the middle of a lexical + token such as a string, name, or inline image. Note that files + that do this are invalid since the PDF specification states that + content streams are not to be split in the middle of a token. If + you want to inspect the original content streams in an uncompressed format, you can always run with for a QDF file without content normalization, or alternatively diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 621ec53a..ced20279 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -455,3 +455,4 @@ qpdf found shared resources in leaf 0 qpdf found shared xobject in leaf 0 QPDF copy foreign with data 1 QPDF copy foreign with foreign_stream 1 +QPDFObjectHandle need_newline 1 diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index a0ff2a57..75021b56 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -1591,13 +1591,21 @@ $td->runtest("type checks with object streams", # ---------- $td->notify("--- Coalesce contents ---"); -$n_tests += 6; +$n_tests += 8; $td->runtest("qdf with normalize warnings", {$td->COMMAND => - "qpdf --qdf --static-id coalesce.pdf a.pdf"}, + "qpdf --qdf --static-id split-tokens.pdf a.pdf"}, {$td->FILE => "normalize-warnings.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "split-tokens.qdf"}); +$td->runtest("coalesce to qdf", + {$td->COMMAND => + "qpdf --qdf --static-id coalesce.pdf a.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "coalesce.qdf"}); @@ -1831,12 +1839,12 @@ $td->runtest("unreferenced resources with bad token", {$td->COMMAND => "qpdf --qdf --static-id --split-pages=2" . " --remove-unreferenced-resources=yes" . - " coalesce.pdf split-out-bad-token.pdf"}, - {$td->FILE => "coalesce-split.out", $td->EXIT_STATUS => 3}, + " split-tokens.pdf split-out-bad-token.pdf"}, + {$td->FILE => "split-tokens-split.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "split-out-bad-token-1-2.pdf"}, - {$td->FILE => "coalesce-split-1-2.pdf"}); + {$td->FILE => "split-tokens-split-1-2.pdf"}); $td->runtest("shared images in form xobject", {$td->COMMAND => "qpdf --qdf --static-id --split-pages". diff --git a/qpdf/qtest/qpdf/coalesce-out.pdf b/qpdf/qtest/qpdf/coalesce-out.pdf index 78505aba68718f41001c042f12565a66488b0abc..a0dae39d963f12d3e8fa41d3e98a0e66950b419f 100644 GIT binary patch delta 1804 zcmZXVc|6mN1IL}2xgSD8-*U^-W{jdi16!iO5Gxk|VheEx43QAK!xm!cWCb7~C-7}A)0^M3P=^+)Vn0Ix@DRB|rIfc~<0hBVF?nPcsuH5|8 ztWiH`ISfnSdTDudckG-Ueoc&SPk*YvxlA!#UC)3N=(5}@_32sy9W`x2HMq(20 z_vhdTDuWg3EuV2uH+4Fpc(tdd9<$n4l}gP$-zwEbpS@CER`zHu4vR_sK!klV&~+wJlNA|dZ#Sr+5D#wW|} zi>rF+NeXpIS{drL^9m<4yi9X5`>~ILGFVgUR4Wz-ChSET<5Ube-l=z zLb9uJmhTFVXXbf~Nf%d}>|JrAPde$UD1@-Pt z8|>vwg9Akr^z_Np!>n5AA6t3L6Wy{k-AMF}k+S#@e%?1fBUBM54t?6G8{TJd(lerSmK0E-lPx-?T_t;sS)elUmYi05|ALS< z=UP^C)9E%VLsm4wosUt^Q68>68J&L1B~q_Y9`~1rst`$-Bt`g4V8@wSM>J0ibVLim z%?8g^uDIwG=U^FpWhJNn?5Ug_IU^{%EnAip$rqIQKoPn=J}5YVC0j zQbKKVjz3$Qm;0jZnXm3JiRcq&AEaM4yX}vcej50j$qskes4kCe;bQg zlb4#+L?Q-$oe0>yAS+jU=uC(lY50+ zQc5B)D&mv9IZphAGlp#?(p9-Qyk$*JnmH&R1 z>7G!lKNw$B9lu6%yNxrqM+-*!#ikp+K>o#$3r+n@;J5HYO8Vz$35d<#B06M7+jaA^ z8{R%yc~!G)CE-Y5?#_y|R7OmO+$r)=@hVVrzG_q&csh_T3%1080RV#12VjN(F6fXCp9FsR zB)EV>)^-Yy@{J1D0dS#U8ypx9Kzzw$-#9QNl0*cBL;-}GAjBXYCwNK#2h&Fy1JM5q qxCVg#11QV@YWQ9G6XexTQGN*trwx`m~WVjGkc+oy6&W7S%t*6&&qT)9r+LyB&L&0I+* zMgu3-Uk-kPue5FCGjd*dUwEf)YUZ<`bHZauIfcd3b}q9sV6zjAZ2YO1{3eDklII4$ zYUBsTrPnO%)fQZP-BFu0>Gum+>(#x+a}Qn15h?JyUF`AgcgZitK&BfZC0ZG0LVrCj zXFq%Og6ay*f@zzTrn+PXW$m2iA`$5qDtP&(W7#CP2Wq?%&R!{1PM&%!ob${5PUi5* z{=8hSo=Y$EAK2W&p~MtoU}0vdU;qLNc?w)$hJk^lxf!~cp@9LWn4ysghB_l-!^yw6 Rnz#*(%#65HRbBnvxBw4Fs3rga diff --git a/qpdf/qtest/qpdf/coalesce-out.qdf b/qpdf/qtest/qpdf/coalesce-out.qdf index 9a7129f34d81aad58264190126ab92cf6dbd4698..822fdd1709f25ed6b0dd92b5e266a43f61ac5bf4 100644 GIT binary patch delta 1815 zcmZvddpy$%AIDuY!w`+!`c>}7X%{wIA-BeAXzrO&L}+}8hb6>Gu{iUur4IBYB!2<9-q0oRS z5_Fj-*e_4MABf)q26KfY8*di%qOM;rd9(F?K4wa-o`%|3{)*WY-jnb1;s*?0FwC!e9 z4(WKC=VbN}9|UF$OlZ?>2KY!x@9+_R=_jXK)y}gvpKf1z=7Q2nv1Sq+mpxn0ZX>=y zI7U<7m@2xB7qmP~85@%|O=eFQ6lp?B{Gpos)yMwyZmEya<|kzvwKYZ*QmIAh+V~z- zNw;$vEOhy5vVYeV{ea`vs*r}xx*?^C{F`$aoB>ov72prdBTffrmopb{i;rfqJVz7? z%EPr<3BN-7a&7S(SyAurqNqU-AheqKesAZc?;QQOS%fC;&8sHl^Qjs~nl$L~qlx=j zl?p#LSc_vFO56@C{@PG+e6XnSs~;Md2+~1jPcW?zj&B#^?!^lc^q6yXZ5)w4h_P)U z)Klz9rJkxzQ(NX zS&rTMscSyRTotCIXRK);_E|f8OtWV8N`Ic_*lTK9<=xW~`uwo0`nq45Y)l5C$sQu@ z)qIV?%EQs=H(Vl(*ihHMJhddkCBx;(AIZEpbK8jevEG(w36w?OiLxaZqXH(eUBsgE zifD|<-#Qna^mQY7(=x*D1Kw?`F;7N_xcyB7+K%r+WTov&w37ay5u+4uy^Xk z%|LwkYVx|gO*Mou;L{ykRKWkE!G0nxSI6qax2z7HGzi_;=^UtrTj#4kS(#P-tl^cb z?KqC%{nVD7$-C9FW@r**NI-e1QqZl6|C zuDtZ8*y0__+$0*2ePHc-g?vJ$UoJ$>+4N!C3?;ikEZOW1TM(X{2nkE|VJyjQ#8Cn7JaWDr$7Y(A(MF7nhqs~>tuvXn3EQf!DY zlDYOf(fCf_%l;du+IoVSb}Sn*?;-H&1cv@1S;Fy&kS{g^hphIf{}n(|1F|Lak$wqH@oKbqoo(zMH@LMa{ut98>%Dm=hKozrvZi5)1@9vkI(U(nw0`uPH_s-fE4r4Gu!%K4dGiZhvX~R>!x!R$& zfV6Z~urnX-Ibd}?4PsA^@@h%pl`nld(eczR!0xf3a^}(A&4Z zc4~d9eQ|g9DnmgWx*tpeU>B&=3vpmr zWH==_3Ls~MNx&r#Xe@@qlHiHakSI6;fdA)!FaZ2t14p7zdyDix42?zq#7r^BpBU!< eFs$)U|BT_LrhDCVP>PH(0$~i+(K+RW1OFRd=u%Pu delta 495 zcmX>gJwb4T8gsouw1R$OQBh*0esMugW{EuFxb>`Gy4K~x`}i$CWKM?HuWXrfM0ZMu-T(Fjy>?UL z7}eG-EPWK)prqJ7m17#K)*7{b*P7tUbqXI+bQ^5uN;)wbII;e6@DqHcZ6lwN^TPYW zJAG3#p9P&09!tt8ES|P=nVkWfooHm^PsQXnF?^9cH~3W}KQJ!6W?`?k;M(hs+N?>x zU&vao?lqo!=vt0Qf!FO~k8i(AelZ3z-3Tes$~Y7H>v1{z*{c^+S7;VY+pILzB{L{% z=QJ0INWW0Q%Qqd%Cb>OO;gXq}B33 zd9;2#`jic*3jN`{V;s}19->yKFm4G&r@j=$}V*#P>owg=Z< z&8w2GUM+gH@pd+I$kWM*uNzK(S&jDX@q?5m0+jGQvE zj8CDx#<}f`OsVOy(I@zYl1#$a;BZ%JnOd_w4CkVuJNdPuDN%-8BSQWwv&FnC9+fbgm#_#M{Q%-4%% zf(Z1xg=wvimERe+omzkFf()LDjCChTJ z&!Eui(#o;`yLT3LHXyw;WBwNJP%7PdP&}_RRIwTRqjXP}DTX1;?Y6~@=mP=UMpIjN zcAk68P)_NEDWG4yXn;SRAXixlf*w8?yPH-n{&kH$Khh@2Y}3VD=`V~4;NJf1g+#}J zl;P=P8AdRx*YnYLVzyzFNSB%x23L=$mTA7UGtZ-PRaxbR{P1ekgvm;MQiJGff>Nqc zoK!P-eZdE`8$V2CQB@|Gs#h+xjrUJrXUa>`tUA>aSKLq9i%$rWr;I^4Hs+YfMuqex zR<^>(OJZ_)(pf$=_NBDinjacWw0k26PF$^(Z27+OV-YFW?7}s3r0sumR^$ui4;3T4 zCk#jHn}*enbT>utAq;v>mn_(6=4IeoxeVGvxFfayQ1-D_QwblQ6x!}gs1U;xuhV4C zAIL4f!J7OUBduKpF9{ZO%i?Qe| zWuwzy(%OavLCE@cTR%DUD*MQzrD>^;@~&A*R->p+;o;qs^9DD(@zT#EyK?-;LeE9v zTRZx(PFg?Lp^{!kVV9)ECRKD%-9L`_Y@U^rD!=e6&-`sea#lPp=g`VlnOJP9SC*)V ztao&>$yIh@dhIAB1)BYkx!=pcI#S0BVs% zytu@KCDlol1Nwaupi`MabmuI%QqgVmy_@bYT)YT(}TpuN)1&McY z4^+a$zj(WypagI_9n;H6QeF{)eVK8A26d+r)bd5+EWHN=%={bI*Rqe@4N#D6crGK^# z#7s`cQ#d#4Pv%~{e@j5TJ<#Nr){lzTpCUMII;rae+5KHY<5eFae`1L_`d)g7Yxoax zu+!97)ap-RO%el*8d>R8FCQ#CW6qn1SQA*AlLB=mVWR;zb3GK?vQOAvL>Bm7_P09& zxb>}%kT*}nNFC>!Xbd`zfAL#)U{3GvWHLpEgl-((l%xn43?;?Sagcmo=WUpZG`X=< zPMhW&+;0wK*{Q^=uXlas__q3#ysXEeO!goC(5s)hI!at;r$A-X`3`-kZI9hg*CQ_K z`1@tQ&h4JI$R9wV{0A09)TWSBSNxjuk;Y0(^Xj~X%bcCrQ0HEwtI47k6q0Mxjp5RT z52rgm6|(}>GG0_WW_PStbwCezEa%Smta7@0R#(hcCtBxschBtTFRh4x%`jlxo>tU^ zQF{&v@d^aP5W0vx!JuiGJW>E+Pbc<1U;t3F2_gL>A@H7c@BqY`|_MShplf}*}>aOl6f WBqHsAV62c9TpJEnRyMOngZ~2WK!NrE delta 658 zcmdlf+bg``0Mq13PL0X$nK+n?jVG_=RG2Kutje8OlCP-{lEvj3qA*#W%Ye&5!9XED zDT~X{5~#$jUca~?C$mH$R>8@WOW!$ILEl|JT*1st-$TK~T;D0cS;0czP2Vj?-zC7t zMn9k^H6=5-B)>?(&{W?!Kc_S|uNbJ<&W_8|MWJHO(QBT3hZT5O18V*|TZf)VIB?l~ zV$@3RW#UXnX54z#FJ0^M;eGs;A2KJySGLSKqC2I-?tgvzfnK{Qag1u~7M4DWZBSBd zpUN?fRcnn}ziUl!<{}a47bF+(F$Q%o^SOAIj+6HL7( wW)QJ>B#RAAEsQZVo0(uZ#?agZQ=PdPrWnv8lhyb-xD720Ex1%wUH#p-04qhz(EtDd diff --git a/qpdf/qtest/qpdf/coalesce.qdf b/qpdf/qtest/qpdf/coalesce.qdf index 5007dc12717516f8d14e27ca4b144a912c9e3dfa..ab5b08cc6e986afc54814b0d3903cb9bc7ea3e1b 100644 GIT binary patch delta 1996 zcmZvdXIRr$8;9A1u*(zy|0=uGgoK1d0of1@BWwvnrhp(2kRf2eqE-@7Nh>HC3`>Tx zBFK0V$`ZkpjUt<{Bn;UrNO^m`AKLPs=gT?2bDrxy&!>AuiVBpPb5ln7L6k*qRZ0jq zh)WwvDdp9nlya*RP3^&9U~M!2JuCzg(18I+7!>dh1{-0(5Mw(4Vh(Wvw9ycY!&``v zjWK|Nm_f{fz!(!C`q_QQ`j8CbjmfsKb_miR`Asw z9uf0$@q$LV0#tH4*AH$c1n`dz3q`5HktJiNh_wa@wM{22gT_ZKR;S+&#_j+^S?tlR zx@Faf`1qo_o%f5GW1db4;g&t6xjIFCWPK*h2r{HLnUeEUlP~crC7EvDgTr0O=v~9NMw^ zAaVls^G%{1^w}##g@upT zVsO~xmLS*%xVo>TY77k2zr(WLPb1E<3U9candz7oDpZNl*Z)ANf^e%NcpbL4%(sgc zg4}2i3)2}dls_1CUR>2oO_ZrhRHZ1H&&!-pcGb&F>%~3xqYO?dk&FkKFkaWtQD*Tc z>pRu9^Twa4vh^hI?hrNKvqrKaja~J_66LwK z7EosD(#o;`b8u1XLO@0tb@?v$SQ^cFR4lJ7RIwfVlXQQU35G7j>AJ^>7yShf}^>Qa3Nougj_ZjvwU%=NrdRaUtxDJTlJ_VZ@8ba7n>0v&KZGnY|JrHtqK`y z1KA1_ZwV=t_b>3MF|VaJ)c@FQtUVa%=ET9MX37s$o{UVrX&0`UBW?eivm#F@Z>Xr- zN4N19J(IA8iN3Z-9)y1Xxsnw-%{(fO!J*$3!WpIgyRwh9no9WitRTB5sY(=6yiJk0 z%$HmIU|_k5oW2)BWDHop>^;_Hm)lyCv){n23t~dolQu<-t3}9z?mYnodCW(R7E>`< z%7*8@rFV`CfRIgHwtjMEo6O_S*XAWZ%e!PLSxusPgvJk2FYDj-#!J7J=*{t;3cVPO zXLJu^o#4MTqVB(q#;!?=&Z?kMeLqe3>|T(Nti19o*YZ6BVpbw9M`~lQTr@7tD@#P! zR_95_Tu??MSCa0%99C#j0x-hwk+C@z^aFDg*AzroWZC15DyS(2pcDm#lcO8Q$nWJs zyf{RKB-A6SNA!loLpfepqz_1+;4NmdyTu#BGzD+`f`i}lD;>CL+R+<8HK!T7jbF#1 z!?k(0i&Hp=`@(LyBA888Z`!K1Kvh@t8^-kG>kDko=z)L?e9_N_>grK~F3(fU1PBjt zPgTOizInTzCIzs195X5-B)uX9hB6ZZ^&8J7spX3#SbC4}%MCByGN6HDIWZsOR>w|L z%qF5>WZQ+CKv3c9p|2bdPOJ;xdeL@mpAg>ar>?;y@)3?V&~p4HYr0Y@Bz%{o%Kl)C z#Lmtpl2~_|&g5Qwa+hDME7156^WFfjNW!AreVC5oWF9yAmXR{jvLrODrUh*JYauMT*>1 z8mm)t3Ffy1vg}c+XlinK?fAa-th}s;R3`Ixe`x$yj_wkd#W_&f+zu-Y7w?1JBNKHgewXENnC=1+L z+z2${FRwW8Xi)0-B>}{tN$h>V0H9_Q65$yUq7K*xf)9m*2OypVf@cgRj#rBd0XyvR zrVx2RhevfvJRgV?^~e*TzvOG-BBGE0^uG(Z27v$CK%r;^;@=qz3DY^6p;1RN6zulYW~WR@tzDmZy^={pB2=)3EOE0~$-dnlNg>pKNFD_H2e>AU6Vy9C(S=m!*~ zrer3UQQgC{)Zjdd-vXumTTjK+S(=>(CPk2QHgWj9SUP zOq}V+j9bt8rE6V2ypP}VL*``o%9c4tbfR#iyhpy#_6x=TM z`1ZTx7bDkXe`fW`JlvejMy8-3GoS3ot>YNTbR(ohE8|S)ugB%=XRlsRU7=YpZL`u; zm&~B7ozq+-BK<-IFW+=5o8o@0y7K@EKPv1f+}WcU}AzHW@u=PA!dR_ovEQ2hB`Ad z1F%>TSTmY^t)9U~L diff --git a/qpdf/qtest/qpdf/normalize-warnings.out b/qpdf/qtest/qpdf/normalize-warnings.out index 57f038f4..287a583c 100644 --- a/qpdf/qtest/qpdf/normalize-warnings.out +++ b/qpdf/qtest/qpdf/normalize-warnings.out @@ -1,9 +1,9 @@ -WARNING: coalesce.pdf (offset 671): content normalization encountered bad tokens -WARNING: coalesce.pdf (offset 671): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents -WARNING: coalesce.pdf (offset 671): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. -WARNING: coalesce.pdf (offset 823): content normalization encountered bad tokens -WARNING: coalesce.pdf (offset 823): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. -WARNING: coalesce.pdf (offset 962): content normalization encountered bad tokens -WARNING: coalesce.pdf (offset 962): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents -WARNING: coalesce.pdf (offset 962): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. +WARNING: split-tokens.pdf (offset 671): content normalization encountered bad tokens +WARNING: split-tokens.pdf (offset 671): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents +WARNING: split-tokens.pdf (offset 671): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. +WARNING: split-tokens.pdf (offset 823): content normalization encountered bad tokens +WARNING: split-tokens.pdf (offset 823): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. +WARNING: split-tokens.pdf (offset 962): content normalization encountered bad tokens +WARNING: split-tokens.pdf (offset 962): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents +WARNING: split-tokens.pdf (offset 962): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/coalesce-split-1-2.pdf b/qpdf/qtest/qpdf/split-tokens-split-1-2.pdf similarity index 100% rename from qpdf/qtest/qpdf/coalesce-split-1-2.pdf rename to qpdf/qtest/qpdf/split-tokens-split-1-2.pdf diff --git a/qpdf/qtest/qpdf/coalesce-split.out b/qpdf/qtest/qpdf/split-tokens-split.out similarity index 88% rename from qpdf/qtest/qpdf/coalesce-split.out rename to qpdf/qtest/qpdf/split-tokens-split.out index 5e18173c..0a76a46a 100644 --- a/qpdf/qtest/qpdf/coalesce-split.out +++ b/qpdf/qtest/qpdf/split-tokens-split.out @@ -1,4 +1,4 @@ -WARNING: coalesce.pdf, object 3 0 at offset 181: Bad token found while scanning content stream; not attempting to remove unreferenced objects from this page +WARNING: split-tokens.pdf, object 3 0 at offset 181: Bad token found while scanning content stream; not attempting to remove unreferenced objects from this page WARNING: empty PDF: content normalization encountered bad tokens WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual. diff --git a/qpdf/qtest/qpdf/split-tokens.pdf b/qpdf/qtest/qpdf/split-tokens.pdf new file mode 100644 index 00000000..ba5d959b --- /dev/null +++ b/qpdf/qtest/qpdf/split-tokens.pdf @@ -0,0 +1,217 @@ +%PDF-1.3 +% +%QDF-1.0 + +1 0 obj +<< + /Pages 2 0 R + /Type /Catalog +>> +endobj + +2 0 obj +<< + /Count 2 + /Kids [ + 3 0 R + 4 0 R + ] + /Type /Pages +>> +endobj + +%% Page 1 +3 0 obj +<< + /Contents [ + 5 0 R + 7 0 R + 9 0 R + 11 0 R + ] + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 13 0 R + >> + /ProcSet 14 0 R + >> + /Type /Page +>> +endobj + +%% Page 2 +4 0 obj +<< + /Contents 15 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 17 0 R + >> + /ProcSet 18 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +5 0 obj +<< + /Length 6 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Pot +endstream +endobj + +%QDF: ignore_newline +6 0 obj +33 +endobj + +%% Contents for page 1 +7 0 obj +<< + /Length 8 0 R +>> +stream +ato) Tj +ET [ /array +endstream +endobj + +%QDF: ignore_newline +8 0 obj +19 +endobj + +%% Contents for page 1 +9 0 obj +<< + /Length 10 0 R +>> +stream +/split ] BI +/CS /G/W 66/H 47/BPC 8/F/Fl/DP<> +ID xI P|C;U`7Z Ę}D_W->>^&u]"!*&E|Sy d-<B0B@N+<hlK/56L >0>Y!c\Y %Y8?&}j;3lpsHt +endstream +endobj + +%QDF: ignore_newline +10 0 obj +253 +endobj + +%% Contents for page 1 +11 0 obj +<< + /Length 12 0 R +>> +stream +QTt*hUw%)p"DiRjDYNUAvF& u#cW ߉WO +EI +endstream +endobj + +%QDF: ignore_newline +12 0 obj +66 +endobj + +13 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +14 0 obj +[ + /PDF + /Text +] +endobj + +%% Contents for page 2 +15 0 obj +<< + /Length 16 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +16 0 obj +44 +endobj + +17 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +18 0 obj +[ + /PDF + /Text +] +endobj + +xref +0 19 +0000000000 65535 f +0000000025 00000 n +0000000079 00000 n +0000000171 00000 n +0000000416 00000 n +0000000634 00000 n +0000000744 00000 n +0000000786 00000 n +0000000882 00000 n +0000000924 00000 n +0000001255 00000 n +0000001299 00000 n +0000001444 00000 n +0000001464 00000 n +0000001583 00000 n +0000001642 00000 n +0000001743 00000 n +0000001763 00000 n +0000001882 00000 n +trailer << + /Root 1 0 R + /Size 19 + /ID [<6af379f20e8dcd4e724869daec3ba023>] +>> +startxref +1918 +%%EOF diff --git a/qpdf/qtest/qpdf/split-tokens.qdf b/qpdf/qtest/qpdf/split-tokens.qdf new file mode 100644 index 00000000..5007dc12 --- /dev/null +++ b/qpdf/qtest/qpdf/split-tokens.qdf @@ -0,0 +1,231 @@ +%PDF-1.3 +% +%QDF-1.0 + +%% Original object ID: 1 0 +1 0 obj +<< + /Pages 2 0 R + /Type /Catalog +>> +endobj + +%% Original object ID: 2 0 +2 0 obj +<< + /Count 2 + /Kids [ + 3 0 R + 4 0 R + ] + /Type /Pages +>> +endobj + +%% Page 1 +%% Original object ID: 3 0 +3 0 obj +<< + /Contents [ + 5 0 R + 7 0 R + 9 0 R + 11 0 R + ] + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 13 0 R + >> + /ProcSet 14 0 R + >> + /Type /Page +>> +endobj + +%% Page 2 +%% Original object ID: 4 0 +4 0 obj +<< + /Contents 15 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 17 0 R + >> + /ProcSet 18 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +%% Original object ID: 5 0 +5 0 obj +<< + /Length 6 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Pot +endstream +endobj + +%QDF: ignore_newline +6 0 obj +33 +endobj + +%% Contents for page 1 +%% Original object ID: 7 0 +7 0 obj +<< + /Length 8 0 R +>> +stream +ato) Tj +ET [ /array +endstream +endobj + +%QDF: ignore_newline +8 0 obj +19 +endobj + +%% Contents for page 1 +%% Original object ID: 9 0 +9 0 obj +<< + /Length 10 0 R +>> +stream +/split ] BI +/CS /G/W 66/H 47/BPC 8/F/Fl/DP<> +ID xI P|C;U`7Z Ę}D_W->>^&u]"!*&E|Sy d-<B0B@N+<hlK/56L >0>Y!c\Y %Y8?&}j;3lpsHt +endstream +endobj + +%QDF: ignore_newline +10 0 obj +253 +endobj + +%% Contents for page 1 +%% Original object ID: 11 0 +11 0 obj +<< + /Length 12 0 R +>> +stream +QTt*hUw%)p"DiRjDYNUAvF& +u#cW ߉WO +EI +endstream +endobj + +%QDF: ignore_newline +12 0 obj +65 +endobj + +%% Original object ID: 13 0 +13 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 14 0 +14 0 obj +[ + /PDF + /Text +] +endobj + +%% Contents for page 2 +%% Original object ID: 15 0 +15 0 obj +<< + /Length 16 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +16 0 obj +44 +endobj + +%% Original object ID: 17 0 +17 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 18 0 +18 0 obj +[ + /PDF + /Text +] +endobj + +xref +0 19 +0000000000 65535 f +0000000052 00000 n +0000000133 00000 n +0000000252 00000 n +0000000524 00000 n +0000000769 00000 n +0000000879 00000 n +0000000948 00000 n +0000001044 00000 n +0000001113 00000 n +0000001444 00000 n +0000001516 00000 n +0000001660 00000 n +0000001708 00000 n +0000001855 00000 n +0000001942 00000 n +0000002043 00000 n +0000002091 00000 n +0000002238 00000 n +trailer << + /Root 1 0 R + /Size 19 + /ID [<31415926535897932384626433832795>] +>> +startxref +2274 +%%EOF diff --git a/qpdf/qtest/qpdf/token-filters-out.pdf b/qpdf/qtest/qpdf/token-filters-out.pdf index 6d24497c6d1445a9702d0989adb7a2d5b299dc06..8f5f14c3291a3d05d2a92c6140365700493bf570 100644 GIT binary patch delta 1804 zcmZXNdpHvc1IAr5!;ovPQ|5M@He)k|T*qo??wL_Uh?6 z3>+Kd7RpUpS zY{)51EFG6f?wIrH^apm*4wzFY7~6WYtRH*hM#Y<*4~qrk7kqs9LCm*}yYThB_avM( z=ehTp+T@i3i)QZ%uUh}gVQ65$TYn|}_sbrY5As|P9_PG|GngAY>PPR%1q*e{v3Kw>1Ia+DAVp3WlkrT zuJ+b8U|0LAGbsh9Sh8Kr*-I4+#)Gv)95(ZPB>bbPVThuBA{;QZ!*|=yndOri*Zq#q zbWKY$^b)PDKho-(812TA^~b@Keu+UZHi(iaC_w<;2@Pt9j5Bd)aqZq(lb3WgI%br%608G z$lM4x?kP7TIcG-(6rXj%#J6katqm7xPP`^%)!jWUX283W+tU0?o4px3j^HEI-N4h} z)ak@!-}HzvE>`yZ%U4T`C{C0md?IiYtsJ6TCI&m=#1Pg)C#zOGj7#%z-9pyASB2xv z-a#(98R*4uXQc%F>GiUh%59p;`9md@_lB42skwWJ-NR(JX9GuiJxbaei}zbZEde~( zditiUeFKQf4(JarFXjEw>O7TL2(dl+J-3G|2|%)X-9y#Tn>_U=Yx9aw6Pkbx%LIwjdBr z7!J?FSNvvcXc#Z$_aqA~NxF}Fq!%ssJ=p&kC7j>yomUsD7!)VTDM*R1ZatB1PzFkI z2_BVD8(B=Vp#zhIF`trG$B)s_6Y+4W`@&6dB;y6=o6tS&b(yrM9ar{AG3}v-M!Z8L zvU&@>ICQeHH>*}5WJ#&|@9xp0+1XSI{}$_b$>oQ)CFFV|?EmQgqUH8Aj^AURvprfg zG9WeG_yzhmmQ-vNWQn+m|Ey+mlA4U#{4Kgeeze`FFt73Tqm>u6%l0yEgyG#;iPoy< z$?#hxftvnB$J{R?%R{b(d7J|KbG9dTkCBrUkBZH-M;^t$`fD(vnEf9znPMJ`Zs+bQ zP$aC!@1`#CkzzsT&(>Tj)j*{J-n%f^jc-DI zRepEZ$FOWzV5`oYdVq|zMm_UmX(-t!^hndiH z#C4c2#aJR_xETr#hW#t>6)^CB0R~5yn*79&=H@>!bEN4{42Ar8XJLZ)|D7oe`R^}n TRU$*m6lQ?}LLiQAXyAVVw`);% delta 499 zcmdle-6Xg{mAPIaT0uXts3@^gzqlYLvqT|Q!O4?L-#J)8-(5dk!OTqGL&3yc-zmUZ z!9w3n-z`VqCBVi;KcFZzB{R7szevH*RNpy2r!+UO7^v9Jj?2?Up<>R_Yo2_E6?j+! zYW_Q0hn`3{aM^rf)JpDU;!HqYmHjJYfW(FI)x7@x(zmSC7l=zoLGN3_zAw!wvo@sdEtHG zoxZ7=&w|bgk0s?47Ejx`%+7$#PBgOdr(*J(7`{lJ8~mz~9~hTjv#?iNaP4(RZPujU zFJ!G(_ZrVVbS+1u!0UFg$G6`lzZe6VZiJL*Wt<89^|+kMtHZC0A zbDE1pq+h7u<(rOWliVJt@lH5alRnFZVl{!zcUma=Cg=p3N!GY;0~i`69RC z=KGw7nd%Jis;aBM8y5h^(W=b=