mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 02:49:00 +00:00
be less conservative when skipping over inline images in content normalization
git-svn-id: svn+q:///qpdf/trunk@1050 71b93d88-0707-0410-a8cf-f5a4172ac649
This commit is contained in:
parent
a8f2248729
commit
6405d3928f
@ -1,3 +1,10 @@
|
||||
2011-04-30 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* libqpdf/Pl_QPDFTokenizer.cc (processChar): When an inline image
|
||||
is detected, suspend normalization only up to the end of the
|
||||
inline image rather than for the remainder of the content stream.
|
||||
(Fixes qpdf-Bugs 3152169.)
|
||||
|
||||
2011-01-31 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* libqpdf/QPDF.cc (readObjectAtOffset): use -1 rather than 0 when
|
||||
|
12
TODO
12
TODO
@ -1,3 +1,11 @@
|
||||
Next
|
||||
====
|
||||
|
||||
* Look for %PDF header somewhere within the first 1024 bytes of the
|
||||
file. Also accept headers of the form "%!PS−Adobe−N.n PDF−M.m".
|
||||
See Implementation notes 13 and 14 in appendix H of the PDF 1.7
|
||||
specification. This is bug 3267974.
|
||||
|
||||
General
|
||||
=======
|
||||
|
||||
@ -174,6 +182,10 @@ Index: QPDFWriter.cc
|
||||
providing some mechanism to recover earlier versions of a file
|
||||
embedded prior to appended sections.
|
||||
|
||||
* From a suggestion in bug 3152169, consisder having an option to
|
||||
re-encode inline images with an ASCII encoding.
|
||||
|
||||
|
||||
Splitting by Pages
|
||||
==================
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <qpdf/Pl_QPDFTokenizer.hh>
|
||||
#include <qpdf/QPDF_String.hh>
|
||||
#include <qpdf/QPDF_Name.hh>
|
||||
#include <qpdf/QTC.hh>
|
||||
#include <stdexcept>
|
||||
#include <string.h>
|
||||
|
||||
@ -11,8 +12,9 @@ Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) :
|
||||
last_char_was_cr(false),
|
||||
unread_char(false),
|
||||
char_to_unread('\0'),
|
||||
pass_through(false)
|
||||
in_inline_image(false)
|
||||
{
|
||||
memset(this->image_buf, 0, IMAGE_BUF_SIZE);
|
||||
}
|
||||
|
||||
Pl_QPDFTokenizer::~Pl_QPDFTokenizer()
|
||||
@ -56,11 +58,34 @@ Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token)
|
||||
void
|
||||
Pl_QPDFTokenizer::processChar(char ch)
|
||||
{
|
||||
if (this->pass_through)
|
||||
if (this->in_inline_image)
|
||||
{
|
||||
// We're not normalizing anymore -- just write this without
|
||||
// looking at it.
|
||||
writeNext(&ch, 1);
|
||||
// Scan through the input looking for EI surrounded by
|
||||
// whitespace. If that pattern appears in the inline image's
|
||||
// representation, we're hosed, but this situation seems
|
||||
// excessively unlikely, and this code path is only followed
|
||||
// during content stream normalization, which is pretty much
|
||||
// used for debugging and human inspection of PDF files.
|
||||
memmove(this->image_buf,
|
||||
this->image_buf + 1,
|
||||
IMAGE_BUF_SIZE - 1);
|
||||
this->image_buf[IMAGE_BUF_SIZE - 1] = ch;
|
||||
if (strchr(" \t\n\v\f\r", this->image_buf[0]) &&
|
||||
(this->image_buf[1] == 'E') &&
|
||||
(this->image_buf[2] == 'I') &&
|
||||
strchr(" \t\n\v\f\r", this->image_buf[3]))
|
||||
{
|
||||
// We've found an EI operator. We've already written the
|
||||
// EI operator to output; terminate with a newline
|
||||
// character and resume normal processing.
|
||||
writeNext("\n", 1);
|
||||
this->in_inline_image = false;
|
||||
QTC::TC("qpdf", "Pl_QPDFTokenizer found EI");
|
||||
}
|
||||
else
|
||||
{
|
||||
writeNext(&ch, 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@ -75,18 +100,10 @@ Pl_QPDFTokenizer::processChar(char ch)
|
||||
this->newline_after_next_token = false;
|
||||
}
|
||||
if ((token.getType() == QPDFTokenizer::tt_word) &&
|
||||
(token.getValue() == "BI"))
|
||||
(token.getValue() == "ID"))
|
||||
{
|
||||
// Uh oh.... we're not sophisticated enough to handle
|
||||
// inline images safely. We'd have to to set up all the
|
||||
// filters and pipe the image data through it until the
|
||||
// filtered output was the right size for an image of the
|
||||
// specified dimensions. Then we'd either have to write
|
||||
// out raw image data or continue to write filtered data,
|
||||
// resuming normalization when we get to the end.
|
||||
// Instead, for now, we'll just turn off normalization for
|
||||
// the remainder of this stream.
|
||||
this->pass_through = true;
|
||||
// Suspend normal scanning until we find an EI token.
|
||||
this->in_inline_image = true;
|
||||
if (this->unread_char)
|
||||
{
|
||||
writeNext(&this->char_to_unread, 1);
|
||||
@ -156,7 +173,7 @@ void
|
||||
Pl_QPDFTokenizer::finish()
|
||||
{
|
||||
this->tokenizer.presentEOF();
|
||||
if (! this->pass_through)
|
||||
if (! this->in_inline_image)
|
||||
{
|
||||
QPDFTokenizer::Token token;
|
||||
if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
|
||||
|
@ -33,7 +33,9 @@ class Pl_QPDFTokenizer: public Pipeline
|
||||
bool last_char_was_cr;
|
||||
bool unread_char;
|
||||
char char_to_unread;
|
||||
bool pass_through;
|
||||
bool in_inline_image;
|
||||
static int const IMAGE_BUF_SIZE = 4; // must be >= 4
|
||||
char image_buf[IMAGE_BUF_SIZE];
|
||||
};
|
||||
|
||||
#endif // __PL_QPDFTOKENIZER_HH__
|
||||
|
@ -187,3 +187,4 @@ QPDF_Stream getRawStreamData 0
|
||||
QPDF_Stream getStreamData 0
|
||||
QPDF_Stream expand filter abbreviation 0
|
||||
qpdf-c called qpdf_read_memory 0
|
||||
Pl_QPDFTokenizer found EI 0
|
||||
|
@ -1257,8 +1257,8 @@ my @flags = (["-qdf", # 1
|
||||
"no arguments"],
|
||||
);
|
||||
|
||||
$n_tests += (@files * @flags * 2 * 3);
|
||||
$n_compare_pdfs += (@files * @flags * 2);
|
||||
$n_tests += 1 + (@files * @flags * 2 * 3);
|
||||
$n_compare_pdfs += 1 + (@files * @flags * 2);
|
||||
$n_acroread += (@files * @flags * 2);
|
||||
|
||||
foreach my $file (@files)
|
||||
@ -1311,6 +1311,14 @@ foreach my $file (@files)
|
||||
}
|
||||
}
|
||||
|
||||
# inline-images-cr.pdf is xbkm938-dies.pdf from PDF collection
|
||||
$td->runtest("convert inline-images-cr to qdf",
|
||||
{$td->COMMAND => "qpdf --static-id --no-original-object-ids" .
|
||||
" --qdf inline-images-cr.pdf a.pdf"},
|
||||
{$td->STRING => "", $td->EXIT_STATUS => 0});
|
||||
|
||||
compare_pdfs("inline-images-cr.pdf", "a.pdf");
|
||||
|
||||
show_ntests();
|
||||
# ----------
|
||||
$td->notify("--- fix-qdf Tests ---");
|
||||
|
BIN
qpdf/qtest/qpdf/inline-images-cr.pdf
Normal file
BIN
qpdf/qtest/qpdf/inline-images-cr.pdf
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user