mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-31 02:48:31 +00:00
Externalize inline images (fixes #278)
This commit is contained in:
parent
22bcdbe786
commit
5211bcb5ea
12
ChangeLog
12
ChangeLog
@ -1,5 +1,17 @@
|
||||
2019-01-31 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Add new options --externalize-inline-images, which converts
|
||||
inline images larger than a specified size to regular images, and
|
||||
--ii-min-bytes, which tweaks that size.
|
||||
|
||||
* When optimizing images, inline images are now included in the
|
||||
optimization, first being converted to regular images. Use
|
||||
--keep-inline-images to exclude them from optimization. Fixes #278.
|
||||
|
||||
* Add method QPDFPageObjectHelper::externalizeInlineImages, which
|
||||
converts inline images whose size is at least a specified amount
|
||||
to regular images.
|
||||
|
||||
* Remove traces of acroread, which hasn't been available in Linux
|
||||
for a long time.
|
||||
|
||||
|
22
TODO
22
TODO
@ -1,3 +1,8 @@
|
||||
Now
|
||||
===
|
||||
|
||||
* Deal with compiler warnings
|
||||
|
||||
Soon
|
||||
====
|
||||
|
||||
@ -96,23 +101,6 @@ directory or that are otherwise not publicly accessible. This includes
|
||||
things sent to me by email that are specifically not public. Even so,
|
||||
I find it useful to make reference to them in this list
|
||||
|
||||
* Do something better for inline images (see #278)
|
||||
* Figure out a way to add an expectInlineImage method that takes
|
||||
the offset of the EI image so an external system can locate the
|
||||
end tag. Hopefully Both QPDFObjectHandle and Pl_QPDFTokenizer can
|
||||
do this. Somewhere we might want something that uses an input
|
||||
source to do it, but for the pipeline, it will also have to be
|
||||
possible to do it as we go.
|
||||
* Improve location of EI to handle EI embedded in the image data;
|
||||
consider trying to parse after EI and, if errors, keep looking.
|
||||
Will have to look at what happens with random binary characters
|
||||
regarding token type.
|
||||
* Add a method to replace inline images with real images. Look at
|
||||
existing code for adding new resources used with form XObjects
|
||||
and reuse if possible
|
||||
* Have image optimization replace inline images that are of more
|
||||
than a certain size prior to optimizing
|
||||
|
||||
* Add support for writing name and number trees
|
||||
|
||||
* Figure out how to render Gajić correctly in the PDF version of the
|
||||
|
@ -73,6 +73,11 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
|
||||
QPDF_DLL
|
||||
std::map<std::string, QPDFObjectHandle> getPageImages();
|
||||
|
||||
// Convert each inline image to an external (normal) image if the
|
||||
// size is at least the specified number of bytes.
|
||||
QPDF_DLL
|
||||
void externalizeInlineImages(size_t min_size = 0);
|
||||
|
||||
// Return the annotations in the page's "/Annots" list, if any. If
|
||||
// only_subtype is non-empty, only include annotations of the
|
||||
// given subtype.
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <qpdf/QTC.hh>
|
||||
#include <qpdf/QPDF.hh>
|
||||
#include <qpdf/Pl_Concatenate.hh>
|
||||
#include <qpdf/Pl_Buffer.hh>
|
||||
#include <qpdf/QUtil.hh>
|
||||
#include <qpdf/QPDFExc.hh>
|
||||
#include <qpdf/QPDFMatrix.hh>
|
||||
@ -36,6 +37,251 @@ ContentProvider::provideStreamData(int, int, Pipeline* p)
|
||||
concat.manualFinish();
|
||||
}
|
||||
|
||||
class InlineImageTracker: public QPDFObjectHandle::TokenFilter
|
||||
{
|
||||
public:
|
||||
InlineImageTracker(QPDF*, size_t min_size, QPDFObjectHandle resources);
|
||||
virtual ~InlineImageTracker()
|
||||
{
|
||||
}
|
||||
virtual void handleToken(QPDFTokenizer::Token const&);
|
||||
QPDFObjectHandle convertIIDict(QPDFObjectHandle odict);
|
||||
|
||||
QPDF* qpdf;
|
||||
size_t min_size;
|
||||
QPDFObjectHandle resources;
|
||||
std::string dict_str;
|
||||
std::string bi_str;
|
||||
int min_suffix;
|
||||
bool any_images;
|
||||
enum { st_top, st_bi } state;
|
||||
};
|
||||
|
||||
InlineImageTracker::InlineImageTracker(QPDF* qpdf, size_t min_size,
|
||||
QPDFObjectHandle resources) :
|
||||
qpdf(qpdf),
|
||||
min_size(min_size),
|
||||
resources(resources),
|
||||
min_suffix(1),
|
||||
any_images(false),
|
||||
state(st_top)
|
||||
{
|
||||
}
|
||||
|
||||
QPDFObjectHandle
|
||||
InlineImageTracker::convertIIDict(QPDFObjectHandle odict)
|
||||
{
|
||||
QPDFObjectHandle dict = QPDFObjectHandle::newDictionary();
|
||||
dict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject"));
|
||||
dict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Image"));
|
||||
std::set<std::string> keys = odict.getKeys();
|
||||
for (std::set<std::string>::iterator iter = keys.begin();
|
||||
iter != keys.end(); ++iter)
|
||||
{
|
||||
std::string key = *iter;
|
||||
QPDFObjectHandle value = odict.getKey(key);
|
||||
if (key == "/BPC")
|
||||
{
|
||||
key = "/BitsPerComponent";
|
||||
}
|
||||
else if (key == "/CS")
|
||||
{
|
||||
key = "/ColorSpace";
|
||||
}
|
||||
else if (key == "/D")
|
||||
{
|
||||
key = "/Decode";
|
||||
}
|
||||
else if (key == "/DP")
|
||||
{
|
||||
key = "/DecodeParms";
|
||||
}
|
||||
else if (key == "/F")
|
||||
{
|
||||
key = "/Filter";
|
||||
}
|
||||
else if (key == "/H")
|
||||
{
|
||||
key = "/Height";
|
||||
}
|
||||
else if (key == "/IM")
|
||||
{
|
||||
key = "/ImageMask";
|
||||
}
|
||||
else if (key == "/I")
|
||||
{
|
||||
key = "/Interpolate";
|
||||
}
|
||||
else if (key == "/W")
|
||||
{
|
||||
key = "/Width";
|
||||
}
|
||||
|
||||
if (key == "/ColorSpace")
|
||||
{
|
||||
if (value.isName())
|
||||
{
|
||||
std::string name = value.getName();
|
||||
if (name == "/G")
|
||||
{
|
||||
name = "/DeviceGray";
|
||||
}
|
||||
else if (name == "/RGB")
|
||||
{
|
||||
name = "/DeviceRGB";
|
||||
}
|
||||
else if (name == "/CMYK")
|
||||
{
|
||||
name = "/DeviceCMYK";
|
||||
}
|
||||
else if (name == "/I")
|
||||
{
|
||||
name = "/Indexed";
|
||||
}
|
||||
else
|
||||
{
|
||||
name.clear();
|
||||
}
|
||||
if (! name.empty())
|
||||
{
|
||||
value = QPDFObjectHandle::newName(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (key == "/Filter")
|
||||
{
|
||||
std::vector<QPDFObjectHandle> filters;
|
||||
if (value.isName())
|
||||
{
|
||||
filters.push_back(value);
|
||||
}
|
||||
else if (value.isArray())
|
||||
{
|
||||
filters = value.getArrayAsVector();
|
||||
}
|
||||
for (std::vector<QPDFObjectHandle>::iterator iter =
|
||||
filters.begin();
|
||||
iter != filters.end(); ++iter)
|
||||
{
|
||||
std::string name;
|
||||
if ((*iter).isName())
|
||||
{
|
||||
name = (*iter).getName();
|
||||
}
|
||||
if (name == "/AHx")
|
||||
{
|
||||
name = "/ASCIIHexDecode";
|
||||
}
|
||||
else if (name == "/A85")
|
||||
{
|
||||
name = "/ASCII85Decode";
|
||||
}
|
||||
else if (name == "/LZW")
|
||||
{
|
||||
name = "/LZWDecode";
|
||||
}
|
||||
else if (name == "/Fl")
|
||||
{
|
||||
name = "/FlateDecode";
|
||||
}
|
||||
else if (name == "/RL")
|
||||
{
|
||||
name = "/RunLengthDecode";
|
||||
}
|
||||
else if (name == "/CCF")
|
||||
{
|
||||
name = "/CCITTFaxDecode";
|
||||
}
|
||||
else if (name == "/DCT")
|
||||
{
|
||||
name = "/DCTDecode";
|
||||
}
|
||||
else
|
||||
{
|
||||
name.clear();
|
||||
}
|
||||
if (! name.empty())
|
||||
{
|
||||
*iter = QPDFObjectHandle::newName(name);
|
||||
}
|
||||
}
|
||||
if (value.isName() && (filters.size() == 1))
|
||||
{
|
||||
value = filters.at(0);
|
||||
}
|
||||
else if (value.isArray())
|
||||
{
|
||||
value = QPDFObjectHandle::newArray(filters);
|
||||
}
|
||||
}
|
||||
dict.replaceKey(key, value);
|
||||
}
|
||||
return dict;
|
||||
}
|
||||
|
||||
void
|
||||
InlineImageTracker::handleToken(QPDFTokenizer::Token const& token)
|
||||
{
|
||||
if (state == st_bi)
|
||||
{
|
||||
if (token.getType() == QPDFTokenizer::tt_inline_image)
|
||||
{
|
||||
std::string image_data(token.getValue());
|
||||
size_t len = image_data.length();
|
||||
// The token ends with delimiter followed by EI, so it
|
||||
// will always be at least 3 bytes long. We want to
|
||||
// exclude the EI and preceding delimiter.
|
||||
len = (len >= 3 ? len - 3 : 0);
|
||||
if (len >= this->min_size)
|
||||
{
|
||||
QTC::TC("qpdf", "QPDFPageObjectHelper externalize inline image");
|
||||
Pl_Buffer b("image_data");
|
||||
b.write(QUtil::unsigned_char_pointer(image_data), len);
|
||||
b.finish();
|
||||
QPDFObjectHandle dict =
|
||||
convertIIDict(QPDFObjectHandle::parse(dict_str));
|
||||
dict.replaceKey("/Length", QPDFObjectHandle::newInteger(len));
|
||||
std::string name = resources.getUniqueResourceName(
|
||||
"/IIm", this->min_suffix);
|
||||
QPDFObjectHandle image = QPDFObjectHandle::newStream(
|
||||
this->qpdf, b.getBuffer());
|
||||
image.replaceDict(dict);
|
||||
resources.getKey("/XObject").replaceKey(name, image);
|
||||
write(name);
|
||||
write(" Do\n");
|
||||
any_images = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
QTC::TC("qpdf", "QPDFPageObjectHelper keep inline image");
|
||||
write(bi_str);
|
||||
writeToken(token);
|
||||
}
|
||||
state = st_top;
|
||||
}
|
||||
else if (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "ID"))
|
||||
{
|
||||
bi_str += token.getValue();
|
||||
dict_str += " >>";
|
||||
}
|
||||
else
|
||||
{
|
||||
bi_str += token.getValue();
|
||||
dict_str += token.getValue();
|
||||
}
|
||||
}
|
||||
else if (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "BI"))
|
||||
{
|
||||
bi_str = token.getValue();
|
||||
dict_str = "<< ";
|
||||
state = st_bi;
|
||||
}
|
||||
else
|
||||
{
|
||||
writeToken(token);
|
||||
}
|
||||
}
|
||||
|
||||
QPDFPageObjectHelper::Members::~Members()
|
||||
{
|
||||
}
|
||||
@ -112,13 +358,32 @@ QPDFPageObjectHelper::getMediaBox(bool copy_if_shared)
|
||||
return getAttribute("/MediaBox", copy_if_shared);
|
||||
}
|
||||
|
||||
|
||||
std::map<std::string, QPDFObjectHandle>
|
||||
QPDFPageObjectHelper::getPageImages()
|
||||
{
|
||||
return this->oh.getPageImages();
|
||||
}
|
||||
|
||||
void
|
||||
QPDFPageObjectHelper::externalizeInlineImages(size_t min_size)
|
||||
{
|
||||
QPDFObjectHandle resources = getAttribute("/Resources", true);
|
||||
// Calling mergeResources also ensures that /XObject becomes
|
||||
// direct and is not shared with other pages.
|
||||
resources.mergeResources(
|
||||
QPDFObjectHandle::parse("<< /XObject << >> >>"));
|
||||
InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources);
|
||||
Pl_Buffer b("new page content");
|
||||
filterPageContents(&iit, &b);
|
||||
if (iit.any_images)
|
||||
{
|
||||
getObjectHandle().replaceKey(
|
||||
"/Contents",
|
||||
QPDFObjectHandle::newStream(
|
||||
this->oh.getOwningQPDF(), b.getBuffer()));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<QPDFAnnotationObjectHelper>
|
||||
QPDFPageObjectHelper::getAnnotations(std::string const& only_subtype)
|
||||
{
|
||||
|
@ -1746,7 +1746,11 @@ outfile.pdf</option>
|
||||
<option>--verbose</option>. See also the
|
||||
<option>--oi-min-width</option>,
|
||||
<option>--oi-min-height</option>, and
|
||||
<option>--oi-min-area</option> options.
|
||||
<option>--oi-min-area</option> options. By default, starting
|
||||
in qpdf 8.4, inline images are converted to regular images
|
||||
and optimized as well. Use
|
||||
<option>--keep-inline-images</option> to prevent inline images
|
||||
from being included.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@ -1780,6 +1784,43 @@ outfile.pdf</option>
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--externalize-inline-images</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Convert inline images to regular images. By default, images
|
||||
whose data is at least 1,024 bytes are converted when this
|
||||
option is selected. Use <option>--ii-min-bytes</option> to
|
||||
change the size threshold. This option is implicitly selected
|
||||
when <option>--optimize-images</option> is selected. Use
|
||||
<option>--keep-inline-images</option> to exclude inline images
|
||||
from image optimization.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><option>--ii-min-bytes=<replaceable>bytes</replaceable></option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Avoid converting inline images whose size is below the
|
||||
specified minimum size to regular images. If omitted, the
|
||||
default is 1,024 bytes. Use 0 for no minimum.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><option>--keep-inline-images</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Prevent inline images from being included in image
|
||||
optimization. This option has no affect when
|
||||
<option>--optimize-images</option> is not specified.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><option>--qdf</option></term>
|
||||
<listitem>
|
||||
@ -4321,6 +4362,18 @@ print "\n";
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
New options <option>--externalize-inline-images</option>,
|
||||
<option>--ii-min-bytes</option>, and
|
||||
<option>--keep-inline-images</option> control qpdf's
|
||||
handling of inline images and possible conversion of them to
|
||||
regular images. By default,
|
||||
<option>--optimize-images</option> now also applies to
|
||||
inline images. These options are discussed in <xref
|
||||
linkend="ref.advanced-transformation"/>.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
Add options <option>--overlay</option> and
|
||||
@ -4415,6 +4468,14 @@ print "\n";
|
||||
not compressed.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
When the tokenizer returns inline image tokens, delimiters
|
||||
following <literal>ID</literal> and <literal>EI</literal>
|
||||
operators are no longer excluded. This makes it possible to
|
||||
reliably extract the actual image data.
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</listitem>
|
||||
<listitem>
|
||||
@ -4422,6 +4483,13 @@ print "\n";
|
||||
Library Enhancements
|
||||
</para>
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
Add method
|
||||
<function>QPDFPageObjectHelper::externalizeInlineImages</function>
|
||||
to convert inline images to regular images.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
Add method
|
||||
|
65
qpdf/qpdf.cc
65
qpdf/qpdf.cc
@ -161,9 +161,12 @@ struct Options
|
||||
json(false),
|
||||
check(false),
|
||||
optimize_images(false),
|
||||
externalize_inline_images(false),
|
||||
keep_inline_images(false),
|
||||
oi_min_width(128), // Default values for these
|
||||
oi_min_height(128), // oi flags are in --help
|
||||
oi_min_area(16384), // and in the manual.
|
||||
ii_min_bytes(1024), //
|
||||
underlay("underlay"),
|
||||
overlay("overlay"),
|
||||
under_overlay(0),
|
||||
@ -254,9 +257,12 @@ struct Options
|
||||
std::set<std::string> json_objects;
|
||||
bool check;
|
||||
bool optimize_images;
|
||||
bool externalize_inline_images;
|
||||
bool keep_inline_images;
|
||||
size_t oi_min_width;
|
||||
size_t oi_min_height;
|
||||
size_t oi_min_area;
|
||||
size_t ii_min_bytes;
|
||||
UnderOverlay underlay;
|
||||
UnderOverlay overlay;
|
||||
UnderOverlay* under_overlay;
|
||||
@ -659,9 +665,12 @@ class ArgParser
|
||||
void argJsonObject(char* parameter);
|
||||
void argCheck();
|
||||
void argOptimizeImages();
|
||||
void argExternalizeInlineImages();
|
||||
void argKeepInlineImages();
|
||||
void argOiMinWidth(char* parameter);
|
||||
void argOiMinHeight(char* parameter);
|
||||
void argOiMinArea(char* parameter);
|
||||
void argIiMinBytes(char* parameter);
|
||||
void arg40Print(char* parameter);
|
||||
void arg40Modify(char* parameter);
|
||||
void arg40Extract(char* parameter);
|
||||
@ -894,12 +903,17 @@ ArgParser::initOptionTable()
|
||||
&ArgParser::argJsonObject, "trailer|obj[,gen]");
|
||||
(*t)["check"] = oe_bare(&ArgParser::argCheck);
|
||||
(*t)["optimize-images"] = oe_bare(&ArgParser::argOptimizeImages);
|
||||
(*t)["externalize-inline-images"] =
|
||||
oe_bare(&ArgParser::argExternalizeInlineImages);
|
||||
(*t)["keep-inline-images"] = oe_bare(&ArgParser::argKeepInlineImages);
|
||||
(*t)["oi-min-width"] = oe_requiredParameter(
|
||||
&ArgParser::argOiMinWidth, "minimum-width");
|
||||
(*t)["oi-min-height"] = oe_requiredParameter(
|
||||
&ArgParser::argOiMinHeight, "minimum-height");
|
||||
(*t)["oi-min-area"] = oe_requiredParameter(
|
||||
&ArgParser::argOiMinArea, "minimum-area");
|
||||
(*t)["ii-min-bytes"] = oe_requiredParameter(
|
||||
&ArgParser::argIiMinBytes, "minimum-bytes");
|
||||
(*t)["overlay"] = oe_bare(&ArgParser::argOverlay);
|
||||
(*t)["underlay"] = oe_bare(&ArgParser::argUnderlay);
|
||||
|
||||
@ -1308,6 +1322,12 @@ ArgParser::argHelp()
|
||||
<< " default is 128. Use 0 to mean no minimum\n"
|
||||
<< "--oi-min-area=a do not optimize images whose pixel count is below a\n"
|
||||
<< " default is 16,384. Use 0 to mean no minimum\n"
|
||||
<< "--externalize-inline-images convert inline images to regular images; by\n"
|
||||
<< " default, images of at least 1,024 bytes are\n"
|
||||
<< " externalized\n"
|
||||
<< "--ii-min-bytes=bytes specify minimum size of inline images to be\n"
|
||||
<< " converted to regular images\n"
|
||||
<< "--keep-inline-images exclude inline images from image optimization\n"
|
||||
<< "--qdf turns on \"QDF mode\" (below)\n"
|
||||
<< "--linearize-pass1=file write intermediate pass of linearized file\n"
|
||||
<< " for debugging\n"
|
||||
@ -1965,6 +1985,18 @@ ArgParser::argOptimizeImages()
|
||||
o.optimize_images = true;
|
||||
}
|
||||
|
||||
void
|
||||
ArgParser::argExternalizeInlineImages()
|
||||
{
|
||||
o.externalize_inline_images = true;
|
||||
}
|
||||
|
||||
void
|
||||
ArgParser::argKeepInlineImages()
|
||||
{
|
||||
o.keep_inline_images = true;
|
||||
}
|
||||
|
||||
void
|
||||
ArgParser::argOiMinWidth(char* parameter)
|
||||
{
|
||||
@ -1983,6 +2015,12 @@ ArgParser::argOiMinArea(char* parameter)
|
||||
o.oi_min_area = QUtil::string_to_int(parameter);
|
||||
}
|
||||
|
||||
void
|
||||
ArgParser::argIiMinBytes(char* parameter)
|
||||
{
|
||||
o.ii_min_bytes = QUtil::string_to_int(parameter);
|
||||
}
|
||||
|
||||
void
|
||||
ArgParser::arg40Print(char* parameter)
|
||||
{
|
||||
@ -2933,6 +2971,10 @@ ArgParser::doFinalChecks()
|
||||
{
|
||||
usage("no output file may be given for this option");
|
||||
}
|
||||
if (o.optimize_images && (! o.keep_inline_images))
|
||||
{
|
||||
o.externalize_inline_images = true;
|
||||
}
|
||||
|
||||
if (o.require_outfile && (strcmp(o.outfilename, "-") == 0))
|
||||
{
|
||||
@ -3764,10 +3806,7 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next)
|
||||
QPDFObjectHandle w_obj = dict.getKey("/Width");
|
||||
QPDFObjectHandle h_obj = dict.getKey("/Height");
|
||||
QPDFObjectHandle colorspace_obj = dict.getKey("/ColorSpace");
|
||||
QPDFObjectHandle components_obj = dict.getKey("/BitsPerComponent");
|
||||
if (! (w_obj.isInteger() &&
|
||||
h_obj.isInteger() &&
|
||||
components_obj.isInteger()))
|
||||
if (! (w_obj.isNumber() && h_obj.isNumber()))
|
||||
{
|
||||
if (o.verbose && (! description.empty()))
|
||||
{
|
||||
@ -3777,8 +3816,12 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next)
|
||||
}
|
||||
return result;
|
||||
}
|
||||
JDIMENSION w = w_obj.getIntValue();
|
||||
JDIMENSION h = h_obj.getIntValue();
|
||||
// Files have been seen in the wild whose width and height are
|
||||
// floating point, which is goofy, but we can deal with it.
|
||||
JDIMENSION w = static_cast<JDIMENSION>(
|
||||
w_obj.isInteger() ? w_obj.getIntValue() : w_obj.getNumericValue());
|
||||
JDIMENSION h = static_cast<JDIMENSION>(
|
||||
h_obj.isInteger() ? h_obj.getIntValue() : h_obj.getNumericValue());
|
||||
std::string colorspace = (colorspace_obj.isName() ?
|
||||
colorspace_obj.getName() :
|
||||
"");
|
||||
@ -4198,6 +4241,16 @@ static void handle_under_overlay(QPDF& pdf, Options& o)
|
||||
static void handle_transformations(QPDF& pdf, Options& o)
|
||||
{
|
||||
QPDFPageDocumentHelper dh(pdf);
|
||||
if (o.externalize_inline_images)
|
||||
{
|
||||
std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
|
||||
for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
|
||||
iter != pages.end(); ++iter)
|
||||
{
|
||||
QPDFPageObjectHelper& ph(*iter);
|
||||
ph.externalizeInlineImages(o.ii_min_bytes);
|
||||
}
|
||||
}
|
||||
if (o.optimize_images)
|
||||
{
|
||||
int pageno = 0;
|
||||
|
@ -436,3 +436,5 @@ QPDFTokenizer found EI the old way 0
|
||||
QPDFTokenizer found EI by byte count 0
|
||||
QPDFTokenizer inline image at EOF the old way 0
|
||||
QPDFTokenizer found EI after more than one try 0
|
||||
QPDFPageObjectHelper externalize inline image 0
|
||||
QPDFPageObjectHelper keep inline image 0
|
||||
|
@ -679,7 +679,7 @@ $td->runtest("check pass1 file",
|
||||
show_ntests();
|
||||
# ----------
|
||||
$td->notify("--- Inline Images ---");
|
||||
$n_tests += 2;
|
||||
$n_tests += 8;
|
||||
|
||||
# The file large-inline-image.pdf is a hand-crafted file with several
|
||||
# inline images of various sizes including one that is two megabytes,
|
||||
@ -696,6 +696,69 @@ $td->runtest("check output",
|
||||
{$td->FILE => "a.pdf"},
|
||||
{$td->FILE => "large-inline-image.qdf"});
|
||||
|
||||
$td->runtest("eof in inline image",
|
||||
{$td->COMMAND =>
|
||||
"qpdf --qdf --static-id eof-in-inline-image.pdf a.pdf"},
|
||||
{$td->FILE => "eof-inline-qdf.out", $td->EXIT_STATUS => 3},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("check output",
|
||||
{$td->FILE => "a.pdf"},
|
||||
{$td->FILE => "eof-in-inline-image.qdf"});
|
||||
$td->runtest("externalize eof in inline image",
|
||||
{$td->COMMAND =>
|
||||
"qpdf --qdf --externalize-inline-images" .
|
||||
" --static-id eof-in-inline-image.pdf a.pdf"},
|
||||
{$td->FILE => "eof-inline-qdf.out", $td->EXIT_STATUS => 3},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("check output",
|
||||
{$td->FILE => "a.pdf"},
|
||||
{$td->FILE => "eof-in-inline-image-ii.qdf"});
|
||||
$td->runtest("externalize damaged image",
|
||||
{$td->COMMAND =>
|
||||
"qpdf --externalize-inline-images" .
|
||||
" --compress-streams=n --static-id" .
|
||||
" damaged-inline-image.pdf a.pdf"},
|
||||
{$td->STRING => "", $td->EXIT_STATUS => 0},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("check output",
|
||||
{$td->FILE => "a.pdf"},
|
||||
{$td->FILE => "damaged-inline-image-out.pdf"});
|
||||
|
||||
my @eii_tests = (
|
||||
['inline-images', 80],
|
||||
['large-inline-image', 1024],
|
||||
);
|
||||
$n_tests += 4 * scalar(@eii_tests);
|
||||
$n_compare_pdfs += 2 * scalar(@eii_tests);
|
||||
|
||||
foreach my $d (@eii_tests)
|
||||
{
|
||||
my ($file, $threshold) = @$d;
|
||||
$td->runtest("inline image $file (all)",
|
||||
{$td->COMMAND =>
|
||||
"qpdf --qdf --static-id --externalize-inline-images" .
|
||||
" --ii-min-bytes=0 $file.pdf a.pdf"},
|
||||
{$td->STRING => "", $td->EXIT_STATUS => 0},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("check output",
|
||||
{$td->FILE => "a.pdf"},
|
||||
{$td->FILE => "$file-ii-all.pdf"});
|
||||
compare_pdfs("$file.pdf", "a.pdf");
|
||||
|
||||
$td->runtest("inline image $file (some)",
|
||||
{$td->COMMAND =>
|
||||
"qpdf --qdf --static-id --externalize-inline-images" .
|
||||
" --ii-min-bytes=$threshold $file.pdf a.pdf"},
|
||||
{$td->STRING => "", $td->EXIT_STATUS => 0},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("check output",
|
||||
{$td->FILE => "a.pdf"},
|
||||
{$td->FILE => "$file-ii-some.pdf"});
|
||||
compare_pdfs("$file.pdf", "a.pdf");
|
||||
}
|
||||
|
||||
# QXXXQ externalize tests with min size
|
||||
|
||||
show_ntests();
|
||||
# ----------
|
||||
$td->notify("--- Tokenizer ---");
|
||||
@ -2019,6 +2082,12 @@ my @image_opt = (
|
||||
'--oi-min-width=0 --oi-min-height=0 --oi-min-area=30000'],
|
||||
['small-images', 'min-area-all',
|
||||
'--oi-min-width=0 --oi-min-height=0 --oi-min-area=30000'],
|
||||
['large-inline-image', 'inline-images',
|
||||
'--ii-min-bytes=0'],
|
||||
['large-inline-image', 'inline-images-all-size',
|
||||
'--oi-min-width=0 --oi-min-height=0 --oi-min-area=0 --ii-min-bytes=0'],
|
||||
['large-inline-image', 'inline-images-keep-some', ''],
|
||||
['large-inline-image', 'inline-images-keep-all', '--keep-inline-images'],
|
||||
);
|
||||
|
||||
$n_tests += 2 * scalar(@image_opt);
|
||||
|
BIN
qpdf/qtest/qpdf/damaged-inline-image-out.pdf
Normal file
BIN
qpdf/qtest/qpdf/damaged-inline-image-out.pdf
Normal file
Binary file not shown.
BIN
qpdf/qtest/qpdf/damaged-inline-image.pdf
Normal file
BIN
qpdf/qtest/qpdf/damaged-inline-image.pdf
Normal file
Binary file not shown.
BIN
qpdf/qtest/qpdf/eof-in-inline-image-ii.qdf
Normal file
BIN
qpdf/qtest/qpdf/eof-in-inline-image-ii.qdf
Normal file
Binary file not shown.
BIN
qpdf/qtest/qpdf/eof-in-inline-image.qdf
Normal file
BIN
qpdf/qtest/qpdf/eof-in-inline-image.qdf
Normal file
Binary file not shown.
4
qpdf/qtest/qpdf/eof-inline-qdf.out
Normal file
4
qpdf/qtest/qpdf/eof-inline-qdf.out
Normal file
@ -0,0 +1,4 @@
|
||||
WARNING: eof-in-inline-image.pdf (offset 299): content normalization encountered bad tokens
|
||||
WARNING: eof-in-inline-image.pdf (offset 299): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents
|
||||
WARNING: eof-in-inline-image.pdf (offset 299): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
|
||||
qpdf: operation succeeded with warnings; resulting file may have some problems
|
98566
qpdf/qtest/qpdf/inline-images-ii-all.pdf
Normal file
98566
qpdf/qtest/qpdf/inline-images-ii-all.pdf
Normal file
File diff suppressed because one or more lines are too long
79969
qpdf/qtest/qpdf/inline-images-ii-some.pdf
Normal file
79969
qpdf/qtest/qpdf/inline-images-ii-some.pdf
Normal file
File diff suppressed because one or more lines are too long
BIN
qpdf/qtest/qpdf/large-inline-image-ii-all.pdf
Normal file
BIN
qpdf/qtest/qpdf/large-inline-image-ii-all.pdf
Normal file
Binary file not shown.
BIN
qpdf/qtest/qpdf/large-inline-image-ii-some.pdf
Normal file
BIN
qpdf/qtest/qpdf/large-inline-image-ii-some.pdf
Normal file
Binary file not shown.
@ -0,0 +1,79 @@
|
||||
{
|
||||
"pages": [
|
||||
{
|
||||
"contents": [
|
||||
"7 0 R"
|
||||
],
|
||||
"images": [
|
||||
{
|
||||
"bitspercomponent": 8,
|
||||
"colorspace": "/DeviceRGB",
|
||||
"decodeparms": [
|
||||
null
|
||||
],
|
||||
"filter": [
|
||||
"/DCTDecode"
|
||||
],
|
||||
"filterable": false,
|
||||
"height": 56,
|
||||
"name": "/IIm1",
|
||||
"object": "8 0 R",
|
||||
"width": 49
|
||||
},
|
||||
{
|
||||
"bitspercomponent": 8,
|
||||
"colorspace": "/DeviceRGB",
|
||||
"decodeparms": [
|
||||
null
|
||||
],
|
||||
"filter": [
|
||||
"/DCTDecode"
|
||||
],
|
||||
"filterable": false,
|
||||
"height": 675,
|
||||
"name": "/IIm2",
|
||||
"object": "9 0 R",
|
||||
"width": 1200
|
||||
},
|
||||
{
|
||||
"bitspercomponent": 8,
|
||||
"colorspace": "/DeviceRGB",
|
||||
"decodeparms": [
|
||||
null
|
||||
],
|
||||
"filter": [
|
||||
"/DCTDecode"
|
||||
],
|
||||
"filterable": false,
|
||||
"height": 56,
|
||||
"name": "/IIm3",
|
||||
"object": "10 0 R",
|
||||
"width": 49
|
||||
},
|
||||
{
|
||||
"bitspercomponent": 8,
|
||||
"colorspace": "/DeviceGray",
|
||||
"decodeparms": [
|
||||
null
|
||||
],
|
||||
"filter": [
|
||||
"/FlateDecode"
|
||||
],
|
||||
"filterable": true,
|
||||
"height": 8,
|
||||
"name": "/IIm4",
|
||||
"object": "11 0 R",
|
||||
"width": 8
|
||||
}
|
||||
],
|
||||
"label": null,
|
||||
"object": "4 0 R",
|
||||
"outlines": [],
|
||||
"pageposfrom1": 1
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"decodelevel": "generalized"
|
||||
},
|
||||
"version": 1
|
||||
}
|
@ -0,0 +1,5 @@
|
||||
qpdf: image /IIm1 on page 1: optimizing image reduces size from 2391 to ...
|
||||
qpdf: image /IIm2 on page 1: optimizing image reduces size from 2134996 to ...
|
||||
qpdf: image /IIm3 on page 1: not optimizing because unable to decode data or data already uses DCT
|
||||
qpdf: image /IIm4 on page 1: not optimizing because DCT compression does not reduce image size
|
||||
qpdf: wrote file a.pdf
|
79
qpdf/qtest/qpdf/optimize-images-inline-images-json.out
Normal file
79
qpdf/qtest/qpdf/optimize-images-inline-images-json.out
Normal file
@ -0,0 +1,79 @@
|
||||
{
|
||||
"pages": [
|
||||
{
|
||||
"contents": [
|
||||
"7 0 R"
|
||||
],
|
||||
"images": [
|
||||
{
|
||||
"bitspercomponent": 8,
|
||||
"colorspace": "/DeviceRGB",
|
||||
"decodeparms": [
|
||||
null
|
||||
],
|
||||
"filter": [
|
||||
"/FlateDecode"
|
||||
],
|
||||
"filterable": true,
|
||||
"height": 56,
|
||||
"name": "/IIm1",
|
||||
"object": "8 0 R",
|
||||
"width": 49
|
||||
},
|
||||
{
|
||||
"bitspercomponent": 8,
|
||||
"colorspace": "/DeviceRGB",
|
||||
"decodeparms": [
|
||||
null
|
||||
],
|
||||
"filter": [
|
||||
"/DCTDecode"
|
||||
],
|
||||
"filterable": false,
|
||||
"height": 675,
|
||||
"name": "/IIm2",
|
||||
"object": "9 0 R",
|
||||
"width": 1200
|
||||
},
|
||||
{
|
||||
"bitspercomponent": 8,
|
||||
"colorspace": "/DeviceRGB",
|
||||
"decodeparms": [
|
||||
null
|
||||
],
|
||||
"filter": [
|
||||
"/DCTDecode"
|
||||
],
|
||||
"filterable": false,
|
||||
"height": 56,
|
||||
"name": "/IIm3",
|
||||
"object": "10 0 R",
|
||||
"width": 49
|
||||
},
|
||||
{
|
||||
"bitspercomponent": 8,
|
||||
"colorspace": "/DeviceGray",
|
||||
"decodeparms": [
|
||||
null
|
||||
],
|
||||
"filter": [
|
||||
"/FlateDecode"
|
||||
],
|
||||
"filterable": true,
|
||||
"height": 8,
|
||||
"name": "/IIm4",
|
||||
"object": "11 0 R",
|
||||
"width": 8
|
||||
}
|
||||
],
|
||||
"label": null,
|
||||
"object": "4 0 R",
|
||||
"outlines": [],
|
||||
"pageposfrom1": 1
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"decodelevel": "generalized"
|
||||
},
|
||||
"version": 1
|
||||
}
|
@ -0,0 +1,18 @@
|
||||
{
|
||||
"pages": [
|
||||
{
|
||||
"contents": [
|
||||
"7 0 R"
|
||||
],
|
||||
"images": [],
|
||||
"label": null,
|
||||
"object": "4 0 R",
|
||||
"outlines": [],
|
||||
"pageposfrom1": 1
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"decodelevel": "generalized"
|
||||
},
|
||||
"version": 1
|
||||
}
|
@ -0,0 +1 @@
|
||||
qpdf: wrote file a.pdf
|
@ -0,0 +1,64 @@
|
||||
{
|
||||
"pages": [
|
||||
{
|
||||
"contents": [
|
||||
"7 0 R"
|
||||
],
|
||||
"images": [
|
||||
{
|
||||
"bitspercomponent": 8,
|
||||
"colorspace": "/DeviceRGB",
|
||||
"decodeparms": [
|
||||
null
|
||||
],
|
||||
"filter": [
|
||||
"/FlateDecode"
|
||||
],
|
||||
"filterable": true,
|
||||
"height": 56,
|
||||
"name": "/IIm1",
|
||||
"object": "8 0 R",
|
||||
"width": 49
|
||||
},
|
||||
{
|
||||
"bitspercomponent": 8,
|
||||
"colorspace": "/DeviceRGB",
|
||||
"decodeparms": [
|
||||
null
|
||||
],
|
||||
"filter": [
|
||||
"/DCTDecode"
|
||||
],
|
||||
"filterable": false,
|
||||
"height": 675,
|
||||
"name": "/IIm2",
|
||||
"object": "9 0 R",
|
||||
"width": 1200
|
||||
},
|
||||
{
|
||||
"bitspercomponent": 8,
|
||||
"colorspace": "/DeviceRGB",
|
||||
"decodeparms": [
|
||||
null
|
||||
],
|
||||
"filter": [
|
||||
"/DCTDecode"
|
||||
],
|
||||
"filterable": false,
|
||||
"height": 56,
|
||||
"name": "/IIm3",
|
||||
"object": "10 0 R",
|
||||
"width": 49
|
||||
}
|
||||
],
|
||||
"label": null,
|
||||
"object": "4 0 R",
|
||||
"outlines": [],
|
||||
"pageposfrom1": 1
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"decodelevel": "generalized"
|
||||
},
|
||||
"version": 1
|
||||
}
|
@ -0,0 +1,4 @@
|
||||
qpdf: image /IIm1 on page 1: not optimizing because image is smaller than requested minimum dimensions
|
||||
qpdf: image /IIm2 on page 1: optimizing image reduces size from 2134996 to ...
|
||||
qpdf: image /IIm3 on page 1: not optimizing because unable to decode data or data already uses DCT
|
||||
qpdf: wrote file a.pdf
|
5
qpdf/qtest/qpdf/optimize-images-inline-images.out
Normal file
5
qpdf/qtest/qpdf/optimize-images-inline-images.out
Normal file
@ -0,0 +1,5 @@
|
||||
qpdf: image /IIm1 on page 1: not optimizing because image is smaller than requested minimum dimensions
|
||||
qpdf: image /IIm2 on page 1: optimizing image reduces size from 2134996 to ...
|
||||
qpdf: image /IIm3 on page 1: not optimizing because unable to decode data or data already uses DCT
|
||||
qpdf: image /IIm4 on page 1: not optimizing because image is smaller than requested minimum dimensions
|
||||
qpdf: wrote file a.pdf
|
Loading…
x
Reference in New Issue
Block a user