mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 10:58:58 +00:00
Add flags to control zlib compression level (fixes #113)
This commit is contained in:
parent
dac0598b94
commit
2794bfb1a6
14
ChangeLog
14
ChangeLog
@ -1,8 +1,22 @@
|
||||
2019-08-23 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Add --recompress-streams option to qpdf and
|
||||
QPDFWriter::setRecompressFlate to cause QPDFWriter to recompress
|
||||
streams that are already compressed with /FlateDecode.
|
||||
|
||||
* Add option Pl_Flate::setCompressionLevel to globally set the
|
||||
zlib compression level used by all Pl_Flate pipelines.
|
||||
|
||||
* Add --compression-level flag to qpdf to set the zlib compression
|
||||
level. When combined with --recompress-flate, this will cause most
|
||||
of qpdf's streams to use the maximum compression level. This
|
||||
results in only a very small amount of savings in size that comes
|
||||
at a fairly significant performance cost, but it could be useful
|
||||
for archival files or other cases where every byte counts and
|
||||
creation time doesn't matter so much. Note that using
|
||||
--object-streams=generate in combination with these options gives
|
||||
you the biggest advantage. Fixes #113.
|
||||
|
||||
2019-08-22 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* In QPDFObjectHandle::ParserCallbacks, in addition to
|
||||
|
@ -189,10 +189,11 @@ class QPDFWriter
|
||||
// filters on the input. When combined with
|
||||
// setCompressStreams(true), which the default, the effect of this
|
||||
// is that streams filtered with these older and less efficient
|
||||
// filters will be recompressed with the Flate filter. As a
|
||||
// special case, if a stream is already compressed with
|
||||
// filters will be recompressed with the Flate filter. By default,
|
||||
// as a special case, if a stream is already compressed with
|
||||
// FlateDecode and setCompressStreams is enabled, the original
|
||||
// compressed data will be preserved.
|
||||
// compressed data will be preserved. This behavior can be
|
||||
// overridden by calling setRecompressFlate(true).
|
||||
//
|
||||
// qpdf_dl_specialized: In addition to uncompressing the
|
||||
// generalized compression formats, supported non-lossy
|
||||
@ -209,6 +210,15 @@ class QPDFWriter
|
||||
QPDF_DLL
|
||||
void setDecodeLevel(qpdf_stream_decode_level_e);
|
||||
|
||||
// By default, when both the input and output contents of a stream
|
||||
// are compressed with Flate, qpdf does not uncompress and
|
||||
// recompress the stream. Passing true here causes it to do so.
|
||||
// This can be useful if recompressing all streams with a higher
|
||||
// compression level, which can be set by calling the static
|
||||
// method Pl_Flate::setCompressionLevel.
|
||||
QPDF_DLL
|
||||
void setRecompressFlate(bool);
|
||||
|
||||
// Set value of content stream normalization. The default is
|
||||
// "false". If true, we attempt to normalize newlines inside of
|
||||
// content streams. Some constructs such as inline images may
|
||||
@ -597,6 +607,7 @@ class QPDFWriter
|
||||
bool compress_streams_set;
|
||||
qpdf_stream_decode_level_e stream_decode_level;
|
||||
bool stream_decode_level_set;
|
||||
bool recompress_flate;
|
||||
bool qdf_mode;
|
||||
bool preserve_unreferenced_objects;
|
||||
bool newline_before_endstream;
|
||||
|
@ -37,6 +37,7 @@ QPDFWriter::Members::Members(QPDF& pdf) :
|
||||
compress_streams_set(false),
|
||||
stream_decode_level(qpdf_dl_none),
|
||||
stream_decode_level_set(false),
|
||||
recompress_flate(false),
|
||||
qdf_mode(false),
|
||||
preserve_unreferenced_objects(false),
|
||||
newline_before_endstream(false),
|
||||
@ -206,6 +207,12 @@ QPDFWriter::setDecodeLevel(qpdf_stream_decode_level_e val)
|
||||
this->m->stream_decode_level_set = true;
|
||||
}
|
||||
|
||||
void
|
||||
QPDFWriter::setRecompressFlate(bool val)
|
||||
{
|
||||
this->m->recompress_flate = val;
|
||||
}
|
||||
|
||||
void
|
||||
QPDFWriter::setContentNormalization(bool val)
|
||||
{
|
||||
@ -1716,13 +1723,14 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
|
||||
if (this->m->compress_streams)
|
||||
{
|
||||
// Don't filter if the stream is already compressed with
|
||||
// FlateDecode. We don't want to make it worse by getting
|
||||
// rid of a predictor or otherwise messing with it. We
|
||||
// should also avoid messing with anything that's
|
||||
// compressed with a lossy compression scheme, but we
|
||||
// don't support any of those right now.
|
||||
// FlateDecode. This way we don't make it worse if the
|
||||
// original file used a better Flate algorithm, and we
|
||||
// don't spend time and CPU cycles uncompressing and
|
||||
// recompressing stuff. This can be overridden with
|
||||
// setRecompressFlate(true).
|
||||
QPDFObjectHandle filter_obj = stream_dict.getKey("/Filter");
|
||||
if ((! object.isDataModified()) &&
|
||||
if ((! this->m->recompress_flate) &&
|
||||
(! object.isDataModified()) &&
|
||||
filter_obj.isName() &&
|
||||
((filter_obj.getName() == "/FlateDecode") ||
|
||||
(filter_obj.getName() == "/Fl")))
|
||||
|
@ -26,7 +26,8 @@ endif
|
||||
|
||||
$(OUTDOC).pdf: $(OUTDOC).fo qpdf/build/qpdf
|
||||
$(FOP) $< -pdf $@.tmp
|
||||
qpdf/build/qpdf --linearize $@.tmp $@
|
||||
qpdf/build/qpdf --linearize --object-streams=generate \
|
||||
--recompress-flate --compression-level=9 $@.tmp $@
|
||||
|
||||
$(OUTDOC).html: $(INDOC).xml manual/html.xsl $(VALIDATE)
|
||||
$(XSLTPROC) --output $@ manual/html.xsl $<
|
||||
|
@ -1433,27 +1433,32 @@ outfile.pdf</option>
|
||||
<listitem>
|
||||
<para>
|
||||
<option>generalized</option>: decode streams filtered with
|
||||
supported generalized filters: <option>/LZWDecode</option>,
|
||||
<option>/FlateDecode</option>,
|
||||
<option>/ASCII85Decode</option>, and
|
||||
<option>/ASCIIHexDecode</option>. We define generalized
|
||||
supported generalized filters:
|
||||
<literal>/LZWDecode</literal>,
|
||||
<literal>/FlateDecode</literal>,
|
||||
<literal>/ASCII85Decode</literal>, and
|
||||
<literal>/ASCIIHexDecode</literal>. We define generalized
|
||||
filters as those to be used for general-purpose compression
|
||||
or encoding, as opposed to filters specifically designed
|
||||
for image data.
|
||||
for image data. Note that, by default, streams already
|
||||
compressed with <literal>/FlateDecode</literal> are not
|
||||
uncompressed and recompressed unless you also specify
|
||||
<option>--recompress-flate</option>.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
<option>specialized</option>: in addition to generalized,
|
||||
decode streams with supported non-lossy specialized
|
||||
filters; currently this is just <option>/RunLengthDecode</option>
|
||||
filters; currently this is just
|
||||
<literal>/RunLengthDecode</literal>
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
<option>all</option>: in addition to generalized and
|
||||
specialized, decode streams with supported lossy filters;
|
||||
currently this is just <option>/DCTDecode</option> (JPEG)
|
||||
currently this is just <literal>/DCTDecode</literal> (JPEG)
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
@ -1476,7 +1481,10 @@ outfile.pdf</option>
|
||||
<option>compress</option>: recompress stream data when
|
||||
possible (default); equivalent to
|
||||
<option>--compress-streams=y</option>
|
||||
<option>--decode-level=generalized</option>
|
||||
<option>--decode-level=generalized</option>. Does not
|
||||
recompress streams already compressed with
|
||||
<literal>/FlateDecode</literal> unless
|
||||
<option>--recompress-flate</option> is also specified.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
@ -1498,6 +1506,37 @@ outfile.pdf</option>
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><option>--recompress-flate</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
By default, streams already compressed with
|
||||
<literal>/FlateDecode</literal> are left alone rather than
|
||||
being uncompressed and recompressed. This option causes qpdf
|
||||
to uncompress and recompress the streams. There is a
|
||||
significant performance cost to using this option, but you
|
||||
probably want to use it if you specify
|
||||
<option>--compression-level</option>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><option>--compression-level=<replaceable>level</replaceable></option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
When writing new streams that are compressed with
|
||||
<literal>/FlateDecode</literal>, use the specified compression
|
||||
level. The value of <option>level</option> should be a number
|
||||
from 1 to 9 and is passed directly to zlib, which implements
|
||||
deflate compression. Note that qpdf doesn't uncompress and
|
||||
recompress streams by default. To have this option apply to
|
||||
already compressed streams, you should also specify
|
||||
<option>--recompress-flate</option>. If your goal is to shrink
|
||||
the size of PDF files, you should also use
|
||||
<option>--object-streams=generate</option>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><option>--normalize-content=[yn]</option></term>
|
||||
<listitem>
|
||||
@ -4449,7 +4488,7 @@ print "\n";
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
Library Enhancements
|
||||
Library and CLI Enhancements
|
||||
</para>
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
@ -4508,6 +4547,41 @@ print "\n";
|
||||
bytes of the combined contents.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
Static method
|
||||
<function>Pl_Flate::setCompressionLevel</function> can be
|
||||
called to set the zlib compression level globally used by
|
||||
all instances of Pl_Flate in deflate mode.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
The method
|
||||
<function>QPDFWriter::setRecompressFlate</function> can be
|
||||
called to tell <classname>QPDFWriter</classname> to
|
||||
uncompress and recompress streams already compressed with
|
||||
<literal>/FlateDecode</literal>.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
CLI enhancement: the <option>--recompress-flate</option>
|
||||
instructs <command>qpdf</command> to recompress streams that
|
||||
are already compressed with <literal>/FlateDecode</literal>.
|
||||
Useful with <option>--compression-level</option>.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
CLI enhancement: the
|
||||
<option>--compression-level=<replaceable>level</replaceable></option>
|
||||
sets the zlib compression level used for any streams
|
||||
compressed by <literal>/FlateDecode</literal>. Most
|
||||
effective when combined with
|
||||
<option>--recompress-flate</option>.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
The underlying implementation of QPDF arrays has been
|
||||
@ -5699,9 +5773,9 @@ print "\n";
|
||||
<listitem>
|
||||
<para>
|
||||
Disregard data check errors when uncompressing
|
||||
<option>/FlateDecode</option> streams. This is consistent with
|
||||
most other PDF readers and allows qpdf to recover data from
|
||||
another class of malformed PDF files.
|
||||
<literal>/FlateDecode</literal> streams. This is consistent
|
||||
with most other PDF readers and allows qpdf to recover data
|
||||
from another class of malformed PDF files.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
|
36
qpdf/qpdf.cc
36
qpdf/qpdf.cc
@ -13,6 +13,7 @@
|
||||
#include <qpdf/Pl_Discard.hh>
|
||||
#include <qpdf/Pl_DCT.hh>
|
||||
#include <qpdf/Pl_Count.hh>
|
||||
#include <qpdf/Pl_Flate.hh>
|
||||
#include <qpdf/PointerHolder.hh>
|
||||
|
||||
#include <qpdf/QPDF.hh>
|
||||
@ -124,6 +125,9 @@ struct Options
|
||||
stream_data_mode(qpdf_s_compress),
|
||||
compress_streams(true),
|
||||
compress_streams_set(false),
|
||||
recompress_flate(false),
|
||||
recompress_flate_set(false),
|
||||
compression_level(-1),
|
||||
decode_level(qpdf_dl_generalized),
|
||||
decode_level_set(false),
|
||||
normalize_set(false),
|
||||
@ -217,6 +221,9 @@ struct Options
|
||||
qpdf_stream_data_e stream_data_mode;
|
||||
bool compress_streams;
|
||||
bool compress_streams_set;
|
||||
bool recompress_flate;
|
||||
bool recompress_flate_set;
|
||||
int compression_level;
|
||||
qpdf_stream_decode_level_e decode_level;
|
||||
bool decode_level_set;
|
||||
bool normalize_set;
|
||||
@ -632,6 +639,8 @@ class ArgParser
|
||||
void argCollate();
|
||||
void argStreamData(char* parameter);
|
||||
void argCompressStreams(char* parameter);
|
||||
void argRecompressFlate();
|
||||
void argCompressionLevel(char* parameter);
|
||||
void argDecodeLevel(char* parameter);
|
||||
void argNormalizeContent(char* parameter);
|
||||
void argSuppressRecovery();
|
||||
@ -847,6 +856,9 @@ ArgParser::initOptionTable()
|
||||
&ArgParser::argStreamData, stream_data_choices);
|
||||
(*t)["compress-streams"] = oe_requiredChoices(
|
||||
&ArgParser::argCompressStreams, yn);
|
||||
(*t)["recompress-flate"] = oe_bare(&ArgParser::argRecompressFlate);
|
||||
(*t)["compression-level"] = oe_requiredParameter(
|
||||
&ArgParser::argCompressionLevel, "level");
|
||||
char const* decode_level_choices[] =
|
||||
{"none", "generalized", "specialized", "all", 0};
|
||||
(*t)["decode-level"] = oe_requiredChoices(
|
||||
@ -1328,6 +1340,9 @@ ArgParser::argHelp()
|
||||
<< "--stream-data=option controls transformation of stream data (below)\n"
|
||||
<< "--compress-streams=[yn] controls whether to compress streams on output\n"
|
||||
<< "--decode-level=option controls how to filter streams from the input\n"
|
||||
<< "--recompress-flate recompress streams already compressed with Flate\n"
|
||||
<< "--compression-level=n set zlib compression level; most effective with\n"
|
||||
<< " --recompress-flate --object-streams=generate\n"
|
||||
<< "--normalize-content=[yn] enables or disables normalization of content streams\n"
|
||||
<< "--object-streams=mode controls handing of object streams\n"
|
||||
<< "--preserve-unreferenced preserve unreferenced objects\n"
|
||||
@ -1724,6 +1739,19 @@ ArgParser::argCompressStreams(char* parameter)
|
||||
o.compress_streams = (strcmp(parameter, "y") == 0);
|
||||
}
|
||||
|
||||
void
|
||||
ArgParser::argRecompressFlate()
|
||||
{
|
||||
o.recompress_flate_set = true;
|
||||
o.recompress_flate = true;
|
||||
}
|
||||
|
||||
void
|
||||
ArgParser::argCompressionLevel(char* parameter)
|
||||
{
|
||||
o.compression_level = QUtil::string_to_int(parameter);
|
||||
}
|
||||
|
||||
void
|
||||
ArgParser::argDecodeLevel(char* parameter)
|
||||
{
|
||||
@ -4889,6 +4917,10 @@ static void set_encryption_options(QPDF& pdf, Options& o, QPDFWriter& w)
|
||||
|
||||
static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w)
|
||||
{
|
||||
if (o.compression_level >= 0)
|
||||
{
|
||||
Pl_Flate::setCompressionLevel(o.compression_level);
|
||||
}
|
||||
if (o.qdf_mode)
|
||||
{
|
||||
w.setQDFMode(true);
|
||||
@ -4913,6 +4945,10 @@ static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w)
|
||||
{
|
||||
w.setCompressStreams(o.compress_streams);
|
||||
}
|
||||
if (o.recompress_flate_set)
|
||||
{
|
||||
w.setRecompressFlate(o.recompress_flate);
|
||||
}
|
||||
if (o.decode_level_set)
|
||||
{
|
||||
w.setDecodeLevel(o.decode_level);
|
||||
|
@ -3876,8 +3876,20 @@ $td->runtest("convert inline-images to qdf",
|
||||
compare_pdfs("inline-images.pdf", "a.pdf");
|
||||
|
||||
show_ntests();
|
||||
# ----------
|
||||
$td->notify("--- Compression Level ---");
|
||||
$n_tests += 4;
|
||||
|
||||
check_pdf("recompress with level",
|
||||
"qpdf --static-id --recompress-flate --compression-level=9" .
|
||||
" --object-streams=generate minimal.pdf",
|
||||
"minimal-9.pdf", 0);
|
||||
check_pdf("recompress with level",
|
||||
"qpdf --static-id --recompress-flate --compression-level=1" .
|
||||
" --object-streams=generate minimal.pdf",
|
||||
"minimal-1.pdf", 0);
|
||||
|
||||
show_ntests();
|
||||
# ----------
|
||||
$td->notify("--- Specialized filtering Tests ---");
|
||||
$n_tests += 3;
|
||||
|
BIN
qpdf/qtest/qpdf/minimal-1.pdf
Normal file
BIN
qpdf/qtest/qpdf/minimal-1.pdf
Normal file
Binary file not shown.
BIN
qpdf/qtest/qpdf/minimal-9.pdf
Normal file
BIN
qpdf/qtest/qpdf/minimal-9.pdf
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user