Add --remove-unreferenced-resources option

This commit is contained in:
Jay Berkenbilt 2020-04-04 11:32:27 -04:00
parent 8190462394
commit 1e766dcda2
3 changed files with 57 additions and 6 deletions

View File

@ -1,3 +1,20 @@
2020-04-04 Jay Berkenbilt <ejb@ql.org>
* Add new option --remove-unreferenced-resources that takes auto,
yes, or no as options. This tells qpdf whether to attempt to
remove unreferenced resources from pages when doing page splitting
operations. Prior to this change, the default was to attempt to
remove unreferenced resources, but this operation was very slow,
especially for large and complex files. The new default is "auto",
which tells qpdf to analyze the file for shared resources. This is
a relatively quick test. If no shared resources are found, then we
don't attempt to remove unreferenced resources, because
unreferenced resources never occur in files without shared
resources. To force qpdf to look for and remove unreferenced
resources, use --remove-unreferenced-resources=yes. The option
--preserve-unreferenced-resources is now a synonym for
--remove-unreferenced-resources=no.
2020-04-03 Jay Berkenbilt <ejb@ql.org>
* Allow qpdf to be built on systems without wchar_t. All "normal"

View File

@ -94,6 +94,8 @@ struct UnderOverlay
std::vector<int> repeat_pagenos;
};
enum remove_unref_e { re_auto, re_yes, re_no };
struct Options
{
Options() :
@ -144,7 +146,7 @@ struct Options
ignore_xref_streams(false),
qdf_mode(false),
preserve_unreferenced_objects(false),
preserve_unreferenced_page_resources(false),
remove_unreferenced_page_resources(re_auto),
keep_files_open(true),
keep_files_open_set(false),
keep_files_open_threshold(200), // default known in help and docs
@ -243,7 +245,7 @@ struct Options
bool ignore_xref_streams;
bool qdf_mode;
bool preserve_unreferenced_objects;
bool preserve_unreferenced_page_resources;
remove_unref_e remove_unreferenced_page_resources;
bool keep_files_open;
bool keep_files_open_set;
size_t keep_files_open_threshold;
@ -739,6 +741,7 @@ class ArgParser
void argQdf();
void argPreserveUnreferenced();
void argPreserveUnreferencedResources();
void argRemoveUnreferencedResources(char* parameter);
void argKeepFilesOpen(char* parameter);
void argKeepFilesOpenThreshold(char* parameter);
void argNewlineBeforeEndstream();
@ -970,6 +973,10 @@ ArgParser::initOptionTable()
&ArgParser::argPreserveUnreferenced);
(*t)["preserve-unreferenced-resources"] = oe_bare(
&ArgParser::argPreserveUnreferencedResources);
char const* remove_unref_choices[] = {
"auto", "yes", "no", 0};
(*t)["remove-unreferenced-resources"] = oe_requiredChoices(
&ArgParser::argRemoveUnreferencedResources, remove_unref_choices);
(*t)["keep-files-open"] = oe_requiredChoices(
&ArgParser::argKeepFilesOpen, yn);
(*t)["keep-files-open-threshold"] = oe_requiredParameter(
@ -1459,7 +1466,9 @@ ArgParser::argHelp()
<< "--object-streams=mode controls handing of object streams\n"
<< "--preserve-unreferenced preserve unreferenced objects\n"
<< "--preserve-unreferenced-resources\n"
<< " preserve unreferenced page resources\n"
<< " synonym for --remove-unreferenced-resources=no\n"
<< "--remove-unreferenced-resources={auto,yes,no}\n"
<< " whether to remove unreferenced page resources\n"
<< "--newline-before-endstream always put a newline before endstream\n"
<< "--coalesce-contents force all pages' content to be a single stream\n"
<< "--flatten-annotations=option\n"
@ -1973,7 +1982,30 @@ ArgParser::argPreserveUnreferenced()
void
ArgParser::argPreserveUnreferencedResources()
{
o.preserve_unreferenced_page_resources = true;
o.remove_unreferenced_page_resources = re_no;
}
void
ArgParser::argRemoveUnreferencedResources(char* parameter)
{
if (strcmp(parameter, "auto") == 0)
{
o.remove_unreferenced_page_resources = re_auto;
}
else if (strcmp(parameter, "yes") == 0)
{
o.remove_unreferenced_page_resources = re_yes;
}
else if (strcmp(parameter, "no") == 0)
{
o.remove_unreferenced_page_resources = re_no;
}
else
{
// If this happens, it means remove_unref_choices in
// ArgParser::initOptionTable is wrong.
usage("invalid value for --remove-unreferenced-page-resources");
}
}
void
@ -4838,7 +4870,7 @@ static void handle_page_specs(QPDF& pdf, Options& o)
page_spec.range));
}
if (! o.preserve_unreferenced_page_resources)
if (o.remove_unreferenced_page_resources != re_no)
{
for (std::map<std::string, QPDF*>::iterator iter =
page_spec_qpdfs.begin();
@ -5336,7 +5368,7 @@ static void do_split_pages(QPDF& pdf, Options& o)
before = std::string(o.outfilename) + "-";
}
if (! o.preserve_unreferenced_page_resources)
if (o.remove_unreferenced_page_resources != re_no)
{
QPDFPageDocumentHelper dh(pdf);
dh.removeUnreferencedResources();

View File

@ -2120,6 +2120,7 @@ $td->runtest("check output",
$td->runtest("split with shared resources",
{$td->COMMAND =>
"qpdf --qdf --static-id" .
" --remove-unreferenced-resources=yes" .
" shared-images.pdf --pages . 1,3" .
" ./shared-images.pdf 1,2 -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
@ -2130,6 +2131,7 @@ $td->runtest("check output",
$td->runtest("split with really shared resources",
{$td->COMMAND =>
"qpdf --qdf --static-id" .
" --remove-unreferenced-resources=yes" .
" shared-images.pdf --pages . 1,3" .
" . 1,2 -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});