2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-05-31 17:30:54 +00:00
qpdf/qpdf/qpdf.cc
Jay Berkenbilt 9b28933647 Check object ownership when adding
When adding a QPDFObjectHandle to an array or dictionary, if possible,
check if the new object belongs to the same QPDF. This makes it much
easier to find incorrect code than waiting for the situation to be
detected when the file is written.
2021-11-04 12:29:42 -04:00

6717 lines
221 KiB
C++

#include <iostream>
#include <string.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdio.h>
#include <ctype.h>
#include <memory>
#include <qpdf/QUtil.hh>
#include <qpdf/QTC.hh>
#include <qpdf/ClosedFileInputSource.hh>
#include <qpdf/FileInputSource.hh>
#include <qpdf/Pl_StdioFile.hh>
#include <qpdf/Pl_Discard.hh>
#include <qpdf/Pl_DCT.hh>
#include <qpdf/Pl_Count.hh>
#include <qpdf/Pl_Flate.hh>
#include <qpdf/PointerHolder.hh>
#include <qpdf/QPDF.hh>
#include <qpdf/QPDFPageDocumentHelper.hh>
#include <qpdf/QPDFPageObjectHelper.hh>
#include <qpdf/QPDFPageLabelDocumentHelper.hh>
#include <qpdf/QPDFOutlineDocumentHelper.hh>
#include <qpdf/QPDFAcroFormDocumentHelper.hh>
#include <qpdf/QPDFExc.hh>
#include <qpdf/QPDFSystemError.hh>
#include <qpdf/QPDFCryptoProvider.hh>
#include <qpdf/QPDFEmbeddedFileDocumentHelper.hh>
#include <qpdf/QPDFWriter.hh>
#include <qpdf/QIntC.hh>
static int constexpr EXIT_ERROR = 2;
static int EXIT_WARNING = 3; // may be changed to 0 at runtime
// For is-encrypted and requires-password
static int constexpr EXIT_IS_NOT_ENCRYPTED = 2;
static int constexpr EXIT_CORRECT_PASSWORD = 3;
static char const* whoami = 0;
static std::string expected_version = "10.3.2";
struct PageSpec
{
PageSpec(std::string const& filename,
char const* password,
char const* range) :
filename(filename),
password(password),
range(range)
{
}
std::string filename;
char const* password;
char const* range;
};
struct RotationSpec
{
RotationSpec(int angle = 0, bool relative = false) :
angle(angle),
relative(relative)
{
}
int angle;
bool relative;
};
enum password_mode_e { pm_bytes, pm_hex_bytes, pm_unicode, pm_auto };
struct UnderOverlay
{
UnderOverlay(char const* which) :
which(which),
filename(0),
password(0),
to_nr("1-z"),
from_nr("1-z"),
repeat_nr("")
{
}
std::string which;
char const* filename;
char const* password;
char const* to_nr;
char const* from_nr;
char const* repeat_nr;
PointerHolder<QPDF> pdf;
std::vector<int> to_pagenos;
std::vector<int> from_pagenos;
std::vector<int> repeat_pagenos;
};
struct AddAttachment
{
AddAttachment() :
replace(false)
{
}
std::string path;
std::string key;
std::string filename;
std::string creationdate;
std::string moddate;
std::string mimetype;
std::string description;
bool replace;
};
struct CopyAttachmentFrom
{
std::string path;
std::string password;
std::string prefix;
};
enum remove_unref_e { re_auto, re_yes, re_no };
struct Options
{
Options() :
password(0),
linearize(false),
decrypt(false),
split_pages(0),
verbose(false),
progress(false),
suppress_warnings(false),
copy_encryption(false),
encryption_file(0),
encryption_file_password(0),
encrypt(false),
password_is_hex_key(false),
suppress_password_recovery(false),
password_mode(pm_auto),
allow_insecure(false),
keylen(0),
r2_print(true),
r2_modify(true),
r2_extract(true),
r2_annotate(true),
r3_accessibility(true),
r3_extract(true),
r3_assemble(true),
r3_annotate_and_form(true),
r3_form_filling(true),
r3_modify_other(true),
r3_print(qpdf_r3p_full),
force_V4(false),
force_R5(false),
cleartext_metadata(false),
use_aes(false),
stream_data_set(false),
stream_data_mode(qpdf_s_compress),
compress_streams(true),
compress_streams_set(false),
recompress_flate(false),
recompress_flate_set(false),
compression_level(-1),
decode_level(qpdf_dl_generalized),
decode_level_set(false),
normalize_set(false),
normalize(false),
suppress_recovery(false),
object_stream_set(false),
object_stream_mode(qpdf_o_preserve),
ignore_xref_streams(false),
qdf_mode(false),
preserve_unreferenced_objects(false),
remove_unreferenced_page_resources(re_auto),
keep_files_open(true),
keep_files_open_set(false),
keep_files_open_threshold(200), // default known in help and docs
newline_before_endstream(false),
coalesce_contents(false),
flatten_annotations(false),
flatten_annotations_required(0),
flatten_annotations_forbidden(an_invisible | an_hidden),
generate_appearances(false),
show_npages(false),
deterministic_id(false),
static_id(false),
static_aes_iv(false),
suppress_original_object_id(false),
show_encryption(false),
show_encryption_key(false),
check_linearization(false),
show_linearization(false),
show_xref(false),
show_trailer(false),
show_obj(0),
show_gen(0),
show_raw_stream_data(false),
show_filtered_stream_data(false),
show_pages(false),
show_page_images(false),
collate(0),
flatten_rotation(false),
list_attachments(false),
json(false),
check(false),
optimize_images(false),
externalize_inline_images(false),
keep_inline_images(false),
remove_page_labels(false),
oi_min_width(128), // Default values for these
oi_min_height(128), // oi flags are in --help
oi_min_area(16384), // and in the manual.
ii_min_bytes(1024), //
underlay("underlay"),
overlay("overlay"),
under_overlay(0),
require_outfile(true),
replace_input(false),
check_is_encrypted(false),
check_requires_password(false),
infilename(0),
outfilename(0)
{
}
char const* password;
std::shared_ptr<char> password_alloc;
bool linearize;
bool decrypt;
int split_pages;
bool verbose;
bool progress;
bool suppress_warnings;
bool copy_encryption;
char const* encryption_file;
char const* encryption_file_password;
bool encrypt;
bool password_is_hex_key;
bool suppress_password_recovery;
password_mode_e password_mode;
bool allow_insecure;
std::string user_password;
std::string owner_password;
int keylen;
bool r2_print;
bool r2_modify;
bool r2_extract;
bool r2_annotate;
bool r3_accessibility;
bool r3_extract;
bool r3_assemble;
bool r3_annotate_and_form;
bool r3_form_filling;
bool r3_modify_other;
qpdf_r3_print_e r3_print;
bool force_V4;
bool force_R5;
bool cleartext_metadata;
bool use_aes;
bool stream_data_set;
qpdf_stream_data_e stream_data_mode;
bool compress_streams;
bool compress_streams_set;
bool recompress_flate;
bool recompress_flate_set;
int compression_level;
qpdf_stream_decode_level_e decode_level;
bool decode_level_set;
bool normalize_set;
bool normalize;
bool suppress_recovery;
bool object_stream_set;
qpdf_object_stream_e object_stream_mode;
bool ignore_xref_streams;
bool qdf_mode;
bool preserve_unreferenced_objects;
remove_unref_e remove_unreferenced_page_resources;
bool keep_files_open;
bool keep_files_open_set;
size_t keep_files_open_threshold;
bool newline_before_endstream;
std::string linearize_pass1;
bool coalesce_contents;
bool flatten_annotations;
int flatten_annotations_required;
int flatten_annotations_forbidden;
bool generate_appearances;
std::string min_version;
std::string force_version;
bool show_npages;
bool deterministic_id;
bool static_id;
bool static_aes_iv;
bool suppress_original_object_id;
bool show_encryption;
bool show_encryption_key;
bool check_linearization;
bool show_linearization;
bool show_xref;
bool show_trailer;
int show_obj;
int show_gen;
bool show_raw_stream_data;
bool show_filtered_stream_data;
bool show_pages;
bool show_page_images;
size_t collate;
bool flatten_rotation;
bool list_attachments;
std::string attachment_to_show;
std::list<std::string> attachments_to_remove;
std::list<AddAttachment> attachments_to_add;
std::list<CopyAttachmentFrom> attachments_to_copy;
bool json;
std::set<std::string> json_keys;
std::set<std::string> json_objects;
bool check;
bool optimize_images;
bool externalize_inline_images;
bool keep_inline_images;
bool remove_page_labels;
size_t oi_min_width;
size_t oi_min_height;
size_t oi_min_area;
size_t ii_min_bytes;
UnderOverlay underlay;
UnderOverlay overlay;
UnderOverlay* under_overlay;
std::vector<PageSpec> page_specs;
std::map<std::string, RotationSpec> rotations;
bool require_outfile;
bool replace_input;
bool check_is_encrypted;
bool check_requires_password;
char const* infilename;
char const* outfilename;
};
struct QPDFPageData
{
QPDFPageData(std::string const& filename, QPDF* qpdf, char const* range);
QPDFPageData(QPDFPageData const& other, int page);
std::string filename;
QPDF* qpdf;
std::vector<QPDFObjectHandle> orig_pages;
std::vector<int> selected_pages;
};
class DiscardContents: public QPDFObjectHandle::ParserCallbacks
{
public:
virtual ~DiscardContents() {}
virtual void handleObject(QPDFObjectHandle) {}
virtual void handleEOF() {}
};
class ProgressReporter: public QPDFWriter::ProgressReporter
{
public:
ProgressReporter(char const* filename) :
filename(filename)
{
}
virtual ~ProgressReporter()
{
}
virtual void reportProgress(int);
private:
std::string filename;
};
void
ProgressReporter::reportProgress(int percentage)
{
std::cout << whoami << ": " << filename << ": write progress: "
<< percentage << "%" << std::endl;
}
static JSON json_schema(std::set<std::string>* keys = 0)
{
// Style: use all lower-case keys with no dashes or underscores.
// Choose array or dictionary based on indexing. For example, we
// use a dictionary for objects because we want to index by object
// ID and an array for pages because we want to index by position.
// The pages in the pages array contain references back to the
// original object, which can be resolved in the objects
// dictionary. When a PDF construct that maps back to an original
// object is represented separately, use "object" as the key that
// references the original object.
// This JSON object doubles as a schema and as documentation for
// our JSON output. Any schema mismatch is a bug in qpdf. This
// helps to enforce our policy of consistently providing a known
// structure where every documented key will always be present,
// which makes it easier to consume our JSON. This is discussed in
// more depth in the manual.
JSON schema = JSON::makeDictionary();
schema.addDictionaryMember(
"version", JSON::makeString(
"JSON format serial number; increased for non-compatible changes"));
JSON j_params = schema.addDictionaryMember(
"parameters", JSON::makeDictionary());
j_params.addDictionaryMember(
"decodelevel", JSON::makeString(
"decode level used to determine stream filterability"));
bool all_keys = ((keys == 0) || keys->empty());
// The list of selectable top-level keys id duplicated in three
// places: json_schema, do_json, and initOptionTable.
if (all_keys || keys->count("objects"))
{
schema.addDictionaryMember(
"objects", JSON::makeString(
"dictionary of original objects;"
" keys are 'trailer' or 'n n R'"));
}
if (all_keys || keys->count("objectinfo"))
{
JSON objectinfo = schema.addDictionaryMember(
"objectinfo", JSON::makeDictionary());
JSON details = objectinfo.addDictionaryMember(
"<object-id>", JSON::makeDictionary());
JSON stream = details.addDictionaryMember(
"stream", JSON::makeDictionary());
stream.addDictionaryMember(
"is",
JSON::makeString("whether the object is a stream"));
stream.addDictionaryMember(
"length",
JSON::makeString("if stream, its length, otherwise null"));
stream.addDictionaryMember(
"filter",
JSON::makeString("if stream, its filters, otherwise null"));
}
if (all_keys || keys->count("pages"))
{
JSON page = schema.addDictionaryMember("pages", JSON::makeArray()).
addArrayElement(JSON::makeDictionary());
page.addDictionaryMember(
"object",
JSON::makeString("reference to original page object"));
JSON image = page.addDictionaryMember("images", JSON::makeArray()).
addArrayElement(JSON::makeDictionary());
image.addDictionaryMember(
"name",
JSON::makeString("name of image in XObject table"));
image.addDictionaryMember(
"object",
JSON::makeString("reference to image stream"));
image.addDictionaryMember(
"width",
JSON::makeString("image width"));
image.addDictionaryMember(
"height",
JSON::makeString("image height"));
image.addDictionaryMember(
"colorspace",
JSON::makeString("color space"));
image.addDictionaryMember(
"bitspercomponent",
JSON::makeString("bits per component"));
image.addDictionaryMember("filter", JSON::makeArray()).
addArrayElement(
JSON::makeString("filters applied to image data"));
image.addDictionaryMember("decodeparms", JSON::makeArray()).
addArrayElement(
JSON::makeString("decode parameters for image data"));
image.addDictionaryMember(
"filterable",
JSON::makeString("whether image data can be decoded"
" using the decode level qpdf was invoked with"));
page.addDictionaryMember("contents", JSON::makeArray()).
addArrayElement(
JSON::makeString("reference to each content stream"));
page.addDictionaryMember(
"label",
JSON::makeString("page label dictionary, or null if none"));
JSON outline = page.addDictionaryMember("outlines", JSON::makeArray()).
addArrayElement(JSON::makeDictionary());
outline.addDictionaryMember(
"object",
JSON::makeString("reference to outline that targets this page"));
outline.addDictionaryMember(
"title",
JSON::makeString("outline title"));
outline.addDictionaryMember(
"dest",
JSON::makeString("outline destination dictionary"));
page.addDictionaryMember(
"pageposfrom1",
JSON::makeString("position of page in document numbering from 1"));
}
if (all_keys || keys->count("pagelabels"))
{
JSON labels = schema.addDictionaryMember(
"pagelabels", JSON::makeArray()).
addArrayElement(JSON::makeDictionary());
labels.addDictionaryMember(
"index",
JSON::makeString("starting page position starting from zero"));
labels.addDictionaryMember(
"label",
JSON::makeString("page label dictionary"));
}
if (all_keys || keys->count("outlines"))
{
JSON outlines = schema.addDictionaryMember(
"outlines", JSON::makeArray()).
addArrayElement(JSON::makeDictionary());
outlines.addDictionaryMember(
"object",
JSON::makeString("reference to this outline"));
outlines.addDictionaryMember(
"title",
JSON::makeString("outline title"));
outlines.addDictionaryMember(
"dest",
JSON::makeString("outline destination dictionary"));
outlines.addDictionaryMember(
"kids",
JSON::makeString("array of descendent outlines"));
outlines.addDictionaryMember(
"open",
JSON::makeString("whether the outline is displayed expanded"));
outlines.addDictionaryMember(
"destpageposfrom1",
JSON::makeString("position of destination page in document"
" numbered from 1; null if not known"));
}
if (all_keys || keys->count("acroform"))
{
JSON acroform = schema.addDictionaryMember(
"acroform", JSON::makeDictionary());
acroform.addDictionaryMember(
"hasacroform",
JSON::makeString("whether the document has interactive forms"));
acroform.addDictionaryMember(
"needappearances",
JSON::makeString("whether the form fields' appearance"
" streams need to be regenerated"));
JSON fields = acroform.addDictionaryMember(
"fields", JSON::makeArray()).
addArrayElement(JSON::makeDictionary());
fields.addDictionaryMember(
"object",
JSON::makeString("reference to this form field"));
fields.addDictionaryMember(
"parent",
JSON::makeString("reference to this field's parent"));
fields.addDictionaryMember(
"pageposfrom1",
JSON::makeString("position of containing page numbered from 1"));
fields.addDictionaryMember(
"fieldtype",
JSON::makeString("field type"));
fields.addDictionaryMember(
"fieldflags",
JSON::makeString(
"form field flags from /Ff --"
" see pdf_form_field_flag_e in qpdf/Constants.h"));
fields.addDictionaryMember(
"fullname",
JSON::makeString("full name of field"));
fields.addDictionaryMember(
"partialname",
JSON::makeString("partial name of field"));
fields.addDictionaryMember(
"alternativename",
JSON::makeString(
"alternative name of field --"
" this is the one usually shown to users"));
fields.addDictionaryMember(
"mappingname",
JSON::makeString("mapping name of field"));
fields.addDictionaryMember(
"value",
JSON::makeString("value of field"));
fields.addDictionaryMember(
"defaultvalue",
JSON::makeString("default value of field"));
fields.addDictionaryMember(
"quadding",
JSON::makeString(
"field quadding --"
" number indicating left, center, or right"));
fields.addDictionaryMember(
"ischeckbox",
JSON::makeString("whether field is a checkbox"));
fields.addDictionaryMember(
"isradiobutton",
JSON::makeString("whether field is a radio button --"
" buttons in a single group share a parent"));
fields.addDictionaryMember(
"ischoice",
JSON::makeString("whether field is a list, combo, or dropdown"));
fields.addDictionaryMember(
"istext",
JSON::makeString("whether field is a text field"));
JSON j_choices = fields.addDictionaryMember(
"choices",
JSON::makeString("for choices fields, the list of"
" choices presented to the user"));
JSON annotation = fields.addDictionaryMember(
"annotation", JSON::makeDictionary());
annotation.addDictionaryMember(
"object",
JSON::makeString("reference to the annotation object"));
annotation.addDictionaryMember(
"appearancestate",
JSON::makeString("appearance state --"
" can be used to determine value for"
" checkboxes and radio buttons"));
annotation.addDictionaryMember(
"annotationflags",
JSON::makeString(
"annotation flags from /F --"
" see pdf_annotation_flag_e in qpdf/Constants.h"));
}
if (all_keys || keys->count("encrypt"))
{
JSON encrypt = schema.addDictionaryMember(
"encrypt", JSON::makeDictionary());
encrypt.addDictionaryMember(
"encrypted",
JSON::makeString("whether the document is encrypted"));
encrypt.addDictionaryMember(
"userpasswordmatched",
JSON::makeString("whether supplied password matched user password;"
" always false for non-encrypted files"));
encrypt.addDictionaryMember(
"ownerpasswordmatched",
JSON::makeString("whether supplied password matched owner password;"
" always false for non-encrypted files"));
JSON capabilities = encrypt.addDictionaryMember(
"capabilities", JSON::makeDictionary());
capabilities.addDictionaryMember(
"accessibility",
JSON::makeString("allow extraction for accessibility?"));
capabilities.addDictionaryMember(
"extract",
JSON::makeString("allow extraction?"));
capabilities.addDictionaryMember(
"printlow",
JSON::makeString("allow low resolution printing?"));
capabilities.addDictionaryMember(
"printhigh",
JSON::makeString("allow high resolution printing?"));
capabilities.addDictionaryMember(
"modifyassembly",
JSON::makeString("allow modifying document assembly?"));
capabilities.addDictionaryMember(
"modifyforms",
JSON::makeString("allow modifying forms?"));
capabilities.addDictionaryMember(
"moddifyannotations",
JSON::makeString("allow modifying annotations?"));
capabilities.addDictionaryMember(
"modifyother",
JSON::makeString("allow other modifications?"));
capabilities.addDictionaryMember(
"modify",
JSON::makeString("allow all modifications?"));
JSON parameters = encrypt.addDictionaryMember(
"parameters", JSON::makeDictionary());
parameters.addDictionaryMember(
"R",
JSON::makeString("R value from Encrypt dictionary"));
parameters.addDictionaryMember(
"V",
JSON::makeString("V value from Encrypt dictionary"));
parameters.addDictionaryMember(
"P",
JSON::makeString("P value from Encrypt dictionary"));
parameters.addDictionaryMember(
"bits",
JSON::makeString("encryption key bit length"));
parameters.addDictionaryMember(
"key",
JSON::makeString("encryption key; will be null"
" unless --show-encryption-key was specified"));
parameters.addDictionaryMember(
"method",
JSON::makeString("overall encryption method:"
" none, mixed, RC4, AESv2, AESv3"));
parameters.addDictionaryMember(
"streammethod",
JSON::makeString("encryption method for streams"));
parameters.addDictionaryMember(
"stringmethod",
JSON::makeString("encryption method for string"));
parameters.addDictionaryMember(
"filemethod",
JSON::makeString("encryption method for attachments"));
}
if (all_keys || keys->count("attachments"))
{
JSON attachments = schema.addDictionaryMember(
"attachments", JSON::makeDictionary());
JSON details = attachments.addDictionaryMember(
"<attachment-key>", JSON::makeDictionary());
details.addDictionaryMember(
"filespec",
JSON::makeString("object containing the file spec"));
details.addDictionaryMember(
"preferredname",
JSON::makeString("most preferred file name"));
details.addDictionaryMember(
"preferredcontents",
JSON::makeString("most preferred embedded file stream"));
}
return schema;
}
static void parse_object_id(std::string const& objspec,
bool& trailer, int& obj, int& gen)
{
if (objspec == "trailer")
{
trailer = true;
}
else
{
trailer = false;
obj = QUtil::string_to_int(objspec.c_str());
size_t comma = objspec.find(',');
if ((comma != std::string::npos) && (comma + 1 < objspec.length()))
{
gen = QUtil::string_to_int(
objspec.substr(1 + comma, std::string::npos).c_str());
}
}
}
static bool file_exists(char const* filename)
{
try
{
fclose(QUtil::safe_fopen(filename, "rb"));
return true;
}
catch (std::runtime_error&)
{
// can't open the file
}
return false;
}
// This is not a general-purpose argument parser. It is tightly
// crafted to work with qpdf. qpdf's command-line syntax is very
// complex because of its long history, and it doesn't really follow
// any kind of normal standard for arguments, but I don't want to
// break compatibility by changing what constitutes a valid command.
// This class is intended to simplify the argument parsing code and
// also to make it possible to add bash completion support while
// guaranteeing consistency with the actual argument syntax.
class ArgParser
{
public:
ArgParser(int argc, char* argv[], Options& o);
void parseOptions();
private:
typedef void (ArgParser::*bare_arg_handler_t)();
typedef void (ArgParser::*param_arg_handler_t)(char* parameter);
struct OptionEntry
{
OptionEntry() :
parameter_needed(false),
bare_arg_handler(0),
param_arg_handler(0)
{
}
bool parameter_needed;
std::string parameter_name;
std::set<std::string> choices;
bare_arg_handler_t bare_arg_handler;
param_arg_handler_t param_arg_handler;
};
friend struct OptionEntry;
OptionEntry oe_positional(param_arg_handler_t);
OptionEntry oe_bare(bare_arg_handler_t);
OptionEntry oe_requiredParameter(param_arg_handler_t, char const* name);
OptionEntry oe_optionalParameter(param_arg_handler_t);
OptionEntry oe_requiredChoices(param_arg_handler_t, char const** choices);
void completionCommon(bool zsh);
void argHelp();
void argVersion();
void argCopyright();
void argCompletionBash();
void argCompletionZsh();
void argJsonHelp();
void argShowCrypto();
void argPositional(char* arg);
void argPassword(char* parameter);
void argPasswordFile(char* parameter);
void argEmpty();
void argLinearize();
void argEncrypt();
void argDecrypt();
void argPasswordIsHexKey();
void argAllowInsecure();
void argPasswordMode(char* parameter);
void argSuppressPasswordRecovery();
void argCopyEncryption(char* parameter);
void argEncryptionFilePassword(char* parameter);
void argPages();
void argUnderlay();
void argOverlay();
void argRotate(char* parameter);
void argCollate(char* parameter);
void argFlattenRotation();
void argListAttachments();
void argShowAttachment(char* parameter);
void argRemoveAttachment(char* parameter);
void argAddAttachment();
void argCopyAttachments();
void argStreamData(char* parameter);
void argCompressStreams(char* parameter);
void argRecompressFlate();
void argCompressionLevel(char* parameter);
void argDecodeLevel(char* parameter);
void argNormalizeContent(char* parameter);
void argSuppressRecovery();
void argObjectStreams(char* parameter);
void argIgnoreXrefStreams();
void argQdf();
void argPreserveUnreferenced();
void argPreserveUnreferencedResources();
void argRemoveUnreferencedResources(char* parameter);
void argKeepFilesOpen(char* parameter);
void argKeepFilesOpenThreshold(char* parameter);
void argNewlineBeforeEndstream();
void argLinearizePass1(char* parameter);
void argCoalesceContents();
void argFlattenAnnotations(char* parameter);
void argGenerateAppearances();
void argMinVersion(char* parameter);
void argForceVersion(char* parameter);
void argSplitPages(char* parameter);
void argVerbose();
void argProgress();
void argNoWarn();
void argWarningExitZero();
void argDeterministicId();
void argStaticId();
void argStaticAesIv();
void argNoOriginalObjectIds();
void argShowEncryption();
void argShowEncryptionKey();
void argCheckLinearization();
void argShowLinearization();
void argShowXref();
void argShowObject(char* parameter);
void argRawStreamData();
void argFilteredStreamData();
void argShowNpages();
void argShowPages();
void argWithImages();
void argJson();
void argJsonKey(char* parameter);
void argJsonObject(char* parameter);
void argCheck();
void argOptimizeImages();
void argExternalizeInlineImages();
void argKeepInlineImages();
void argRemovePageLabels();
void argOiMinWidth(char* parameter);
void argOiMinHeight(char* parameter);
void argOiMinArea(char* parameter);
void argIiMinBytes(char* parameter);
void arg40Print(char* parameter);
void arg40Modify(char* parameter);
void arg40Extract(char* parameter);
void arg40Annotate(char* parameter);
void arg128Accessibility(char* parameter);
void arg128Extract(char* parameter);
void arg128Print(char* parameter);
void arg128Modify(char* parameter);
void arg128ClearTextMetadata();
void arg128Assemble(char* parameter);
void arg128Annotate(char* parameter);
void arg128Form(char* parameter);
void arg128ModOther(char* parameter);
void arg128UseAes(char* parameter);
void arg128ForceV4();
void arg256ForceR5();
void argEndEncrypt();
void argUOpositional(char* arg);
void argUOto(char* parameter);
void argUOfrom(char* parameter);
void argUOrepeat(char* parameter);
void argUOpassword(char* parameter);
void argEndUnderOverlay();
void argReplaceInput();
void argIsEncrypted();
void argRequiresPassword();
void argAApositional(char* arg);
void argAAKey(char* parameter);
void argAAFilename(char* parameter);
void argAACreationDate(char* parameter);
void argAAModDate(char* parameter);
void argAAMimeType(char* parameter);
void argAADescription(char* parameter);
void argAAReplace();
void argEndAddAttachment();
void argCApositional(char* arg);
void argCAprefix(char* parameter);
void argCApassword(char* parameter);
void argEndCopyAttachments();
void usage(std::string const& message);
void checkCompletion();
void initOptionTable();
void handleHelpArgs();
void handleArgFileArguments();
void handleBashArguments();
void readArgsFromFile(char const* filename);
void doFinalChecks();
void addOptionsToCompletions();
void addChoicesToCompletions(std::string const&, std::string const&);
void handleCompletion();
std::vector<PageSpec> parsePagesOptions();
void parseUnderOverlayOptions(UnderOverlay*);
void parseRotationParameter(std::string const&);
std::vector<int> parseNumrange(char const* range, int max,
bool throw_error = false);
int argc;
char** argv;
Options& o;
int cur_arg;
bool bash_completion;
bool zsh_completion;
std::string bash_prev;
std::string bash_cur;
std::string bash_line;
std::set<std::string> completions;
std::map<std::string, OptionEntry>* option_table;
std::map<std::string, OptionEntry> help_option_table;
std::map<std::string, OptionEntry> main_option_table;
std::map<std::string, OptionEntry> encrypt40_option_table;
std::map<std::string, OptionEntry> encrypt128_option_table;
std::map<std::string, OptionEntry> encrypt256_option_table;
std::map<std::string, OptionEntry> under_overlay_option_table;
std::map<std::string, OptionEntry> add_attachment_option_table;
std::map<std::string, OptionEntry> copy_attachments_option_table;
std::vector<PointerHolder<char> > new_argv;
std::vector<PointerHolder<char> > bash_argv;
PointerHolder<char*> argv_ph;
PointerHolder<char*> bash_argv_ph;
};
ArgParser::ArgParser(int argc, char* argv[], Options& o) :
argc(argc),
argv(argv),
o(o),
cur_arg(0),
bash_completion(false),
zsh_completion(false)
{
option_table = &main_option_table;
initOptionTable();
}
ArgParser::OptionEntry
ArgParser::oe_positional(param_arg_handler_t h)
{
OptionEntry oe;
oe.param_arg_handler = h;
return oe;
}
ArgParser::OptionEntry
ArgParser::oe_bare(bare_arg_handler_t h)
{
OptionEntry oe;
oe.parameter_needed = false;
oe.bare_arg_handler = h;
return oe;
}
ArgParser::OptionEntry
ArgParser::oe_requiredParameter(param_arg_handler_t h, char const* name)
{
OptionEntry oe;
oe.parameter_needed = true;
oe.parameter_name = name;
oe.param_arg_handler = h;
return oe;
}
ArgParser::OptionEntry
ArgParser::oe_optionalParameter(param_arg_handler_t h)
{
OptionEntry oe;
oe.parameter_needed = false;
oe.param_arg_handler = h;
return oe;
}
ArgParser::OptionEntry
ArgParser::oe_requiredChoices(param_arg_handler_t h, char const** choices)
{
OptionEntry oe;
oe.parameter_needed = true;
oe.param_arg_handler = h;
for (char const** i = choices; *i; ++i)
{
oe.choices.insert(*i);
}
return oe;
}
void
ArgParser::initOptionTable()
{
std::map<std::string, OptionEntry>* t = &this->help_option_table;
(*t)["help"] = oe_bare(&ArgParser::argHelp);
(*t)["version"] = oe_bare(&ArgParser::argVersion);
(*t)["copyright"] = oe_bare(&ArgParser::argCopyright);
(*t)["completion-bash"] = oe_bare(&ArgParser::argCompletionBash);
(*t)["completion-zsh"] = oe_bare(&ArgParser::argCompletionZsh);
(*t)["json-help"] = oe_bare(&ArgParser::argJsonHelp);
(*t)["show-crypto"] = oe_bare(&ArgParser::argShowCrypto);
t = &this->main_option_table;
char const* yn[] = {"y", "n", 0};
(*t)[""] = oe_positional(&ArgParser::argPositional);
(*t)["password"] = oe_requiredParameter(
&ArgParser::argPassword, "password");
(*t)["password-file"] = oe_requiredParameter(
&ArgParser::argPasswordFile, "password-file");
(*t)["empty"] = oe_bare(&ArgParser::argEmpty);
(*t)["linearize"] = oe_bare(&ArgParser::argLinearize);
(*t)["encrypt"] = oe_bare(&ArgParser::argEncrypt);
(*t)["decrypt"] = oe_bare(&ArgParser::argDecrypt);
(*t)["password-is-hex-key"] = oe_bare(&ArgParser::argPasswordIsHexKey);
(*t)["suppress-password-recovery"] =
oe_bare(&ArgParser::argSuppressPasswordRecovery);
char const* password_mode_choices[] =
{"bytes", "hex-bytes", "unicode", "auto", 0};
(*t)["password-mode"] = oe_requiredChoices(
&ArgParser::argPasswordMode, password_mode_choices);
(*t)["copy-encryption"] = oe_requiredParameter(
&ArgParser::argCopyEncryption, "file");
(*t)["encryption-file-password"] = oe_requiredParameter(
&ArgParser::argEncryptionFilePassword, "password");
(*t)["pages"] = oe_bare(&ArgParser::argPages);
(*t)["rotate"] = oe_requiredParameter(
&ArgParser::argRotate, "[+|-]angle:page-range");
char const* stream_data_choices[] =
{"compress", "preserve", "uncompress", 0};
(*t)["collate"] = oe_optionalParameter(&ArgParser::argCollate);
(*t)["flatten-rotation"] = oe_bare(&ArgParser::argFlattenRotation);
(*t)["list-attachments"] = oe_bare(&ArgParser::argListAttachments);
(*t)["show-attachment"] = oe_requiredParameter(
&ArgParser::argShowAttachment, "attachment-key");
(*t)["remove-attachment"] = oe_requiredParameter(
&ArgParser::argRemoveAttachment, "attachment-key");
(*t)["add-attachment"] = oe_bare(&ArgParser::argAddAttachment);
(*t)["copy-attachments-from"] = oe_bare(&ArgParser::argCopyAttachments);
(*t)["stream-data"] = oe_requiredChoices(
&ArgParser::argStreamData, stream_data_choices);
(*t)["compress-streams"] = oe_requiredChoices(
&ArgParser::argCompressStreams, yn);
(*t)["recompress-flate"] = oe_bare(&ArgParser::argRecompressFlate);
(*t)["compression-level"] = oe_requiredParameter(
&ArgParser::argCompressionLevel, "level");
char const* decode_level_choices[] =
{"none", "generalized", "specialized", "all", 0};
(*t)["decode-level"] = oe_requiredChoices(
&ArgParser::argDecodeLevel, decode_level_choices);
(*t)["normalize-content"] = oe_requiredChoices(
&ArgParser::argNormalizeContent, yn);
(*t)["suppress-recovery"] = oe_bare(&ArgParser::argSuppressRecovery);
char const* object_streams_choices[] = {
"disable", "preserve", "generate", 0};
(*t)["object-streams"] = oe_requiredChoices(
&ArgParser::argObjectStreams, object_streams_choices);
(*t)["ignore-xref-streams"] = oe_bare(&ArgParser::argIgnoreXrefStreams);
(*t)["qdf"] = oe_bare(&ArgParser::argQdf);
(*t)["preserve-unreferenced"] = oe_bare(
&ArgParser::argPreserveUnreferenced);
(*t)["preserve-unreferenced-resources"] = oe_bare(
&ArgParser::argPreserveUnreferencedResources);
char const* remove_unref_choices[] = {
"auto", "yes", "no", 0};
(*t)["remove-unreferenced-resources"] = oe_requiredChoices(
&ArgParser::argRemoveUnreferencedResources, remove_unref_choices);
(*t)["keep-files-open"] = oe_requiredChoices(
&ArgParser::argKeepFilesOpen, yn);
(*t)["keep-files-open-threshold"] = oe_requiredParameter(
&ArgParser::argKeepFilesOpenThreshold, "count");
(*t)["newline-before-endstream"] = oe_bare(
&ArgParser::argNewlineBeforeEndstream);
(*t)["linearize-pass1"] = oe_requiredParameter(
&ArgParser::argLinearizePass1, "filename");
(*t)["coalesce-contents"] = oe_bare(&ArgParser::argCoalesceContents);
char const* flatten_choices[] = {"all", "print", "screen", 0};
(*t)["flatten-annotations"] = oe_requiredChoices(
&ArgParser::argFlattenAnnotations, flatten_choices);
(*t)["generate-appearances"] =
oe_bare(&ArgParser::argGenerateAppearances);
(*t)["min-version"] = oe_requiredParameter(
&ArgParser::argMinVersion, "version");
(*t)["force-version"] = oe_requiredParameter(
&ArgParser::argForceVersion, "version");
(*t)["split-pages"] = oe_optionalParameter(&ArgParser::argSplitPages);
(*t)["verbose"] = oe_bare(&ArgParser::argVerbose);
(*t)["progress"] = oe_bare(&ArgParser::argProgress);
(*t)["no-warn"] = oe_bare(&ArgParser::argNoWarn);
(*t)["warning-exit-0"] = oe_bare(&ArgParser::argWarningExitZero);
(*t)["deterministic-id"] = oe_bare(&ArgParser::argDeterministicId);
(*t)["static-id"] = oe_bare(&ArgParser::argStaticId);
(*t)["static-aes-iv"] = oe_bare(&ArgParser::argStaticAesIv);
(*t)["no-original-object-ids"] = oe_bare(
&ArgParser::argNoOriginalObjectIds);
(*t)["show-encryption"] = oe_bare(&ArgParser::argShowEncryption);
(*t)["show-encryption-key"] = oe_bare(&ArgParser::argShowEncryptionKey);
(*t)["check-linearization"] = oe_bare(&ArgParser::argCheckLinearization);
(*t)["show-linearization"] = oe_bare(&ArgParser::argShowLinearization);
(*t)["show-xref"] = oe_bare(&ArgParser::argShowXref);
(*t)["show-object"] = oe_requiredParameter(
&ArgParser::argShowObject, "trailer|obj[,gen]");
(*t)["raw-stream-data"] = oe_bare(&ArgParser::argRawStreamData);
(*t)["filtered-stream-data"] = oe_bare(&ArgParser::argFilteredStreamData);
(*t)["show-npages"] = oe_bare(&ArgParser::argShowNpages);
(*t)["show-pages"] = oe_bare(&ArgParser::argShowPages);
(*t)["with-images"] = oe_bare(&ArgParser::argWithImages);
(*t)["json"] = oe_bare(&ArgParser::argJson);
// The list of selectable top-level keys id duplicated in three
// places: json_schema, do_json, and initOptionTable.
char const* json_key_choices[] = {
"objects", "objectinfo", "pages", "pagelabels", "outlines",
"acroform", "encrypt", "attachments", 0};
(*t)["json-key"] = oe_requiredChoices(
&ArgParser::argJsonKey, json_key_choices);
(*t)["json-object"] = oe_requiredParameter(
&ArgParser::argJsonObject, "trailer|obj[,gen]");
(*t)["check"] = oe_bare(&ArgParser::argCheck);
(*t)["optimize-images"] = oe_bare(&ArgParser::argOptimizeImages);
(*t)["externalize-inline-images"] =
oe_bare(&ArgParser::argExternalizeInlineImages);
(*t)["keep-inline-images"] = oe_bare(&ArgParser::argKeepInlineImages);
(*t)["remove-page-labels"] = oe_bare(&ArgParser::argRemovePageLabels);
(*t)["oi-min-width"] = oe_requiredParameter(
&ArgParser::argOiMinWidth, "minimum-width");
(*t)["oi-min-height"] = oe_requiredParameter(
&ArgParser::argOiMinHeight, "minimum-height");
(*t)["oi-min-area"] = oe_requiredParameter(
&ArgParser::argOiMinArea, "minimum-area");
(*t)["ii-min-bytes"] = oe_requiredParameter(
&ArgParser::argIiMinBytes, "minimum-bytes");
(*t)["overlay"] = oe_bare(&ArgParser::argOverlay);
(*t)["underlay"] = oe_bare(&ArgParser::argUnderlay);
(*t)["replace-input"] = oe_bare(&ArgParser::argReplaceInput);
(*t)["is-encrypted"] = oe_bare(&ArgParser::argIsEncrypted);
(*t)["requires-password"] = oe_bare(&ArgParser::argRequiresPassword);
t = &this->encrypt40_option_table;
(*t)["--"] = oe_bare(&ArgParser::argEndEncrypt);
// The above 40-bit options are also 128-bit and 256-bit options,
// so copy what we have so far to 128. Then continue separately
// with 128. We later copy 128 to 256.
this->encrypt128_option_table = this->encrypt40_option_table;
(*t)["print"] = oe_requiredChoices(&ArgParser::arg40Print, yn);
(*t)["modify"] = oe_requiredChoices(&ArgParser::arg40Modify, yn);
(*t)["extract"] = oe_requiredChoices(&ArgParser::arg40Extract, yn);
(*t)["annotate"] = oe_requiredChoices(&ArgParser::arg40Annotate, yn);
t = &this->encrypt128_option_table;
(*t)["accessibility"] = oe_requiredChoices(
&ArgParser::arg128Accessibility, yn);
(*t)["extract"] = oe_requiredChoices(&ArgParser::arg128Extract, yn);
char const* print128_choices[] = {"full", "low", "none", 0};
(*t)["print"] = oe_requiredChoices(
&ArgParser::arg128Print, print128_choices);
(*t)["assemble"] = oe_requiredChoices(&ArgParser::arg128Assemble, yn);
(*t)["annotate"] = oe_requiredChoices(&ArgParser::arg128Annotate, yn);
(*t)["form"] = oe_requiredChoices(&ArgParser::arg128Form, yn);
(*t)["modify-other"] = oe_requiredChoices(&ArgParser::arg128ModOther, yn);
char const* modify128_choices[] =
{"all", "annotate", "form", "assembly", "none", 0};
(*t)["modify"] = oe_requiredChoices(
&ArgParser::arg128Modify, modify128_choices);
(*t)["cleartext-metadata"] = oe_bare(&ArgParser::arg128ClearTextMetadata);
// The above 128-bit options are also 256-bit options, so copy
// what we have so far. Then continue separately with 128 and 256.
this->encrypt256_option_table = this->encrypt128_option_table;
(*t)["use-aes"] = oe_requiredChoices(&ArgParser::arg128UseAes, yn);
(*t)["force-V4"] = oe_bare(&ArgParser::arg128ForceV4);
t = &this->encrypt256_option_table;
(*t)["force-R5"] = oe_bare(&ArgParser::arg256ForceR5);
(*t)["allow-insecure"] = oe_bare(&ArgParser::argAllowInsecure);
t = &this->under_overlay_option_table;
(*t)[""] = oe_positional(&ArgParser::argUOpositional);
(*t)["to"] = oe_requiredParameter(
&ArgParser::argUOto, "page-range");
(*t)["from"] = oe_requiredParameter(
&ArgParser::argUOfrom, "page-range");
(*t)["repeat"] = oe_requiredParameter(
&ArgParser::argUOrepeat, "page-range");
(*t)["password"] = oe_requiredParameter(
&ArgParser::argUOpassword, "password");
(*t)["--"] = oe_bare(&ArgParser::argEndUnderOverlay);
t = &this->add_attachment_option_table;
(*t)[""] = oe_positional(&ArgParser::argAApositional);
(*t)["key"] = oe_requiredParameter(
&ArgParser::argAAKey, "attachment-key");
(*t)["filename"] = oe_requiredParameter(
&ArgParser::argAAFilename, "filename");
(*t)["creationdate"] = oe_requiredParameter(
&ArgParser::argAACreationDate, "creation-date");
(*t)["moddate"] = oe_requiredParameter(
&ArgParser::argAAModDate, "modification-date");
(*t)["mimetype"] = oe_requiredParameter(
&ArgParser::argAAMimeType, "mime/type");
(*t)["description"] = oe_requiredParameter(
&ArgParser::argAADescription, "description");
(*t)["replace"] = oe_bare(&ArgParser::argAAReplace);
(*t)["--"] = oe_bare(&ArgParser::argEndAddAttachment);
t = &this->copy_attachments_option_table;
(*t)[""] = oe_positional(&ArgParser::argCApositional);
(*t)["prefix"] = oe_requiredParameter(
&ArgParser::argCAprefix, "prefix");
(*t)["password"] = oe_requiredParameter(
&ArgParser::argCApassword, "password");
(*t)["--"] = oe_bare(&ArgParser::argEndCopyAttachments);
}
void
ArgParser::argPositional(char* arg)
{
if (o.infilename == 0)
{
o.infilename = arg;
}
else if (o.outfilename == 0)
{
o.outfilename = arg;
}
else
{
usage(std::string("unknown argument ") + arg);
}
}
void
ArgParser::argVersion()
{
if (expected_version != QPDF::QPDFVersion())
{
std::cerr << "***\n"
<< "WARNING: qpdf CLI from version " << expected_version
<< " is using library version " << QPDF::QPDFVersion()
<< ".\n"
<< "This probably means you have multiple versions of qpdf installed\n"
<< "and don't have your library path configured correctly.\n"
<< "***"
<< std::endl;
}
std::cout
<< whoami << " version " << QPDF::QPDFVersion() << std::endl
<< "Run " << whoami << " --copyright to see copyright and license information."
<< std::endl;
}
void
ArgParser::argCopyright()
{
// Make sure the output looks right on an 80-column display.
// 1 2 3 4 5 6 7 8
// 12345678901234567890123456789012345678901234567890123456789012345678901234567890
std::cout
<< whoami << " version " << QPDF::QPDFVersion() << std::endl
<< std::endl
<< "Copyright (c) 2005-2021 Jay Berkenbilt"
<< std::endl
<< "QPDF is licensed under the Apache License, Version 2.0 (the \"License\");"
<< std::endl
<< "not use this file except in compliance with the License."
<< std::endl
<< "You may obtain a copy of the License at"
<< std::endl
<< std::endl
<< " http://www.apache.org/licenses/LICENSE-2.0"
<< std::endl
<< std::endl
<< "Unless required by applicable law or agreed to in writing, software"
<< std::endl
<< "distributed under the License is distributed on an \"AS IS\" BASIS,"
<< std::endl
<< "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied."
<< std::endl
<< "See the License for the specific language governing permissions and"
<< std::endl
<< "limitations under the License."
<< std::endl
<< std::endl
<< "Versions of qpdf prior to version 7 were released under the terms"
<< std::endl
<< "of version 2.0 of the Artistic License. At your option, you may"
<< std::endl
<< "continue to consider qpdf to be licensed under those terms. Please"
<< std::endl
<< "see the manual for additional information."
<< std::endl;
}
void
ArgParser::argHelp()
{
std::cout
// 12345678901234567890123456789012345678901234567890123456789012345678901234567890
<< "Usage: qpdf [ options ] { infilename | --empty } [ outfilename ]\n"
<< "\n"
<< "An option summary appears below. Please see the documentation for details.\n"
<< "\n"
<< "If @filename appears anywhere in the command-line, each line of filename\n"
<< "will be interpreted as an argument. No interpolation is done. Line\n"
<< "terminators are stripped. @- can be specified to read from standard input.\n"
<< "\n"
<< "The output file can be - to indicate writing to standard output, or it can\n"
<< "be --replace-input to cause qpdf to replace the input file with the output.\n"
<< "\n"
<< "Note that when contradictory options are provided, whichever options are\n"
<< "provided last take precedence.\n"
<< "\n"
<< "\n"
<< "Basic Options\n"
<< "-------------\n"
<< "\n"
<< "--version show version of qpdf\n"
<< "--copyright show qpdf's copyright and license information\n"
<< "--help show command-line argument help\n"
<< "--show-crypto show supported crypto providers; default is first\n"
<< "--completion-bash output a bash complete command you can eval\n"
<< "--completion-zsh output a zsh complete command you can eval\n"
<< "--password=password specify a password for accessing encrypted files\n"
<< "--password-file=file get the password the first line \"file\"; use \"-\"\n"
<< " to read the password from stdin (without prompt or\n"
<< " disabling echo, so use with caution)\n"
<< "--is-encrypted silently exit 0 if the file is encrypted or 2\n"
<< " if not; useful for shell scripts\n"
<< "--requires-password silently exit 0 if a password (other than as\n"
<< " supplied) is required, 2 if the file is not\n"
<< " encrypted, or 3 if the file is encrypted\n"
<< " but requires no password or the supplied password\n"
<< " is correct; useful for shell scripts\n"
<< "--verbose provide additional informational output\n"
<< "--progress give progress indicators while writing output\n"
<< "--no-warn suppress warnings\n"
<< "--warning-exit-0 exit with code 0 instead of 3 if there are warnings\n"
<< "--linearize generated a linearized (web optimized) file\n"
<< "--replace-input use in place of specifying an output file; qpdf will\n"
<< " replace the input file with the output\n"
<< "--copy-encryption=file copy encryption parameters from specified file\n"
<< "--encryption-file-password=password\n"
<< " password used to open the file from which encryption\n"
<< " parameters are being copied\n"
<< "--encrypt options -- generate an encrypted file\n"
<< "--decrypt remove any encryption on the file\n"
<< "--password-is-hex-key treat primary password option as a hex-encoded key\n"
<< "--suppress-password-recovery\n"
<< " do not attempt recovering from password string\n"
<< " encoding errors\n"
<< "--password-mode=mode control qpdf's encoding of passwords\n"
<< "--pages options -- select specific pages from one or more files\n"
<< "--collate=n causes files specified in --pages to be collated\n"
<< " in groups of n pages (default 1) rather than\n"
<< " concatenated\n"
<< "--flatten-rotation move page rotation from /Rotate key to content\n"
<< "--rotate=[+|-]angle[:page-range]\n"
<< " rotate each specified page 0, 90, 180, or 270\n"
<< " degrees; rotate all pages if no page range is given\n"
<< "--split-pages=[n] write each output page to a separate file\n"
<< "--overlay options -- overlay pages from another file\n"
<< "--underlay options -- underlay pages from another file\n"
<< "\n"
<< "Note that you can use the @filename or @- syntax for any argument at any\n"
<< "point in the command. This provides a good way to specify a password without\n"
<< "having to explicitly put it on the command line. @filename or @- must be a\n"
<< "word by itself. Syntax such as --arg=@filename doesn't work.\n"
<< "\n"
<< "If none of --copy-encryption, --encrypt or --decrypt are given, qpdf will\n"
<< "preserve any encryption data associated with a file.\n"
<< "\n"
<< "Note that when copying encryption parameters from another file, all\n"
<< "parameters will be copied, including both user and owner passwords, even\n"
<< "if the user password is used to open the other file. This works even if\n"
<< "the owner password is not known.\n"
<< "\n"
<< "The --password-is-hex-key option overrides the normal computation of\n"
<< "encryption keys. It only applies to the password used to open the main\n"
<< "file. This option is not ordinarily useful but can be helpful for forensic\n"
<< "or investigatory purposes. See manual for further discussion.\n"
<< "\n"
<< "The --rotate flag can be used to specify pages to rotate pages either\n"
<< "0, 90, 180, or 270 degrees. The page range is specified in the same\n"
<< "format as with the --pages option, described below. Repeat the option\n"
<< "to rotate multiple groups of pages. If the angle is preceded by + or -,\n"
<< "it is added to or subtracted from the original rotation. Otherwise, the\n"
<< "rotation angle is set explicitly to the given value. You almost always\n"
<< "want to use + or - unless you are certain about the internals of the PDF\n"
<< "you are working with.\n"
<< "\n"
<< "If --split-pages is specified, each page is written to a separate output\n"
<< "file. File names are generated as follows:\n"
<< "* If the string %d appears in the output file name, it is replaced with a\n"
<< " zero-padded page range starting from 1\n"
<< "* Otherwise, if the output file name ends in .pdf (case insensitive), a\n"
<< " zero-padded page range, preceded by a dash, is inserted before the file\n"
<< " extension\n"
<< "* Otherwise, the file name is appended with a zero-padded page range\n"
<< " preceded by a dash.\n"
<< "Page ranges are single page numbers for single-page groups or first-last\n"
<< "for multipage groups.\n"
<< "\n"
<< "\n"
<< "Encryption Options\n"
<< "------------------\n"
<< "\n"
<< " --encrypt user-password owner-password key-length flags --\n"
<< "\n"
<< "Note that -- terminates parsing of encryption flags.\n"
<< "\n"
<< "Either or both of the user password and the owner password may be\n"
<< "empty strings.\n"
<< "\n"
<< "key-length may be 40, 128, or 256\n"
<< "\n"
<< "Additional flags are dependent upon key length.\n"
<< "\n"
<< " If 40:\n"
<< "\n"
<< " --print=[yn] allow printing\n"
<< " --modify=[yn] allow document modification\n"
<< " --extract=[yn] allow text/graphic extraction\n"
<< " --annotate=[yn] allow comments and form fill-in and signing\n"
<< "\n"
<< " If 128:\n"
<< "\n"
<< " --accessibility=[yn] allow accessibility to visually impaired\n"
<< " --extract=[yn] allow other text/graphic extraction\n"
<< " --print=print-opt control printing access\n"
<< " --assemble=[yn] allow document assembly\n"
<< " --annotate=[yn] allow commenting/filling form fields\n"
<< " --form=[yn] allow filling form fields\n"
<< " --modify-other=[yn] allow other modifications\n"
<< " --modify=modify-opt control modify access (old way)\n"
<< " --cleartext-metadata prevents encryption of metadata\n"
<< " --use-aes=[yn] indicates whether to use AES encryption\n"
<< " --force-V4 forces use of V=4 encryption handler\n"
<< "\n"
<< " If 256, options are the same as 128 with these exceptions:\n"
<< " --force-V4 this option is not available with 256-bit keys\n"
<< " --use-aes this option is always on with 256-bit keys\n"
<< " --force-R5 forces use of deprecated R=5 encryption\n"
<< " --allow-insecure allow the owner password to be empty when the\n"
<< " user password is not empty\n"
<< "\n"
<< " print-opt may be:\n"
<< "\n"
<< " full allow full printing\n"
<< " low allow only low-resolution printing\n"
<< " none disallow printing\n"
<< "\n"
<< " modify-opt may be:\n"
<< "\n"
<< " all allow full document modification\n"
<< " annotate allow comment authoring and form operations\n"
<< " form allow form field fill-in and signing\n"
<< " assembly allow document assembly only\n"
<< " none allow no modifications\n"
<< "\n"
<< "The default for each permission option is to be fully permissive. Please\n"
<< "refer to the manual for more details on the modify options.\n"
<< "\n"
<< "Specifying cleartext-metadata forces the PDF version to at least 1.5.\n"
<< "Specifying use of AES forces the PDF version to at least 1.6. These\n"
<< "options are both off by default.\n"
<< "\n"
<< "The --force-V4 flag forces the V=4 encryption handler introduced in PDF 1.5\n"
<< "to be used even if not otherwise needed. This option is primarily useful\n"
<< "for testing qpdf and has no other practical use.\n"
<< "\n"
<< "\n"
<< "Password Modes\n"
<< "--------------\n"
<< "\n"
<< "The --password-mode controls how qpdf interprets passwords supplied\n"
<< "on the command-line. qpdf's default behavior is correct in almost all\n"
<< "cases, but you can fine-tune with this option.\n"
<< "\n"
<< " bytes: use the password literally as supplied\n"
<< " hex-bytes: interpret the password as a hex-encoded byte string\n"
<< " unicode: interpret the password as a UTF-8 encoded string\n"
<< " auto: attempt to infer the encoding and adjust as needed\n"
<< "\n"
<< "This is a complex topic. See the manual for a complete discussion.\n"
<< "\n"
<< "\n"
<< "Page Selection Options\n"
<< "----------------------\n"
<< "\n"
<< "These options allow pages to be selected from one or more PDF files.\n"
<< "Whatever file is given as the primary input file is used as the\n"
<< "starting point, but its pages are replaced with pages as specified.\n"
<< "\n"
<< "--keep-files-open=[yn]\n"
<< "--keep-files-open-threshold=count\n"
<< "--pages file [ --password=password ] [ page-range ] ... --\n"
<< "\n"
<< "For each file that pages should be taken from, specify the file, a\n"
<< "password needed to open the file (if any), and a page range. The\n"
<< "password needs to be given only once per file. If any of the input\n"
<< "files are the same as the primary input file or the file used to copy\n"
<< "encryption parameters (if specified), you do not need to repeat the\n"
<< "password here. The same file can be repeated multiple times. The\n"
<< "filename \".\" may be used to refer to the current input file. All\n"
<< "non-page data (info, outlines, page numbers, etc. are taken from the\n"
<< "primary input file. To discard this, use --empty as the primary\n"
<< "input.\n"
<< "\n"
<< "By default, when more than 200 distinct files are specified, qpdf will\n"
<< "close each file when not being referenced. With 200 files or fewer, all\n"
<< "files will be kept open at the same time. This behavior can be overridden\n"
<< "by specifying --keep-files-open=[yn]. Closing and opening files can have\n"
<< "very high overhead on certain file systems, especially networked file\n"
<< "systems. The threshold of 200 can be modified with\n"
<< "--keep-files-open-threshold\n"
<< "\n"
<< "The page range is a set of numbers separated by commas, ranges of\n"
<< "numbers separated dashes, or combinations of those. The character\n"
<< "\"z\" represents the last page. A number preceded by an \"r\" indicates\n"
<< "to count from the end, so \"r3-r1\" would be the last three pages of the\n"
<< "document. Pages can appear in any order. Ranges can appear with a\n"
<< "high number followed by a low number, which causes the pages to appear in\n"
<< "reverse. Numbers may be repeated. A page range may be appended with :odd\n"
<< "to indicate odd pages in the selected range or :even to indicate even\n"
<< "pages.\n"
<< "\n"
<< "If the page range is omitted, the range of 1-z is assumed. qpdf decides\n"
<< "that the page range is omitted if the range argument is either -- or a\n"
<< "valid file name and not a valid range.\n"
<< "\n"
<< "The usual behavior of --pages is to add all pages from the first file,\n"
<< "then all pages from the second file, and so on. If the --collate option\n"
<< "is specified, then pages are collated instead. In other words, qpdf takes\n"
<< "the first page from the first file, the first page from the second file,\n"
<< "and so on until it runs out of files; then it takes the second page from\n"
<< "each file, etc. When a file runs out of pages, it is skipped until all\n"
<< "specified pages are taken from all files.\n"
<< "\n"
<< "See the manual for examples and a discussion of additional subtleties.\n"
<< "\n"
<< "\n"
<< "Overlay and Underlay Options\n"
<< "----------------------------\n"
<< "\n"
<< "These options allow pages from another file to be overlaid or underlaid\n"
<< "on the primary output. Overlaid pages are drawn on top of the destination\n"
<< "page and may obscure the page. Underlaid pages are drawn below the\n"
<< "destination page.\n"
<< "\n"
<< "{--overlay | --underlay } file\n"
" [ --password=password ]\n"
" [ --to=page-range ]\n"
" [ --from=[page-range] ]\n"
" [ --repeat=page-range ]\n"
" --\n"
<< "\n"
<< "For overlay and underlay, a file and optional password are specified, along\n"
<< "with a series of optional page ranges. The default behavior is that each\n"
<< "page of the overlay or underlay file is imposed on the corresponding page\n"
<< "of the primary output until it runs out of pages, and any extra pages are\n"
<< "ignored. The page range options all take page ranges in the same form as\n"
<< "the --pages option. They have the following meanings:\n"
<< "\n"
<< " --to: the pages in the primary output to which overlay/underlay is\n"
<< " applied\n"
<< " --from: the pages from the overlay/underlay file that are used\n"
<< " --repeat: pages from the overlay/underlay that are repeated after\n"
<< " any \"from\" pages have been exhausted\n"
<< "\n"
<< "\n"
<< "Embedded Files/Attachments Options\n"
<< "----------------------------------\n"
<< "\n"
<< "These options can be used to work with embedded files, also known as\n"
<< "attachments.\n"
<< "\n"
<< "--list-attachments show key and stream number for embedded files;\n"
<< " combine with --verbose for more detailed information\n"
<< "--show-attachment=key write the contents of the specified attachment to\n"
<< " standard output as binary data\n"
<< "--add-attachment file options --\n"
<< " add or replace an attachment\n"
<< "--remove-attachment=key remove the specified attachment; repeatable\n"
<< "--copy-attachments-from file options --\n"
<< " copy attachments from another file\n"
<< "\n"
<< "The \"key\" option is the unique name under which the attachment is registered\n"
<< "within the PDF file. You can get this using the --list-attachments option. This\n"
<< "is usually the same as the filename, but it doesn't have to be.\n"
<< "\n"
<< "Options for adding attachments:\n"
<< "\n"
<< " file path to the file to attach\n"
<< " --key=key the name of this in the embedded files table;\n"
<< " defaults to the last path element of file\n"
<< " --filename=name the file name of the attachment; this is what is\n"
<< " usually displayed to the user; defaults to the\n"
<< " last path element of file\n"
<< " --creationdate=date creation date in PDF format; defaults to the\n"
<< " current time\n"
<< " --moddate=date modification date in PDF format; defaults to the\n"
<< " current time\n"
<< " --mimetype=type/subtype mime type of attachment (e.g. application/pdf)\n"
<< " --description=\"text\" attachment description\n"
<< " --replace replace any existing attachment with the same key\n"
<< "\n"
<< "Options for copying attachments:\n"
<< "\n"
<< " file file whose attachments should be copied\n"
<< " --password=password password to open the other file, if needed\n"
<< " --prefix=prefix a prefix to insert in front of each key;\n"
<< " required if needed to ensure each attachment\n"
<< " has a unique key\n"
<< "\n"
<< "Date format: D:yyyymmddhhmmss<z> where <z> is either Z for UTC or a timezone\n"
<< "offset in the form -hh'mm' or +hh'mm'.\n"
<< "Examples: D:20210207161528-05'00', D:20210207211528Z\n"
<< "\n"
<< "\n"
<< "Advanced Parsing Options\n"
<< "------------------------\n"
<< "\n"
<< "These options control aspects of how qpdf reads PDF files. Mostly these are\n"
<< "of use to people who are working with damaged files. There is little reason\n"
<< "to use these options unless you are trying to solve specific problems.\n"
<< "\n"
<< "--suppress-recovery prevents qpdf from attempting to recover damaged files\n"
<< "--ignore-xref-streams tells qpdf to ignore any cross-reference streams\n"
<< "\n"
<< "\n"
<< "Advanced Transformation Options\n"
<< "-------------------------------\n"
<< "\n"
<< "These transformation options control fine points of how qpdf creates\n"
<< "the output file. Mostly these are of use only to people who are very\n"
<< "familiar with the PDF file format or who are PDF developers.\n"
<< "\n"
<< "--stream-data=option controls transformation of stream data (below)\n"
<< "--compress-streams=[yn] controls whether to compress streams on output\n"
<< "--decode-level=option controls how to filter streams from the input\n"
<< "--recompress-flate recompress streams already compressed with Flate\n"
<< "--compression-level=n set zlib compression level; most effective with\n"
<< " --recompress-flate --object-streams=generate\n"
<< "--normalize-content=[yn] enables or disables normalization of content streams\n"
<< "--object-streams=mode controls handing of object streams\n"
<< "--preserve-unreferenced preserve unreferenced objects\n"
<< "--remove-unreferenced-resources={auto,yes,no}\n"
<< " whether to remove unreferenced page resources\n"
<< "--preserve-unreferenced-resources\n"
<< " synonym for --remove-unreferenced-resources=no\n"
<< "--newline-before-endstream always put a newline before endstream\n"
<< "--coalesce-contents force all pages' content to be a single stream\n"
<< "--flatten-annotations=option\n"
<< " incorporate rendering of annotations into page\n"
<< " contents including those for interactive form\n"
<< " fields; may also want --generate-appearances\n"
<< "--generate-appearances generate appearance streams for form fields\n"
<< "--optimize-images compress images with DCT (JPEG) when advantageous\n"
<< "--oi-min-width=w do not optimize images whose width is below w;\n"
<< " default is 128. Use 0 to mean no minimum\n"
<< "--oi-min-height=h do not optimize images whose height is below h\n"
<< " default is 128. Use 0 to mean no minimum\n"
<< "--oi-min-area=a do not optimize images whose pixel count is below a\n"
<< " default is 16,384. Use 0 to mean no minimum\n"
<< "--externalize-inline-images convert inline images to regular images; by\n"
<< " default, images of at least 1,024 bytes are\n"
<< " externalized\n"
<< "--ii-min-bytes=bytes specify minimum size of inline images to be\n"
<< " converted to regular images\n"
<< "--keep-inline-images exclude inline images from image optimization\n"
<< "--remove-page-labels remove any page labels present in the output file\n"
<< "--qdf turns on \"QDF mode\" (below)\n"
<< "--linearize-pass1=file write intermediate pass of linearized file\n"
<< " for debugging\n"
<< "--min-version=version sets the minimum PDF version of the output file\n"
<< "--force-version=version forces this to be the PDF version of the output file\n"
<< "\n"
<< "Options for --flatten-annotations are all, print, or screen. If the option\n"
<< "is print, only annotations marked as print are included. If the option is\n"
<< "screen, options marked as \"no view\" are excluded. Otherwise, annotations\n"
<< "are flattened regardless of the presence of print or NoView flags. It is\n"
<< "common for PDF files to have a flag set that appearance streams need to be\n"
<< "regenerated. This happens when someone changes a form value with software\n"
<< "that does not know how to render the new value. qpdf will not flatten form\n"
<< "fields in files like this. If you get this warning, you have two choices:\n"
<< "either use qpdf's --generate-appearances flag to tell qpdf to go ahead and\n"
<< "regenerate appearances, or use some other tool to generate the appearances.\n"
<< "qpdf does a pretty good job with most forms when only ASCII and \"Windows\n"
<< "ANSI\" characters are used in form field values, but if your form fields\n"
<< "contain other characters, rich text, or are other than left justified, you\n"
<< "will get better results first saving with other software.\n"
<< "\n"
<< "Version numbers may be expressed as major.minor.extension-level, so 1.7.3\n"
<< "means PDF version 1.7 at extension level 3.\n"
<< "\n"
<< "Values for stream data options:\n"
<< "\n"
<< " compress recompress stream data when possible (default)\n"
<< " preserve leave all stream data as is\n"
<< " uncompress uncompress stream data when possible\n"
<< "\n"
<< "Values for object stream mode:\n"
<< "\n"
<< " preserve preserve original object streams (default)\n"
<< " disable don't write any object streams\n"
<< " generate use object streams wherever possible\n"
<< "\n"
<< "When --compress-streams=n is specified, this overrides the default behavior\n"
<< "of qpdf, which is to attempt compress uncompressed streams. Setting\n"
<< "stream data mode to uncompress or preserve has the same effect.\n"
<< "\n"
<< "The --decode-level parameter may be set to one of the following values:\n"
<< " none do not decode streams\n"
<< " generalized decode streams compressed with generalized filters\n"
<< " including LZW, Flate, and the ASCII encoding filters.\n"
<< " specialized additionally decode streams with non-lossy specialized\n"
<< " filters including RunLength\n"
<< " all additionally decode streams with lossy filters\n"
<< " including DCT (JPEG)\n"
<< "\n"
<< "In qdf mode, by default, content normalization is turned on, and the\n"
<< "stream data mode is set to uncompress. QDF mode does not support\n"
<< "linearized files. The --linearize flag disables qdf mode.\n"
<< "\n"
<< "Setting the minimum PDF version of the output file may raise the version\n"
<< "but will never lower it. Forcing the PDF version of the output file may\n"
<< "set the PDF version to a lower value than actually allowed by the file's\n"
<< "contents. You should only do this if you have no other possible way to\n"
<< "open the file or if you know that the file definitely doesn't include\n"
<< "features not supported later versions.\n"
<< "\n"
<< "Testing, Inspection, and Debugging Options\n"
<< "------------------------------------------\n"
<< "\n"
<< "These options can be useful for digging into PDF files or for use in\n"
<< "automated test suites for software that uses the qpdf library.\n"
<< "\n"
<< "--deterministic-id generate deterministic /ID\n"
<< "--static-id generate static /ID: FOR TESTING ONLY!\n"
<< "--static-aes-iv use a static initialization vector for AES-CBC\n"
<< " This is option is not secure! FOR TESTING ONLY!\n"
<< "--no-original-object-ids suppress original object ID comments in qdf mode\n"
<< "--show-encryption quickly show encryption parameters\n"
<< "--show-encryption-key when showing encryption, reveal the actual key\n"
<< "--check-linearization check file integrity and linearization status\n"
<< "--show-linearization check and show all linearization data\n"
<< "--show-xref show the contents of the cross-reference table\n"
<< "--show-object=trailer|obj[,gen]\n"
<< " show the contents of the given object\n"
<< " --raw-stream-data show raw stream data instead of object contents\n"
<< " --filtered-stream-data show filtered stream data instead of object contents\n"
<< "--show-npages print the number of pages in the file\n"
<< "--show-pages shows the object/generation number for each page\n"
<< " --with-images also shows the object IDs for images on each page\n"
<< "--check check file structure + encryption, linearization\n"
<< "--json generate a json representation of the file\n"
<< "--json-help describe the format of the json representation\n"
<< "--json-key=key repeatable; prune json structure to include only\n"
<< " specified keys. If absent, all keys are shown\n"
<< "--json-object=trailer|[obj,gen]\n"
<< " repeatable; include only specified objects in the\n"
<< " \"objects\" section of the json. If absent, all\n"
<< " objects are shown\n"
<< "\n"
<< "The json representation generated by qpdf is designed to facilitate\n"
<< "processing of qpdf from other programming languages that have a hard\n"
<< "time calling C++ APIs. Run qpdf --json-help for details on the format.\n"
<< "The manual has more in-depth information about the json representation\n"
<< "and certain compatibility guarantees that qpdf provides.\n"
<< "\n"
<< "The --raw-stream-data and --filtered-stream-data options are ignored\n"
<< "unless --show-object is given. Either of these options will cause the\n"
<< "stream data to be written to standard output.\n"
<< "\n"
<< "If --filtered-stream-data is given and --normalize-content=y is also\n"
<< "given, qpdf will attempt to normalize the stream data as if it is a\n"
<< "page content stream. This attempt will be made even if it is not a\n"
<< "page content stream, in which case it will produce unusable results.\n"
<< "\n"
<< "Ordinarily, qpdf exits with a status of 0 on success or a status of 2\n"
<< "if any errors occurred. If there were warnings but not errors, qpdf\n"
<< "exits with a status of 3. If warnings would have been issued but --no-warn\n"
<< "was given, an exit status of 3 is still used. If you want qpdf to exit\n"
<< "with status 0 when there are warnings, use the --warning-exit-0 flag.\n"
<< "When --no-warn and --warning-exit-0 are used together, the effect is for\n"
<< "qpdf to completely ignore warnings. qpdf does not use exit status 1,\n"
<< "since that is used by the shell if it can't execute qpdf.\n";
}
void
ArgParser::completionCommon(bool zsh)
{
std::string progname = argv[0];
std::string executable;
std::string appdir;
std::string appimage;
if (QUtil::get_env("QPDF_EXECUTABLE", &executable))
{
progname = executable;
}
else if (QUtil::get_env("APPDIR", &appdir) &&
QUtil::get_env("APPIMAGE", &appimage))
{
// Detect if we're in an AppImage and adjust
if ((appdir.length() < strlen(argv[0])) &&
(strncmp(appdir.c_str(), argv[0], appdir.length()) == 0))
{
progname = appimage;
}
}
if (zsh)
{
std::cout << "autoload -U +X bashcompinit && bashcompinit && ";
}
std::cout << "complete -o bashdefault -o default";
if (! zsh)
{
std::cout << " -o nospace";
}
std::cout << " -C " << progname << " " << whoami << std::endl;
// Put output before error so calling from zsh works properly
std::string path = progname;
size_t slash = path.find('/');
if ((slash != 0) && (slash != std::string::npos))
{
std::cerr << "WARNING: qpdf completion enabled"
<< " using relative path to qpdf" << std::endl;
}
}
void
ArgParser::argCompletionBash()
{
completionCommon(false);
}
void
ArgParser::argCompletionZsh()
{
completionCommon(true);
}
void
ArgParser::argJsonHelp()
{
// Make sure the output looks right on an 80-column display.
// 1 2 3 4 5 6 7 8
// 12345678901234567890123456789012345678901234567890123456789012345678901234567890
std::cout
<< "The json block below contains the same structure with the same keys as the"
<< std::endl
<< "json generated by qpdf. In the block below, the values are descriptions of"
<< std::endl
<< "the meanings of those entries. The specific contract guaranteed by qpdf in"
<< std::endl
<< "its json representation is explained in more detail in the manual. You can"
<< std::endl
<< "specify a subset of top-level keys when you invoke qpdf, but the \"version\""
<< std::endl
<< "and \"parameters\" keys will always be present. Note that the \"encrypt\""
<< std::endl
<< "key's values will be populated for non-encrypted files. Some values will"
<< std::endl
<< "be null, and others will have values that apply to unencrypted files."
<< std::endl
<< json_schema().unparse()
<< std::endl;
}
void
ArgParser::argShowCrypto()
{
auto crypto = QPDFCryptoProvider::getRegisteredImpls();
std::string default_crypto = QPDFCryptoProvider::getDefaultProvider();
std::cout << default_crypto << std::endl;
for (auto const& iter: crypto)
{
if (iter != default_crypto)
{
std::cout << iter << std::endl;
}
}
}
void
ArgParser::argPassword(char* parameter)
{
o.password = parameter;
}
void
ArgParser::argPasswordFile(char* parameter)
{
std::list<std::string> lines;
if (strcmp(parameter, "-") == 0)
{
QTC::TC("qpdf", "qpdf password stdin");
lines = QUtil::read_lines_from_file(std::cin);
}
else
{
QTC::TC("qpdf", "qpdf password file");
lines = QUtil::read_lines_from_file(parameter);
}
if (lines.size() >= 1)
{
// Make sure the memory for this stays in scope.
o.password_alloc = std::shared_ptr<char>(
QUtil::copy_string(lines.front().c_str()),
std::default_delete<char[]>());
o.password = o.password_alloc.get();
if (lines.size() > 1)
{
std::cerr << whoami << ": WARNING: all but the first line of"
<< " the password file are ignored" << std::endl;
}
}
}
void
ArgParser::argEmpty()
{
o.infilename = "";
}
void
ArgParser::argLinearize()
{
o.linearize = true;
}
void
ArgParser::argEncrypt()
{
++cur_arg;
if (cur_arg + 3 > argc)
{
if (this->bash_completion)
{
if (cur_arg == argc)
{
this->completions.insert("user-password");
}
else if (cur_arg + 1 == argc)
{
this->completions.insert("owner-password");
}
else if (cur_arg + 2 == argc)
{
this->completions.insert("40");
this->completions.insert("128");
this->completions.insert("256");
}
return;
}
else
{
usage("insufficient arguments to --encrypt");
}
}
o.user_password = argv[cur_arg++];
o.owner_password = argv[cur_arg++];
std::string len_str = argv[cur_arg];
if (len_str == "40")
{
o.keylen = 40;
this->option_table = &(this->encrypt40_option_table);
}
else if (len_str == "128")
{
o.keylen = 128;
this->option_table = &(this->encrypt128_option_table);
}
else if (len_str == "256")
{
o.keylen = 256;
o.use_aes = true;
this->option_table = &(this->encrypt256_option_table);
}
else
{
usage("encryption key length must be 40, 128, or 256");
}
}
void
ArgParser::argDecrypt()
{
o.decrypt = true;
o.encrypt = false;
o.copy_encryption = false;
}
void
ArgParser::argPasswordIsHexKey()
{
o.password_is_hex_key = true;
}
void
ArgParser::argSuppressPasswordRecovery()
{
o.suppress_password_recovery = true;
}
void
ArgParser::argPasswordMode(char* parameter)
{
if (strcmp(parameter, "bytes") == 0)
{
o.password_mode = pm_bytes;
}
else if (strcmp(parameter, "hex-bytes") == 0)
{
o.password_mode = pm_hex_bytes;
}
else if (strcmp(parameter, "unicode") == 0)
{
o.password_mode = pm_unicode;
}
else if (strcmp(parameter, "auto") == 0)
{
o.password_mode = pm_auto;
}
else
{
usage("invalid password-mode option");
}
}
void
ArgParser::argAllowInsecure()
{
o.allow_insecure = true;
}
void
ArgParser::argCopyEncryption(char* parameter)
{
o.encryption_file = parameter;
o.copy_encryption = true;
o.encrypt = false;
o.decrypt = false;
}
void
ArgParser::argEncryptionFilePassword(char* parameter)
{
o.encryption_file_password = parameter;
}
void
ArgParser::argCollate(char* parameter)
{
auto n = ((parameter == 0) ? 1 :
QUtil::string_to_uint(parameter));
o.collate = QIntC::to_size(n);
}
void
ArgParser::argPages()
{
++cur_arg;
if (! o.page_specs.empty())
{
usage("the --pages may only be specified one time");
}
o.page_specs = parsePagesOptions();
if (o.page_specs.empty())
{
usage("--pages: no page specifications given");
}
}
void
ArgParser::argUnderlay()
{
parseUnderOverlayOptions(&o.underlay);
}
void
ArgParser::argOverlay()
{
parseUnderOverlayOptions(&o.overlay);
}
void
ArgParser::argRotate(char* parameter)
{
parseRotationParameter(parameter);
}
void
ArgParser::argFlattenRotation()
{
o.flatten_rotation = true;
}
void
ArgParser::argListAttachments()
{
o.list_attachments = true;
o.require_outfile = false;
}
void
ArgParser::argShowAttachment(char* parameter)
{
o.attachment_to_show = parameter;
o.require_outfile = false;
}
void
ArgParser::argRemoveAttachment(char* parameter)
{
o.attachments_to_remove.push_back(parameter);
}
void
ArgParser::argAddAttachment()
{
this->option_table = &(this->add_attachment_option_table);
o.attachments_to_add.push_back(AddAttachment());
}
void
ArgParser::argCopyAttachments()
{
this->option_table = &(this->copy_attachments_option_table);
o.attachments_to_copy.push_back(CopyAttachmentFrom());
}
void
ArgParser::argStreamData(char* parameter)
{
o.stream_data_set = true;
if (strcmp(parameter, "compress") == 0)
{
o.stream_data_mode = qpdf_s_compress;
}
else if (strcmp(parameter, "preserve") == 0)
{
o.stream_data_mode = qpdf_s_preserve;
}
else if (strcmp(parameter, "uncompress") == 0)
{
o.stream_data_mode = qpdf_s_uncompress;
}
else
{
// If this happens, it means streamDataChoices in
// ArgParser::initOptionTable is wrong.
usage("invalid stream-data option");
}
}
void
ArgParser::argCompressStreams(char* parameter)
{
o.compress_streams_set = true;
o.compress_streams = (strcmp(parameter, "y") == 0);
}
void
ArgParser::argRecompressFlate()
{
o.recompress_flate_set = true;
o.recompress_flate = true;
}
void
ArgParser::argCompressionLevel(char* parameter)
{
o.compression_level = QUtil::string_to_int(parameter);
}
void
ArgParser::argDecodeLevel(char* parameter)
{
o.decode_level_set = true;
if (strcmp(parameter, "none") == 0)
{
o.decode_level = qpdf_dl_none;
}
else if (strcmp(parameter, "generalized") == 0)
{
o.decode_level = qpdf_dl_generalized;
}
else if (strcmp(parameter, "specialized") == 0)
{
o.decode_level = qpdf_dl_specialized;
}
else if (strcmp(parameter, "all") == 0)
{
o.decode_level = qpdf_dl_all;
}
else
{
// If this happens, it means decodeLevelChoices in
// ArgParser::initOptionTable is wrong.
usage("invalid option");
}
}
void
ArgParser::argNormalizeContent(char* parameter)
{
o.normalize_set = true;
o.normalize = (strcmp(parameter, "y") == 0);
}
void
ArgParser::argSuppressRecovery()
{
o.suppress_recovery = true;
}
void
ArgParser::argObjectStreams(char* parameter)
{
o.object_stream_set = true;
if (strcmp(parameter, "disable") == 0)
{
o.object_stream_mode = qpdf_o_disable;
}
else if (strcmp(parameter, "preserve") == 0)
{
o.object_stream_mode = qpdf_o_preserve;
}
else if (strcmp(parameter, "generate") == 0)
{
o.object_stream_mode = qpdf_o_generate;
}
else
{
// If this happens, it means objectStreamsChoices in
// ArgParser::initOptionTable is wrong.
usage("invalid object stream mode");
}
}
void
ArgParser::argIgnoreXrefStreams()
{
o.ignore_xref_streams = true;
}
void
ArgParser::argQdf()
{
o.qdf_mode = true;
}
void
ArgParser::argPreserveUnreferenced()
{
o.preserve_unreferenced_objects = true;
}
void
ArgParser::argPreserveUnreferencedResources()
{
o.remove_unreferenced_page_resources = re_no;
}
void
ArgParser::argRemoveUnreferencedResources(char* parameter)
{
if (strcmp(parameter, "auto") == 0)
{
o.remove_unreferenced_page_resources = re_auto;
}
else if (strcmp(parameter, "yes") == 0)
{
o.remove_unreferenced_page_resources = re_yes;
}
else if (strcmp(parameter, "no") == 0)
{
o.remove_unreferenced_page_resources = re_no;
}
else
{
// If this happens, it means remove_unref_choices in
// ArgParser::initOptionTable is wrong.
usage("invalid value for --remove-unreferenced-page-resources");
}
}
void
ArgParser::argKeepFilesOpen(char* parameter)
{
o.keep_files_open_set = true;
o.keep_files_open = (strcmp(parameter, "y") == 0);
}
void
ArgParser::argKeepFilesOpenThreshold(char* parameter)
{
o.keep_files_open_threshold = QUtil::string_to_uint(parameter);
}
void
ArgParser::argNewlineBeforeEndstream()
{
o.newline_before_endstream = true;
}
void
ArgParser::argLinearizePass1(char* parameter)
{
o.linearize_pass1 = parameter;
}
void
ArgParser::argCoalesceContents()
{
o.coalesce_contents = true;
}
void
ArgParser::argFlattenAnnotations(char* parameter)
{
o.flatten_annotations = true;
if (strcmp(parameter, "screen") == 0)
{
o.flatten_annotations_forbidden |= an_no_view;
}
else if (strcmp(parameter, "print") == 0)
{
o.flatten_annotations_required |= an_print;
}
}
void
ArgParser::argGenerateAppearances()
{
o.generate_appearances = true;
}
void
ArgParser::argMinVersion(char* parameter)
{
o.min_version = parameter;
}
void
ArgParser::argForceVersion(char* parameter)
{
o.force_version = parameter;
}
void
ArgParser::argSplitPages(char* parameter)
{
int n = ((parameter == 0) ? 1 :
QUtil::string_to_int(parameter));
o.split_pages = n;
}
void
ArgParser::argVerbose()
{
o.verbose = true;
}
void
ArgParser::argProgress()
{
o.progress = true;
}
void
ArgParser::argNoWarn()
{
o.suppress_warnings = true;
}
void
ArgParser::argWarningExitZero()
{
::EXIT_WARNING = 0;
}
void
ArgParser::argDeterministicId()
{
o.deterministic_id = true;
}
void
ArgParser::argStaticId()
{
o.static_id = true;
}
void
ArgParser::argStaticAesIv()
{
o.static_aes_iv = true;
}
void
ArgParser::argNoOriginalObjectIds()
{
o.suppress_original_object_id = true;
}
void
ArgParser::argShowEncryption()
{
o.show_encryption = true;
o.require_outfile = false;
}
void
ArgParser::argShowEncryptionKey()
{
o.show_encryption_key = true;
}
void
ArgParser::argCheckLinearization()
{
o.check_linearization = true;
o.require_outfile = false;
}
void
ArgParser::argShowLinearization()
{
o.show_linearization = true;
o.require_outfile = false;
}
void
ArgParser::argShowXref()
{
o.show_xref = true;
o.require_outfile = false;
}
void
ArgParser::argShowObject(char* parameter)
{
parse_object_id(parameter, o.show_trailer, o.show_obj, o.show_gen);
o.require_outfile = false;
}
void
ArgParser::argRawStreamData()
{
o.show_raw_stream_data = true;
}
void
ArgParser::argFilteredStreamData()
{
o.show_filtered_stream_data = true;
}
void
ArgParser::argShowNpages()
{
o.show_npages = true;
o.require_outfile = false;
}
void
ArgParser::argShowPages()
{
o.show_pages = true;
o.require_outfile = false;
}
void
ArgParser::argWithImages()
{
o.show_page_images = true;
}
void
ArgParser::argJson()
{
o.json = true;
o.require_outfile = false;
}
void
ArgParser::argJsonKey(char* parameter)
{
o.json_keys.insert(parameter);
}
void
ArgParser::argJsonObject(char* parameter)
{
o.json_objects.insert(parameter);
}
void
ArgParser::argCheck()
{
o.check = true;
o.require_outfile = false;
}
void
ArgParser::argOptimizeImages()
{
o.optimize_images = true;
}
void
ArgParser::argExternalizeInlineImages()
{
o.externalize_inline_images = true;
}
void
ArgParser::argKeepInlineImages()
{
o.keep_inline_images = true;
}
void
ArgParser::argRemovePageLabels()
{
o.remove_page_labels = true;
}
void
ArgParser::argOiMinWidth(char* parameter)
{
o.oi_min_width = QUtil::string_to_uint(parameter);
}
void
ArgParser::argOiMinHeight(char* parameter)
{
o.oi_min_height = QUtil::string_to_uint(parameter);
}
void
ArgParser::argOiMinArea(char* parameter)
{
o.oi_min_area = QUtil::string_to_uint(parameter);
}
void
ArgParser::argIiMinBytes(char* parameter)
{
o.ii_min_bytes = QUtil::string_to_uint(parameter);
}
void
ArgParser::arg40Print(char* parameter)
{
o.r2_print = (strcmp(parameter, "y") == 0);
}
void
ArgParser::arg40Modify(char* parameter)
{
o.r2_modify = (strcmp(parameter, "y") == 0);
}
void
ArgParser::arg40Extract(char* parameter)
{
o.r2_extract = (strcmp(parameter, "y") == 0);
}
void
ArgParser::arg40Annotate(char* parameter)
{
o.r2_annotate = (strcmp(parameter, "y") == 0);
}
void
ArgParser::arg128Accessibility(char* parameter)
{
o.r3_accessibility = (strcmp(parameter, "y") == 0);
}
void
ArgParser::arg128Extract(char* parameter)
{
o.r3_extract = (strcmp(parameter, "y") == 0);
}
void
ArgParser::arg128Print(char* parameter)
{
if (strcmp(parameter, "full") == 0)
{
o.r3_print = qpdf_r3p_full;
}
else if (strcmp(parameter, "low") == 0)
{
o.r3_print = qpdf_r3p_low;
}
else if (strcmp(parameter, "none") == 0)
{
o.r3_print = qpdf_r3p_none;
}
else
{
usage("invalid print option");
}
}
void
ArgParser::arg128Modify(char* parameter)
{
if (strcmp(parameter, "all") == 0)
{
o.r3_assemble = true;
o.r3_annotate_and_form = true;
o.r3_form_filling = true;
o.r3_modify_other = true;
}
else if (strcmp(parameter, "annotate") == 0)
{
o.r3_assemble = true;
o.r3_annotate_and_form = true;
o.r3_form_filling = true;
o.r3_modify_other = false;
}
else if (strcmp(parameter, "form") == 0)
{
o.r3_assemble = true;
o.r3_annotate_and_form = false;
o.r3_form_filling = true;
o.r3_modify_other = false;
}
else if (strcmp(parameter, "assembly") == 0)
{
o.r3_assemble = true;
o.r3_annotate_and_form = false;
o.r3_form_filling = false;
o.r3_modify_other = false;
}
else if (strcmp(parameter, "none") == 0)
{
o.r3_assemble = false;
o.r3_annotate_and_form = false;
o.r3_form_filling = false;
o.r3_modify_other = false;
}
else
{
usage("invalid modify option");
}
}
void
ArgParser::arg128ClearTextMetadata()
{
o.cleartext_metadata = true;
}
void
ArgParser::arg128Assemble(char* parameter)
{
o.r3_assemble = (strcmp(parameter, "y") == 0);
}
void
ArgParser::arg128Annotate(char* parameter)
{
o.r3_annotate_and_form = (strcmp(parameter, "y") == 0);
}
void
ArgParser::arg128Form(char* parameter)
{
o.r3_form_filling = (strcmp(parameter, "y") == 0);
}
void
ArgParser::arg128ModOther(char* parameter)
{
o.r3_modify_other = (strcmp(parameter, "y") == 0);
}
void
ArgParser::arg128UseAes(char* parameter)
{
o.use_aes = (strcmp(parameter, "y") == 0);
}
void
ArgParser::arg128ForceV4()
{
o.force_V4 = true;
}
void
ArgParser::arg256ForceR5()
{
o.force_R5 = true;
}
void
ArgParser::argEndEncrypt()
{
o.encrypt = true;
o.decrypt = false;
o.copy_encryption = false;
this->option_table = &(this->main_option_table);
}
void
ArgParser::argUOpositional(char* arg)
{
if (o.under_overlay->filename)
{
usage(o.under_overlay->which + " file already specified");
}
else
{
o.under_overlay->filename = arg;
}
}
void
ArgParser::argUOto(char* parameter)
{
parseNumrange(parameter, 0);
o.under_overlay->to_nr = parameter;
}
void
ArgParser::argUOfrom(char* parameter)
{
if (strlen(parameter))
{
parseNumrange(parameter, 0);
}
o.under_overlay->from_nr = parameter;
}
void
ArgParser::argUOrepeat(char* parameter)
{
if (strlen(parameter))
{
parseNumrange(parameter, 0);
}
o.under_overlay->repeat_nr = parameter;
}
void
ArgParser::argUOpassword(char* parameter)
{
o.under_overlay->password = parameter;
}
void
ArgParser::argEndUnderOverlay()
{
this->option_table = &(this->main_option_table);
if (0 == o.under_overlay->filename)
{
usage(o.under_overlay->which + " file not specified");
}
o.under_overlay = 0;
}
void
ArgParser::argReplaceInput()
{
o.replace_input = true;
}
void
ArgParser::argIsEncrypted()
{
o.check_is_encrypted = true;
o.require_outfile = false;
}
void
ArgParser::argRequiresPassword()
{
o.check_requires_password = true;
o.require_outfile = false;
}
void
ArgParser::argAApositional(char* arg)
{
o.attachments_to_add.back().path = arg;
}
void
ArgParser::argAAKey(char* parameter)
{
o.attachments_to_add.back().key = parameter;
}
void
ArgParser::argAAFilename(char* parameter)
{
o.attachments_to_add.back().filename = parameter;
}
void
ArgParser::argAACreationDate(char* parameter)
{
if (! QUtil::pdf_time_to_qpdf_time(parameter))
{
usage(std::string(parameter) + " is not a valid PDF timestamp");
}
o.attachments_to_add.back().creationdate = parameter;
}
void
ArgParser::argAAModDate(char* parameter)
{
if (! QUtil::pdf_time_to_qpdf_time(parameter))
{
usage(std::string(parameter) + " is not a valid PDF timestamp");
}
o.attachments_to_add.back().moddate = parameter;
}
void
ArgParser::argAAMimeType(char* parameter)
{
if (strchr(parameter, '/') == nullptr)
{
usage("mime type should be specified as type/subtype");
}
o.attachments_to_add.back().mimetype = parameter;
}
void
ArgParser::argAADescription(char* parameter)
{
o.attachments_to_add.back().description = parameter;
}
void
ArgParser::argAAReplace()
{
o.attachments_to_add.back().replace = true;
}
void
ArgParser::argEndAddAttachment()
{
static std::string now = QUtil::qpdf_time_to_pdf_time(
QUtil::get_current_qpdf_time());
this->option_table = &(this->main_option_table);
auto& cur = o.attachments_to_add.back();
if (cur.path.empty())
{
usage("add attachment: no path specified");
}
std::string last_element = QUtil::path_basename(cur.path);
if (last_element.empty())
{
usage("path for --add-attachment may not be empty");
}
if (cur.filename.empty())
{
cur.filename = last_element;
}
if (cur.key.empty())
{
cur.key = last_element;
}
if (cur.creationdate.empty())
{
cur.creationdate = now;
}
if (cur.moddate.empty())
{
cur.moddate = now;
}
}
void
ArgParser::argCApositional(char* arg)
{
o.attachments_to_copy.back().path = arg;
}
void
ArgParser::argCAprefix(char* parameter)
{
o.attachments_to_copy.back().prefix = parameter;
}
void
ArgParser::argCApassword(char* parameter)
{
o.attachments_to_copy.back().password = parameter;
}
void
ArgParser::argEndCopyAttachments()
{
this->option_table = &(this->main_option_table);
if (o.attachments_to_copy.back().path.empty())
{
usage("copy attachments: no path specified");
}
}
void
ArgParser::handleArgFileArguments()
{
// Support reading arguments from files. Create a new argv. Ensure
// that argv itself as well as all its contents are automatically
// deleted by using PointerHolder objects to back the pointers in
// argv.
new_argv.push_back(PointerHolder<char>(true, QUtil::copy_string(argv[0])));
for (int i = 1; i < argc; ++i)
{
char* argfile = 0;
if ((strlen(argv[i]) > 1) && (argv[i][0] == '@'))
{
argfile = 1 + argv[i];
if (strcmp(argfile, "-") != 0)
{
if (! file_exists(argfile))
{
// The file's not there; treating as regular option
argfile = nullptr;
}
}
}
if (argfile)
{
readArgsFromFile(1+argv[i]);
}
else
{
new_argv.push_back(
PointerHolder<char>(true, QUtil::copy_string(argv[i])));
}
}
argv_ph = PointerHolder<char*>(true, new char*[1+new_argv.size()]);
argv = argv_ph.getPointer();
for (size_t i = 0; i < new_argv.size(); ++i)
{
argv[i] = new_argv.at(i).getPointer();
}
argc = QIntC::to_int(new_argv.size());
argv[argc] = 0;
}
void
ArgParser::handleBashArguments()
{
// Do a minimal job of parsing bash_line into arguments. This
// doesn't do everything the shell does (e.g. $(...), variable
// expansion, arithmetic, globs, etc.), but it should be good
// enough for purposes of handling completion. As we build up the
// new argv, we can't use this->new_argv because this code has to
// interoperate with @file arguments, so memory for both ways of
// fabricating argv has to be protected.
bool last_was_backslash = false;
enum { st_top, st_squote, st_dquote } state = st_top;
std::string arg;
for (std::string::iterator iter = bash_line.begin();
iter != bash_line.end(); ++iter)
{
char ch = (*iter);
if (last_was_backslash)
{
arg.append(1, ch);
last_was_backslash = false;
}
else if (ch == '\\')
{
last_was_backslash = true;
}
else
{
bool append = false;
switch (state)
{
case st_top:
if (QUtil::is_space(ch))
{
if (! arg.empty())
{
bash_argv.push_back(
PointerHolder<char>(
true, QUtil::copy_string(arg.c_str())));
arg.clear();
}
}
else if (ch == '"')
{
state = st_dquote;
}
else if (ch == '\'')
{
state = st_squote;
}
else
{
append = true;
}
break;
case st_squote:
if (ch == '\'')
{
state = st_top;
}
else
{
append = true;
}
break;
case st_dquote:
if (ch == '"')
{
state = st_top;
}
else
{
append = true;
}
break;
}
if (append)
{
arg.append(1, ch);
}
}
}
if (bash_argv.empty())
{
// This can't happen if properly invoked by bash, but ensure
// we have a valid argv[0] regardless.
bash_argv.push_back(
PointerHolder<char>(
true, QUtil::copy_string(argv[0])));
}
// Explicitly discard any non-space-terminated word. The "current
// word" is handled specially.
bash_argv_ph = PointerHolder<char*>(true, new char*[1+bash_argv.size()]);
argv = bash_argv_ph.getPointer();
for (size_t i = 0; i < bash_argv.size(); ++i)
{
argv[i] = bash_argv.at(i).getPointer();
}
argc = QIntC::to_int(bash_argv.size());
argv[argc] = 0;
}
void usageExit(std::string const& msg)
{
std::cerr
<< std::endl
<< whoami << ": " << msg << std::endl
<< std::endl
<< "Usage: " << whoami << " [options] infile outfile" << std::endl
<< "For detailed help, run " << whoami << " --help" << std::endl
<< std::endl;
exit(EXIT_ERROR);
}
void
ArgParser::usage(std::string const& message)
{
if (this->bash_completion)
{
// This will cause bash to fall back to regular file completion.
exit(0);
}
else
{
usageExit(message);
}
}
static std::string show_bool(bool v)
{
return v ? "allowed" : "not allowed";
}
static std::string show_encryption_method(QPDF::encryption_method_e method)
{
std::string result = "unknown";
switch (method)
{
case QPDF::e_none:
result = "none";
break;
case QPDF::e_unknown:
result = "unknown";
break;
case QPDF::e_rc4:
result = "RC4";
break;
case QPDF::e_aes:
result = "AESv2";
break;
case QPDF::e_aesv3:
result = "AESv3";
break;
// no default so gcc will warn for missing case
}
return result;
}
static void show_encryption(QPDF& pdf, Options& o)
{
// Extract /P from /Encrypt
int R = 0;
int P = 0;
int V = 0;
QPDF::encryption_method_e stream_method = QPDF::e_unknown;
QPDF::encryption_method_e string_method = QPDF::e_unknown;
QPDF::encryption_method_e file_method = QPDF::e_unknown;
if (! pdf.isEncrypted(R, P, V,
stream_method, string_method, file_method))
{
std::cout << "File is not encrypted" << std::endl;
}
else
{
std::cout << "R = " << R << std::endl;
std::cout << "P = " << P << std::endl;
std::string user_password = pdf.getTrimmedUserPassword();
std::string encryption_key = pdf.getEncryptionKey();
std::cout << "User password = " << user_password << std::endl;
if (o.show_encryption_key)
{
std::cout << "Encryption key = "
<< QUtil::hex_encode(encryption_key) << std::endl;
}
if (pdf.ownerPasswordMatched())
{
std::cout << "Supplied password is owner password" << std::endl;
}
if (pdf.userPasswordMatched())
{
std::cout << "Supplied password is user password" << std::endl;
}
std::cout << "extract for accessibility: "
<< show_bool(pdf.allowAccessibility()) << std::endl
<< "extract for any purpose: "
<< show_bool(pdf.allowExtractAll()) << std::endl
<< "print low resolution: "
<< show_bool(pdf.allowPrintLowRes()) << std::endl
<< "print high resolution: "
<< show_bool(pdf.allowPrintHighRes()) << std::endl
<< "modify document assembly: "
<< show_bool(pdf.allowModifyAssembly()) << std::endl
<< "modify forms: "
<< show_bool(pdf.allowModifyForm()) << std::endl
<< "modify annotations: "
<< show_bool(pdf.allowModifyAnnotation()) << std::endl
<< "modify other: "
<< show_bool(pdf.allowModifyOther()) << std::endl
<< "modify anything: "
<< show_bool(pdf.allowModifyAll()) << std::endl;
if (V >= 4)
{
std::cout << "stream encryption method: "
<< show_encryption_method(stream_method) << std::endl
<< "string encryption method: "
<< show_encryption_method(string_method) << std::endl
<< "file encryption method: "
<< show_encryption_method(file_method) << std::endl;
}
}
}
std::vector<int>
ArgParser::parseNumrange(char const* range, int max, bool throw_error)
{
try
{
return QUtil::parse_numrange(range, max);
}
catch (std::runtime_error& e)
{
if (throw_error)
{
throw(e);
}
else
{
usage(e.what());
}
}
return std::vector<int>();
}
std::vector<PageSpec>
ArgParser::parsePagesOptions()
{
auto check_unclosed = [this](char const* arg, int n) {
if ((strlen(arg) > 0) && (arg[0] == '-'))
{
// A common error is to forget to close --pages with --,
// so catch this as special case
QTC::TC("qpdf", "check unclosed --pages", n);
usage("the --pages option must be terminated with -- by itself");
}
};
std::vector<PageSpec> result;
while (1)
{
if ((cur_arg < argc) && (strcmp(argv[cur_arg], "--") == 0))
{
break;
}
if (cur_arg + 1 >= argc)
{
usage("insufficient arguments to --pages");
}
char const* file = argv[cur_arg++];
char const* password = 0;
char const* range = argv[cur_arg++];
if (! file_exists(file))
{
check_unclosed(file, 0);
}
if (strncmp(range, "--password=", 11) == 0)
{
// Oh, that's the password, not the range
if (cur_arg + 1 >= argc)
{
usage("insufficient arguments to --pages");
}
password = range + 11;
range = argv[cur_arg++];
}
// See if the user omitted the range entirely, in which case
// we assume "1-z".
bool range_omitted = false;
if (strcmp(range, "--") == 0)
{
// The filename or password was the last argument
QTC::TC("qpdf", "qpdf pages range omitted at end");
range_omitted = true;
}
else
{
try
{
parseNumrange(range, 0, true);
}
catch (std::runtime_error& e1)
{
// The range is invalid. Let's see if it's a file.
range_omitted = true;
if (strcmp(range, ".") == 0)
{
// "." means the input file.
QTC::TC("qpdf", "qpdf pages range omitted with .");
}
else if (file_exists(range))
{
QTC::TC("qpdf", "qpdf pages range omitted in middle");
// Yup, it's a file.
}
else
{
check_unclosed(range, 1);
// Give the range error
usage(e1.what());
}
}
}
if (range_omitted)
{
--cur_arg;
range = "1-z";
}
result.push_back(PageSpec(file, password, range));
}
return result;
}
void
ArgParser::parseUnderOverlayOptions(UnderOverlay* uo)
{
o.under_overlay = uo;
this->option_table = &(this->under_overlay_option_table);
}
QPDFPageData::QPDFPageData(std::string const& filename,
QPDF* qpdf,
char const* range) :
filename(filename),
qpdf(qpdf),
orig_pages(qpdf->getAllPages())
{
try
{
this->selected_pages =
QUtil::parse_numrange(range,
QIntC::to_int(this->orig_pages.size()));
}
catch (std::runtime_error& e)
{
usageExit("parsing numeric range for " + filename + ": " + e.what());
}
}
QPDFPageData::QPDFPageData(QPDFPageData const& other, int page) :
filename(other.filename),
qpdf(other.qpdf),
orig_pages(other.orig_pages)
{
this->selected_pages.push_back(page);
}
static void parse_version(std::string const& full_version_string,
std::string& version, int& extension_level)
{
PointerHolder<char> vp(true, QUtil::copy_string(full_version_string));
char* v = vp.getPointer();
char* p1 = strchr(v, '.');
char* p2 = (p1 ? strchr(1 + p1, '.') : 0);
if (p2 && *(p2 + 1))
{
*p2++ = '\0';
extension_level = QUtil::string_to_int(p2);
}
version = v;
}
void
ArgParser::readArgsFromFile(char const* filename)
{
std::list<std::string> lines;
if (strcmp(filename, "-") == 0)
{
QTC::TC("qpdf", "qpdf read args from stdin");
lines = QUtil::read_lines_from_file(std::cin);
}
else
{
QTC::TC("qpdf", "qpdf read args from file");
lines = QUtil::read_lines_from_file(filename);
}
for (std::list<std::string>::iterator iter = lines.begin();
iter != lines.end(); ++iter)
{
new_argv.push_back(
PointerHolder<char>(true, QUtil::copy_string((*iter).c_str())));
}
}
void
ArgParser::handleHelpArgs()
{
// Handle special-case informational options that are only
// available as the sole option.
if (argc != 2)
{
return;
}
char* arg = argv[1];
if (*arg != '-')
{
return;
}
++arg;
if (*arg == '-')
{
++arg;
}
if (! *arg)
{
return;
}
if (this->help_option_table.count(arg))
{
(this->*(this->help_option_table[arg].bare_arg_handler))();
exit(0);
}
}
void
ArgParser::parseRotationParameter(std::string const& parameter)
{
std::string angle_str;
std::string range;
size_t colon = parameter.find(':');
int relative = 0;
if (colon != std::string::npos)
{
if (colon > 0)
{
angle_str = parameter.substr(0, colon);
}
if (colon + 1 < parameter.length())
{
range = parameter.substr(colon + 1);
}
}
else
{
angle_str = parameter;
}
if (angle_str.length() > 0)
{
char first = angle_str.at(0);
if ((first == '+') || (first == '-'))
{
relative = ((first == '+') ? 1 : -1);
angle_str = angle_str.substr(1);
}
else if (! QUtil::is_digit(angle_str.at(0)))
{
angle_str = "";
}
}
if (range.empty())
{
range = "1-z";
}
bool range_valid = false;
try
{
parseNumrange(range.c_str(), 0, true);
range_valid = true;
}
catch (std::runtime_error const&)
{
// ignore
}
if (range_valid &&
((angle_str == "0") ||(angle_str == "90") ||
(angle_str == "180") || (angle_str == "270")))
{
int angle = QUtil::string_to_int(angle_str.c_str());
if (relative == -1)
{
angle = -angle;
}
o.rotations[range] = RotationSpec(angle, (relative != 0));
}
else
{
usage("invalid parameter to rotate: " + parameter);
}
}
void
ArgParser::checkCompletion()
{
// See if we're being invoked from bash completion.
std::string bash_point_env;
// On Windows with mingw, there have been times when there appears
// to be no way to distinguish between an empty environment
// variable and an unset variable. There are also conditions under
// which bash doesn't set COMP_LINE. Therefore, enter this logic
// if either COMP_LINE or COMP_POINT are set. They will both be
// set together under ordinary circumstances.
bool got_line = QUtil::get_env("COMP_LINE", &bash_line);
bool got_point = QUtil::get_env("COMP_POINT", &bash_point_env);
if (got_line || got_point)
{
size_t p = QUtil::string_to_uint(bash_point_env.c_str());
if (p < bash_line.length())
{
// Truncate the line. We ignore everything at or after the
// cursor for completion purposes.
bash_line = bash_line.substr(0, p);
}
if (p > bash_line.length())
{
p = bash_line.length();
}
// Set bash_cur and bash_prev based on bash_line rather than
// relying on argv. This enables us to use bashcompinit to get
// completion in zsh too since bashcompinit sets COMP_LINE and
// COMP_POINT but doesn't invoke the command with options like
// bash does.
// p is equal to length of the string. Walk backwards looking
// for the first separator. bash_cur is everything after the
// last separator, possibly empty.
char sep(0);
while (p > 0)
{
--p;
char ch = bash_line.at(p);
if ((ch == ' ') || (ch == '=') || (ch == ':'))
{
sep = ch;
break;
}
}
if (1+p <= bash_line.length())
{
bash_cur = bash_line.substr(1+p, std::string::npos);
}
if ((sep == ':') || (sep == '='))
{
// Bash sets prev to the non-space separator if any.
// Actually, if there are multiple separators in a row,
// they are all included in prev, but that detail is not
// important to us and not worth coding.
bash_prev = bash_line.substr(p, 1);
}
else
{
// Go back to the last separator and set prev based on
// that.
size_t p1 = p;
while (p1 > 0)
{
--p1;
char ch = bash_line.at(p1);
if ((ch == ' ') || (ch == ':') || (ch == '='))
{
bash_prev = bash_line.substr(p1 + 1, p - p1 - 1);
break;
}
}
}
if (bash_prev.empty())
{
bash_prev = bash_line.substr(0, p);
}
if (argc == 1)
{
// This is probably zsh using bashcompinit. There are a
// few differences in the expected output.
zsh_completion = true;
}
handleBashArguments();
bash_completion = true;
}
}
void
ArgParser::parseOptions()
{
checkCompletion();
if (! this->bash_completion)
{
handleHelpArgs();
}
handleArgFileArguments();
for (cur_arg = 1; cur_arg < argc; ++cur_arg)
{
char* arg = argv[cur_arg];
if (strcmp(arg, "--") == 0)
{
// Special case for -- option, which is used to break out
// of subparsers.
OptionEntry& oe = (*this->option_table)["--"];
if (oe.bare_arg_handler)
{
(this->*(oe.bare_arg_handler))();
}
}
else if ((arg[0] == '-') && (strcmp(arg, "-") != 0))
{
++arg;
if (arg[0] == '-')
{
// Be lax about -arg vs --arg
++arg;
}
char* parameter = 0;
if (strlen(arg) > 0)
{
// Prevent --=something from being treated as an empty
// arg since the empty string in the option table is
// for positional arguments.
parameter = const_cast<char*>(strchr(1 + arg, '='));
}
if (parameter)
{
*parameter++ = 0;
}
std::string arg_s(arg);
if (arg_s.empty() ||
(arg_s.at(0) == '-') ||
(0 == this->option_table->count(arg_s)))
{
usage(std::string("unknown option --") + arg);
}
OptionEntry& oe = (*this->option_table)[arg_s];
if ((oe.parameter_needed && (0 == parameter)) ||
((! oe.choices.empty() &&
((0 == parameter) ||
(0 == oe.choices.count(parameter))))))
{
std::string message =
"--" + arg_s + " must be given as --" + arg_s + "=";
if (! oe.choices.empty())
{
QTC::TC("qpdf", "qpdf required choices");
message += "{";
for (std::set<std::string>::iterator iter =
oe.choices.begin();
iter != oe.choices.end(); ++iter)
{
if (iter != oe.choices.begin())
{
message += ",";
}
message += *iter;
}
message += "}";
}
else if (! oe.parameter_name.empty())
{
QTC::TC("qpdf", "qpdf required parameter");
message += oe.parameter_name;
}
else
{
// should not be possible
message += "option";
}
usage(message);
}
if (oe.bare_arg_handler)
{
(this->*(oe.bare_arg_handler))();
}
else if (oe.param_arg_handler)
{
(this->*(oe.param_arg_handler))(parameter);
}
}
else if (0 != this->option_table->count(""))
{
// The empty string maps to the positional argument
// handler.
OptionEntry& oe = (*this->option_table)[""];
if (oe.param_arg_handler)
{
(this->*(oe.param_arg_handler))(arg);
}
}
else
{
usage(std::string("unknown argument ") + arg);
}
}
if (this->bash_completion)
{
handleCompletion();
}
else
{
doFinalChecks();
}
}
void
ArgParser::doFinalChecks()
{
if (this->option_table != &(this->main_option_table))
{
usage("missing -- at end of options");
}
if (o.replace_input)
{
if (o.outfilename)
{
usage("--replace-input may not be used when"
" an output file is specified");
}
else if (o.split_pages)
{
usage("--split-pages may not be used with --replace-input");
}
}
if (o.infilename == 0)
{
usage("an input file name is required");
}
else if (o.require_outfile && (o.outfilename == 0) && (! o.replace_input))
{
usage("an output file name is required; use - for standard output");
}
else if ((! o.require_outfile) &&
((o.outfilename != 0) || o.replace_input))
{
usage("no output file may be given for this option");
}
if (o.optimize_images && (! o.keep_inline_images))
{
o.externalize_inline_images = true;
}
if (o.check_requires_password && o.check_is_encrypted)
{
usage("--requires-password and --is-encrypted may not be given"
" together");
}
if (o.encrypt && (! o.allow_insecure) &&
(o.owner_password.empty() &&
(! o.user_password.empty()) &&
(o.keylen == 256)))
{
// Note that empty owner passwords for R < 5 are copied from
// the user password, so this lack of security is not an issue
// for those files. Also we are consider only the ability to
// open the file without a password to be insecure. We are not
// concerned about whether the viewer enforces security
// settings when the user and owner password match.
usage("A PDF with a non-empty user password and an empty owner"
" password encrypted with a 256-bit key is insecure as it"
" can be opened without a password. If you really want to"
" do this, you must also give the --allow-insecure option"
" before the -- that follows --encrypt.");
}
if (o.require_outfile && o.outfilename &&
(strcmp(o.outfilename, "-") == 0))
{
if (o.split_pages)
{
usage("--split-pages may not be used when"
" writing to standard output");
}
if (o.verbose)
{
usage("--verbose may not be used when"
" writing to standard output");
}
if (o.progress)
{
usage("--progress may not be used when"
" writing to standard output");
}
}
if ((! o.split_pages) && QUtil::same_file(o.infilename, o.outfilename))
{
QTC::TC("qpdf", "qpdf same file error");
usage("input file and output file are the same;"
" use --replace-input to intentionally overwrite the input file");
}
}
void
ArgParser::addChoicesToCompletions(std::string const& option,
std::string const& extra_prefix)
{
if (this->option_table->count(option) != 0)
{
OptionEntry& oe = (*this->option_table)[option];
for (std::set<std::string>::iterator iter = oe.choices.begin();
iter != oe.choices.end(); ++iter)
{
completions.insert(extra_prefix + *iter);
}
}
}
void
ArgParser::addOptionsToCompletions()
{
for (std::map<std::string, OptionEntry>::iterator iter =
this->option_table->begin();
iter != this->option_table->end(); ++iter)
{
std::string const& arg = (*iter).first;
if (arg == "--")
{
continue;
}
OptionEntry& oe = (*iter).second;
std::string base = "--" + arg;
if (oe.param_arg_handler)
{
if (zsh_completion)
{
// zsh doesn't treat = as a word separator, so add all
// the options so we don't get a space after the =.
addChoicesToCompletions(arg, base + "=");
}
completions.insert(base + "=");
}
if (! oe.parameter_needed)
{
completions.insert(base);
}
}
}
void
ArgParser::handleCompletion()
{
std::string extra_prefix;
if (this->completions.empty())
{
// Detect --option=... Bash treats the = as a word separator.
std::string choice_option;
if (bash_cur.empty() && (bash_prev.length() > 2) &&
(bash_prev.at(0) == '-') &&
(bash_prev.at(1) == '-') &&
(bash_line.at(bash_line.length() - 1) == '='))
{
choice_option = bash_prev.substr(2, std::string::npos);
}
else if ((bash_prev == "=") &&
(bash_line.length() > (bash_cur.length() + 1)))
{
// We're sitting at --option=x. Find previous option.
size_t end_mark = bash_line.length() - bash_cur.length() - 1;
char before_cur = bash_line.at(end_mark);
if (before_cur == '=')
{
size_t space = bash_line.find_last_of(' ', end_mark);
if (space != std::string::npos)
{
std::string candidate =
bash_line.substr(space + 1, end_mark - space - 1);
if ((candidate.length() > 2) &&
(candidate.at(0) == '-') &&
(candidate.at(1) == '-'))
{
choice_option =
candidate.substr(2, std::string::npos);
}
}
}
}
if (! choice_option.empty())
{
if (zsh_completion)
{
// zsh wants --option=choice rather than just choice
extra_prefix = "--" + choice_option + "=";
}
addChoicesToCompletions(choice_option, extra_prefix);
}
else if ((! bash_cur.empty()) && (bash_cur.at(0) == '-'))
{
addOptionsToCompletions();
if (this->argc == 1)
{
// Help options are valid only by themselves.
for (std::map<std::string, OptionEntry>::iterator iter =
this->help_option_table.begin();
iter != this->help_option_table.end(); ++iter)
{
this->completions.insert("--" + (*iter).first);
}
}
}
}
std::string prefix = extra_prefix + bash_cur;
for (std::set<std::string>::iterator iter = completions.begin();
iter != completions.end(); ++iter)
{
if (prefix.empty() ||
((*iter).substr(0, prefix.length()) == prefix))
{
std::cout << *iter << std::endl;
}
}
exit(0);
}
static void set_qpdf_options(QPDF& pdf, Options& o)
{
if (o.ignore_xref_streams)
{
pdf.setIgnoreXRefStreams(true);
}
if (o.suppress_recovery)
{
pdf.setAttemptRecovery(false);
}
if (o.password_is_hex_key)
{
pdf.setPasswordIsHexKey(true);
}
if (o.suppress_warnings)
{
pdf.setSuppressWarnings(true);
}
}
static void do_check(QPDF& pdf, Options& o, int& exit_code)
{
// Code below may set okay to false but not to true.
// We assume okay until we prove otherwise but may
// continue to perform additional checks after finding
// errors.
bool okay = true;
bool warnings = false;
std::cout << "checking " << o.infilename << std::endl;
try
{
int extension_level = pdf.getExtensionLevel();
std::cout << "PDF Version: " << pdf.getPDFVersion();
if (extension_level > 0)
{
std::cout << " extension level "
<< pdf.getExtensionLevel();
}
std::cout << std::endl;
show_encryption(pdf, o);
if (pdf.isLinearized())
{
std::cout << "File is linearized\n";
// any errors or warnings are reported by
// checkLinearization(). We treat all issues reported here
// as warnings.
if (! pdf.checkLinearization())
{
warnings = true;
}
}
else
{
std::cout << "File is not linearized\n";
}
// Write the file no nowhere, uncompressing
// streams. This causes full file traversal and
// decoding of all streams we can decode.
QPDFWriter w(pdf);
Pl_Discard discard;
w.setOutputPipeline(&discard);
w.setDecodeLevel(qpdf_dl_all);
w.write();
// Parse all content streams
QPDFPageDocumentHelper dh(pdf);
std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
DiscardContents discard_contents;
int pageno = 0;
for (std::vector<QPDFPageObjectHelper>::iterator iter =
pages.begin();
iter != pages.end(); ++iter)
{
QPDFPageObjectHelper& page(*iter);
++pageno;
try
{
page.parseContents(&discard_contents);
}
catch (QPDFExc& e)
{
okay = false;
std::cerr << "ERROR: page " << pageno << ": "
<< e.what() << std::endl;
}
}
}
catch (std::exception& e)
{
std::cerr << "ERROR: " << e.what() << std::endl;
okay = false;
}
if (okay)
{
if ((! pdf.getWarnings().empty()) || warnings)
{
exit_code = EXIT_WARNING;
}
else
{
std::cout << "No syntax or stream encoding errors"
<< " found; the file may still contain"
<< std::endl
<< "errors that qpdf cannot detect"
<< std::endl;
}
}
else
{
exit_code = EXIT_ERROR;
}
}
static void do_show_obj(QPDF& pdf, Options& o, int& exit_code)
{
QPDFObjectHandle obj;
if (o.show_trailer)
{
obj = pdf.getTrailer();
}
else
{
obj = pdf.getObjectByID(o.show_obj, o.show_gen);
}
if (obj.isStream())
{
if (o.show_raw_stream_data || o.show_filtered_stream_data)
{
bool filter = o.show_filtered_stream_data;
if (filter &&
(! obj.pipeStreamData(0, 0, qpdf_dl_all)))
{
QTC::TC("qpdf", "qpdf unable to filter");
std::cerr << "Unable to filter stream data."
<< std::endl;
exit_code = EXIT_ERROR;
}
else
{
QUtil::binary_stdout();
Pl_StdioFile out("stdout", stdout);
obj.pipeStreamData(
&out,
(filter && o.normalize) ? qpdf_ef_normalize : 0,
filter ? qpdf_dl_all : qpdf_dl_none);
}
}
else
{
std::cout
<< "Object is stream. Dictionary:" << std::endl
<< obj.getDict().unparseResolved() << std::endl;
}
}
else
{
std::cout << obj.unparseResolved() << std::endl;
}
}
static void do_show_pages(QPDF& pdf, Options& o)
{
QPDFPageDocumentHelper dh(pdf);
std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
int pageno = 0;
for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
iter != pages.end(); ++iter)
{
QPDFPageObjectHelper& ph(*iter);
QPDFObjectHandle page = ph.getObjectHandle();
++pageno;
std::cout << "page " << pageno << ": "
<< page.getObjectID() << " "
<< page.getGeneration() << " R" << std::endl;
if (o.show_page_images)
{
std::map<std::string, QPDFObjectHandle> images = ph.getImages();
if (! images.empty())
{
std::cout << " images:" << std::endl;
for (auto const& iter2: images)
{
std::string const& name = iter2.first;
QPDFObjectHandle image = iter2.second;
QPDFObjectHandle dict = image.getDict();
int width =
dict.getKey("/Width").getIntValueAsInt();
int height =
dict.getKey("/Height").getIntValueAsInt();
std::cout << " " << name << ": "
<< image.unparse()
<< ", " << width << " x " << height
<< std::endl;
}
}
}
std::cout << " content:" << std::endl;
std::vector<QPDFObjectHandle> content =
ph.getPageContents();
for (auto& iter2: content)
{
std::cout << " " << iter2.unparse() << std::endl;
}
}
}
static void do_list_attachments(QPDF& pdf, Options& o)
{
QPDFEmbeddedFileDocumentHelper efdh(pdf);
if (efdh.hasEmbeddedFiles())
{
for (auto const& i: efdh.getEmbeddedFiles())
{
std::string const& key = i.first;
auto efoh = i.second;
std::cout << key << " -> "
<< efoh->getEmbeddedFileStream().getObjGen()
<< std::endl;
if (o.verbose)
{
auto desc = efoh->getDescription();
if (! desc.empty())
{
std::cout << " description: " << desc << std::endl;
}
std::cout << " preferred name: " << efoh->getFilename()
<< std::endl;
std::cout << " all names:" << std::endl;
for (auto const& i2: efoh->getFilenames())
{
std::cout << " " << i2.first << " -> " << i2.second
<< std::endl;
}
std::cout << " all data streams:" << std::endl;
for (auto i2: efoh->getEmbeddedFileStreams().ditems())
{
std::cout << " " << i2.first << " -> "
<< i2.second.getObjGen()
<< std::endl;
}
}
}
}
else
{
std::cout << o.infilename << " has no embedded files" << std::endl;
}
}
static void do_show_attachment(QPDF& pdf, Options& o, int& exit_code)
{
QPDFEmbeddedFileDocumentHelper efdh(pdf);
auto fs = efdh.getEmbeddedFile(o.attachment_to_show);
if (! fs)
{
std::cerr << whoami << ": attachment " << o.attachment_to_show
<< " not found" << std::endl;
exit_code = EXIT_ERROR;
return;
}
auto efs = fs->getEmbeddedFileStream();
QUtil::binary_stdout();
Pl_StdioFile out("stdout", stdout);
efs.pipeStreamData(&out, 0, qpdf_dl_all);
}
static std::set<QPDFObjGen>
get_wanted_json_objects(Options& o)
{
std::set<QPDFObjGen> wanted_og;
for (auto const& iter: o.json_objects)
{
bool trailer;
int obj = 0;
int gen = 0;
parse_object_id(iter, trailer, obj, gen);
if (obj)
{
wanted_og.insert(QPDFObjGen(obj, gen));
}
}
return wanted_og;
}
static void do_json_objects(QPDF& pdf, Options& o, JSON& j)
{
// Add all objects. Do this first before other code below modifies
// things by doing stuff like calling
// pushInheritedAttributesToPage.
bool all_objects = o.json_objects.empty();
std::set<QPDFObjGen> wanted_og = get_wanted_json_objects(o);
JSON j_objects = j.addDictionaryMember("objects", JSON::makeDictionary());
if (all_objects || o.json_objects.count("trailer"))
{
j_objects.addDictionaryMember(
"trailer", pdf.getTrailer().getJSON(true));
}
std::vector<QPDFObjectHandle> objects = pdf.getAllObjects();
for (std::vector<QPDFObjectHandle>::iterator iter = objects.begin();
iter != objects.end(); ++iter)
{
if (all_objects || wanted_og.count((*iter).getObjGen()))
{
j_objects.addDictionaryMember(
(*iter).unparse(), (*iter).getJSON(true));
}
}
}
static void do_json_objectinfo(QPDF& pdf, Options& o, JSON& j)
{
// Do this first before other code below modifies things by doing
// stuff like calling pushInheritedAttributesToPage.
bool all_objects = o.json_objects.empty();
std::set<QPDFObjGen> wanted_og = get_wanted_json_objects(o);
JSON j_objectinfo = j.addDictionaryMember(
"objectinfo", JSON::makeDictionary());
for (auto& obj: pdf.getAllObjects())
{
if (all_objects || wanted_og.count(obj.getObjGen()))
{
auto j_details = j_objectinfo.addDictionaryMember(
obj.unparse(), JSON::makeDictionary());
auto j_stream = j_details.addDictionaryMember(
"stream", JSON::makeDictionary());
bool is_stream = obj.isStream();
j_stream.addDictionaryMember(
"is", JSON::makeBool(is_stream));
j_stream.addDictionaryMember(
"length",
(is_stream
? obj.getDict().getKey("/Length").getJSON(true)
: JSON::makeNull()));
j_stream.addDictionaryMember(
"filter",
(is_stream
? obj.getDict().getKey("/Filter").getJSON(true)
: JSON::makeNull()));
}
}
}
static void do_json_pages(QPDF& pdf, Options& o, JSON& j)
{
JSON j_pages = j.addDictionaryMember("pages", JSON::makeArray());
QPDFPageDocumentHelper pdh(pdf);
QPDFPageLabelDocumentHelper pldh(pdf);
QPDFOutlineDocumentHelper odh(pdf);
pdh.pushInheritedAttributesToPage();
std::vector<QPDFPageObjectHelper> pages = pdh.getAllPages();
int pageno = 0;
for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
iter != pages.end(); ++iter, ++pageno)
{
JSON j_page = j_pages.addArrayElement(JSON::makeDictionary());
QPDFPageObjectHelper& ph(*iter);
QPDFObjectHandle page = ph.getObjectHandle();
j_page.addDictionaryMember("object", page.getJSON());
JSON j_images = j_page.addDictionaryMember(
"images", JSON::makeArray());
std::map<std::string, QPDFObjectHandle> images = ph.getImages();
for (auto const& iter2: images)
{
JSON j_image = j_images.addArrayElement(JSON::makeDictionary());
j_image.addDictionaryMember(
"name", JSON::makeString(iter2.first));
QPDFObjectHandle image = iter2.second;
QPDFObjectHandle dict = image.getDict();
j_image.addDictionaryMember("object", image.getJSON());
j_image.addDictionaryMember(
"width", dict.getKey("/Width").getJSON());
j_image.addDictionaryMember(
"height", dict.getKey("/Height").getJSON());
j_image.addDictionaryMember(
"colorspace", dict.getKey("/ColorSpace").getJSON());
j_image.addDictionaryMember(
"bitspercomponent", dict.getKey("/BitsPerComponent").getJSON());
QPDFObjectHandle filters = dict.getKey("/Filter").wrapInArray();
j_image.addDictionaryMember(
"filter", filters.getJSON());
QPDFObjectHandle decode_parms = dict.getKey("/DecodeParms");
QPDFObjectHandle dp_array;
if (decode_parms.isArray())
{
dp_array = decode_parms;
}
else
{
dp_array = QPDFObjectHandle::newArray();
for (int i = 0; i < filters.getArrayNItems(); ++i)
{
dp_array.appendItem(decode_parms);
}
}
j_image.addDictionaryMember("decodeparms", dp_array.getJSON());
j_image.addDictionaryMember(
"filterable",
JSON::makeBool(
image.pipeStreamData(0, 0, o.decode_level, true)));
}
j_page.addDictionaryMember("images", j_images);
JSON j_contents = j_page.addDictionaryMember(
"contents", JSON::makeArray());
std::vector<QPDFObjectHandle> content = ph.getPageContents();
for (auto& iter2: content)
{
j_contents.addArrayElement(iter2.getJSON());
}
j_page.addDictionaryMember(
"label", pldh.getLabelForPage(pageno).getJSON());
JSON j_outlines = j_page.addDictionaryMember(
"outlines", JSON::makeArray());
std::vector<QPDFOutlineObjectHelper> outlines =
odh.getOutlinesForPage(page.getObjGen());
for (std::vector<QPDFOutlineObjectHelper>::iterator oiter =
outlines.begin();
oiter != outlines.end(); ++oiter)
{
JSON j_outline = j_outlines.addArrayElement(JSON::makeDictionary());
j_outline.addDictionaryMember(
"object", (*oiter).getObjectHandle().getJSON());
j_outline.addDictionaryMember(
"title", JSON::makeString((*oiter).getTitle()));
j_outline.addDictionaryMember(
"dest", (*oiter).getDest().getJSON(true));
}
j_page.addDictionaryMember("pageposfrom1", JSON::makeInt(1 + pageno));
}
}
static void do_json_page_labels(QPDF& pdf, Options& o, JSON& j)
{
JSON j_labels = j.addDictionaryMember("pagelabels", JSON::makeArray());
QPDFPageLabelDocumentHelper pldh(pdf);
QPDFPageDocumentHelper pdh(pdf);
std::vector<QPDFPageObjectHelper> pages = pdh.getAllPages();
if (pldh.hasPageLabels())
{
std::vector<QPDFObjectHandle> labels;
pldh.getLabelsForPageRange(
0, QIntC::to_int(pages.size()) - 1, 0, labels);
for (std::vector<QPDFObjectHandle>::iterator iter = labels.begin();
iter != labels.end(); ++iter)
{
std::vector<QPDFObjectHandle>::iterator next = iter;
++next;
if (next == labels.end())
{
// This can't happen, so ignore it. This could only
// happen if getLabelsForPageRange somehow returned an
// odd number of items.
break;
}
JSON j_label = j_labels.addArrayElement(JSON::makeDictionary());
j_label.addDictionaryMember("index", (*iter).getJSON());
++iter;
j_label.addDictionaryMember("label", (*iter).getJSON());
}
}
}
static void add_outlines_to_json(
std::vector<QPDFOutlineObjectHelper> outlines, JSON& j,
std::map<QPDFObjGen, int>& page_numbers)
{
for (std::vector<QPDFOutlineObjectHelper>::iterator iter = outlines.begin();
iter != outlines.end(); ++iter)
{
QPDFOutlineObjectHelper& ol = *iter;
JSON jo = j.addArrayElement(JSON::makeDictionary());
jo.addDictionaryMember("object", ol.getObjectHandle().getJSON());
jo.addDictionaryMember("title", JSON::makeString(ol.getTitle()));
jo.addDictionaryMember("dest", ol.getDest().getJSON(true));
jo.addDictionaryMember("open", JSON::makeBool(ol.getCount() >= 0));
QPDFObjectHandle page = ol.getDestPage();
JSON j_destpage = JSON::makeNull();
if (page.isIndirect())
{
QPDFObjGen og = page.getObjGen();
if (page_numbers.count(og))
{
j_destpage = JSON::makeInt(page_numbers[og]);
}
}
jo.addDictionaryMember("destpageposfrom1", j_destpage);
JSON j_kids = jo.addDictionaryMember("kids", JSON::makeArray());
add_outlines_to_json(ol.getKids(), j_kids, page_numbers);
}
}
static void do_json_outlines(QPDF& pdf, Options& o, JSON& j)
{
std::map<QPDFObjGen, int> page_numbers;
QPDFPageDocumentHelper dh(pdf);
std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
int n = 0;
for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
iter != pages.end(); ++iter)
{
QPDFObjectHandle oh = (*iter).getObjectHandle();
page_numbers[oh.getObjGen()] = ++n;
}
JSON j_outlines = j.addDictionaryMember(
"outlines", JSON::makeArray());
QPDFOutlineDocumentHelper odh(pdf);
add_outlines_to_json(odh.getTopLevelOutlines(), j_outlines, page_numbers);
}
static void do_json_acroform(QPDF& pdf, Options& o, JSON& j)
{
JSON j_acroform = j.addDictionaryMember(
"acroform", JSON::makeDictionary());
QPDFAcroFormDocumentHelper afdh(pdf);
j_acroform.addDictionaryMember(
"hasacroform",
JSON::makeBool(afdh.hasAcroForm()));
j_acroform.addDictionaryMember(
"needappearances",
JSON::makeBool(afdh.getNeedAppearances()));
JSON j_fields = j_acroform.addDictionaryMember(
"fields", JSON::makeArray());
QPDFPageDocumentHelper pdh(pdf);
std::vector<QPDFPageObjectHelper> pages = pdh.getAllPages();
int pagepos1 = 0;
for (std::vector<QPDFPageObjectHelper>::iterator page_iter =
pages.begin();
page_iter != pages.end(); ++page_iter)
{
++pagepos1;
std::vector<QPDFAnnotationObjectHelper> annotations =
afdh.getWidgetAnnotationsForPage(*page_iter);
for (std::vector<QPDFAnnotationObjectHelper>::iterator annot_iter =
annotations.begin();
annot_iter != annotations.end(); ++annot_iter)
{
QPDFAnnotationObjectHelper& aoh = *annot_iter;
QPDFFormFieldObjectHelper ffh =
afdh.getFieldForAnnotation(aoh);
JSON j_field = j_fields.addArrayElement(
JSON::makeDictionary());
j_field.addDictionaryMember(
"object",
ffh.getObjectHandle().getJSON());
j_field.addDictionaryMember(
"parent",
ffh.getObjectHandle().getKey("/Parent").getJSON());
j_field.addDictionaryMember(
"pageposfrom1",
JSON::makeInt(pagepos1));
j_field.addDictionaryMember(
"fieldtype",
JSON::makeString(ffh.getFieldType()));
j_field.addDictionaryMember(
"fieldflags",
JSON::makeInt(ffh.getFlags()));
j_field.addDictionaryMember(
"fullname",
JSON::makeString(ffh.getFullyQualifiedName()));
j_field.addDictionaryMember(
"partialname",
JSON::makeString(ffh.getPartialName()));
j_field.addDictionaryMember(
"alternativename",
JSON::makeString(ffh.getAlternativeName()));
j_field.addDictionaryMember(
"mappingname",
JSON::makeString(ffh.getMappingName()));
j_field.addDictionaryMember(
"value",
ffh.getValue().getJSON());
j_field.addDictionaryMember(
"defaultvalue",
ffh.getDefaultValue().getJSON());
j_field.addDictionaryMember(
"quadding",
JSON::makeInt(ffh.getQuadding()));
j_field.addDictionaryMember(
"ischeckbox",
JSON::makeBool(ffh.isCheckbox()));
j_field.addDictionaryMember(
"isradiobutton",
JSON::makeBool(ffh.isRadioButton()));
j_field.addDictionaryMember(
"ischoice",
JSON::makeBool(ffh.isChoice()));
j_field.addDictionaryMember(
"istext",
JSON::makeBool(ffh.isText()));
JSON j_choices = j_field.addDictionaryMember(
"choices", JSON::makeArray());
std::vector<std::string> choices = ffh.getChoices();
for (std::vector<std::string>::iterator iter = choices.begin();
iter != choices.end(); ++iter)
{
j_choices.addArrayElement(JSON::makeString(*iter));
}
JSON j_annot = j_field.addDictionaryMember(
"annotation", JSON::makeDictionary());
j_annot.addDictionaryMember(
"object",
aoh.getObjectHandle().getJSON());
j_annot.addDictionaryMember(
"appearancestate",
JSON::makeString(aoh.getAppearanceState()));
j_annot.addDictionaryMember(
"annotationflags",
JSON::makeInt(aoh.getFlags()));
}
}
}
static void do_json_encrypt(QPDF& pdf, Options& o, JSON& j)
{
int R = 0;
int P = 0;
int V = 0;
QPDF::encryption_method_e stream_method = QPDF::e_none;
QPDF::encryption_method_e string_method = QPDF::e_none;
QPDF::encryption_method_e file_method = QPDF::e_none;
bool is_encrypted = pdf.isEncrypted(
R, P, V, stream_method, string_method, file_method);
JSON j_encrypt = j.addDictionaryMember(
"encrypt", JSON::makeDictionary());
j_encrypt.addDictionaryMember(
"encrypted",
JSON::makeBool(is_encrypted));
j_encrypt.addDictionaryMember(
"userpasswordmatched",
JSON::makeBool(is_encrypted && pdf.userPasswordMatched()));
j_encrypt.addDictionaryMember(
"ownerpasswordmatched",
JSON::makeBool(is_encrypted && pdf.ownerPasswordMatched()));
JSON j_capabilities = j_encrypt.addDictionaryMember(
"capabilities", JSON::makeDictionary());
j_capabilities.addDictionaryMember(
"accessibility",
JSON::makeBool(pdf.allowAccessibility()));
j_capabilities.addDictionaryMember(
"extract",
JSON::makeBool(pdf.allowExtractAll()));
j_capabilities.addDictionaryMember(
"printlow",
JSON::makeBool(pdf.allowPrintLowRes()));
j_capabilities.addDictionaryMember(
"printhigh",
JSON::makeBool(pdf.allowPrintHighRes()));
j_capabilities.addDictionaryMember(
"modifyassembly",
JSON::makeBool(pdf.allowModifyAssembly()));
j_capabilities.addDictionaryMember(
"modifyforms",
JSON::makeBool(pdf.allowModifyForm()));
j_capabilities.addDictionaryMember(
"moddifyannotations",
JSON::makeBool(pdf.allowModifyAnnotation()));
j_capabilities.addDictionaryMember(
"modifyother",
JSON::makeBool(pdf.allowModifyOther()));
j_capabilities.addDictionaryMember(
"modify",
JSON::makeBool(pdf.allowModifyAll()));
JSON j_parameters = j_encrypt.addDictionaryMember(
"parameters", JSON::makeDictionary());
j_parameters.addDictionaryMember("R", JSON::makeInt(R));
j_parameters.addDictionaryMember("V", JSON::makeInt(V));
j_parameters.addDictionaryMember("P", JSON::makeInt(P));
int bits = 0;
JSON key = JSON::makeNull();
if (is_encrypted)
{
std::string encryption_key = pdf.getEncryptionKey();
bits = QIntC::to_int(encryption_key.length() * 8);
if (o.show_encryption_key)
{
key = JSON::makeString(QUtil::hex_encode(encryption_key));
}
}
j_parameters.addDictionaryMember("bits", JSON::makeInt(bits));
j_parameters.addDictionaryMember("key", key);
auto fix_method = [is_encrypted](QPDF::encryption_method_e& m) {
if (is_encrypted && m == QPDF::e_none)
{
m = QPDF::e_rc4;
}
};
fix_method(stream_method);
fix_method(string_method);
fix_method(file_method);
std::string s_stream_method = show_encryption_method(stream_method);
std::string s_string_method = show_encryption_method(string_method);
std::string s_file_method = show_encryption_method(file_method);
std::string s_overall_method;
if ((stream_method == string_method) &&
(stream_method == file_method))
{
s_overall_method = s_stream_method;
}
else
{
s_overall_method = "mixed";
}
j_parameters.addDictionaryMember(
"method", JSON::makeString(s_overall_method));
j_parameters.addDictionaryMember(
"streammethod", JSON::makeString(s_stream_method));
j_parameters.addDictionaryMember(
"stringmethod", JSON::makeString(s_string_method));
j_parameters.addDictionaryMember(
"filemethod", JSON::makeString(s_file_method));
}
static void do_json_attachments(QPDF& pdf, Options& o, JSON& j)
{
JSON j_attachments = j.addDictionaryMember(
"attachments", JSON::makeDictionary());
QPDFEmbeddedFileDocumentHelper efdh(pdf);
for (auto const& iter: efdh.getEmbeddedFiles())
{
std::string const& key = iter.first;
auto fsoh = iter.second;
auto j_details = j_attachments.addDictionaryMember(
key, JSON::makeDictionary());
j_details.addDictionaryMember(
"filespec",
JSON::makeString(fsoh->getObjectHandle().unparse()));
j_details.addDictionaryMember(
"preferredname", JSON::makeString(fsoh->getFilename()));
j_details.addDictionaryMember(
"preferredcontents",
JSON::makeString(fsoh->getEmbeddedFileStream().unparse()));
}
}
static void do_json(QPDF& pdf, Options& o)
{
JSON j = JSON::makeDictionary();
// This version is updated every time a non-backward-compatible
// change is made to the JSON format. Clients of the JSON are to
// ignore unrecognized keys, so we only update the version of a
// key disappears or if its value changes meaning.
j.addDictionaryMember("version", JSON::makeInt(1));
JSON j_params = j.addDictionaryMember(
"parameters", JSON::makeDictionary());
std::string decode_level_str;
switch (o.decode_level)
{
case qpdf_dl_none:
decode_level_str = "none";
break;
case qpdf_dl_generalized:
decode_level_str = "generalized";
break;
case qpdf_dl_specialized:
decode_level_str = "specialized";
break;
case qpdf_dl_all:
decode_level_str = "all";
break;
}
j_params.addDictionaryMember(
"decodelevel", JSON::makeString(decode_level_str));
bool all_keys = o.json_keys.empty();
// The list of selectable top-level keys id duplicated in three
// places: json_schema, do_json, and initOptionTable.
if (all_keys || o.json_keys.count("objects"))
{
do_json_objects(pdf, o, j);
}
if (all_keys || o.json_keys.count("objectinfo"))
{
do_json_objectinfo(pdf, o, j);
}
if (all_keys || o.json_keys.count("pages"))
{
do_json_pages(pdf, o, j);
}
if (all_keys || o.json_keys.count("pagelabels"))
{
do_json_page_labels(pdf, o, j);
}
if (all_keys || o.json_keys.count("outlines"))
{
do_json_outlines(pdf, o, j);
}
if (all_keys || o.json_keys.count("acroform"))
{
do_json_acroform(pdf, o, j);
}
if (all_keys || o.json_keys.count("encrypt"))
{
do_json_encrypt(pdf, o, j);
}
if (all_keys || o.json_keys.count("attachments"))
{
do_json_attachments(pdf, o, j);
}
// Check against schema
JSON schema = json_schema(&o.json_keys);
std::list<std::string> errors;
if (! j.checkSchema(schema, errors))
{
std::cerr
<< whoami << " didn't create JSON that complies with its own\n\
rules. Please report this as a bug at\n\
https://github.com/qpdf/qpdf/issues/new\n\
ideally with the file that caused the error and the output below. Thanks!\n\
\n";
for (std::list<std::string>::iterator iter = errors.begin();
iter != errors.end(); ++iter)
{
std::cerr << (*iter) << std::endl;
}
}
std::cout << j.unparse() << std::endl;
}
static void do_inspection(QPDF& pdf, Options& o)
{
int exit_code = 0;
if (o.check)
{
do_check(pdf, o, exit_code);
}
if (o.json)
{
do_json(pdf, o);
}
if (o.show_npages)
{
QTC::TC("qpdf", "qpdf npages");
std::cout << pdf.getRoot().getKey("/Pages").
getKey("/Count").getIntValue() << std::endl;
}
if (o.show_encryption)
{
show_encryption(pdf, o);
}
if (o.check_linearization)
{
if (pdf.checkLinearization())
{
std::cout << o.infilename << ": no linearization errors"
<< std::endl;
}
else if (exit_code != EXIT_ERROR)
{
exit_code = EXIT_WARNING;
}
}
if (o.show_linearization)
{
if (pdf.isLinearized())
{
pdf.showLinearizationData();
}
else
{
std::cout << o.infilename << " is not linearized"
<< std::endl;
}
}
if (o.show_xref)
{
pdf.showXRefTable();
}
if ((o.show_obj > 0) || o.show_trailer)
{
do_show_obj(pdf, o, exit_code);
}
if (o.show_pages)
{
do_show_pages(pdf, o);
}
if (o.list_attachments)
{
do_list_attachments(pdf, o);
}
if (! o.attachment_to_show.empty())
{
do_show_attachment(pdf, o, exit_code);
}
if ((! pdf.getWarnings().empty()) && (exit_code != EXIT_ERROR))
{
std::cerr << whoami
<< ": operation succeeded with warnings" << std::endl;
exit_code = EXIT_WARNING;
}
if (exit_code)
{
exit(exit_code);
}
}
class ImageOptimizer: public QPDFObjectHandle::StreamDataProvider
{
public:
ImageOptimizer(Options& o, QPDFObjectHandle& image);
virtual ~ImageOptimizer()
{
}
virtual void provideStreamData(int objid, int generation,
Pipeline* pipeline);
PointerHolder<Pipeline> makePipeline(
std::string const& description, Pipeline* next);
bool evaluate(std::string const& description);
private:
Options& o;
QPDFObjectHandle image;
};
ImageOptimizer::ImageOptimizer(Options& o, QPDFObjectHandle& image) :
o(o),
image(image)
{
}
PointerHolder<Pipeline>
ImageOptimizer::makePipeline(std::string const& description, Pipeline* next)
{
PointerHolder<Pipeline> result;
QPDFObjectHandle dict = image.getDict();
QPDFObjectHandle w_obj = dict.getKey("/Width");
QPDFObjectHandle h_obj = dict.getKey("/Height");
QPDFObjectHandle colorspace_obj = dict.getKey("/ColorSpace");
if (! (w_obj.isNumber() && h_obj.isNumber()))
{
if (o.verbose && (! description.empty()))
{
std::cout << whoami << ": " << description
<< ": not optimizing because image dictionary"
<< " is missing required keys" << std::endl;
}
return result;
}
QPDFObjectHandle components_obj = dict.getKey("/BitsPerComponent");
if (! (components_obj.isInteger() && (components_obj.getIntValue() == 8)))
{
QTC::TC("qpdf", "qpdf image optimize bits per component");
if (o.verbose && (! description.empty()))
{
std::cout << whoami << ": " << description
<< ": not optimizing because image has other than"
<< " 8 bits per component" << std::endl;
}
return result;
}
// Files have been seen in the wild whose width and height are
// floating point, which is goofy, but we can deal with it.
JDIMENSION w = 0;
if (w_obj.isInteger())
{
w = w_obj.getUIntValueAsUInt();
}
else
{
w = static_cast<JDIMENSION>(w_obj.getNumericValue());
}
JDIMENSION h = 0;
if (h_obj.isInteger())
{
h = h_obj.getUIntValueAsUInt();
}
else
{
h = static_cast<JDIMENSION>(h_obj.getNumericValue());
}
std::string colorspace = (colorspace_obj.isName() ?
colorspace_obj.getName() :
std::string());
int components = 0;
J_COLOR_SPACE cs = JCS_UNKNOWN;
if (colorspace == "/DeviceRGB")
{
components = 3;
cs = JCS_RGB;
}
else if (colorspace == "/DeviceGray")
{
components = 1;
cs = JCS_GRAYSCALE;
}
else if (colorspace == "/DeviceCMYK")
{
components = 4;
cs = JCS_CMYK;
}
else
{
QTC::TC("qpdf", "qpdf image optimize colorspace");
if (o.verbose && (! description.empty()))
{
std::cout << whoami << ": " << description
<< ": not optimizing because qpdf can't optimize"
<< " images with this colorspace" << std::endl;
}
return result;
}
if (((o.oi_min_width > 0) && (w <= o.oi_min_width)) ||
((o.oi_min_height > 0) && (h <= o.oi_min_height)) ||
((o.oi_min_area > 0) && ((w * h) <= o.oi_min_area)))
{
QTC::TC("qpdf", "qpdf image optimize too small");
if (o.verbose && (! description.empty()))
{
std::cout << whoami << ": " << description
<< ": not optimizing because image"
<< " is smaller than requested minimum dimensions"
<< std::endl;
}
return result;
}
result = new Pl_DCT("jpg", next, w, h, components, cs);
return result;
}
bool
ImageOptimizer::evaluate(std::string const& description)
{
if (! image.pipeStreamData(0, 0, qpdf_dl_specialized, true))
{
QTC::TC("qpdf", "qpdf image optimize no pipeline");
if (o.verbose)
{
std::cout << whoami << ": " << description
<< ": not optimizing because unable to decode data"
<< " or data already uses DCT"
<< std::endl;
}
return false;
}
Pl_Discard d;
Pl_Count c("count", &d);
PointerHolder<Pipeline> p = makePipeline(description, &c);
if (p.getPointer() == 0)
{
// message issued by makePipeline
return false;
}
if (! image.pipeStreamData(p.getPointer(), 0, qpdf_dl_specialized))
{
return false;
}
long long orig_length = image.getDict().getKey("/Length").getIntValue();
if (c.getCount() >= orig_length)
{
QTC::TC("qpdf", "qpdf image optimize no shrink");
if (o.verbose)
{
std::cout << whoami << ": " << description
<< ": not optimizing because DCT compression does not"
<< " reduce image size" << std::endl;
}
return false;
}
if (o.verbose)
{
std::cout << whoami << ": " << description
<< ": optimizing image reduces size from "
<< orig_length << " to " << c.getCount()
<< std::endl;
}
return true;
}
void
ImageOptimizer::provideStreamData(int, int, Pipeline* pipeline)
{
PointerHolder<Pipeline> p = makePipeline("", pipeline);
if (p.getPointer() == 0)
{
// Should not be possible
image.warnIfPossible("unable to create pipeline after previous"
" success; image data will be lost");
pipeline->finish();
return;
}
image.pipeStreamData(p.getPointer(), 0, qpdf_dl_specialized,
false, false);
}
template <typename T>
static PointerHolder<QPDF> do_process_once(
void (QPDF::*fn)(T, char const*),
T item, char const* password,
Options& o, bool empty)
{
PointerHolder<QPDF> pdf = new QPDF;
set_qpdf_options(*pdf, o);
if (empty)
{
pdf->emptyPDF();
}
else
{
((*pdf).*fn)(item, password);
}
return pdf;
}
template <typename T>
static PointerHolder<QPDF> do_process(
void (QPDF::*fn)(T, char const*),
T item, char const* password,
Options& o, bool empty)
{
// If a password has been specified but doesn't work, try other
// passwords that are equivalent in different character encodings.
// This makes it possible to open PDF files that were encrypted
// using incorrect string encodings. For example, if someone used
// a password encoded in PDF Doc encoding or Windows code page
// 1252 for an AES-encrypted file or a UTF-8-encoded password on
// an RC4-encrypted file, or if the password was properly encoded
// by the password given here was incorrectly encoded, there's a
// good chance we'd succeed here.
std::string ptemp;
if (password && (! o.password_is_hex_key))
{
if (o.password_mode == pm_hex_bytes)
{
// Special case: handle --password-mode=hex-bytes for input
// password as well as output password
QTC::TC("qpdf", "qpdf input password hex-bytes");
ptemp = QUtil::hex_decode(password);
password = ptemp.c_str();
}
}
if ((password == 0) || empty || o.password_is_hex_key ||
o.suppress_password_recovery)
{
// There is no password, or we're not doing recovery, so just
// do the normal processing with the supplied password.
return do_process_once(fn, item, password, o, empty);
}
// Get a list of otherwise encoded strings. Keep in scope for this
// method.
std::vector<std::string> passwords_str =
QUtil::possible_repaired_encodings(password);
// Represent to char const*, as required by the QPDF class.
std::vector<char const*> passwords;
for (std::vector<std::string>::iterator iter = passwords_str.begin();
iter != passwords_str.end(); ++iter)
{
passwords.push_back((*iter).c_str());
}
// We always try the supplied password first because it is the
// first string returned by possible_repaired_encodings. If there
// is more than one option, go ahead and put the supplied password
// at the end so that it's that decoding attempt whose exception
// is thrown.
if (passwords.size() > 1)
{
passwords.push_back(password);
}
// Try each password. If one works, return the resulting object.
// If they all fail, throw the exception thrown by the final
// attempt, which, like the first attempt, will be with the
// supplied password.
bool warned = false;
for (std::vector<char const*>::iterator iter = passwords.begin();
iter != passwords.end(); ++iter)
{
try
{
return do_process_once(fn, item, *iter, o, empty);
}
catch (QPDFExc& e)
{
std::vector<char const*>::iterator next = iter;
++next;
if (next == passwords.end())
{
throw e;
}
}
if ((! warned) && o.verbose)
{
warned = true;
std::cout << whoami << ": supplied password didn't work;"
<< " trying other passwords based on interpreting"
<< " password with different string encodings"
<< std::endl;
}
}
// Should not be reachable
throw std::logic_error("do_process returned");
}
static PointerHolder<QPDF> process_file(char const* filename,
char const* password,
Options& o)
{
return do_process(&QPDF::processFile, filename, password, o,
strcmp(filename, "") == 0);
}
static PointerHolder<QPDF> process_input_source(
PointerHolder<InputSource> is, char const* password, Options& o)
{
return do_process(&QPDF::processInputSource, is, password, o, false);
}
static void validate_under_overlay(QPDF& pdf, UnderOverlay* uo, Options& o)
{
if (0 == uo->filename)
{
return;
}
QPDFPageDocumentHelper main_pdh(pdf);
int main_npages = QIntC::to_int(main_pdh.getAllPages().size());
uo->pdf = process_file(uo->filename, uo->password, o);
QPDFPageDocumentHelper uo_pdh(*(uo->pdf));
int uo_npages = QIntC::to_int(uo_pdh.getAllPages().size());
try
{
uo->to_pagenos = QUtil::parse_numrange(uo->to_nr, main_npages);
}
catch (std::runtime_error& e)
{
usageExit("parsing numeric range for " + uo->which +
" \"to\" pages: " + e.what());
}
try
{
if (0 == strlen(uo->from_nr))
{
QTC::TC("qpdf", "qpdf from_nr from repeat_nr");
uo->from_nr = uo->repeat_nr;
}
uo->from_pagenos = QUtil::parse_numrange(uo->from_nr, uo_npages);
if (strlen(uo->repeat_nr))
{
uo->repeat_pagenos =
QUtil::parse_numrange(uo->repeat_nr, uo_npages);
}
}
catch (std::runtime_error& e)
{
usageExit("parsing numeric range for " + uo->which + " file " +
uo->filename + ": " + e.what());
}
}
static void get_uo_pagenos(UnderOverlay& uo,
std::map<int, std::vector<int> >& pagenos)
{
size_t idx = 0;
size_t from_size = uo.from_pagenos.size();
size_t repeat_size = uo.repeat_pagenos.size();
for (std::vector<int>::iterator iter = uo.to_pagenos.begin();
iter != uo.to_pagenos.end(); ++iter, ++idx)
{
if (idx < from_size)
{
pagenos[*iter].push_back(uo.from_pagenos.at(idx));
}
else if (repeat_size)
{
pagenos[*iter].push_back(
uo.repeat_pagenos.at((idx - from_size) % repeat_size));
}
}
}
static QPDFAcroFormDocumentHelper* get_afdh_for_qpdf(
std::map<unsigned long long,
PointerHolder<QPDFAcroFormDocumentHelper>>& afdh_map,
QPDF* q)
{
auto uid = q->getUniqueId();
if (! afdh_map.count(uid))
{
afdh_map[uid] = new QPDFAcroFormDocumentHelper(*q);
}
return afdh_map[uid].getPointer();
}
static void do_under_overlay_for_page(
QPDF& pdf,
Options& o,
UnderOverlay& uo,
std::map<int, std::vector<int> >& pagenos,
size_t page_idx,
std::map<int, QPDFObjectHandle>& fo,
std::vector<QPDFPageObjectHelper>& pages,
QPDFPageObjectHelper& dest_page,
bool before)
{
int pageno = 1 + QIntC::to_int(page_idx);
if (! pagenos.count(pageno))
{
return;
}
std::map<unsigned long long,
PointerHolder<QPDFAcroFormDocumentHelper>> afdh;
auto make_afdh = [&](QPDFPageObjectHelper& ph) {
QPDF* q = ph.getObjectHandle().getOwningQPDF();
return get_afdh_for_qpdf(afdh, q);
};
auto dest_afdh = make_afdh(dest_page);
std::string content;
int min_suffix = 1;
QPDFObjectHandle resources = dest_page.getAttribute("/Resources", true);
if (! resources.isDictionary())
{
QTC::TC("qpdf", "qpdf overlay page with no resources");
resources = QPDFObjectHandle::newDictionary();
dest_page.getObjectHandle().replaceKey("/Resources", resources);
}
for (std::vector<int>::iterator iter = pagenos[pageno].begin();
iter != pagenos[pageno].end(); ++iter)
{
int from_pageno = *iter;
if (o.verbose)
{
std::cout << " " << uo.which << " " << from_pageno << std::endl;
}
auto from_page = pages.at(QIntC::to_size(from_pageno - 1));
if (0 == fo.count(from_pageno))
{
fo[from_pageno] =
pdf.copyForeignObject(
from_page.getFormXObjectForPage());
}
// If the same page is overlaid or underlaid multiple times,
// we'll generate multiple names for it, but that's harmless
// and also a pretty goofy case that's not worth coding
// around.
std::string name = resources.getUniqueResourceName("/Fx", min_suffix);
QPDFMatrix cm;
std::string new_content = dest_page.placeFormXObject(
fo[from_pageno], name,
dest_page.getTrimBox().getArrayAsRectangle(), cm);
dest_page.copyAnnotations(
from_page, cm, dest_afdh, make_afdh(from_page));
if (! new_content.empty())
{
resources.mergeResources(
QPDFObjectHandle::parse("<< /XObject << >> >>"));
auto xobject = resources.getKey("/XObject");
if (xobject.isDictionary())
{
xobject.replaceKey(name, fo[from_pageno]);
}
++min_suffix;
content += new_content;
}
}
if (! content.empty())
{
if (before)
{
dest_page.addPageContents(
QPDFObjectHandle::newStream(&pdf, content), true);
}
else
{
dest_page.addPageContents(
QPDFObjectHandle::newStream(&pdf, "q\n"), true);
dest_page.addPageContents(
QPDFObjectHandle::newStream(&pdf, "\nQ\n" + content), false);
}
}
}
static void handle_under_overlay(QPDF& pdf, Options& o)
{
validate_under_overlay(pdf, &o.underlay, o);
validate_under_overlay(pdf, &o.overlay, o);
if ((0 == o.underlay.pdf.getPointer()) &&
(0 == o.overlay.pdf.getPointer()))
{
return;
}
std::map<int, std::vector<int> > underlay_pagenos;
get_uo_pagenos(o.underlay, underlay_pagenos);
std::map<int, std::vector<int> > overlay_pagenos;
get_uo_pagenos(o.overlay, overlay_pagenos);
std::map<int, QPDFObjectHandle> underlay_fo;
std::map<int, QPDFObjectHandle> overlay_fo;
std::vector<QPDFPageObjectHelper> upages;
if (o.underlay.pdf.getPointer())
{
upages = QPDFPageDocumentHelper(*(o.underlay.pdf)).getAllPages();
}
std::vector<QPDFPageObjectHelper> opages;
if (o.overlay.pdf.getPointer())
{
opages = QPDFPageDocumentHelper(*(o.overlay.pdf)).getAllPages();
}
QPDFPageDocumentHelper main_pdh(pdf);
std::vector<QPDFPageObjectHelper> main_pages = main_pdh.getAllPages();
size_t main_npages = main_pages.size();
if (o.verbose)
{
std::cout << whoami << ": processing underlay/overlay" << std::endl;
}
for (size_t i = 0; i < main_npages; ++i)
{
if (o.verbose)
{
std::cout << " page " << 1+i << std::endl;
}
do_under_overlay_for_page(pdf, o, o.underlay, underlay_pagenos, i,
underlay_fo, upages, main_pages.at(i),
true);
do_under_overlay_for_page(pdf, o, o.overlay, overlay_pagenos, i,
overlay_fo, opages, main_pages.at(i),
false);
}
}
static void maybe_set_pagemode(QPDF& pdf, std::string const& pagemode)
{
auto root = pdf.getRoot();
if (root.getKey("/PageMode").isNull())
{
root.replaceKey("/PageMode", QPDFObjectHandle::newName(pagemode));
}
}
static void add_attachments(QPDF& pdf, Options& o, int& exit_code)
{
maybe_set_pagemode(pdf, "/UseAttachments");
QPDFEmbeddedFileDocumentHelper efdh(pdf);
for (auto const& to_add: o.attachments_to_add)
{
if ((! to_add.replace) && efdh.getEmbeddedFile(to_add.key))
{
std::cerr << whoami << ": " << pdf.getFilename()
<< " already has an attachment with key = "
<< to_add.key << "; use --replace to replace"
<< " or --key to specify a different key"
<< std::endl;
exit_code = EXIT_ERROR;
continue;
}
auto fs = QPDFFileSpecObjectHelper::createFileSpec(
pdf, to_add.filename, to_add.path);
if (! to_add.description.empty())
{
fs.setDescription(to_add.description);
}
auto efs = QPDFEFStreamObjectHelper(fs.getEmbeddedFileStream());
efs.setCreationDate(to_add.creationdate)
.setModDate(to_add.moddate);
if (! to_add.mimetype.empty())
{
efs.setSubtype(to_add.mimetype);
}
efdh.replaceEmbeddedFile(to_add.key, fs);
if (o.verbose)
{
std::cout << whoami << ": attached " << to_add.path
<< " as " << to_add.filename
<< " with key " << to_add.key << std::endl;
}
}
}
static void copy_attachments(QPDF& pdf, Options& o, int& exit_code)
{
maybe_set_pagemode(pdf, "/UseAttachments");
QPDFEmbeddedFileDocumentHelper efdh(pdf);
for (auto const& to_copy: o.attachments_to_copy)
{
auto other = process_file(
to_copy.path.c_str(), to_copy.password.c_str(), o);
QPDFEmbeddedFileDocumentHelper other_efdh(*other);
auto other_attachments = other_efdh.getEmbeddedFiles();
for (auto const& iter: other_attachments)
{
if (o.verbose)
{
std::cout << whoami << ": copying attachments from "
<< to_copy.path << std::endl;
}
std::string new_key = to_copy.prefix + iter.first;
if (efdh.getEmbeddedFile(new_key))
{
exit_code = EXIT_ERROR;
std::cerr << whoami << to_copy.path << " and "
<< pdf.getFilename()
<< " both have attachments with key " << new_key
<< "; use --prefix with --copy-attachments-from"
<< " or manually copy individual attachments"
<< std::endl;
}
else
{
auto new_fs_oh = pdf.copyForeignObject(
iter.second->getObjectHandle());
efdh.replaceEmbeddedFile(
new_key, QPDFFileSpecObjectHelper(new_fs_oh));
if (o.verbose)
{
std::cout << " " << iter.first << " -> " << new_key
<< std::endl;
}
}
}
if ((other->anyWarnings()) && (exit_code == 0))
{
exit_code = EXIT_WARNING;
}
}
}
static void handle_transformations(QPDF& pdf, Options& o, int& exit_code)
{
QPDFPageDocumentHelper dh(pdf);
PointerHolder<QPDFAcroFormDocumentHelper> afdh;
auto make_afdh = [&]() {
if (! afdh.getPointer())
{
afdh = new QPDFAcroFormDocumentHelper(pdf);
}
};
if (o.externalize_inline_images)
{
std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
iter != pages.end(); ++iter)
{
QPDFPageObjectHelper& ph(*iter);
ph.externalizeInlineImages(o.ii_min_bytes);
}
}
if (o.optimize_images)
{
int pageno = 0;
std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
iter != pages.end(); ++iter)
{
++pageno;
QPDFPageObjectHelper& ph(*iter);
QPDFObjectHandle page = ph.getObjectHandle();
std::map<std::string, QPDFObjectHandle> images = ph.getImages();
for (auto& iter2: images)
{
std::string name = iter2.first;
QPDFObjectHandle& image = iter2.second;
ImageOptimizer* io = new ImageOptimizer(o, image);
PointerHolder<QPDFObjectHandle::StreamDataProvider> sdp(io);
if (io->evaluate("image " + name + " on page " +
QUtil::int_to_string(pageno)))
{
QPDFObjectHandle new_image =
QPDFObjectHandle::newStream(&pdf);
new_image.replaceDict(image.getDict().shallowCopy());
new_image.replaceStreamData(
sdp,
QPDFObjectHandle::newName("/DCTDecode"),
QPDFObjectHandle::newNull());
ph.getAttribute("/Resources", true).
getKey("/XObject").replaceKey(
name, new_image);
}
}
}
}
if (o.generate_appearances)
{
make_afdh();
afdh->generateAppearancesIfNeeded();
}
if (o.flatten_annotations)
{
dh.flattenAnnotations(o.flatten_annotations_required,
o.flatten_annotations_forbidden);
}
if (o.coalesce_contents)
{
std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
iter != pages.end(); ++iter)
{
(*iter).coalesceContentStreams();
}
}
if (o.flatten_rotation)
{
make_afdh();
for (auto& page: dh.getAllPages())
{
page.flattenRotation(afdh.getPointer());
}
}
if (o.remove_page_labels)
{
pdf.getRoot().removeKey("/PageLabels");
}
if (! o.attachments_to_remove.empty())
{
QPDFEmbeddedFileDocumentHelper efdh(pdf);
for (auto const& key: o.attachments_to_remove)
{
if (efdh.removeEmbeddedFile(key))
{
if (o.verbose)
{
std::cout << whoami <<
": removed attachment " << key << std::endl;
}
}
else
{
std::cerr << whoami <<
": attachment " << key << " not found" << std::endl;
exit_code = EXIT_ERROR;
}
}
}
if (! o.attachments_to_add.empty())
{
add_attachments(pdf, o, exit_code);
}
if (! o.attachments_to_copy.empty())
{
copy_attachments(pdf, o, exit_code);
}
}
static bool should_remove_unreferenced_resources(QPDF& pdf, Options& o)
{
if (o.remove_unreferenced_page_resources == re_no)
{
return false;
}
else if (o.remove_unreferenced_page_resources == re_yes)
{
return true;
}
// Unreferenced resources are common in files where resources
// dictionaries are shared across pages. As a heuristic, we look
// in the file for shared resources dictionaries or shared XObject
// subkeys of resources dictionaries either on pages or on form
// XObjects in pages. If we find any, then there is a higher
// likelihood that the expensive process of finding unreferenced
// resources is worth it.
// Return true as soon as we find any shared resources.
std::set<QPDFObjGen> resources_seen; // shared resources detection
std::set<QPDFObjGen> nodes_seen; // loop detection
if (o.verbose)
{
std::cout << whoami << ": " << pdf.getFilename()
<< ": checking for shared resources" << std::endl;
}
std::list<QPDFObjectHandle> queue;
queue.push_back(pdf.getRoot().getKey("/Pages"));
while (! queue.empty())
{
QPDFObjectHandle node = *queue.begin();
queue.pop_front();
QPDFObjGen og = node.getObjGen();
if (nodes_seen.count(og))
{
continue;
}
nodes_seen.insert(og);
QPDFObjectHandle dict = node.isStream() ? node.getDict() : node;
QPDFObjectHandle kids = dict.getKey("/Kids");
if (kids.isArray())
{
// This is a non-leaf node.
if (dict.hasKey("/Resources"))
{
QTC::TC("qpdf", "qpdf found resources in non-leaf");
if (o.verbose)
{
std::cout << " found resources in non-leaf page node "
<< og.getObj() << " " << og.getGen()
<< std::endl;
}
return true;
}
int n = kids.getArrayNItems();
for (int i = 0; i < n; ++i)
{
queue.push_back(kids.getArrayItem(i));
}
}
else
{
// This is a leaf node or a form XObject.
QPDFObjectHandle resources = dict.getKey("/Resources");
if (resources.isIndirect())
{
QPDFObjGen resources_og = resources.getObjGen();
if (resources_seen.count(resources_og))
{
QTC::TC("qpdf", "qpdf found shared resources in leaf");
if (o.verbose)
{
std::cout << " found shared resources in leaf node "
<< og.getObj() << " " << og.getGen()
<< ": "
<< resources_og.getObj() << " "
<< resources_og.getGen()
<< std::endl;
}
return true;
}
resources_seen.insert(resources_og);
}
QPDFObjectHandle xobject = (resources.isDictionary() ?
resources.getKey("/XObject") :
QPDFObjectHandle::newNull());
if (xobject.isIndirect())
{
QPDFObjGen xobject_og = xobject.getObjGen();
if (resources_seen.count(xobject_og))
{
QTC::TC("qpdf", "qpdf found shared xobject in leaf");
if (o.verbose)
{
std::cout << " found shared xobject in leaf node "
<< og.getObj() << " " << og.getGen()
<< ": "
<< xobject_og.getObj() << " "
<< xobject_og.getGen()
<< std::endl;
}
return true;
}
resources_seen.insert(xobject_og);
}
if (xobject.isDictionary())
{
for (auto const& k: xobject.getKeys())
{
QPDFObjectHandle xobj = xobject.getKey(k);
if (xobj.isStream() &&
xobj.getDict().getKey("/Type").isName() &&
("/XObject" ==
xobj.getDict().getKey("/Type").getName()) &&
xobj.getDict().getKey("/Subtype").isName() &&
("/Form" ==
xobj.getDict().getKey("/Subtype").getName()))
{
queue.push_back(xobj);
}
}
}
}
}
if (o.verbose)
{
std::cout << whoami << ": no shared resources found" << std::endl;
}
return false;
}
static QPDFObjectHandle added_page(QPDF& pdf, QPDFObjectHandle page)
{
QPDFObjectHandle result = page;
if (page.getOwningQPDF() != &pdf)
{
// Calling copyForeignObject on an object we already copied
// will give us the already existing copy.
result = pdf.copyForeignObject(page);
}
return result;
}
static QPDFObjectHandle added_page(QPDF& pdf, QPDFPageObjectHelper page)
{
return added_page(pdf, page.getObjectHandle());
}
static void handle_page_specs(
QPDF& pdf, Options& o, bool& warnings,
std::vector<PointerHolder<QPDF>>& page_heap)
{
// Parse all page specifications and translate them into lists of
// actual pages.
// Handle "." as a shortcut for the input file
for (std::vector<PageSpec>::iterator iter = o.page_specs.begin();
iter != o.page_specs.end(); ++iter)
{
PageSpec& page_spec = *iter;
if (page_spec.filename == ".")
{
page_spec.filename = o.infilename;
}
}
if (! o.keep_files_open_set)
{
// Count the number of distinct files to determine whether we
// should keep files open or not. Rather than trying to code
// some portable heuristic based on OS limits, just hard-code
// this at a given number and allow users to override.
std::set<std::string> filenames;
for (std::vector<PageSpec>::iterator iter = o.page_specs.begin();
iter != o.page_specs.end(); ++iter)
{
PageSpec& page_spec = *iter;
filenames.insert(page_spec.filename);
}
if (filenames.size() > o.keep_files_open_threshold)
{
QTC::TC("qpdf", "qpdf disable keep files open");
if (o.verbose)
{
std::cout << whoami << ": selecting --keep-open-files=n"
<< std::endl;
}
o.keep_files_open = false;
}
else
{
if (o.verbose)
{
std::cout << whoami << ": selecting --keep-open-files=y"
<< std::endl;
}
o.keep_files_open = true;
QTC::TC("qpdf", "qpdf don't disable keep files open");
}
}
// Create a QPDF object for each file that we may take pages from.
std::map<std::string, QPDF*> page_spec_qpdfs;
std::map<std::string, ClosedFileInputSource*> page_spec_cfis;
page_spec_qpdfs[o.infilename] = &pdf;
std::vector<QPDFPageData> parsed_specs;
std::map<unsigned long long, std::set<QPDFObjGen> > copied_pages;
for (std::vector<PageSpec>::iterator iter = o.page_specs.begin();
iter != o.page_specs.end(); ++iter)
{
PageSpec& page_spec = *iter;
if (page_spec_qpdfs.count(page_spec.filename) == 0)
{
// Open the PDF file and store the QPDF object. Throw a
// PointerHolder to the qpdf into a heap so that it
// survives through copying to the output but gets cleaned up
// automatically at the end. Do not canonicalize the file
// name. Using two different paths to refer to the same
// file is a document workaround for duplicating a page.
// If you are using this an example of how to do this with
// the API, you can just create two different QPDF objects
// to the same underlying file with the same path to
// achieve the same affect.
char const* password = page_spec.password;
if (o.encryption_file && (password == 0) &&
(page_spec.filename == o.encryption_file))
{
QTC::TC("qpdf", "qpdf pages encryption password");
password = o.encryption_file_password;
}
if (o.verbose)
{
std::cout << whoami << ": processing "
<< page_spec.filename << std::endl;
}
PointerHolder<InputSource> is;
ClosedFileInputSource* cis = 0;
if (! o.keep_files_open)
{
QTC::TC("qpdf", "qpdf keep files open n");
cis = new ClosedFileInputSource(page_spec.filename.c_str());
is = cis;
cis->stayOpen(true);
}
else
{
QTC::TC("qpdf", "qpdf keep files open y");
FileInputSource* fis = new FileInputSource();
is = fis;
fis->setFilename(page_spec.filename.c_str());
}
PointerHolder<QPDF> qpdf_ph = process_input_source(is, password, o);
page_heap.push_back(qpdf_ph);
page_spec_qpdfs[page_spec.filename] = qpdf_ph.getPointer();
if (cis)
{
cis->stayOpen(false);
page_spec_cfis[page_spec.filename] = cis;
}
}
// Read original pages from the PDF, and parse the page range
// associated with this occurrence of the file.
parsed_specs.push_back(
QPDFPageData(page_spec.filename,
page_spec_qpdfs[page_spec.filename],
page_spec.range));
}
std::map<unsigned long long, bool> remove_unreferenced;
if (o.remove_unreferenced_page_resources != re_no)
{
for (std::map<std::string, QPDF*>::iterator iter =
page_spec_qpdfs.begin();
iter != page_spec_qpdfs.end(); ++iter)
{
std::string const& filename = (*iter).first;
ClosedFileInputSource* cis = 0;
if (page_spec_cfis.count(filename))
{
cis = page_spec_cfis[filename];
cis->stayOpen(true);
}
QPDF& other(*((*iter).second));
auto other_uuid = other.getUniqueId();
if (remove_unreferenced.count(other_uuid) == 0)
{
remove_unreferenced[other_uuid] =
should_remove_unreferenced_resources(other, o);
}
if (cis)
{
cis->stayOpen(false);
}
}
}
// Clear all pages out of the primary QPDF's pages tree but leave
// the objects in place in the file so they can be re-added
// without changing their object numbers. This enables other
// things in the original file, such as outlines, to continue to
// work.
if (o.verbose)
{
std::cout << whoami
<< ": removing unreferenced pages from primary input"
<< std::endl;
}
QPDFPageDocumentHelper dh(pdf);
std::vector<QPDFPageObjectHelper> orig_pages = dh.getAllPages();
for (std::vector<QPDFPageObjectHelper>::iterator iter =
orig_pages.begin();
iter != orig_pages.end(); ++iter)
{
dh.removePage(*iter);
}
if (o.collate && (parsed_specs.size() > 1))
{
// Collate the pages by selecting one page from each spec in
// order. When a spec runs out of pages, stop selecting from
// it.
std::vector<QPDFPageData> new_parsed_specs;
size_t nspecs = parsed_specs.size();
size_t cur_page = 0;
bool got_pages = true;
while (got_pages)
{
got_pages = false;
for (size_t i = 0; i < nspecs; ++i)
{
QPDFPageData& page_data = parsed_specs.at(i);
for (size_t j = 0; j < o.collate; ++j)
{
if (cur_page + j < page_data.selected_pages.size())
{
got_pages = true;
new_parsed_specs.push_back(
QPDFPageData(
page_data,
page_data.selected_pages.at(cur_page + j)));
}
}
}
cur_page += o.collate;
}
parsed_specs = new_parsed_specs;
}
// Add all the pages from all the files in the order specified.
// Keep track of any pages from the original file that we are
// selecting.
std::set<int> selected_from_orig;
std::vector<QPDFObjectHandle> new_labels;
bool any_page_labels = false;
int out_pageno = 0;
std::map<unsigned long long,
PointerHolder<QPDFAcroFormDocumentHelper>> afdh_map;
auto this_afdh = get_afdh_for_qpdf(afdh_map, &pdf);
std::set<QPDFObjGen> referenced_fields;
for (std::vector<QPDFPageData>::iterator iter =
parsed_specs.begin();
iter != parsed_specs.end(); ++iter)
{
QPDFPageData& page_data = *iter;
ClosedFileInputSource* cis = 0;
if (page_spec_cfis.count(page_data.filename))
{
cis = page_spec_cfis[page_data.filename];
cis->stayOpen(true);
}
QPDFPageLabelDocumentHelper pldh(*page_data.qpdf);
auto other_afdh = get_afdh_for_qpdf(afdh_map, page_data.qpdf);
if (pldh.hasPageLabels())
{
any_page_labels = true;
}
if (o.verbose)
{
std::cout << whoami << ": adding pages from "
<< page_data.filename << std::endl;
}
for (std::vector<int>::iterator pageno_iter =
page_data.selected_pages.begin();
pageno_iter != page_data.selected_pages.end();
++pageno_iter, ++out_pageno)
{
// Pages are specified from 1 but numbered from 0 in the
// vector
int pageno = *pageno_iter - 1;
pldh.getLabelsForPageRange(pageno, pageno, out_pageno,
new_labels);
QPDFPageObjectHelper to_copy =
page_data.orig_pages.at(QIntC::to_size(pageno));
QPDFObjGen to_copy_og = to_copy.getObjectHandle().getObjGen();
unsigned long long from_uuid = page_data.qpdf->getUniqueId();
if (copied_pages[from_uuid].count(to_copy_og))
{
QTC::TC("qpdf", "qpdf copy same page more than once",
(page_data.qpdf == &pdf) ? 0 : 1);
to_copy = to_copy.shallowCopyPage();
}
else
{
copied_pages[from_uuid].insert(to_copy_og);
if (remove_unreferenced[from_uuid])
{
to_copy.removeUnreferencedResources();
}
}
dh.addPage(to_copy, false);
bool first_copy_from_orig = false;
bool this_file = (page_data.qpdf == &pdf);
if (this_file)
{
// This is a page from the original file. Keep track
// of the fact that we are using it.
first_copy_from_orig = (selected_from_orig.count(pageno) == 0);
selected_from_orig.insert(pageno);
}
auto new_page = added_page(pdf, to_copy);
// Try to avoid gratuitously renaming fields. In the case
// of where we're just extracting a bunch of pages from
// the original file and not copying any page more than
// once, there's no reason to do anything with the fields.
// Since we don't remove fields from the original file
// until all copy operations are completed, any foreign
// pages that conflict with original pages will be
// adjusted. If we copy any page from the original file
// more than once, that page would be in conflict with the
// previous copy of itself.
if (other_afdh->hasAcroForm() &&
((! this_file) || (! first_copy_from_orig)))
{
if (! this_file)
{
QTC::TC("qpdf", "qpdf copy fields not this file");
}
else if (! first_copy_from_orig)
{
QTC::TC("qpdf", "qpdf copy fields non-first from orig");
}
try
{
this_afdh->fixCopiedAnnotations(
new_page, to_copy.getObjectHandle(), *other_afdh,
&referenced_fields);
}
catch (std::exception& e)
{
pdf.warn(
QPDFExc(qpdf_e_damaged_pdf, pdf.getFilename(),
"", 0, "Exception caught while fixing copied"
" annotations. This may be a qpdf bug. " +
std::string("Exception: ") + e.what()));
}
}
}
if (page_data.qpdf->anyWarnings())
{
warnings = true;
}
if (cis)
{
cis->stayOpen(false);
}
}
if (any_page_labels)
{
QPDFObjectHandle page_labels =
QPDFObjectHandle::newDictionary();
page_labels.replaceKey(
"/Nums", QPDFObjectHandle::newArray(new_labels));
pdf.getRoot().replaceKey("/PageLabels", page_labels);
}
// Delete page objects for unused page in primary. This prevents
// those objects from being preserved by being referred to from
// other places, such as the outlines dictionary. Also make sure
// we keep form fields from pages we preserved.
for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno)
{
auto page = orig_pages.at(pageno);
if (selected_from_orig.count(QIntC::to_int(pageno)))
{
for (auto field: this_afdh->getFormFieldsForPage(page))
{
QTC::TC("qpdf", "qpdf pages keeping field from original");
referenced_fields.insert(field.getObjectHandle().getObjGen());
}
}
else
{
pdf.replaceObject(
page.getObjectHandle().getObjGen(),
QPDFObjectHandle::newNull());
}
}
// Remove unreferenced form fields
if (this_afdh->hasAcroForm())
{
auto acroform = pdf.getRoot().getKey("/AcroForm");
auto fields = acroform.getKey("/Fields");
if (fields.isArray())
{
auto new_fields = QPDFObjectHandle::newArray();
if (fields.isIndirect())
{
new_fields = pdf.makeIndirectObject(new_fields);
}
for (auto const& field: fields.aitems())
{
if (referenced_fields.count(field.getObjGen()))
{
new_fields.appendItem(field);
}
}
if (new_fields.getArrayNItems() > 0)
{
QTC::TC("qpdf", "qpdf keep some fields in pages");
acroform.replaceKey("/Fields", new_fields);
}
else
{
QTC::TC("qpdf", "qpdf no more fields in pages");
pdf.getRoot().removeKey("/AcroForm");
}
}
}
}
static void handle_rotations(QPDF& pdf, Options& o)
{
QPDFPageDocumentHelper dh(pdf);
std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
int npages = QIntC::to_int(pages.size());
for (std::map<std::string, RotationSpec>::iterator iter =
o.rotations.begin();
iter != o.rotations.end(); ++iter)
{
std::string const& range = (*iter).first;
RotationSpec const& rspec = (*iter).second;
// range has been previously validated
std::vector<int> to_rotate =
QUtil::parse_numrange(range.c_str(), npages);
for (std::vector<int>::iterator i2 = to_rotate.begin();
i2 != to_rotate.end(); ++i2)
{
int pageno = *i2 - 1;
if ((pageno >= 0) && (pageno < npages))
{
pages.at(QIntC::to_size(pageno)).rotatePage(
rspec.angle, rspec.relative);
}
}
}
}
static void maybe_fix_write_password(int R, Options& o, std::string& password)
{
switch (o.password_mode)
{
case pm_bytes:
QTC::TC("qpdf", "qpdf password mode bytes");
break;
case pm_hex_bytes:
QTC::TC("qpdf", "qpdf password mode hex-bytes");
password = QUtil::hex_decode(password);
break;
case pm_unicode:
case pm_auto:
{
bool has_8bit_chars;
bool is_valid_utf8;
bool is_utf16;
QUtil::analyze_encoding(password,
has_8bit_chars,
is_valid_utf8,
is_utf16);
if (! has_8bit_chars)
{
return;
}
if (o.password_mode == pm_unicode)
{
if (! is_valid_utf8)
{
QTC::TC("qpdf", "qpdf password not unicode");
throw std::runtime_error(
"supplied password is not valid UTF-8");
}
if (R < 5)
{
std::string encoded;
if (! QUtil::utf8_to_pdf_doc(password, encoded))
{
QTC::TC("qpdf", "qpdf password not encodable");
throw std::runtime_error(
"supplied password cannot be encoded for"
" 40-bit or 128-bit encryption formats");
}
password = encoded;
}
}
else
{
if ((R < 5) && is_valid_utf8)
{
std::string encoded;
if (QUtil::utf8_to_pdf_doc(password, encoded))
{
QTC::TC("qpdf", "qpdf auto-encode password");
if (o.verbose)
{
std::cout
<< whoami
<< ": automatically converting Unicode"
<< " password to single-byte encoding as"
<< " required for 40-bit or 128-bit"
<< " encryption" << std::endl;
}
password = encoded;
}
else
{
QTC::TC("qpdf", "qpdf bytes fallback warning");
std::cerr
<< whoami << ": WARNING: "
<< "supplied password looks like a Unicode"
<< " password with characters not allowed in"
<< " passwords for 40-bit and 128-bit encryption;"
<< " most readers will not be able to open this"
<< " file with the supplied password."
<< " (Use --password-mode=bytes to suppress this"
<< " warning and use the password anyway.)"
<< std::endl;
}
}
else if ((R >= 5) && (! is_valid_utf8))
{
QTC::TC("qpdf", "qpdf invalid utf-8 in auto");
throw std::runtime_error(
"supplied password is not a valid Unicode password,"
" which is required for 256-bit encryption; to"
" really use this password, rerun with the"
" --password-mode=bytes option");
}
}
}
break;
}
}
static void set_encryption_options(QPDF& pdf, Options& o, QPDFWriter& w)
{
int R = 0;
if (o.keylen == 40)
{
R = 2;
}
else if (o.keylen == 128)
{
if (o.force_V4 || o.cleartext_metadata || o.use_aes)
{
R = 4;
}
else
{
R = 3;
}
}
else if (o.keylen == 256)
{
if (o.force_R5)
{
R = 5;
}
else
{
R = 6;
}
}
else
{
throw std::logic_error("bad encryption keylen");
}
if ((R > 3) && (o.r3_accessibility == false))
{
std::cerr << whoami
<< ": -accessibility=n is ignored for modern"
<< " encryption formats" << std::endl;
}
maybe_fix_write_password(R, o, o.user_password);
maybe_fix_write_password(R, o, o.owner_password);
switch (R)
{
case 2:
w.setR2EncryptionParameters(
o.user_password.c_str(), o.owner_password.c_str(),
o.r2_print, o.r2_modify, o.r2_extract, o.r2_annotate);
break;
case 3:
w.setR3EncryptionParameters(
o.user_password.c_str(), o.owner_password.c_str(),
o.r3_accessibility, o.r3_extract,
o.r3_assemble, o.r3_annotate_and_form,
o.r3_form_filling, o.r3_modify_other,
o.r3_print);
break;
case 4:
w.setR4EncryptionParameters(
o.user_password.c_str(), o.owner_password.c_str(),
o.r3_accessibility, o.r3_extract,
o.r3_assemble, o.r3_annotate_and_form,
o.r3_form_filling, o.r3_modify_other,
o.r3_print, !o.cleartext_metadata, o.use_aes);
break;
case 5:
w.setR5EncryptionParameters(
o.user_password.c_str(), o.owner_password.c_str(),
o.r3_accessibility, o.r3_extract,
o.r3_assemble, o.r3_annotate_and_form,
o.r3_form_filling, o.r3_modify_other,
o.r3_print, !o.cleartext_metadata);
break;
case 6:
w.setR6EncryptionParameters(
o.user_password.c_str(), o.owner_password.c_str(),
o.r3_accessibility, o.r3_extract,
o.r3_assemble, o.r3_annotate_and_form,
o.r3_form_filling, o.r3_modify_other,
o.r3_print, !o.cleartext_metadata);
break;
default:
throw std::logic_error("bad encryption R value");
break;
}
}
static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w)
{
if (o.compression_level >= 0)
{
Pl_Flate::setCompressionLevel(o.compression_level);
}
if (o.qdf_mode)
{
w.setQDFMode(true);
}
if (o.preserve_unreferenced_objects)
{
w.setPreserveUnreferencedObjects(true);
}
if (o.newline_before_endstream)
{
w.setNewlineBeforeEndstream(true);
}
if (o.normalize_set)
{
w.setContentNormalization(o.normalize);
}
if (o.stream_data_set)
{
w.setStreamDataMode(o.stream_data_mode);
}
if (o.compress_streams_set)
{
w.setCompressStreams(o.compress_streams);
}
if (o.recompress_flate_set)
{
w.setRecompressFlate(o.recompress_flate);
}
if (o.decode_level_set)
{
w.setDecodeLevel(o.decode_level);
}
if (o.decrypt)
{
w.setPreserveEncryption(false);
}
if (o.deterministic_id)
{
w.setDeterministicID(true);
}
if (o.static_id)
{
w.setStaticID(true);
}
if (o.static_aes_iv)
{
w.setStaticAesIV(true);
}
if (o.suppress_original_object_id)
{
w.setSuppressOriginalObjectIDs(true);
}
if (o.copy_encryption)
{
PointerHolder<QPDF> encryption_pdf =
process_file(
o.encryption_file, o.encryption_file_password, o);
w.copyEncryptionParameters(*encryption_pdf);
}
if (o.encrypt)
{
set_encryption_options(pdf, o, w);
}
if (o.linearize)
{
w.setLinearization(true);
}
if (! o.linearize_pass1.empty())
{
w.setLinearizationPass1Filename(o.linearize_pass1);
}
if (o.object_stream_set)
{
w.setObjectStreamMode(o.object_stream_mode);
}
if (! o.min_version.empty())
{
std::string version;
int extension_level = 0;
parse_version(o.min_version, version, extension_level);
w.setMinimumPDFVersion(version, extension_level);
}
if (! o.force_version.empty())
{
std::string version;
int extension_level = 0;
parse_version(o.force_version, version, extension_level);
w.forcePDFVersion(version, extension_level);
}
if (o.progress && o.outfilename)
{
w.registerProgressReporter(new ProgressReporter(o.outfilename));
}
}
static void do_split_pages(QPDF& pdf, Options& o, bool& warnings)
{
// Generate output file pattern
std::string before;
std::string after;
size_t len = strlen(o.outfilename);
char* num_spot = strstr(const_cast<char*>(o.outfilename), "%d");
if (num_spot != 0)
{
QTC::TC("qpdf", "qpdf split-pages %d");
before = std::string(o.outfilename,
QIntC::to_size(num_spot - o.outfilename));
after = num_spot + 2;
}
else if ((len >= 4) &&
(QUtil::str_compare_nocase(
o.outfilename + len - 4, ".pdf") == 0))
{
QTC::TC("qpdf", "qpdf split-pages .pdf");
before = std::string(o.outfilename, len - 4) + "-";
after = o.outfilename + len - 4;
}
else
{
QTC::TC("qpdf", "qpdf split-pages other");
before = std::string(o.outfilename) + "-";
}
if (should_remove_unreferenced_resources(pdf, o))
{
QPDFPageDocumentHelper dh(pdf);
dh.removeUnreferencedResources();
}
QPDFPageLabelDocumentHelper pldh(pdf);
QPDFAcroFormDocumentHelper afdh(pdf);
std::vector<QPDFObjectHandle> const& pages = pdf.getAllPages();
size_t pageno_len = QUtil::uint_to_string(pages.size()).length();
size_t num_pages = pages.size();
for (size_t i = 0; i < num_pages; i += QIntC::to_size(o.split_pages))
{
size_t first = i + 1;
size_t last = i + QIntC::to_size(o.split_pages);
if (last > num_pages)
{
last = num_pages;
}
QPDF outpdf;
outpdf.emptyPDF();
PointerHolder<QPDFAcroFormDocumentHelper> out_afdh;
if (afdh.hasAcroForm())
{
out_afdh = new QPDFAcroFormDocumentHelper(outpdf);
}
if (o.suppress_warnings)
{
outpdf.setSuppressWarnings(true);
}
for (size_t pageno = first; pageno <= last; ++pageno)
{
QPDFObjectHandle page = pages.at(pageno - 1);
outpdf.addPage(page, false);
auto new_page = added_page(outpdf, page);
if (out_afdh.getPointer())
{
QTC::TC("qpdf", "qpdf copy form fields in split_pages");
try
{
out_afdh->fixCopiedAnnotations(new_page, page, afdh);
}
catch (std::exception& e)
{
pdf.warn(
QPDFExc(qpdf_e_damaged_pdf, pdf.getFilename(),
"", 0, "Exception caught while fixing copied"
" annotations. This may be a qpdf bug." +
std::string("Exception: ") + e.what()));
}
}
}
if (pldh.hasPageLabels())
{
std::vector<QPDFObjectHandle> labels;
pldh.getLabelsForPageRange(
QIntC::to_longlong(first - 1),
QIntC::to_longlong(last - 1),
0, labels);
QPDFObjectHandle page_labels =
QPDFObjectHandle::newDictionary();
page_labels.replaceKey(
"/Nums", QPDFObjectHandle::newArray(labels));
outpdf.getRoot().replaceKey("/PageLabels", page_labels);
}
std::string page_range =
QUtil::uint_to_string(first, QIntC::to_int(pageno_len));
if (o.split_pages > 1)
{
page_range += "-" +
QUtil::uint_to_string(last, QIntC::to_int(pageno_len));
}
std::string outfile = before + page_range + after;
if (QUtil::same_file(o.infilename, outfile.c_str()))
{
std::cerr << whoami
<< ": split pages would overwrite input file with "
<< outfile << std::endl;
exit(EXIT_ERROR);
}
QPDFWriter w(outpdf, outfile.c_str());
set_writer_options(outpdf, o, w);
w.write();
if (o.verbose)
{
std::cout << whoami << ": wrote file " << outfile << std::endl;
}
if (outpdf.anyWarnings())
{
warnings = true;
}
}
}
static void write_outfile(QPDF& pdf, Options& o)
{
std::string temp_out;
if (o.replace_input)
{
// Append but don't prepend to the path to generate a
// temporary name. This saves us from having to split the path
// by directory and non-directory.
temp_out = std::string(o.infilename) + ".~qpdf-temp#";
// o.outfilename will be restored to 0 before temp_out
// goes out of scope.
o.outfilename = temp_out.c_str();
}
else if (strcmp(o.outfilename, "-") == 0)
{
o.outfilename = 0;
}
{
// Private scope so QPDFWriter will close the output file
QPDFWriter w(pdf, o.outfilename);
set_writer_options(pdf, o, w);
w.write();
}
if (o.verbose && o.outfilename)
{
std::cout << whoami << ": wrote file "
<< o.outfilename << std::endl;
}
if (o.replace_input)
{
o.outfilename = 0;
}
if (o.replace_input)
{
// We must close the input before we can rename files
pdf.closeInputSource();
std::string backup = std::string(o.infilename) + ".~qpdf-orig";
bool warnings = pdf.anyWarnings();
if (! warnings)
{
backup.append(1, '#');
}
QUtil::rename_file(o.infilename, backup.c_str());
QUtil::rename_file(temp_out.c_str(), o.infilename);
if (warnings)
{
std::cerr << whoami
<< ": there are warnings; original file kept in "
<< backup << std::endl;
}
else
{
try
{
QUtil::remove_file(backup.c_str());
}
catch (QPDFSystemError& e)
{
std::cerr
<< whoami
<< ": unable to delete original file ("
<< e.what() << ");"
<< " original file left in " << backup
<< ", but the input was successfully replaced"
<< std::endl;
}
}
}
}
int realmain(int argc, char* argv[])
{
whoami = QUtil::getWhoami(argv[0]);
QUtil::setLineBuf(stdout);
// Remove prefix added by libtool for consistency during testing.
if (strncmp(whoami, "lt-", 3) == 0)
{
whoami += 3;
}
// ArgParser must stay in scope for the duration of qpdf's run as
// it holds dynamic memory used for argv.
Options o;
ArgParser ap(argc, argv, o);
int exit_code = 0;
try
{
ap.parseOptions();
PointerHolder<QPDF> pdf_ph;
try
{
pdf_ph = process_file(o.infilename, o.password, o);
}
catch (QPDFExc& e)
{
if ((e.getErrorCode() == qpdf_e_password) &&
(o.check_is_encrypted || o.check_requires_password))
{
// Allow --is-encrypted and --requires-password to
// work when an incorrect password is supplied.
return 0;
}
throw e;
}
QPDF& pdf = *pdf_ph;
if (o.check_is_encrypted)
{
if (pdf.isEncrypted())
{
return 0;
}
else
{
return EXIT_IS_NOT_ENCRYPTED;
}
}
else if (o.check_requires_password)
{
if (pdf.isEncrypted())
{
return EXIT_CORRECT_PASSWORD;
}
else
{
return EXIT_IS_NOT_ENCRYPTED;
}
}
bool other_warnings = false;
std::vector<PointerHolder<QPDF>> page_heap;
if (! o.page_specs.empty())
{
handle_page_specs(pdf, o, other_warnings, page_heap);
}
if (! o.rotations.empty())
{
handle_rotations(pdf, o);
}
handle_under_overlay(pdf, o);
handle_transformations(pdf, o, exit_code);
if ((o.outfilename == 0) && (! o.replace_input))
{
do_inspection(pdf, o);
}
else if (o.split_pages)
{
do_split_pages(pdf, o, other_warnings);
}
else
{
write_outfile(pdf, o);
}
if ((! pdf.getWarnings().empty()) || other_warnings)
{
if (! o.suppress_warnings)
{
std::cerr << whoami << ": operation succeeded with warnings;"
<< " resulting file may have some problems"
<< std::endl;
}
// Still return with warning code even if warnings were suppressed.
if (exit_code == 0)
{
exit_code = EXIT_WARNING;
}
}
}
catch (std::exception& e)
{
std::cerr << e.what() << std::endl;
return EXIT_ERROR;
}
return exit_code;
}
#ifdef WINDOWS_WMAIN
extern "C"
int wmain(int argc, wchar_t* argv[])
{
return QUtil::call_main_from_wmain(argc, argv, realmain);
}
#else
int main(int argc, char* argv[])
{
return realmain(argc, argv);
}
#endif