mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-09 01:44:16 +00:00
8843e499b8
Also add copyright notice to a few public headers that were missing one.
1712 lines
48 KiB
C++
1712 lines
48 KiB
C++
#include <iostream>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <fcntl.h>
|
|
#include <stdio.h>
|
|
#include <ctype.h>
|
|
|
|
#include <qpdf/QUtil.hh>
|
|
#include <qpdf/QTC.hh>
|
|
#include <qpdf/Pl_StdioFile.hh>
|
|
#include <qpdf/Pl_Discard.hh>
|
|
#include <qpdf/PointerHolder.hh>
|
|
|
|
#include <qpdf/QPDF.hh>
|
|
#include <qpdf/QPDFExc.hh>
|
|
|
|
#include <qpdf/QPDFWriter.hh>
|
|
|
|
static int const EXIT_ERROR = 2;
|
|
static int const EXIT_WARNING = 3;
|
|
|
|
static char const* whoami = 0;
|
|
|
|
struct PageSpec
|
|
{
|
|
PageSpec(std::string const& filename,
|
|
char const* password,
|
|
char const* range) :
|
|
filename(filename),
|
|
password(password),
|
|
range(range)
|
|
{
|
|
}
|
|
|
|
std::string filename;
|
|
char const* password;
|
|
char const* range;
|
|
};
|
|
|
|
struct QPDFPageData
|
|
{
|
|
QPDFPageData(QPDF* qpdf, char const* range);
|
|
|
|
QPDF* qpdf;
|
|
std::vector<QPDFObjectHandle> orig_pages;
|
|
std::vector<int> selected_pages;
|
|
};
|
|
|
|
// Note: let's not be too noisy about documenting the fact that this
|
|
// software purposely fails to enforce the distinction between user
|
|
// and owner passwords. A user password is sufficient to gain full
|
|
// access to the PDF file, so there is nothing this software can do
|
|
// with an owner password that it couldn't do with a user password
|
|
// other than changing the /P value in the encryption dictionary.
|
|
// (Setting this value requires the owner password.) The
|
|
// documentation discusses this as well.
|
|
|
|
static char const* help = "\
|
|
\n\
|
|
Usage: qpdf [ options ] { infilename | --empty } [ outfilename ]\n\
|
|
\n\
|
|
An option summary appears below. Please see the documentation for details.\n\
|
|
\n\
|
|
Note that when contradictory options are provided, whichever options are\n\
|
|
provided last take precedence.\n\
|
|
\n\
|
|
\n\
|
|
Basic Options\n\
|
|
-------------\n\
|
|
\n\
|
|
--password=password specify a password for accessing encrypted files\n\
|
|
--linearize generated a linearized (web optimized) file\n\
|
|
--copy-encryption=file copy encryption parameters from specified file\n\
|
|
--encryption-file-password=password\n\
|
|
password used to open the file from which encryption\n\
|
|
parameters are being copied\n\
|
|
--encrypt options -- generate an encrypted file\n\
|
|
--decrypt remove any encryption on the file\n\
|
|
--pages options -- select specific pages from one or more files\n\
|
|
\n\
|
|
If none of --copy-encryption, --encrypt or --decrypt are given, qpdf will\n\
|
|
preserve any encryption data associated with a file.\n\
|
|
\n\
|
|
Note that when copying encryption parameters from another file, all\n\
|
|
parameters will be copied, including both user and owner passwords, even\n\
|
|
if the user password is used to open the other file. This works even if\n\
|
|
the owner password is not known.\n\
|
|
\n\
|
|
\n\
|
|
Encryption Options\n\
|
|
------------------\n\
|
|
\n\
|
|
--encrypt user-password owner-password key-length flags --\n\
|
|
\n\
|
|
Note that -- terminates parsing of encryption flags.\n\
|
|
\n\
|
|
Either or both of the user password and the owner password may be\n\
|
|
empty strings.\n\
|
|
\n\
|
|
key-length may be 40, 128, or 256\n\
|
|
\n\
|
|
Additional flags are dependent upon key length.\n\
|
|
\n\
|
|
If 40:\n\
|
|
\n\
|
|
--print=[yn] allow printing\n\
|
|
--modify=[yn] allow document modification\n\
|
|
--extract=[yn] allow text/graphic extraction\n\
|
|
--annotate=[yn] allow comments and form fill-in and signing\n\
|
|
\n\
|
|
If 128:\n\
|
|
\n\
|
|
--accessibility=[yn] allow accessibility to visually impaired\n\
|
|
--extract=[yn] allow other text/graphic extraction\n\
|
|
--print=print-opt control printing access\n\
|
|
--modify=modify-opt control modify access\n\
|
|
--cleartext-metadata prevents encryption of metadata\n\
|
|
--use-aes=[yn] indicates whether to use AES encryption\n\
|
|
--force-V4 forces use of V=4 encryption handler\n\
|
|
\n\
|
|
If 256, options are the same as 128 with these exceptions:\n\
|
|
--force-V4 this option is not available with 256-bit keys\n\
|
|
--use-aes this option is always on with 256-bit keys\n\
|
|
--force-R5 forces use of deprecated R=5 encryption\n\
|
|
\n\
|
|
print-opt may be:\n\
|
|
\n\
|
|
full allow full printing\n\
|
|
low allow only low-resolution printing\n\
|
|
none disallow printing\n\
|
|
\n\
|
|
modify-opt may be:\n\
|
|
\n\
|
|
all allow full document modification\n\
|
|
annotate allow comment authoring and form operations\n\
|
|
form allow form field fill-in and signing\n\
|
|
assembly allow document assembly only\n\
|
|
none allow no modifications\n\
|
|
\n\
|
|
The default for each permission option is to be fully permissive.\n\
|
|
\n\
|
|
Specifying cleartext-metadata forces the PDF version to at least 1.5.\n\
|
|
Specifying use of AES forces the PDF version to at least 1.6. These\n\
|
|
options are both off by default.\n\
|
|
\n\
|
|
The --force-V4 flag forces the V=4 encryption handler introduced in PDF 1.5\n\
|
|
to be used even if not otherwise needed. This option is primarily useful\n\
|
|
for testing qpdf and has no other practical use.\n\
|
|
\n\
|
|
\n\
|
|
Page Selection Options\n\
|
|
----------------------\n\
|
|
\n\
|
|
These options allow pages to be selected from one or more PDF files.\n\
|
|
Whatever file is given as the primary input file is used as the\n\
|
|
starting point, but its pages are replaced with pages as specified.\n\
|
|
\n\
|
|
--pages file [ --password=password ] page-range ... --\n\
|
|
\n\
|
|
For each file that pages should be taken from, specify the file, a\n\
|
|
password needed to open the file (if any), and a page range. The\n\
|
|
password needs to be given only once per file. If any of the input\n\
|
|
files are the same as the primary input file or the file used to copy\n\
|
|
encryption parameters (if specified), you do not need to repeat the\n\
|
|
password here. The same file can be repeated multiple times. All\n\
|
|
non-page data (info, outlines, page numbers, etc. are taken from the\n\
|
|
primary input file. To discard this, use --empty as the primary\n\
|
|
input.\n\
|
|
\n\
|
|
The page range is a set of numbers separated by commas, ranges of\n\
|
|
numbers separated dashes, or combinations of those. The character\n\
|
|
\"z\" represents the last page. Pages can appear in any order. Ranges\n\
|
|
can appear with a high number followed by a low number, which causes the\n\
|
|
pages to appear in reverse. Repeating a number will cause an error, but\n\
|
|
the manual discusses a workaround should you really want to include the\n\
|
|
same page twice.\n\
|
|
\n\
|
|
See the manual for examples and a discussion of additional subtleties.\n\
|
|
\n\
|
|
\n\
|
|
Advanced Transformation Options\n\
|
|
-------------------------------\n\
|
|
\n\
|
|
These transformation options control fine points of how qpdf creates\n\
|
|
the output file. Mostly these are of use only to people who are very\n\
|
|
familiar with the PDF file format or who are PDF developers.\n\
|
|
\n\
|
|
--stream-data=option controls transformation of stream data (below)\n\
|
|
--normalize-content=[yn] enables or disables normalization of content streams\n\
|
|
--suppress-recovery prevents qpdf from attempting to recover damaged files\n\
|
|
--object-streams=mode controls handing of object streams\n\
|
|
--ignore-xref-streams tells qpdf to ignore any cross-reference streams\n\
|
|
--qdf turns on \"QDF mode\" (below)\n\
|
|
--min-version=version sets the minimum PDF version of the output file\n\
|
|
--force-version=version forces this to be the PDF version of the output file\n\
|
|
\n\
|
|
Version numbers may be expressed as major.minor.extension-level, so 1.7.3\n\
|
|
means PDF version 1.7 at extension level 3.\n\
|
|
\n\
|
|
Values for stream data options:\n\
|
|
\n\
|
|
compress recompress stream data when possible (default)\n\
|
|
preserve leave all stream data as is\n\
|
|
uncompress uncompress stream data when possible\n\
|
|
\n\
|
|
Values for object stream mode:\n\
|
|
\n\
|
|
preserve preserve original object streams (default)\n\
|
|
disable don't write any object streams\n\
|
|
generate use object streams wherever possible\n\
|
|
\n\
|
|
In qdf mode, by default, content normalization is turned on, and the\n\
|
|
stream data mode is set to uncompress.\n\
|
|
\n\
|
|
Setting the minimum PDF version of the output file may raise the version\n\
|
|
but will never lower it. Forcing the PDF version of the output file may\n\
|
|
set the PDF version to a lower value than actually allowed by the file's\n\
|
|
contents. You should only do this if you have no other possible way to\n\
|
|
open the file or if you know that the file definitely doesn't include\n\
|
|
features not supported later versions.\n\
|
|
\n\
|
|
Testing, Inspection, and Debugging Options\n\
|
|
------------------------------------------\n\
|
|
\n\
|
|
These options can be useful for digging into PDF files or for use in\n\
|
|
automated test suites for software that uses the qpdf library.\n\
|
|
\n\
|
|
--static-id generate static /ID: FOR TESTING ONLY!\n\
|
|
--static-aes-iv use a static initialization vector for AES-CBC\n\
|
|
This is option is not secure! FOR TESTING ONLY!\n\
|
|
--no-original-object-ids suppress original object ID comments in qdf mode\n\
|
|
--show-encryption quickly show encryption parameters\n\
|
|
--check-linearization check file integrity and linearization status\n\
|
|
--show-linearization check and show all linearization data\n\
|
|
--show-xref show the contents of the cross-reference table\n\
|
|
--show-object=obj[,gen] show the contents of the given object\n\
|
|
--raw-stream-data show raw stream data instead of object contents\n\
|
|
--filtered-stream-data show filtered stream data instead of object contents\n\
|
|
--show-pages shows the object/generation number for each page\n\
|
|
--with-images also shows the object IDs for images on each page\n\
|
|
--check check file structure + encryption, linearization\n\
|
|
\n\
|
|
The --raw-stream-data and --filtered-stream-data options are ignored\n\
|
|
unless --show-object is given. Either of these options will cause the\n\
|
|
stream data to be written to standard output.\n\
|
|
\n\
|
|
If --filtered-stream-data is given and --normalize-content=y is also\n\
|
|
given, qpdf will attempt to normalize the stream data as if it is a\n\
|
|
page content stream. This attempt will be made even if it is not a\n\
|
|
page content stream, in which case it will produce unusable results.\n\
|
|
\n\
|
|
Ordinarily, qpdf exits with a status of 0 on success or a status of 2\n\
|
|
if any errors occurred. In --check mode, if there were warnings but not\n\
|
|
errors, qpdf exits with a status of 3.\n\
|
|
\n";
|
|
|
|
void usage(std::string const& msg)
|
|
{
|
|
std::cerr
|
|
<< std::endl
|
|
<< whoami << ": " << msg << std::endl
|
|
<< std::endl
|
|
<< "Usage: " << whoami << " [options] infile outfile" << std::endl
|
|
<< "For detailed help, run " << whoami << " --help" << std::endl
|
|
<< std::endl;
|
|
exit(EXIT_ERROR);
|
|
}
|
|
|
|
static std::string show_bool(bool v)
|
|
{
|
|
return v ? "allowed" : "not allowed";
|
|
}
|
|
|
|
static std::string show_encryption_method(QPDF::encryption_method_e method)
|
|
{
|
|
std::string result = "unknown";
|
|
switch (method)
|
|
{
|
|
case QPDF::e_none:
|
|
result = "none";
|
|
break;
|
|
case QPDF::e_unknown:
|
|
result = "unknown";
|
|
break;
|
|
case QPDF::e_rc4:
|
|
result = "RC4";
|
|
break;
|
|
case QPDF::e_aes:
|
|
result = "AESv2";
|
|
break;
|
|
case QPDF::e_aesv3:
|
|
result = "AESv3";
|
|
break;
|
|
// no default so gcc will warn for missing case
|
|
}
|
|
return result;
|
|
}
|
|
|
|
static void show_encryption(QPDF& pdf)
|
|
{
|
|
// Extract /P from /Encrypt
|
|
int R = 0;
|
|
int P = 0;
|
|
int V = 0;
|
|
QPDF::encryption_method_e stream_method = QPDF::e_unknown;
|
|
QPDF::encryption_method_e string_method = QPDF::e_unknown;
|
|
QPDF::encryption_method_e file_method = QPDF::e_unknown;
|
|
if (! pdf.isEncrypted(R, P, V,
|
|
stream_method, string_method, file_method))
|
|
{
|
|
std::cout << "File is not encrypted" << std::endl;
|
|
}
|
|
else
|
|
{
|
|
std::cout << "R = " << R << std::endl;
|
|
std::cout << "P = " << P << std::endl;
|
|
std::string user_password = pdf.getTrimmedUserPassword();
|
|
std::cout << "User password = " << user_password << std::endl
|
|
<< "extract for accessibility: "
|
|
<< show_bool(pdf.allowAccessibility()) << std::endl
|
|
<< "extract for any purpose: "
|
|
<< show_bool(pdf.allowExtractAll()) << std::endl
|
|
<< "print low resolution: "
|
|
<< show_bool(pdf.allowPrintLowRes()) << std::endl
|
|
<< "print high resolution: "
|
|
<< show_bool(pdf.allowPrintHighRes()) << std::endl
|
|
<< "modify document assembly: "
|
|
<< show_bool(pdf.allowModifyAssembly()) << std::endl
|
|
<< "modify forms: "
|
|
<< show_bool(pdf.allowModifyForm()) << std::endl
|
|
<< "modify annotations: "
|
|
<< show_bool(pdf.allowModifyAnnotation()) << std::endl
|
|
<< "modify other: "
|
|
<< show_bool(pdf.allowModifyOther()) << std::endl
|
|
<< "modify anything: "
|
|
<< show_bool(pdf.allowModifyAll()) << std::endl;
|
|
if (V >= 4)
|
|
{
|
|
std::cout << "stream encryption method: "
|
|
<< show_encryption_method(stream_method) << std::endl
|
|
<< "string encryption method: "
|
|
<< show_encryption_method(string_method) << std::endl
|
|
<< "file encryption method: "
|
|
<< show_encryption_method(file_method) << std::endl;
|
|
}
|
|
}
|
|
}
|
|
|
|
static std::vector<int> parse_numrange(char const* range, int max)
|
|
{
|
|
std::vector<int> result;
|
|
char const* p = range;
|
|
try
|
|
{
|
|
std::vector<int> work;
|
|
static int const comma = -1;
|
|
static int const dash = -2;
|
|
|
|
enum { st_top,
|
|
st_in_number,
|
|
st_after_number } state = st_top;
|
|
bool last_separator_was_dash = false;
|
|
int cur_number = 0;
|
|
while (*p)
|
|
{
|
|
char ch = *p;
|
|
if (isdigit(ch))
|
|
{
|
|
if (! ((state == st_top) || (state == st_in_number)))
|
|
{
|
|
throw std::runtime_error("digit not expected");
|
|
}
|
|
state = st_in_number;
|
|
cur_number *= 10;
|
|
cur_number += (ch - '0');
|
|
}
|
|
else if (ch == 'z')
|
|
{
|
|
// z represents max
|
|
if (! (state == st_top))
|
|
{
|
|
throw std::runtime_error("z not expected");
|
|
}
|
|
state = st_after_number;
|
|
cur_number = max;
|
|
}
|
|
else if ((ch == ',') || (ch == '-'))
|
|
{
|
|
if (! ((state == st_in_number) || (state == st_after_number)))
|
|
{
|
|
throw std::runtime_error("unexpected separator");
|
|
}
|
|
work.push_back(cur_number);
|
|
cur_number = 0;
|
|
if (ch == ',')
|
|
{
|
|
state = st_top;
|
|
last_separator_was_dash = false;
|
|
work.push_back(comma);
|
|
}
|
|
else if (ch == '-')
|
|
{
|
|
if (last_separator_was_dash)
|
|
{
|
|
throw std::runtime_error("unexpected dash");
|
|
}
|
|
state = st_top;
|
|
last_separator_was_dash = true;
|
|
work.push_back(dash);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
throw std::runtime_error("unexpected character");
|
|
}
|
|
++p;
|
|
}
|
|
if ((state == st_in_number) || (state == st_after_number))
|
|
{
|
|
work.push_back(cur_number);
|
|
}
|
|
else
|
|
{
|
|
throw std::runtime_error("number expected");
|
|
}
|
|
|
|
p = 0;
|
|
for (size_t i = 0; i < work.size(); i += 2)
|
|
{
|
|
int num = work[i];
|
|
if ((num < 1) || (num > max))
|
|
{
|
|
throw std::runtime_error(
|
|
"number " + QUtil::int_to_string(num) + " out of range");
|
|
}
|
|
if (i == 0)
|
|
{
|
|
result.push_back(work[i]);
|
|
}
|
|
else
|
|
{
|
|
int separator = work[i-1];
|
|
if (separator == comma)
|
|
{
|
|
result.push_back(num);
|
|
}
|
|
else if (separator == dash)
|
|
{
|
|
int lastnum = result.back();
|
|
if (num > lastnum)
|
|
{
|
|
for (int j = lastnum + 1; j <= num; ++j)
|
|
{
|
|
result.push_back(j);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (int j = lastnum - 1; j >= num; --j)
|
|
{
|
|
result.push_back(j);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
throw std::logic_error(
|
|
"INTERNAL ERROR parsing numeric range");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
catch (std::runtime_error e)
|
|
{
|
|
if (p)
|
|
{
|
|
usage("error at * in numeric range " +
|
|
std::string(range, p - range) + "*" + p + ": " + e.what());
|
|
}
|
|
else
|
|
{
|
|
usage("error in numeric range " +
|
|
std::string(range) + ": " + e.what());
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
static void
|
|
parse_encrypt_options(
|
|
int argc, char* argv[], int& cur_arg,
|
|
std::string& user_password, std::string& owner_password, int& keylen,
|
|
bool& r2_print, bool& r2_modify, bool& r2_extract, bool& r2_annotate,
|
|
bool& r3_accessibility, bool& r3_extract,
|
|
qpdf_r3_print_e& r3_print, qpdf_r3_modify_e& r3_modify,
|
|
bool& force_V4, bool& cleartext_metadata, bool& use_aes,
|
|
bool& force_R5)
|
|
{
|
|
if (cur_arg + 3 >= argc)
|
|
{
|
|
usage("insufficient arguments to --encrypt");
|
|
}
|
|
user_password = argv[cur_arg++];
|
|
owner_password = argv[cur_arg++];
|
|
std::string len_str = argv[cur_arg++];
|
|
if (len_str == "40")
|
|
{
|
|
keylen = 40;
|
|
}
|
|
else if (len_str == "128")
|
|
{
|
|
keylen = 128;
|
|
}
|
|
else if (len_str == "256")
|
|
{
|
|
keylen = 256;
|
|
use_aes = true;
|
|
}
|
|
else
|
|
{
|
|
usage("encryption key length must be 40, 128, or 256");
|
|
}
|
|
while (1)
|
|
{
|
|
char* arg = argv[cur_arg];
|
|
if (arg == 0)
|
|
{
|
|
usage("insufficient arguments to --encrypt");
|
|
}
|
|
else if (strcmp(arg, "--") == 0)
|
|
{
|
|
return;
|
|
}
|
|
if (arg[0] == '-')
|
|
{
|
|
++arg;
|
|
if (arg[0] == '-')
|
|
{
|
|
++arg;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
usage(std::string("invalid encryption parameter ") + arg);
|
|
}
|
|
++cur_arg;
|
|
char* parameter = strchr(arg, '=');
|
|
if (parameter)
|
|
{
|
|
*parameter++ = 0;
|
|
}
|
|
if (strcmp(arg, "print") == 0)
|
|
{
|
|
if (parameter == 0)
|
|
{
|
|
usage("--print must be given as --print=option");
|
|
}
|
|
std::string val = parameter;
|
|
if (keylen == 40)
|
|
{
|
|
if (val == "y")
|
|
{
|
|
r2_print = true;
|
|
}
|
|
else if (val == "n")
|
|
{
|
|
r2_print = false;
|
|
}
|
|
else
|
|
{
|
|
usage("invalid 40-bit -print parameter");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (val == "full")
|
|
{
|
|
r3_print = qpdf_r3p_full;
|
|
}
|
|
else if (val == "low")
|
|
{
|
|
r3_print = qpdf_r3p_low;
|
|
}
|
|
else if (val == "none")
|
|
{
|
|
r3_print = qpdf_r3p_none;
|
|
}
|
|
else
|
|
{
|
|
usage("invalid 128-bit -print parameter");
|
|
}
|
|
}
|
|
}
|
|
else if (strcmp(arg, "modify") == 0)
|
|
{
|
|
if (parameter == 0)
|
|
{
|
|
usage("--modify must be given as --modify=option");
|
|
}
|
|
std::string val = parameter;
|
|
if (keylen == 40)
|
|
{
|
|
if (val == "y")
|
|
{
|
|
r2_modify = true;
|
|
}
|
|
else if (val == "n")
|
|
{
|
|
r2_modify = false;
|
|
}
|
|
else
|
|
{
|
|
usage("invalid 40-bit -modify parameter");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (val == "all")
|
|
{
|
|
r3_modify = qpdf_r3m_all;
|
|
}
|
|
else if (val == "annotate")
|
|
{
|
|
r3_modify = qpdf_r3m_annotate;
|
|
}
|
|
else if (val == "form")
|
|
{
|
|
r3_modify = qpdf_r3m_form;
|
|
}
|
|
else if (val == "assembly")
|
|
{
|
|
r3_modify = qpdf_r3m_assembly;
|
|
}
|
|
else if (val == "none")
|
|
{
|
|
r3_modify = qpdf_r3m_none;
|
|
}
|
|
else
|
|
{
|
|
usage("invalid 128-bit -modify parameter");
|
|
}
|
|
}
|
|
}
|
|
else if (strcmp(arg, "extract") == 0)
|
|
{
|
|
if (parameter == 0)
|
|
{
|
|
usage("--extract must be given as --extract=option");
|
|
}
|
|
std::string val = parameter;
|
|
bool result = false;
|
|
if (val == "y")
|
|
{
|
|
result = true;
|
|
}
|
|
else if (val == "n")
|
|
{
|
|
result = false;
|
|
}
|
|
else
|
|
{
|
|
usage("invalid -extract parameter");
|
|
}
|
|
if (keylen == 40)
|
|
{
|
|
r2_extract = result;
|
|
}
|
|
else
|
|
{
|
|
r3_extract = result;
|
|
}
|
|
}
|
|
else if (strcmp(arg, "annotate") == 0)
|
|
{
|
|
if (parameter == 0)
|
|
{
|
|
usage("--annotate must be given as --annotate=option");
|
|
}
|
|
std::string val = parameter;
|
|
bool result = false;
|
|
if (val == "y")
|
|
{
|
|
result = true;
|
|
}
|
|
else if (val == "n")
|
|
{
|
|
result = false;
|
|
}
|
|
else
|
|
{
|
|
usage("invalid -annotate parameter");
|
|
}
|
|
if (keylen == 40)
|
|
{
|
|
r2_annotate = result;
|
|
}
|
|
else
|
|
{
|
|
usage("-annotate invalid for 128-bit keys");
|
|
}
|
|
}
|
|
else if (strcmp(arg, "accessibility") == 0)
|
|
{
|
|
if (parameter == 0)
|
|
{
|
|
usage("--accessibility must be given as"
|
|
" --accessibility=option");
|
|
}
|
|
std::string val = parameter;
|
|
bool result = false;
|
|
if (val == "y")
|
|
{
|
|
result = true;
|
|
}
|
|
else if (val == "n")
|
|
{
|
|
result = false;
|
|
}
|
|
else
|
|
{
|
|
usage("invalid -accessibility parameter");
|
|
}
|
|
if (keylen == 128)
|
|
{
|
|
r3_accessibility = result;
|
|
}
|
|
else
|
|
{
|
|
usage("-accessibility invalid for 40-bit keys");
|
|
}
|
|
}
|
|
else if (strcmp(arg, "cleartext-metadata") == 0)
|
|
{
|
|
if (parameter)
|
|
{
|
|
usage("--cleartext-metadata does not take a parameter");
|
|
}
|
|
if (keylen == 40)
|
|
{
|
|
usage("--cleartext-metadata is invalid for 40-bit keys");
|
|
}
|
|
else
|
|
{
|
|
cleartext_metadata = true;
|
|
}
|
|
}
|
|
else if (strcmp(arg, "force-V4") == 0)
|
|
{
|
|
if (parameter)
|
|
{
|
|
usage("--force-V4 does not take a parameter");
|
|
}
|
|
if (keylen != 128)
|
|
{
|
|
usage("--force-V4 is invalid only for 128-bit keys");
|
|
}
|
|
else
|
|
{
|
|
force_V4 = true;
|
|
}
|
|
}
|
|
else if (strcmp(arg, "force-R5") == 0)
|
|
{
|
|
if (parameter)
|
|
{
|
|
usage("--force-R5 does not take a parameter");
|
|
}
|
|
if (keylen != 256)
|
|
{
|
|
usage("--force-R5 is invalid only for 256-bit keys");
|
|
}
|
|
else
|
|
{
|
|
force_R5 = true;
|
|
}
|
|
}
|
|
else if (strcmp(arg, "use-aes") == 0)
|
|
{
|
|
if (parameter == 0)
|
|
{
|
|
usage("--use-aes must be given as --extract=option");
|
|
}
|
|
std::string val = parameter;
|
|
bool result = false;
|
|
if (val == "y")
|
|
{
|
|
result = true;
|
|
}
|
|
else if (val == "n")
|
|
{
|
|
result = false;
|
|
}
|
|
else
|
|
{
|
|
usage("invalid -use-aes parameter");
|
|
}
|
|
if ((keylen == 40) && result)
|
|
{
|
|
usage("use-aes is invalid for 40-bit keys");
|
|
}
|
|
else if ((keylen == 256) && (! result))
|
|
{
|
|
// qpdf would happily create files encrypted with RC4
|
|
// using /V=5, but Adobe reader can't read them.
|
|
usage("use-aes can't be disabled with 256-bit keys");
|
|
}
|
|
else
|
|
{
|
|
use_aes = result;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
usage(std::string("invalid encryption parameter --") + arg);
|
|
}
|
|
}
|
|
}
|
|
|
|
static std::vector<PageSpec>
|
|
parse_pages_options(
|
|
int argc, char* argv[], int& cur_arg)
|
|
{
|
|
std::vector<PageSpec> result;
|
|
while (1)
|
|
{
|
|
if ((cur_arg < argc) && (strcmp(argv[cur_arg], "--") == 0))
|
|
{
|
|
break;
|
|
}
|
|
if (cur_arg + 2 >= argc)
|
|
{
|
|
usage("insufficient arguments to --pages");
|
|
}
|
|
char* file = argv[cur_arg++];
|
|
char* password = 0;
|
|
char* range = argv[cur_arg++];
|
|
if (strncmp(range, "--password=", 11) == 0)
|
|
{
|
|
// Oh, that's the password, not the range
|
|
if (cur_arg + 1 >= argc)
|
|
{
|
|
usage("insufficient arguments to --pages");
|
|
}
|
|
password = range + 11;
|
|
range = argv[cur_arg++];
|
|
}
|
|
|
|
result.push_back(PageSpec(file, password, range));
|
|
}
|
|
return result;
|
|
}
|
|
|
|
static void test_numrange(char const* range)
|
|
{
|
|
if (range == 0)
|
|
{
|
|
std::cout << "null" << std::endl;
|
|
}
|
|
else
|
|
{
|
|
std::vector<int> result = parse_numrange(range, 15);
|
|
std::cout << "numeric range " << range << " ->";
|
|
for (std::vector<int>::iterator iter = result.begin();
|
|
iter != result.end(); ++iter)
|
|
{
|
|
std::cout << " " << *iter;
|
|
}
|
|
std::cout << std::endl;
|
|
}
|
|
}
|
|
|
|
QPDFPageData::QPDFPageData(QPDF* qpdf, char const* range) :
|
|
qpdf(qpdf),
|
|
orig_pages(qpdf->getAllPages())
|
|
{
|
|
this->selected_pages = parse_numrange(range, (int)this->orig_pages.size());
|
|
}
|
|
|
|
static void parse_version(std::string const& full_version_string,
|
|
std::string& version, int& extension_level)
|
|
{
|
|
PointerHolder<char> vp(true, QUtil::copy_string(full_version_string));
|
|
char* v = vp.getPointer();
|
|
char* p1 = strchr(v, '.');
|
|
char* p2 = (p1 ? strchr(1 + p1, '.') : 0);
|
|
if (p2 && *(p2 + 1))
|
|
{
|
|
*p2++ = '\0';
|
|
extension_level = atoi(p2);
|
|
}
|
|
version = v;
|
|
}
|
|
|
|
int main(int argc, char* argv[])
|
|
{
|
|
whoami = QUtil::getWhoami(argv[0]);
|
|
QUtil::setLineBuf(stdout);
|
|
|
|
// For libtool's sake....
|
|
if (strncmp(whoami, "lt-", 3) == 0)
|
|
{
|
|
whoami += 3;
|
|
}
|
|
|
|
if ((argc == 2) &&
|
|
((strcmp(argv[1], "--version") == 0) ||
|
|
(strcmp(argv[1], "-version") == 0)))
|
|
{
|
|
// make_dist looks for the line of code here that actually
|
|
// prints the version number, so read make_dist if you change
|
|
// anything other than the version number. Don't worry about
|
|
// the numbers. That's just a guide to 80 columns so that the
|
|
// help message looks right on an 80-column display.
|
|
|
|
// 1 2 3 4 5 6 7 8
|
|
// 12345678901234567890123456789012345678901234567890123456789012345678901234567890
|
|
std::cout
|
|
<< whoami << " version " << QPDF::QPDFVersion() << std::endl
|
|
<< "Copyright (c) 2005-2013 Jay Berkenbilt"
|
|
<< std::endl
|
|
<< "This software may be distributed under the terms of version 2 of the"
|
|
<< std::endl
|
|
<< "Artistic License which may be found in the source distribution. It is"
|
|
<< std::endl
|
|
<< "provided \"as is\" without express or implied warranty."
|
|
<< std::endl;
|
|
exit(0);
|
|
}
|
|
|
|
if ((argc == 2) &&
|
|
((strcmp(argv[1], "--help") == 0) ||
|
|
(strcmp(argv[1], "-help") == 0)))
|
|
{
|
|
std::cout << help;
|
|
exit(0);
|
|
}
|
|
|
|
char const* password = 0;
|
|
bool linearize = false;
|
|
bool decrypt = false;
|
|
|
|
bool copy_encryption = false;
|
|
char const* encryption_file = 0;
|
|
char const* encryption_file_password = 0;
|
|
|
|
bool encrypt = false;
|
|
std::string user_password;
|
|
std::string owner_password;
|
|
int keylen = 0;
|
|
bool r2_print = true;
|
|
bool r2_modify = true;
|
|
bool r2_extract = true;
|
|
bool r2_annotate = true;
|
|
bool r3_accessibility = true;
|
|
bool r3_extract = true;
|
|
qpdf_r3_print_e r3_print = qpdf_r3p_full;
|
|
qpdf_r3_modify_e r3_modify = qpdf_r3m_all;
|
|
bool force_V4 = false;
|
|
bool force_R5 = false;
|
|
bool cleartext_metadata = false;
|
|
bool use_aes = false;
|
|
|
|
bool stream_data_set = false;
|
|
qpdf_stream_data_e stream_data_mode = qpdf_s_compress;
|
|
bool normalize_set = false;
|
|
bool normalize = false;
|
|
bool suppress_recovery = false;
|
|
bool object_stream_set = false;
|
|
qpdf_object_stream_e object_stream_mode = qpdf_o_preserve;
|
|
bool ignore_xref_streams = false;
|
|
bool qdf_mode = false;
|
|
std::string min_version;
|
|
std::string force_version;
|
|
|
|
bool static_id = false;
|
|
bool static_aes_iv = false;
|
|
bool suppress_original_object_id = false;
|
|
bool show_encryption = false;
|
|
bool check_linearization = false;
|
|
bool show_linearization = false;
|
|
bool show_xref = false;
|
|
int show_obj = 0;
|
|
int show_gen = 0;
|
|
bool show_raw_stream_data = false;
|
|
bool show_filtered_stream_data = false;
|
|
bool show_pages = false;
|
|
bool show_page_images = false;
|
|
bool check = false;
|
|
|
|
std::vector<PageSpec> page_specs;
|
|
|
|
bool require_outfile = true;
|
|
char const* infilename = 0;
|
|
char const* outfilename = 0;
|
|
|
|
for (int i = 1; i < argc; ++i)
|
|
{
|
|
char const* arg = argv[i];
|
|
if ((arg[0] == '-') && (strcmp(arg, "-") != 0))
|
|
{
|
|
++arg;
|
|
if (arg[0] == '-')
|
|
{
|
|
// Be lax about -arg vs --arg
|
|
++arg;
|
|
}
|
|
char* parameter = (char*)strchr(arg, '=');
|
|
if (parameter)
|
|
{
|
|
*parameter++ = 0;
|
|
}
|
|
|
|
// Arguments that start with space are undocumented and
|
|
// are for use by the test suite.
|
|
if (strcmp(arg, " test-numrange") == 0)
|
|
{
|
|
test_numrange(parameter);
|
|
exit(0);
|
|
}
|
|
else if (strcmp(arg, "password") == 0)
|
|
{
|
|
if (parameter == 0)
|
|
{
|
|
usage("--password must be given as --password=pass");
|
|
}
|
|
password = parameter;
|
|
}
|
|
else if (strcmp(arg, "empty") == 0)
|
|
{
|
|
infilename = "";
|
|
}
|
|
else if (strcmp(arg, "linearize") == 0)
|
|
{
|
|
linearize = true;
|
|
}
|
|
else if (strcmp(arg, "encrypt") == 0)
|
|
{
|
|
parse_encrypt_options(
|
|
argc, argv, ++i,
|
|
user_password, owner_password, keylen,
|
|
r2_print, r2_modify, r2_extract, r2_annotate,
|
|
r3_accessibility, r3_extract, r3_print, r3_modify,
|
|
force_V4, cleartext_metadata, use_aes, force_R5);
|
|
encrypt = true;
|
|
decrypt = false;
|
|
copy_encryption = false;
|
|
}
|
|
else if (strcmp(arg, "decrypt") == 0)
|
|
{
|
|
decrypt = true;
|
|
encrypt = false;
|
|
copy_encryption = false;
|
|
}
|
|
else if (strcmp(arg, "copy-encryption") == 0)
|
|
{
|
|
if (parameter == 0)
|
|
{
|
|
usage("--copy-encryption must be given as"
|
|
"--copy_encryption=file");
|
|
}
|
|
encryption_file = parameter;
|
|
copy_encryption = true;
|
|
encrypt = false;
|
|
decrypt = false;
|
|
}
|
|
else if (strcmp(arg, "encryption-file-password") == 0)
|
|
{
|
|
if (parameter == 0)
|
|
{
|
|
usage("--encryption-file-password must be given as"
|
|
"--encryption-file-password=password");
|
|
}
|
|
encryption_file_password = parameter;
|
|
}
|
|
else if (strcmp(arg, "pages") == 0)
|
|
{
|
|
page_specs = parse_pages_options(argc, argv, ++i);
|
|
if (page_specs.empty())
|
|
{
|
|
usage("--pages: no page specifications given");
|
|
}
|
|
}
|
|
else if (strcmp(arg, "stream-data") == 0)
|
|
{
|
|
if (parameter == 0)
|
|
{
|
|
usage("--stream-data must be given as"
|
|
"--stream-data=option");
|
|
}
|
|
stream_data_set = true;
|
|
if (strcmp(parameter, "compress") == 0)
|
|
{
|
|
stream_data_mode = qpdf_s_compress;
|
|
}
|
|
else if (strcmp(parameter, "preserve") == 0)
|
|
{
|
|
stream_data_mode = qpdf_s_preserve;
|
|
}
|
|
else if (strcmp(parameter, "uncompress") == 0)
|
|
{
|
|
stream_data_mode = qpdf_s_uncompress;
|
|
}
|
|
else
|
|
{
|
|
usage("invalid stream-data option");
|
|
}
|
|
}
|
|
else if (strcmp(arg, "normalize-content") == 0)
|
|
{
|
|
if ((parameter == 0) || (*parameter == '\0'))
|
|
{
|
|
usage("--normalize-content must be given as"
|
|
" --normalize-content=[yn]");
|
|
}
|
|
normalize_set = true;
|
|
normalize = (parameter[0] == 'y');
|
|
}
|
|
else if (strcmp(arg, "suppress-recovery") == 0)
|
|
{
|
|
suppress_recovery = true;
|
|
}
|
|
else if (strcmp(arg, "object-streams") == 0)
|
|
{
|
|
if (parameter == 0)
|
|
{
|
|
usage("--object-streams must be given as"
|
|
" --object-streams=option");
|
|
}
|
|
object_stream_set = true;
|
|
if (strcmp(parameter, "disable") == 0)
|
|
{
|
|
object_stream_mode = qpdf_o_disable;
|
|
}
|
|
else if (strcmp(parameter, "preserve") == 0)
|
|
{
|
|
object_stream_mode = qpdf_o_preserve;
|
|
}
|
|
else if (strcmp(parameter, "generate") == 0)
|
|
{
|
|
object_stream_mode = qpdf_o_generate;
|
|
}
|
|
else
|
|
{
|
|
usage("invalid object stream mode");
|
|
}
|
|
}
|
|
else if (strcmp(arg, "ignore-xref-streams") == 0)
|
|
{
|
|
ignore_xref_streams = true;
|
|
}
|
|
else if (strcmp(arg, "qdf") == 0)
|
|
{
|
|
qdf_mode = true;
|
|
}
|
|
else if (strcmp(arg, "min-version") == 0)
|
|
{
|
|
if (parameter == 0)
|
|
{
|
|
usage("--min-version be given as"
|
|
"--min-version=version");
|
|
}
|
|
min_version = parameter;
|
|
}
|
|
else if (strcmp(arg, "force-version") == 0)
|
|
{
|
|
if (parameter == 0)
|
|
{
|
|
usage("--force-version be given as"
|
|
"--force-version=version");
|
|
}
|
|
force_version = parameter;
|
|
}
|
|
else if (strcmp(arg, "static-id") == 0)
|
|
{
|
|
static_id = true;
|
|
}
|
|
else if (strcmp(arg, "static-aes-iv") == 0)
|
|
{
|
|
static_aes_iv = true;
|
|
}
|
|
else if (strcmp(arg, "no-original-object-ids") == 0)
|
|
{
|
|
suppress_original_object_id = true;
|
|
}
|
|
else if (strcmp(arg, "show-encryption") == 0)
|
|
{
|
|
show_encryption = true;
|
|
require_outfile = false;
|
|
}
|
|
else if (strcmp(arg, "check-linearization") == 0)
|
|
{
|
|
check_linearization = true;
|
|
require_outfile = false;
|
|
}
|
|
else if (strcmp(arg, "show-linearization") == 0)
|
|
{
|
|
show_linearization = true;
|
|
require_outfile = false;
|
|
}
|
|
else if (strcmp(arg, "show-xref") == 0)
|
|
{
|
|
show_xref = true;
|
|
require_outfile = false;
|
|
}
|
|
else if (strcmp(arg, "show-object") == 0)
|
|
{
|
|
if (parameter == 0)
|
|
{
|
|
usage("--show-object must be given as"
|
|
" --show-object=obj[,gen]");
|
|
}
|
|
char* obj = parameter;
|
|
char* gen = obj;
|
|
if ((gen = strchr(obj, ',')) != 0)
|
|
{
|
|
*gen++ = 0;
|
|
show_gen = atoi(gen);
|
|
}
|
|
show_obj = atoi(obj);
|
|
require_outfile = false;
|
|
}
|
|
else if (strcmp(arg, "raw-stream-data") == 0)
|
|
{
|
|
show_raw_stream_data = true;
|
|
}
|
|
else if (strcmp(arg, "filtered-stream-data") == 0)
|
|
{
|
|
show_filtered_stream_data = true;
|
|
}
|
|
else if (strcmp(arg, "show-pages") == 0)
|
|
{
|
|
show_pages = true;
|
|
require_outfile = false;
|
|
}
|
|
else if (strcmp(arg, "with-images") == 0)
|
|
{
|
|
show_page_images = true;
|
|
}
|
|
else if (strcmp(arg, "check") == 0)
|
|
{
|
|
check = true;
|
|
require_outfile = false;
|
|
}
|
|
else
|
|
{
|
|
usage(std::string("unknown option --") + arg);
|
|
}
|
|
}
|
|
else if (infilename == 0)
|
|
{
|
|
infilename = arg;
|
|
}
|
|
else if (outfilename == 0)
|
|
{
|
|
outfilename = arg;
|
|
}
|
|
else
|
|
{
|
|
usage(std::string("unknown argument ") + arg);
|
|
}
|
|
}
|
|
|
|
if (infilename == 0)
|
|
{
|
|
usage("an input file name is required");
|
|
}
|
|
else if (require_outfile && (outfilename == 0))
|
|
{
|
|
usage("an output file name is required; use - for standard output");
|
|
}
|
|
else if ((! require_outfile) && (outfilename != 0))
|
|
{
|
|
usage("no output file may be given for this option");
|
|
}
|
|
|
|
try
|
|
{
|
|
QPDF pdf;
|
|
QPDF encryption_pdf;
|
|
if (ignore_xref_streams)
|
|
{
|
|
pdf.setIgnoreXRefStreams(true);
|
|
}
|
|
if (suppress_recovery)
|
|
{
|
|
pdf.setAttemptRecovery(false);
|
|
}
|
|
if (strcmp(infilename, "") == 0)
|
|
{
|
|
pdf.emptyPDF();
|
|
}
|
|
else
|
|
{
|
|
pdf.processFile(infilename, password);
|
|
}
|
|
if (outfilename == 0)
|
|
{
|
|
if (show_encryption)
|
|
{
|
|
::show_encryption(pdf);
|
|
}
|
|
if (check_linearization)
|
|
{
|
|
if (pdf.checkLinearization())
|
|
{
|
|
std::cout << infilename << ": no linearization errors"
|
|
<< std::endl;
|
|
}
|
|
else
|
|
{
|
|
exit(EXIT_ERROR);
|
|
}
|
|
}
|
|
if (show_linearization)
|
|
{
|
|
if (pdf.isLinearized())
|
|
{
|
|
pdf.showLinearizationData();
|
|
}
|
|
else
|
|
{
|
|
std::cout << infilename << " is not linearized"
|
|
<< std::endl;
|
|
}
|
|
}
|
|
if (show_xref)
|
|
{
|
|
pdf.showXRefTable();
|
|
}
|
|
if (show_obj > 0)
|
|
{
|
|
QPDFObjectHandle obj = pdf.getObjectByID(show_obj, show_gen);
|
|
if (obj.isStream())
|
|
{
|
|
if (show_raw_stream_data || show_filtered_stream_data)
|
|
{
|
|
bool filter = show_filtered_stream_data;
|
|
if (filter &&
|
|
(! obj.pipeStreamData(0, true, false, false)))
|
|
{
|
|
QTC::TC("qpdf", "qpdf unable to filter");
|
|
std::cerr << "Unable to filter stream data."
|
|
<< std::endl;
|
|
exit(EXIT_ERROR);
|
|
}
|
|
else
|
|
{
|
|
QUtil::binary_stdout();
|
|
Pl_StdioFile out("stdout", stdout);
|
|
obj.pipeStreamData(&out, filter, normalize, false);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
std::cout
|
|
<< "Object is stream. Dictionary:" << std::endl
|
|
<< obj.getDict().unparseResolved() << std::endl;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
std::cout << obj.unparseResolved() << std::endl;
|
|
}
|
|
}
|
|
if (show_pages)
|
|
{
|
|
if (show_page_images)
|
|
{
|
|
pdf.pushInheritedAttributesToPage();
|
|
}
|
|
std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
|
|
int pageno = 0;
|
|
for (std::vector<QPDFObjectHandle>::iterator iter =
|
|
pages.begin();
|
|
iter != pages.end(); ++iter)
|
|
{
|
|
QPDFObjectHandle& page = *iter;
|
|
++pageno;
|
|
|
|
std::cout << "page " << pageno << ": "
|
|
<< page.getObjectID() << " "
|
|
<< page.getGeneration() << " R" << std::endl;
|
|
if (show_page_images)
|
|
{
|
|
std::map<std::string, QPDFObjectHandle> images =
|
|
page.getPageImages();
|
|
if (! images.empty())
|
|
{
|
|
std::cout << " images:" << std::endl;
|
|
for (std::map<std::string,
|
|
QPDFObjectHandle>::iterator
|
|
iter = images.begin();
|
|
iter != images.end(); ++iter)
|
|
{
|
|
std::string const& name = (*iter).first;
|
|
QPDFObjectHandle image = (*iter).second;
|
|
QPDFObjectHandle dict = image.getDict();
|
|
int width =
|
|
dict.getKey("/Width").getIntValue();
|
|
int height =
|
|
dict.getKey("/Height").getIntValue();
|
|
std::cout << " " << name << ": "
|
|
<< image.unparse()
|
|
<< ", " << width << " x " << height
|
|
<< std::endl;
|
|
}
|
|
}
|
|
}
|
|
|
|
std::cout << " content:" << std::endl;
|
|
std::vector<QPDFObjectHandle> content =
|
|
page.getPageContents();
|
|
for (std::vector<QPDFObjectHandle>::iterator iter =
|
|
content.begin();
|
|
iter != content.end(); ++iter)
|
|
{
|
|
std::cout << " " << (*iter).unparse() << std::endl;
|
|
}
|
|
}
|
|
}
|
|
if (check)
|
|
{
|
|
bool okay = false;
|
|
std::cout << "checking " << infilename << std::endl;
|
|
try
|
|
{
|
|
int extension_level = pdf.getExtensionLevel();
|
|
std::cout << "PDF Version: " << pdf.getPDFVersion();
|
|
if (extension_level > 0)
|
|
{
|
|
std::cout << " extension level "
|
|
<< pdf.getExtensionLevel();
|
|
}
|
|
std::cout << std::endl;
|
|
::show_encryption(pdf);
|
|
if (pdf.isLinearized())
|
|
{
|
|
std::cout << "File is linearized\n";
|
|
okay = pdf.checkLinearization();
|
|
// any errors are reported by checkLinearization().
|
|
}
|
|
else
|
|
{
|
|
std::cout << "File is not linearized\n";
|
|
// Write the file no nowhere, uncompressing
|
|
// streams. This causes full file traversal
|
|
// and decoding of all streams we can decode.
|
|
QPDFWriter w(pdf);
|
|
Pl_Discard discard;
|
|
w.setOutputPipeline(&discard);
|
|
w.setStreamDataMode(qpdf_s_uncompress);
|
|
w.write();
|
|
okay = true;
|
|
}
|
|
}
|
|
catch (std::exception& e)
|
|
{
|
|
std::cout << e.what() << std::endl;
|
|
}
|
|
if (okay)
|
|
{
|
|
if (! pdf.getWarnings().empty())
|
|
{
|
|
exit(EXIT_WARNING);
|
|
}
|
|
else
|
|
{
|
|
std::cout << "No syntax or stream encoding errors"
|
|
<< " found; the file may still contain"
|
|
<< std::endl
|
|
<< "errors that qpdf cannot detect"
|
|
<< std::endl;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
std::vector<PointerHolder<QPDF> > page_heap;
|
|
if (! page_specs.empty())
|
|
{
|
|
// Parse all page specifications and translate them
|
|
// into lists of actual pages.
|
|
|
|
// Create a QPDF object for each file that we may take
|
|
// pages from.
|
|
std::map<std::string, QPDF*> page_spec_qpdfs;
|
|
page_spec_qpdfs[infilename] = &pdf;
|
|
std::vector<QPDFPageData> parsed_specs;
|
|
for (std::vector<PageSpec>::iterator iter = page_specs.begin();
|
|
iter != page_specs.end(); ++iter)
|
|
{
|
|
PageSpec& page_spec = *iter;
|
|
if (page_spec_qpdfs.count(page_spec.filename) == 0)
|
|
{
|
|
// Open the PDF file and store the QPDF
|
|
// object. Throw a PointerHolder to the qpdf
|
|
// into a heap so that it survives through
|
|
// writing the output but gets cleaned up
|
|
// automatically at the end. Do not
|
|
// canonicalize the file name. Using two
|
|
// different paths to refer to the same file
|
|
// is a document workaround for duplicating a
|
|
// page. If you are using this an example of
|
|
// how to do this with the API, you can just
|
|
// create two different QPDF objects to the
|
|
// same underlying file with the same path to
|
|
// achieve the same affect.
|
|
PointerHolder<QPDF> qpdf_ph = new QPDF();
|
|
page_heap.push_back(qpdf_ph);
|
|
QPDF* qpdf = qpdf_ph.getPointer();
|
|
char const* password = page_spec.password;
|
|
if (encryption_file && (password == 0) &&
|
|
(page_spec.filename == encryption_file))
|
|
{
|
|
QTC::TC("qpdf", "qpdf pages encryption password");
|
|
password = encryption_file_password;
|
|
}
|
|
qpdf->processFile(
|
|
page_spec.filename.c_str(), password);
|
|
page_spec_qpdfs[page_spec.filename] = qpdf;
|
|
}
|
|
|
|
// Read original pages from the PDF, and parse the
|
|
// page range associated with this occurrence of
|
|
// the file.
|
|
parsed_specs.push_back(
|
|
QPDFPageData(page_spec_qpdfs[page_spec.filename],
|
|
page_spec.range));
|
|
}
|
|
|
|
// Clear all pages out of the primary QPDF's pages
|
|
// tree but leave the objects in place in the file so
|
|
// they can be re-added without changing their object
|
|
// numbers. This enables other things in the original
|
|
// file, such as outlines, to continue to work.
|
|
std::vector<QPDFObjectHandle> orig_pages = pdf.getAllPages();
|
|
for (std::vector<QPDFObjectHandle>::iterator iter =
|
|
orig_pages.begin();
|
|
iter != orig_pages.end(); ++iter)
|
|
{
|
|
pdf.removePage(*iter);
|
|
}
|
|
|
|
// Add all the pages from all the files in the order
|
|
// specified. Keep track of any pages from the
|
|
// original file that we are selecting.
|
|
std::set<int> selected_from_orig;
|
|
for (std::vector<QPDFPageData>::iterator iter =
|
|
parsed_specs.begin();
|
|
iter != parsed_specs.end(); ++iter)
|
|
{
|
|
QPDFPageData& page_data = *iter;
|
|
for (std::vector<int>::iterator pageno_iter =
|
|
page_data.selected_pages.begin();
|
|
pageno_iter != page_data.selected_pages.end();
|
|
++pageno_iter)
|
|
{
|
|
// Pages are specified from 1 but numbered
|
|
// from 0 in the vector
|
|
int pageno = *pageno_iter - 1;
|
|
pdf.addPage(page_data.orig_pages[pageno], false);
|
|
if (page_data.qpdf == &pdf)
|
|
{
|
|
// This is a page from the original file.
|
|
// Keep track of the fact that we are
|
|
// using it.
|
|
selected_from_orig.insert(pageno);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Delete page objects for unused page in primary.
|
|
// This prevents those objects from being preserved by
|
|
// being referred to from other places, such as the
|
|
// outlines dictionary.
|
|
for (int pageno = 0; pageno < (int)orig_pages.size(); ++pageno)
|
|
{
|
|
if (selected_from_orig.count(pageno) == 0)
|
|
{
|
|
pdf.replaceObject(orig_pages[pageno].getObjectID(),
|
|
orig_pages[pageno].getGeneration(),
|
|
QPDFObjectHandle::newNull());
|
|
}
|
|
}
|
|
}
|
|
|
|
if (strcmp(outfilename, "-") == 0)
|
|
{
|
|
outfilename = 0;
|
|
}
|
|
QPDFWriter w(pdf, outfilename);
|
|
if (qdf_mode)
|
|
{
|
|
w.setQDFMode(true);
|
|
}
|
|
if (normalize_set)
|
|
{
|
|
w.setContentNormalization(normalize);
|
|
}
|
|
if (stream_data_set)
|
|
{
|
|
w.setStreamDataMode(stream_data_mode);
|
|
}
|
|
if (decrypt)
|
|
{
|
|
w.setPreserveEncryption(false);
|
|
}
|
|
if (static_id)
|
|
{
|
|
w.setStaticID(true);
|
|
}
|
|
if (static_aes_iv)
|
|
{
|
|
w.setStaticAesIV(true);
|
|
}
|
|
if (suppress_original_object_id)
|
|
{
|
|
w.setSuppressOriginalObjectIDs(true);
|
|
}
|
|
if (copy_encryption)
|
|
{
|
|
encryption_pdf.processFile(
|
|
encryption_file, encryption_file_password);
|
|
w.copyEncryptionParameters(encryption_pdf);
|
|
}
|
|
if (encrypt)
|
|
{
|
|
if (keylen == 40)
|
|
{
|
|
w.setR2EncryptionParameters(
|
|
user_password.c_str(), owner_password.c_str(),
|
|
r2_print, r2_modify, r2_extract, r2_annotate);
|
|
}
|
|
else if (keylen == 128)
|
|
{
|
|
if (force_V4 || cleartext_metadata || use_aes)
|
|
{
|
|
w.setR4EncryptionParameters(
|
|
user_password.c_str(), owner_password.c_str(),
|
|
r3_accessibility, r3_extract, r3_print, r3_modify,
|
|
!cleartext_metadata, use_aes);
|
|
}
|
|
else
|
|
{
|
|
w.setR3EncryptionParameters(
|
|
user_password.c_str(), owner_password.c_str(),
|
|
r3_accessibility, r3_extract, r3_print, r3_modify);
|
|
}
|
|
}
|
|
else if (keylen == 256)
|
|
{
|
|
if (force_R5)
|
|
{
|
|
w.setR5EncryptionParameters(
|
|
user_password.c_str(), owner_password.c_str(),
|
|
r3_accessibility, r3_extract, r3_print, r3_modify,
|
|
!cleartext_metadata);
|
|
}
|
|
else
|
|
{
|
|
w.setR6EncryptionParameters(
|
|
user_password.c_str(), owner_password.c_str(),
|
|
r3_accessibility, r3_extract, r3_print, r3_modify,
|
|
!cleartext_metadata);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
throw std::logic_error("bad encryption keylen");
|
|
}
|
|
}
|
|
if (linearize)
|
|
{
|
|
w.setLinearization(true);
|
|
}
|
|
if (object_stream_set)
|
|
{
|
|
w.setObjectStreamMode(object_stream_mode);
|
|
}
|
|
if (! min_version.empty())
|
|
{
|
|
std::string version;
|
|
int extension_level = 0;
|
|
parse_version(min_version, version, extension_level);
|
|
w.setMinimumPDFVersion(version, extension_level);
|
|
}
|
|
if (! force_version.empty())
|
|
{
|
|
std::string version;
|
|
int extension_level = 0;
|
|
parse_version(force_version, version, extension_level);
|
|
w.forcePDFVersion(version, extension_level);
|
|
}
|
|
w.write();
|
|
}
|
|
if (! pdf.getWarnings().empty())
|
|
{
|
|
std::cerr << whoami << ": operation succeeded with warnings;"
|
|
<< " resulting file may have some problems" << std::endl;
|
|
exit(EXIT_WARNING);
|
|
}
|
|
}
|
|
catch (std::exception& e)
|
|
{
|
|
std::cerr << e.what() << std::endl;
|
|
exit(EXIT_ERROR);
|
|
}
|
|
|
|
return 0;
|
|
}
|