diff --git a/Makefile b/Makefile index 64a97707..b0a08bcd 100644 --- a/Makefile +++ b/Makefile @@ -127,7 +127,9 @@ check: $(TEST_TARGETS) .PHONY: spell # npm install -g cspell; add exceptions to cSpell.json spell: - cspell **/*.hh include/qpdf/*.h **/*.cc manual/* ChangeLog README* TODO + cspell **/*.hh include/qpdf/*.h **/*.cc \ + manual/*.rst manual/*.in manual/_ext/*.py \ + ChangeLog README* TODO # Install targets are in the make directory in the rules-specific make # fragments. diff --git a/TODO b/TODO index 329dc2a9..ff16ab2c 100644 --- a/TODO +++ b/TODO @@ -34,16 +34,14 @@ Documentation * Consider which parts might be good candidates for moving to the wiki. -* See #530 -- add an appendix explaining PDF encryption in general - plus how it's handled by qpdf. Or maybe this should go on the wiki. - Document-level work =================== * Ideas here may by superseded by #593. * QPDFPageCopier -- object for moving pages around within files or - between files and performing various transformations + between files and performing various transformations. Reread/rewrite + _page-selection in the manual if needed. * Handle all the stuff of pages and split-pages * Do n-up, booklet, collation diff --git a/cSpell.json b/cSpell.json index ccf3d56c..144f394e 100644 --- a/cSpell.json +++ b/cSpell.json @@ -54,6 +54,7 @@ "cerr", "cfis", "cflags", + "ciphertext", "classname", "clearsign", "cleartext", @@ -149,6 +150,7 @@ "hosoda", "htcondor", "htdocs", + "idempotency", "ifdefs", "ifeq", "ifstream", diff --git a/generate_auto_job b/generate_auto_job index 556b374c..d573f596 100755 --- a/generate_auto_job +++ b/generate_auto_job @@ -19,10 +19,16 @@ def warn(*args, **kwargs): class Main: - SOURCES = [whoami, 'job.yml', 'manual/cli.rst'] + SOURCES = [ + whoami, + 'manual/_ext/qpdf.py', + 'job.yml', + 'manual/cli.rst', + ] DESTS = { 'decl': 'libqpdf/qpdf/auto_job_decl.hh', 'init': 'libqpdf/qpdf/auto_job_init.hh', + 'help': 'libqpdf/qpdf/auto_job_help.hh', } SUMS = 'job.sums' @@ -100,14 +106,22 @@ class Main: short_text = None long_text = None - print('this->ap.addHelpFooter("For detailed help, visit' - ' the qpdf manual: https://qpdf.readthedocs.io\\n");', file=f) + # Generate a bunch of short static functions rather than a big + # member function for help. Some compilers have problems with + # very large member functions in classes in anonymous + # namespaces. + + help_files = 0 + help_lines = 0 + + self.all_topics = set(self.options_without_help) + self.referenced_topics = set() def set_indent(x): nonlocal indent indent = ' ' * len(x) - def append_long_text(line): + def append_long_text(line, topic): nonlocal indent, long_text if line == '\n': long_text += '\n' @@ -115,13 +129,23 @@ class Main: long_text += line[len(indent):] else: long_text = long_text.strip() - if long_text != '': - long_text += '\n' + if long_text == '': + raise Exception(f'missing long text for {topic}') + long_text += '\n' + for i in re.finditer(r'--help=([^\.\s]+)', long_text): + self.referenced_topics.add(i.group(1)) return True return False lineno = 0 for line in df.readlines(): + if help_lines == 0: + if help_files > 0: + print('}', file=f) + help_files += 1 + help_lines += 1 + print(f'static void add_help_{help_files}(QPDFArgParser& ap)\n' + '{', file=f) lineno += 1 if state == st_top: m = re.match(r'^(\s*\.\. )help-topic (\S+): (.*)$', line) @@ -132,8 +156,9 @@ class Main: long_text = '' state = st_topic continue - m = re.match(r'^(\s*\.\. )qpdf:option:: (([^=\s]+)(=(\S+))?)$', - line) + m = re.match( + r'^(\s*\.\. )qpdf:option:: (([^=\s]+)([= ](.+))?)$', + line) if m: if topic is None: raise Exception('option seen before topic') @@ -150,9 +175,11 @@ class Main: state = st_option continue elif state == st_topic: - if append_long_text(line): - print(f'this->ap.addHelpTopic("{topic}", "{short_text}",' + if append_long_text(line, topic): + self.all_topics.add(topic) + print(f'ap.addHelpTopic("{topic}", "{short_text}",' f' R"({long_text})");', file=f) + help_lines += 1 state = st_top elif state == st_option: if line == '\n' or line.startswith(indent): @@ -162,12 +189,36 @@ class Main: short_text = m.group(2) state = st_option_help else: + raise Exception('option without help text') state = st_top elif state == st_option_help: - if append_long_text(line): - print(f'this->ap.addOptionHelp("{option}", "{topic}",' + if append_long_text(line, option): + if option in self.options_without_help: + self.options_without_help.remove(option) + else: + raise Exception( + f'help for unknown option {option},' + f' lineno={lineno}') + print(f'ap.addOptionHelp("{option}", "{topic}",' f' "{short_text}", R"({long_text})");', file=f) + help_lines += 1 state = st_top + if help_lines == 20: + help_lines = 0 + print('}', file=f) + print('static void add_help(QPDFArgParser& ap)\n{', file=f) + for i in range(help_files): + print(f' add_help_{i+1}(ap);', file=f) + print('ap.addHelpFooter("For detailed help, visit' + ' the qpdf manual: https://qpdf.readthedocs.io\\n");', file=f) + print('}\n', file=f) + for i in self.referenced_topics: + if i not in self.all_topics: + raise Exception(f'help text referenced --help={i}') + for i in self.options_without_help: + raise Exception( + 'Options without help: ' + + ', '.join(self.options_without_help)) def generate(self): warn(f'{whoami}: regenerating auto job files') @@ -175,12 +226,19 @@ class Main: with open('job.yml', 'r') as f: data = yaml.safe_load(f.read()) self.validate(data) + self.options_without_help = set( + ['--completion-bash', '--completion-zsh', '--help'] + ) with open(self.DESTS['decl'], 'w') as f: print(BANNER, file=f) self.generate_decl(data, f) with open(self.DESTS['init'], 'w') as f: print(BANNER, file=f) self.generate_init(data, f) + with open(self.DESTS['help'], 'w') as f: + with open('manual/cli.rst', 'r') as df: + print(BANNER, file=f) + self.generate_doc(df, f) # Update hashes last to ensure that this will be rerun in the # event of a failure. @@ -275,24 +333,29 @@ class Main: print('this->ap.addPositional(' f'p(&ArgParser::{prefix}Positional));', file=f) for i in o.get('bare', []): + self.options_without_help.add(f'--{i}') identifier = self.to_identifier(i, prefix, False) print(f'this->ap.addBare("{i}", ' f'b(&ArgParser::{identifier}));', file=f) for i in o.get('optional_parameter', []): + self.options_without_help.add(f'--{i}') identifier = self.to_identifier(i, prefix, False) print(f'this->ap.addOptionalParameter("{i}", ' f'p(&ArgParser::{identifier}));', file=f) for k, v in o.get('required_parameter', {}).items(): + self.options_without_help.add(f'--{k}') identifier = self.to_identifier(k, prefix, False) print(f'this->ap.addRequiredParameter("{k}", ' f'p(&ArgParser::{identifier})' f', "{v}");', file=f) for k, v in o.get('required_choices', {}).items(): + self.options_without_help.add(f'--{k}') identifier = self.to_identifier(k, prefix, False) print(f'this->ap.addChoices("{k}", ' f'p(&ArgParser::{identifier})' f', true, {v}_choices);', file=f) for k, v in o.get('optional_choices', {}).items(): + self.options_without_help.add(f'--{k}') identifier = self.to_identifier(k, prefix, False) print(f'this->ap.addChoices("{k}", ' f'p(&ArgParser::{identifier})' @@ -312,8 +375,6 @@ class Main: for j in ft['options']: print('this->ap.copyFromOtherTable' f'("{j}", "{other_table}");', file=f) - with open('manual/cli.rst', 'r') as df: - self.generate_doc(df, f) if __name__ == '__main__': diff --git a/job.sums b/job.sums index b70547b8..0e168ab3 100644 --- a/job.sums +++ b/job.sums @@ -1,6 +1,8 @@ # Generated by generate_auto_job -generate_auto_job 1f42fc554778d95210d11c44e858214b4854ead907d1c9ea84fe37f993ea1a23 +generate_auto_job 466aa9211549cebeb3fedc6413108981aeeddd89936621095f5f5223cee9880b job.yml 25c85cba1ae01dac9cd0f9cb7b734e7e3e531c0023ea2b892dc0d40bda1c1146 libqpdf/qpdf/auto_job_decl.hh 97395ecbe590b23ae04d6cce2080dbd0e998917ff5eeaa5c6aafa91041d3cd6a -libqpdf/qpdf/auto_job_init.hh 2afffb5002ff28a3909f709709f65d77bf2289dd72d5ea3d1598a36664a49c73 -manual/cli.rst f0109cca3366a9da4b0a05e3cce996ece2d776321a3f689aeaa2d6af599eee88 +libqpdf/qpdf/auto_job_help.hh fa7ff1d1f6289881ac3a485107d15240c4992c59cff506be425354557108d184 +libqpdf/qpdf/auto_job_init.hh 465bf46769559ceb77110d1b9d3293ba9b3595850b49848c31aeabd10aadb4ad +manual/_ext/qpdf.py 855fe12de5af7a10bb24be6ecc4d5dff4c84ac58cf388a13be6bbb394346a67d +manual/cli.rst c26e877d2065ac917edffdd6a037d2191b64d7c25beb4e8df1acc174b20b3ff4 diff --git a/libqpdf/QPDFArgParser.cc b/libqpdf/QPDFArgParser.cc index 69a97a5c..8d7c978c 100644 --- a/libqpdf/QPDFArgParser.cc +++ b/libqpdf/QPDFArgParser.cc @@ -967,21 +967,20 @@ void QPDFArgParser::getAllHelp(std::ostringstream& msg) { getTopHelp(msg); - auto show = [this, &msg](std::map& topics, - std::string const& label) { + auto show = [this, &msg](std::map& topics) { for (auto const& i: topics) { auto const& topic = i.first; msg << std::endl - << "== " << label << " " << topic + << "== " << topic << " (" << i.second.short_text << ") ==" << std::endl << std::endl; getTopicHelp(topic, i.second, msg); } }; - show(this->m->help_topics, "topic"); - show(this->m->option_help, "option"); + show(this->m->help_topics); + show(this->m->option_help); msg << std::endl << "====" << std::endl; } diff --git a/libqpdf/QPDFJob_argv.cc b/libqpdf/QPDFJob_argv.cc index 9b678257..da0bcd4a 100644 --- a/libqpdf/QPDFJob_argv.cc +++ b/libqpdf/QPDFJob_argv.cc @@ -48,6 +48,8 @@ ArgParser::ArgParser(QPDFArgParser& ap, QPDFJob& o) : initOptionTables(); } +#include + void ArgParser::initOptionTables() { @@ -55,6 +57,8 @@ ArgParser::initOptionTables() # include this->ap.addFinalCheck( QPDFArgParser::bindBare(&ArgParser::doFinalChecks, this)); + // add_help is defined in auto_job_help.hh + add_help(this->ap); } void @@ -127,513 +131,6 @@ ArgParser::argCopyright() << std::endl; } -#if 0 -void -ArgParser::argHelp() -{ - // QXXXQ - std::cout - // 12345678901234567890123456789012345678901234567890123456789012345678901234567890 - << "Usage: qpdf [options] {infile | --empty} [page_selection_options] outfile\n" - << "\n" - << "An option summary appears below. Please see the documentation for details.\n" - << "\n" - << "If @filename appears anywhere in the command-line, each line of filename\n" - << "will be interpreted as an argument. No interpolation is done. Line\n" - << "terminators are stripped, but leading and trailing whitespace is\n" - << "intentionally preserved. @- can be specified to read from standard input.\n" - << "\n" - << "The output file can be - to indicate writing to standard output, or it can\n" - << "be --replace-input to cause qpdf to replace the input file with the output.\n" - << "\n" - << "Note that when contradictory options are provided, whichever options are\n" - << "provided last take precedence.\n" - << "\n" - << "\n" - << "Basic Options\n" - << "-------------\n" - << "\n" - << "--version show version of qpdf\n" - << "--copyright show qpdf's copyright and license information\n" - << "--help show command-line argument help\n" - << "--show-crypto show supported crypto providers; default is first\n" - << "--completion-bash output a bash complete command you can eval\n" - << "--completion-zsh output a zsh complete command you can eval\n" - << "--password=password specify a password for accessing encrypted files\n" - << "--password-file=file get the password the first line \"file\"; use \"-\"\n" - << " to read the password from stdin (without prompt or\n" - << " disabling echo, so use with caution)\n" - << "--is-encrypted silently exit 0 if the file is encrypted or 2\n" - << " if not; useful for shell scripts\n" - << "--requires-password silently exit 0 if a password (other than as\n" - << " supplied) is required, 2 if the file is not\n" - << " encrypted, or 3 if the file is encrypted\n" - << " but requires no password or the supplied password\n" - << " is correct; useful for shell scripts\n" - << "--verbose provide additional informational output\n" - << "--progress give progress indicators while writing output\n" - << "--no-warn suppress warnings\n" - << "--warning-exit-0 exit with code 0 instead of 3 if there are warnings\n" - << "--linearize generated a linearized (web optimized) file\n" - << "--replace-input use in place of specifying an output file; qpdf will\n" - << " replace the input file with the output\n" - << "--copy-encryption=file copy encryption parameters from specified file\n" - << "--encryption-file-password=password\n" - << " password used to open the file from which encryption\n" - << " parameters are being copied\n" - << "--allow-weak-crypto allow creation of files using weak cryptographic\n" - << " algorithms\n" - << "--encrypt options -- generate an encrypted file\n" - << "--decrypt remove any encryption on the file\n" - << "--password-is-hex-key treat primary password option as a hex-encoded key\n" - << "--suppress-password-recovery\n" - << " do not attempt recovering from password string\n" - << " encoding errors\n" - << "--password-mode=mode control qpdf's encoding of passwords\n" - << "--pages options -- select specific pages from one or more files\n" - << "--collate=n causes files specified in --pages to be collated\n" - << " in groups of n pages (default 1) rather than\n" - << " concatenated\n" - << "--flatten-rotation move page rotation from /Rotate key to content\n" - << "--rotate=[+|-]angle[:page-range]\n" - << " rotate each specified page 0, 90, 180, or 270\n" - << " degrees; rotate all pages if no page range is given\n" - << "--split-pages=[n] write each output page to a separate file\n" - << "--overlay options -- overlay pages from another file\n" - << "--underlay options -- underlay pages from another file\n" - << "\n" - << "Note that you can use the @filename or @- syntax for any argument at any\n" - << "point in the command. This provides a good way to specify a password without\n" - << "having to explicitly put it on the command line. @filename or @- must be a\n" - << "word by itself. Syntax such as --arg=@filename doesn't work.\n" - << "\n" - << "If none of --copy-encryption, --encrypt or --decrypt are given, qpdf will\n" - << "preserve any encryption data associated with a file.\n" - << "\n" - << "Note that when copying encryption parameters from another file, all\n" - << "parameters will be copied, including both user and owner passwords, even\n" - << "if the user password is used to open the other file. This works even if\n" - << "the owner password is not known.\n" - << "\n" - << "The --password-is-hex-key option overrides the normal computation of\n" - << "encryption keys. It only applies to the password used to open the main\n" - << "file. This option is not ordinarily useful but can be helpful for forensic\n" - << "or investigatory purposes. See manual for further discussion.\n" - << "\n" - << "The --rotate flag can be used to specify pages to rotate pages either\n" - << "0, 90, 180, or 270 degrees. The page range is specified in the same\n" - << "format as with the --pages option, described below. Repeat the option\n" - << "to rotate multiple groups of pages. If the angle is preceded by + or -,\n" - << "it is added to or subtracted from the original rotation. Otherwise, the\n" - << "rotation angle is set explicitly to the given value. You almost always\n" - << "want to use + or - unless you are certain about the internals of the PDF\n" - << "you are working with.\n" - << "\n" - << "If --split-pages is specified, each page is written to a separate output\n" - << "file. File names are generated as follows:\n" - << "* If the string %d appears in the output file name, it is replaced with a\n" - << " zero-padded page range starting from 1\n" - << "* Otherwise, if the output file name ends in .pdf (case insensitive), a\n" - << " zero-padded page range, preceded by a dash, is inserted before the file\n" - << " extension\n" - << "* Otherwise, the file name is appended with a zero-padded page range\n" - << " preceded by a dash.\n" - << "Page ranges are single page numbers for single-page groups or first-last\n" - << "for multipage groups.\n" - << "\n" - << "\n" - << "Encryption Options\n" - << "------------------\n" - << "\n" - << " --encrypt user-password owner-password key-length flags --\n" - << "\n" - << "Note that -- terminates parsing of encryption flags.\n" - << "\n" - << "Either or both of the user password and the owner password may be\n" - << "empty strings.\n" - << "\n" - << "key-length may be 40, 128, or 256\n" - << "\n" - << "Additional flags are dependent upon key length.\n" - << "\n" - << " If 40:\n" - << "\n" - << " --print=[yn] allow printing\n" - << " --modify=[yn] allow document modification\n" - << " --extract=[yn] allow text/graphic extraction\n" - << " --annotate=[yn] allow comments and form fill-in and signing\n" - << "\n" - << " If 128:\n" - << "\n" - << " --accessibility=[yn] allow accessibility to visually impaired\n" - << " --extract=[yn] allow other text/graphic extraction\n" - << " --print=print-opt control printing access\n" - << " --assemble=[yn] allow document assembly\n" - << " --annotate=[yn] allow commenting/filling form fields\n" - << " --form=[yn] allow filling form fields\n" - << " --modify-other=[yn] allow other modifications\n" - << " --modify=modify-opt control modify access (old way)\n" - << " --cleartext-metadata prevents encryption of metadata\n" - << " --use-aes=[yn] indicates whether to use AES encryption\n" - << " --force-V4 forces use of V=4 encryption handler\n" - << "\n" - << " If 256, options are the same as 128 with these exceptions:\n" - << " --force-V4 this option is not available with 256-bit keys\n" - << " --use-aes this option is always on with 256-bit keys\n" - << " --force-R5 forces use of deprecated R=5 encryption\n" - << " --allow-insecure allow the owner password to be empty when the\n" - << " user password is not empty\n" - << "\n" - << " print-opt may be:\n" - << "\n" - << " full allow full printing\n" - << " low allow only low-resolution printing\n" - << " none disallow printing\n" - << "\n" - << " modify-opt may be:\n" - << "\n" - << " all allow full document modification\n" - << " annotate allow comment authoring and form operations\n" - << " form allow form field fill-in and signing\n" - << " assembly allow document assembly only\n" - << " none allow no modifications\n" - << "\n" - << "The default for each permission option is to be fully permissive. Please\n" - << "refer to the manual for more details on the modify options.\n" - << "\n" - << "Specifying cleartext-metadata forces the PDF version to at least 1.5.\n" - << "Specifying use of AES forces the PDF version to at least 1.6. These\n" - << "options are both off by default.\n" - << "\n" - << "The --force-V4 flag forces the V=4 encryption handler introduced in PDF 1.5\n" - << "to be used even if not otherwise needed. This option is primarily useful\n" - << "for testing qpdf and has no other practical use.\n" - << "\n" - << "A warning will be issued if you attempt to encrypt a file with a format that\n" - << "uses a weak cryptographic algorithm such as RC4. To suppress the warning,\n" - << "specify the option --allow-weak-crypto. This option is outside of encryption\n" - << "options (e.g. --allow-week-crypto --encrypt u o 128 --)\n" - << "\n" - << "\n" - << "Password Modes\n" - << "--------------\n" - << "\n" - << "The --password-mode controls how qpdf interprets passwords supplied\n" - << "on the command-line. qpdf's default behavior is correct in almost all\n" - << "cases, but you can fine-tune with this option.\n" - << "\n" - << " bytes: use the password literally as supplied\n" - << " hex-bytes: interpret the password as a hex-encoded byte string\n" - << " unicode: interpret the password as a UTF-8 encoded string\n" - << " auto: attempt to infer the encoding and adjust as needed\n" - << "\n" - << "This is a complex topic. See the manual for a complete discussion.\n" - << "\n" - << "\n" - << "Page Selection Options\n" - << "----------------------\n" - << "\n" - << "These options allow pages to be selected from one or more PDF files.\n" - << "Whatever file is given as the primary input file is used as the\n" - << "starting point, but its pages are replaced with pages as specified.\n" - << "\n" - << "--keep-files-open=[yn]\n" - << "--keep-files-open-threshold=count\n" - << "--pages file [ --password=password ] [ page-range ] ... --\n" - << "\n" - << "For each file that pages should be taken from, specify the file, a\n" - << "password needed to open the file (if any), and a page range. The\n" - << "password needs to be given only once per file. If any of the input\n" - << "files are the same as the primary input file or the file used to copy\n" - << "encryption parameters (if specified), you do not need to repeat the\n" - << "password here. The same file can be repeated multiple times. The\n" - << "filename \".\" may be used to refer to the current input file. All\n" - << "non-page data (info, outlines, page numbers, etc. are taken from the\n" - << "primary input file. To discard this, use --empty as the primary\n" - << "input.\n" - << "\n" - << "By default, when more than 200 distinct files are specified, qpdf will\n" - << "close each file when not being referenced. With 200 files or fewer, all\n" - << "files will be kept open at the same time. This behavior can be overridden\n" - << "by specifying --keep-files-open=[yn]. Closing and opening files can have\n" - << "very high overhead on certain file systems, especially networked file\n" - << "systems. The threshold of 200 can be modified with\n" - << "--keep-files-open-threshold\n" - << "\n" - << "The page range is a set of numbers separated by commas, ranges of\n" - << "numbers separated dashes, or combinations of those. The character\n" - << "\"z\" represents the last page. A number preceded by an \"r\" indicates\n" - << "to count from the end, so \"r3-r1\" would be the last three pages of the\n" - << "document. Pages can appear in any order. Ranges can appear with a\n" - << "high number followed by a low number, which causes the pages to appear in\n" - << "reverse. Numbers may be repeated. A page range may be appended with :odd\n" - << "to indicate odd pages in the selected range or :even to indicate even\n" - << "pages.\n" - << "\n" - << "If the page range is omitted, the range of 1-z is assumed. qpdf decides\n" - << "that the page range is omitted if the range argument is either -- or a\n" - << "valid file name and not a valid range.\n" - << "\n" - << "The usual behavior of --pages is to add all pages from the first file,\n" - << "then all pages from the second file, and so on. If the --collate option\n" - << "is specified, then pages are collated instead. In other words, qpdf takes\n" - << "the first page from the first file, the first page from the second file,\n" - << "and so on until it runs out of files; then it takes the second page from\n" - << "each file, etc. When a file runs out of pages, it is skipped until all\n" - << "specified pages are taken from all files.\n" - << "\n" - << "See the manual for examples and a discussion of additional subtleties.\n" - << "\n" - << "\n" - << "Overlay and Underlay Options\n" - << "----------------------------\n" - << "\n" - << "These options allow pages from another file to be overlaid or underlaid\n" - << "on the primary output. Overlaid pages are drawn on top of the destination\n" - << "page and may obscure the page. Underlaid pages are drawn below the\n" - << "destination page.\n" - << "\n" - << "{--overlay | --underlay } file\n" - " [ --password=password ]\n" - " [ --to=page-range ]\n" - " [ --from=[page-range] ]\n" - " [ --repeat=page-range ]\n" - " --\n" - << "\n" - << "For overlay and underlay, a file and optional password are specified, along\n" - << "with a series of optional page ranges. The default behavior is that each\n" - << "page of the overlay or underlay file is imposed on the corresponding page\n" - << "of the primary output until it runs out of pages, and any extra pages are\n" - << "ignored. The page range options all take page ranges in the same form as\n" - << "the --pages option. They have the following meanings:\n" - << "\n" - << " --to: the pages in the primary output to which overlay/underlay is\n" - << " applied\n" - << " --from: the pages from the overlay/underlay file that are used\n" - << " --repeat: pages from the overlay/underlay that are repeated after\n" - << " any \"from\" pages have been exhausted\n" - << "\n" - << "\n" - << "Embedded Files/Attachments Options\n" - << "----------------------------------\n" - << "\n" - << "These options can be used to work with embedded files, also known as\n" - << "attachments.\n" - << "\n" - << "--list-attachments show key and stream number for embedded files;\n" - << " combine with --verbose for more detailed information\n" - << "--show-attachment=key write the contents of the specified attachment to\n" - << " standard output as binary data\n" - << "--add-attachment file options --\n" - << " add or replace an attachment\n" - << "--remove-attachment=key remove the specified attachment; repeatable\n" - << "--copy-attachments-from file options --\n" - << " copy attachments from another file\n" - << "\n" - << "The \"key\" option is the unique name under which the attachment is registered\n" - << "within the PDF file. You can get this using the --list-attachments option. This\n" - << "is usually the same as the filename, but it doesn't have to be.\n" - << "\n" - << "Options for adding attachments:\n" - << "\n" - << " file path to the file to attach\n" - << " --key=key the name of this in the embedded files table;\n" - << " defaults to the last path element of file\n" - << " --filename=name the file name of the attachment; this is what is\n" - << " usually displayed to the user; defaults to the\n" - << " last path element of file\n" - << " --creationdate=date creation date in PDF format; defaults to the\n" - << " current time\n" - << " --moddate=date modification date in PDF format; defaults to the\n" - << " current time\n" - << " --mimetype=type/subtype mime type of attachment (e.g. application/pdf)\n" - << " --description=\"text\" attachment description\n" - << " --replace replace any existing attachment with the same key\n" - << "\n" - << "Options for copying attachments:\n" - << "\n" - << " file file whose attachments should be copied\n" - << " --password=password password to open the other file, if needed\n" - << " --prefix=prefix a prefix to insert in front of each key;\n" - << " required if needed to ensure each attachment\n" - << " has a unique key\n" - << "\n" - << "Date format: D:yyyymmddhhmmss where is either Z for UTC or a timezone\n" - << "offset in the form -hh'mm' or +hh'mm'.\n" - << "Examples: D:20210207161528-05'00', D:20210207211528Z\n" - << "\n" - << "\n" - << "Advanced Parsing Options\n" - << "------------------------\n" - << "\n" - << "These options control aspects of how qpdf reads PDF files. Mostly these are\n" - << "of use to people who are working with damaged files. There is little reason\n" - << "to use these options unless you are trying to solve specific problems.\n" - << "\n" - << "--suppress-recovery prevents qpdf from attempting to recover damaged files\n" - << "--ignore-xref-streams tells qpdf to ignore any cross-reference streams\n" - << "\n" - << "\n" - << "Advanced Transformation Options\n" - << "-------------------------------\n" - << "\n" - << "These transformation options control fine points of how qpdf creates\n" - << "the output file. Mostly these are of use only to people who are very\n" - << "familiar with the PDF file format or who are PDF developers.\n" - << "\n" - << "--stream-data=option controls transformation of stream data (below)\n" - << "--compress-streams=[yn] controls whether to compress streams on output\n" - << "--decode-level=option controls how to filter streams from the input\n" - << "--recompress-flate recompress streams already compressed with Flate\n" - << "--compression-level=n set zlib compression level; most effective with\n" - << " --recompress-flate --object-streams=generate\n" - << "--normalize-content=[yn] enables or disables normalization of content streams\n" - << "--object-streams=mode controls handing of object streams\n" - << "--preserve-unreferenced preserve unreferenced objects\n" - << "--remove-unreferenced-resources={auto,yes,no}\n" - << " whether to remove unreferenced page resources\n" - << "--preserve-unreferenced-resources\n" - << " synonym for --remove-unreferenced-resources=no\n" - << "--newline-before-endstream always put a newline before endstream\n" - << "--coalesce-contents force all pages' content to be a single stream\n" - << "--flatten-annotations=option\n" - << " incorporate rendering of annotations into page\n" - << " contents including those for interactive form\n" - << " fields; may also want --generate-appearances\n" - << "--generate-appearances generate appearance streams for form fields\n" - << "--optimize-images compress images with DCT (JPEG) when advantageous\n" - << "--oi-min-width=w do not optimize images whose width is below w;\n" - << " default is 128. Use 0 to mean no minimum\n" - << "--oi-min-height=h do not optimize images whose height is below h\n" - << " default is 128. Use 0 to mean no minimum\n" - << "--oi-min-area=a do not optimize images whose pixel count is below a\n" - << " default is 16,384. Use 0 to mean no minimum\n" - << "--externalize-inline-images convert inline images to regular images; by\n" - << " default, images of at least 1,024 bytes are\n" - << " externalized\n" - << "--ii-min-bytes=bytes specify minimum size of inline images to be\n" - << " converted to regular images\n" - << "--keep-inline-images exclude inline images from image optimization\n" - << "--remove-page-labels remove any page labels present in the output file\n" - << "--qdf turns on \"QDF mode\" (below)\n" - << "--linearize-pass1=file write intermediate pass of linearized file\n" - << " for debugging\n" - << "--min-version=version sets the minimum PDF version of the output file\n" - << "--force-version=version forces this to be the PDF version of the output file\n" - << "\n" - << "Options for --flatten-annotations are all, print, or screen. If the option\n" - << "is print, only annotations marked as print are included. If the option is\n" - << "screen, options marked as \"no view\" are excluded. Otherwise, annotations\n" - << "are flattened regardless of the presence of print or NoView flags. It is\n" - << "common for PDF files to have a flag set that appearance streams need to be\n" - << "regenerated. This happens when someone changes a form value with software\n" - << "that does not know how to render the new value. qpdf will not flatten form\n" - << "fields in files like this. If you get this warning, you have two choices:\n" - << "either use qpdf's --generate-appearances flag to tell qpdf to go ahead and\n" - << "regenerate appearances, or use some other tool to generate the appearances.\n" - << "qpdf does a pretty good job with most forms when only ASCII and \"Windows\n" - << "ANSI\" characters are used in form field values, but if your form fields\n" - << "contain other characters, rich text, or are other than left justified, you\n" - << "will get better results first saving with other software.\n" - << "\n" - << "Version numbers may be expressed as major.minor.extension-level, so 1.7.3\n" - << "means PDF version 1.7 at extension level 3.\n" - << "\n" - << "Values for stream data options:\n" - << "\n" - << " compress recompress stream data when possible (default)\n" - << " preserve leave all stream data as is\n" - << " uncompress uncompress stream data when possible\n" - << "\n" - << "Values for object stream mode:\n" - << "\n" - << " preserve preserve original object streams (default)\n" - << " disable don't write any object streams\n" - << " generate use object streams wherever possible\n" - << "\n" - << "When --compress-streams=n is specified, this overrides the default behavior\n" - << "of qpdf, which is to attempt compress uncompressed streams. Setting\n" - << "stream data mode to uncompress or preserve has the same effect.\n" - << "\n" - << "The --decode-level parameter may be set to one of the following values:\n" - << " none do not decode streams\n" - << " generalized decode streams compressed with generalized filters\n" - << " including LZW, Flate, and the ASCII encoding filters.\n" - << " specialized additionally decode streams with non-lossy specialized\n" - << " filters including RunLength\n" - << " all additionally decode streams with lossy filters\n" - << " including DCT (JPEG)\n" - << "\n" - << "In qdf mode, by default, content normalization is turned on, and the\n" - << "stream data mode is set to uncompress. QDF mode does not support\n" - << "linearized files. The --linearize flag disables qdf mode.\n" - << "\n" - << "Setting the minimum PDF version of the output file may raise the version\n" - << "but will never lower it. Forcing the PDF version of the output file may\n" - << "set the PDF version to a lower value than actually allowed by the file's\n" - << "contents. You should only do this if you have no other possible way to\n" - << "open the file or if you know that the file definitely doesn't include\n" - << "features not supported later versions.\n" - << "\n" - << "Testing, Inspection, and Debugging Options\n" - << "------------------------------------------\n" - << "\n" - << "These options can be useful for digging into PDF files or for use in\n" - << "automated test suites for software that uses the qpdf library.\n" - << "\n" - << "--deterministic-id generate deterministic /ID\n" - << "--static-id generate static /ID: FOR TESTING ONLY!\n" - << "--static-aes-iv use a static initialization vector for AES-CBC\n" - << " This is option is not secure! FOR TESTING ONLY!\n" - << "--no-original-object-ids suppress original object ID comments in qdf mode\n" - << "--show-encryption quickly show encryption parameters\n" - << "--show-encryption-key when showing encryption, reveal the actual key\n" - << "--check-linearization check file integrity and linearization status\n" - << "--show-linearization check and show all linearization data\n" - << "--show-xref show the contents of the cross-reference table\n" - << "--show-object=trailer|obj[,gen]\n" - << " show the contents of the given object\n" - << " --raw-stream-data show raw stream data instead of object contents\n" - << " --filtered-stream-data show filtered stream data instead of object contents\n" - << "--show-npages print the number of pages in the file\n" - << "--show-pages shows the object/generation number for each page\n" - << " --with-images also shows the object IDs for images on each page\n" - << "--check check file structure + encryption, linearization\n" - << "--json generate a json representation of the file\n" - << "--json-help describe the format of the json representation\n" - << "--json-key=key repeatable; prune json structure to include only\n" - << " specified keys. If absent, all keys are shown\n" - << "--json-object=trailer|[obj,gen]\n" - << " repeatable; include only specified objects in the\n" - << " \"objects\" section of the json. If absent, all\n" - << " objects are shown\n" - << "\n" - << "The json representation generated by qpdf is designed to facilitate\n" - << "processing of qpdf from other programming languages that have a hard\n" - << "time calling C++ APIs. Run qpdf --json-help for details on the format.\n" - << "The manual has more in-depth information about the json representation\n" - << "and certain compatibility guarantees that qpdf provides.\n" - << "\n" - << "The --raw-stream-data and --filtered-stream-data options are ignored\n" - << "unless --show-object is given. Either of these options will cause the\n" - << "stream data to be written to standard output.\n" - << "\n" - << "If --filtered-stream-data is given and --normalize-content=y is also\n" - << "given, qpdf will attempt to normalize the stream data as if it is a\n" - << "page content stream. This attempt will be made even if it is not a\n" - << "page content stream, in which case it will produce unusable results.\n" - << "\n" - << "Ordinarily, qpdf exits with a status of 0 on success or a status of 2\n" - << "if any errors occurred. If there were warnings but not errors, qpdf\n" - << "exits with a status of 3. If warnings would have been issued but --no-warn\n" - << "was given, an exit status of 3 is still used. If you want qpdf to exit\n" - << "with status 0 when there are warnings, use the --warning-exit-0 flag.\n" - << "When --no-warn and --warning-exit-0 are used together, the effect is for\n" - << "qpdf to completely ignore warnings. qpdf does not use exit status 1,\n" - << "since that is used by the shell if it can't execute qpdf.\n"; -} -#endif - void ArgParser::argJsonHelp() { diff --git a/libqpdf/qpdf/auto_job_help.hh b/libqpdf/qpdf/auto_job_help.hh new file mode 100644 index 00000000..81e8325b --- /dev/null +++ b/libqpdf/qpdf/auto_job_help.hh @@ -0,0 +1,822 @@ +// +// This file is automatically generated by generate_auto_job. +// Edits will be automatically overwritten if the build is +// run in maintainer mode. +// +static void add_help_1(QPDFArgParser& ap) +{ +ap.addHelpTopic("usage", "basic invocation", R"(Read a PDF file, apply transformations or modifications, and write +a new PDF file. + +Usage: qpdf infile [options] [outfile] + OR qpdf help-option + +- infile, options, and outfile may be in any order as long as infile + precedes outfile. +- Use --empty in place of an input file for a zero-page, empty input +- Use --replace-input in place of an output file to overwrite the + input file with the output +- outfile may be - to write to stdout; reading from stdin is not supported +- @filename is an argument file; each line is treated as a separate + command-line argument +- @- may be used to read arguments from stdin +- Later options may override earlier options if contradictory +)"); +ap.addOptionHelp("--empty", "usage", "empty input file", R"(Use in place of infile for an empty input. Especially useful +with --pages. +)"); +ap.addOptionHelp("--replace-input", "usage", "replace input with output", R"(Use in place of outfile to overwrite the input file with the output. +)"); +ap.addHelpTopic("exit-status", "meanings of qpdf's exit codes", R"(Meaning of exit codes: + +0: no errors or warnings +1: not used by qpdf but may be used by the shell if unable to invoke qpdf +2: errors detected +3: warnings detected, unless --warning-exit-0 is given +)"); +ap.addOptionHelp("--warning-exit-0", "exit-status", "exit 0 even with warnings", R"(Use exit status 0 instead of 3 when warnings are present. When +combined with --no-warn, warnings are completely ignored. +)"); +ap.addHelpTopic("completion", "shell completion", R"(Shell completion is supported with bash and zsh. Use +eval $(qpdf --completion-bash) or eval $(qpdf --completion-zsh) +to enable. The QPDF_EXECUTABLE environment variable overrides the +path to qpdf that these commands output. +)"); +ap.addOptionHelp("--completion-bash", "completion", "enable bash completion", R"(Output a command that enables bash completion +)"); +ap.addOptionHelp("--completion-zsh", "completion", "enable zsh completion", R"(Output a command that enables zsh completion +)"); +ap.addHelpTopic("help", "information about qpdf", R"(Help options provide some information about qpdf itself. Help +options are only valid as the first and only command-line argument. +)"); +ap.addOptionHelp("--help", "help", "provide help", R"(Display help information. Run qpdf --help for information about +how to get help on various topics. +)"); +ap.addOptionHelp("--version", "help", "show qpdf version", R"(Display the version of qpdf. +)"); +ap.addOptionHelp("--copyright", "help", "show copyright information", R"(Display copyright and license information. +)"); +ap.addOptionHelp("--show-crypto", "help", "show available crypto providers", R"(Show a list of available crypto providers, one per line. The +default provider is shown first. +)"); +ap.addHelpTopic("general", "general options", R"(General options control qpdf's behavior in ways that are not +directly related to the operation it is performing. +)"); +ap.addOptionHelp("--password", "general", "specify password", R"(--password=password + +Specify a password for an encrypted, password-protected file. +Not needed for encrypted files with no password. +)"); +ap.addOptionHelp("--password-file", "general", "read password from a file", R"(--password-file=filename + +The first line of the specified file is used as the password. +This is used in place of the --password option. +)"); +ap.addOptionHelp("--verbose", "general", "print additional information", R"(Output additional information about various things qpdf is +doing, including information about files created and operations +performed. +)"); +ap.addOptionHelp("--progress", "general", "show progress when writing", R"(Indicate progress when writing files. +)"); +ap.addOptionHelp("--no-warn", "general", "suppress printing warning messages", R"(Suppress printing warning messages. If warnings were +encountered, qpdf still exits with exit status 3. +Use --warning-exit-0 with --no-warn to completely ignore +warnings. +)"); +} +static void add_help_2(QPDFArgParser& ap) +{ +ap.addOptionHelp("--deterministic-id", "general", "generate ID deterministically", R"(Generate a secure, random document ID only using static +information, such as the page contents. Does not use the file's +name or attributes or the current time. +)"); +ap.addOptionHelp("--allow-weak-crypto", "general", "allow insecure cryptographic algorithms", R"(All creation of files with weak cryptographic algorithms. This +option is necessary to create 40-bit files or 128-bit files that +use RC4 encryption. +)"); +ap.addOptionHelp("--keep-files-open", "general", "manage keeping multiple files open", R"(--keep-files-open=[yn] + +When qpdf needs to work with many files, as when merging large +numbers of files, explicitly indicate whether files should be +kept open. The default behavior is to determine this based on +the number of files. +)"); +ap.addOptionHelp("--keep-files-open-threshold", "general", "set threshold for --keep-files-open", R"(--keep-files-open-threshold=count + +Set the threshold used by --keep-files-open, overriding the +default value of 200. +)"); +ap.addHelpTopic("advanced-control", "tweak qpdf's behavior", R"(Advanced control options control qpdf's behavior in ways that would +normally never be needed by a user but that may be useful to +developers or people investigating problems with specific files. +)"); +ap.addOptionHelp("--password-is-hex-key", "advanced-control", "provide hex-encoded encryption key", R"(Provide the underlying file encryption key has a hex-encoded +string rather than supplying a password. This is an expert +option. +)"); +ap.addOptionHelp("--suppress-password-recovery", "advanced-control", "don't try different password encodings", R"(Suppress qpdf's behavior of attempting different encodings of a +password that contains non-ASCII Unicode characters if the first +attempt doesn't succeed. +)"); +ap.addOptionHelp("--password-mode", "advanced-control", "tweak how qpdf encodes passwords", R"(--password-mode={mode} + +Fine-tune how qpdf controls encoding of Unicode passwords. Valid +options are auto, bytes, hex-bytes, and unicode. +)"); +ap.addOptionHelp("--suppress-recovery", "advanced-control", "suppress error recovery", R"(Avoid attempting to recover when errors are found in a file's +cross reference table or stream lengths. +)"); +ap.addOptionHelp("--ignore-xref-streams", "advanced-control", "use xref tables rather than streams", R"(Ignore any cross-reference streams in the file, falling back to +cross-reference tables or triggering document recovery. +)"); +ap.addHelpTopic("transformation", "make structural PDF changes", R"(The options below tell qpdf to apply transformations that change +the structure without changing the content. +)"); +ap.addOptionHelp("--linearize", "transformation", "linearize (web-optimize) output", R"(Create linearized (web-optimized) output files. +)"); +ap.addOptionHelp("--encrypt", "transformation", "start encryption options", R"(--encrypt user owner key-length [ options ] -- + +Run qpdf --help=encryption for details. +)"); +ap.addOptionHelp("--decrypt", "transformation", "remove encryption from input file", R"(Create an unencrypted output file even if the input file was +encrypted. Normally qpdf preserves whatever encryption was +present on the input file. This option overrides that behavior. +)"); +ap.addOptionHelp("--copy-encryption", "transformation", "copy another file's encryption details", R"(--copy-encryption=file + +Copy encryption details from the specified file instead of +preserving the input file's encryption. Use --encryption-file-password +to specify the encryption file's password. +)"); +ap.addOptionHelp("--encryption-file-password", "transformation", "supply password for --copy-encryption", R"(--encryption-file-password=password + +If the file named in --copy-encryption requires a password, use +this option to specify the password. +)"); +ap.addOptionHelp("--qdf", "transformation", "enable viewing PDF code in a text editor", R"(Create a PDF file suitable for viewing in a text editor and even +editing. This is to edit the PDF code, not the page contents. +All streams that can be uncompressed are uncompressed, and +content streams are normalized, among other changes. The +companion tool "fix-qdf" can be used to repair hand-edited QDF +files. QDF is a feature specific to the qpdf tool. There is a +chapter about it in the manual. +)"); +ap.addOptionHelp("--no-original-object-ids", "transformation", "omit original object ID in qdf", R"(Omit comments in a QDF file indicating the object ID an object +had in the original file. +)"); +ap.addOptionHelp("--compress-streams", "transformation", "compress uncompressed streams", R"(--compress-streams=[yn] + +Setting --compress-streams=n prevents qpdf from compressing +uncompressed streams. This can be useful if you are leaving some +streams uncompressed intentionally. +)"); +} +static void add_help_3(QPDFArgParser& ap) +{ +ap.addOptionHelp("--decode-level", "transformation", "control which streams to uncompress", R"(--decode-level=option + +When uncompressing streams, control which types of compression +schemes should be uncompressed: +- none: don't uncompress anything +- generalized: uncompress streams compressed with a + general-purpose compression algorithm. This is the default. +- specialized: in addition to generalized, also uncompress + streams compressed with a special-purpose but non-lossy + compression scheme +- all: in addition to specialized, uncompress streams compressed + with lossy compression schemes like JPEG (DCT) +qpdf does not know how to uncompress all compression schemes. +)"); +ap.addOptionHelp("--stream-data", "transformation", "control stream compression", R"(--stream-data=option + +This option controls how streams are compressed in the output. +It is less granular than the newer options, --compress-streams +and --decode-level. + +Options: +- compress: same as --compress-streams=y --decode-level=generalized +- preserve: same as --compress-streams=n --decode-level=none +- uncompress: same as --compress-streams=n --decode-level=generalized +)"); +ap.addOptionHelp("--recompress-flate", "transformation", "uncompress and recompress flate", R"(The default generalized compression scheme used by PDF is flate, +which is the same as used by zip and gzip. Usually qpdf just +leaves these alone. This option tells qpdf to uncompress and +recompress streams compressed with flate. This can be useful +when combined with --compression-level. +)"); +ap.addOptionHelp("--compression-level", "transformation", "set compression level for flate", R"(--compression-level=level + +Set a compression level from 1 (least, fastest) to 9 (most, +slowest) when compressing files with flate (used in zip and +gzip), which is the default compression for most PDF files. +You need --recompress-flate with this option if you want to +change already compressed streams. +)"); +ap.addOptionHelp("--normalize-content", "transformation", "fix newlines in content streams", R"(--normalize-content=[yn] + +Normalize newlines to UNIX-style newlines in PDF content +streams, which is useful for viewing them in a programmer's text +editor across multiple platforms. This is also turned on by +--qdf. +)"); +ap.addOptionHelp("--object-streams", "transformation", "control use of object streams", R"(--object-streams=mode + +Control what qpdf does regarding object streams. Options: +- preserve: preserve original object streams, if any (the default) +- disable: create output files with no object streams +- generate: create object streams, and compress objects when possible +)"); +ap.addOptionHelp("--preserve-unreferenced", "transformation", "preserve unreferenced objects", R"(Preserve all objects from the input even if not referenced. +)"); +ap.addOptionHelp("--remove-unreferenced-resources", "transformation", "remove unreferenced page resources", R"(--remove-unreferenced-resources=option + +Remove from a page's resource dictionary any resources that are +not referenced in the page's contents. Options: "auto" +(default), "yes", "no". +)"); +ap.addOptionHelp("--preserve-unreferenced-resources", "transformation", "use --remove-unreferenced-resources=no", R"(Synonym for --remove-unreferenced-resources=no. Use that instead. +)"); +ap.addOptionHelp("--newline-before-endstream", "transformation", "force a newline before endstream", R"(For an extra newline before endstream. Using this option enables +qpdf to preserve PDF/A when rewriting such files. +)"); +ap.addOptionHelp("--coalesce-contents", "transformation", "combine content streams", R"(If a page has an array of content streams, concatenate them into +a single content stream. +)"); +ap.addOptionHelp("--externalize-inline-images", "transformation", "convert inline to regular images", R"(Convert inline images to regular images. +)"); +ap.addOptionHelp("--ii-min-bytes", "transformation", "set minimum size for --externalize-inline-images", R"(--ii-min-bytes=size-in-bytes + +Don't externalize inline images smaller than this size. The +default is 1,024. Use 0 for no minimum. +)"); +ap.addOptionHelp("--min-version", "transformation", "set minimum PDF version", R"(--min-version=version + +Force the PDF version of the output to be at least the +specified version. +)"); +ap.addOptionHelp("--force-version", "transformation", "set output PDF version", R"(--force-version=version + +Force the output PDF file's PDF version header to be the specified +value, even if the file uses features that may not be available +in that version. +)"); +ap.addHelpTopic("page-ranges", "page range syntax", R"(A full description of the page range syntax, with examples, can be +found in the manual. Summary: + +- a,b,c pages a, b, and c +- a-b pages a through b inclusive; if a > b, this counts down +- r where represents a number is the th page from the end +- z the last page, same as r1 + +You can append :even or :odd to select every other page from the +resulting set of pages, where :odd starts with the first page and +:even starts with the second page. These are odd and even pages +from the resulting set, not based on the original page numbers. +)"); +ap.addHelpTopic("modification", "change parts of the PDF", R"(Modification options make systematic changes to certain parts of +the PDF, causing the PDF to render differently from the original. +)"); +ap.addOptionHelp("--pages", "modification", "begin page selection", R"(--pages file [ --password=password ] [ page-range ] [ ... ] -- + +Run qpdf --help=page-selection for details. +)"); +ap.addOptionHelp("--collate", "modification", "collate with --pages", R"(--collate=n + +Collate rather than concatenate pages specified with --pages. +With a numeric argument, collate in groups of n. The default +is 1. Run qpdf --help=page-selection for additional details. +)"); +} +static void add_help_4(QPDFArgParser& ap) +{ +ap.addOptionHelp("--split-pages", "modification", "write pages to separate files", R"(--split-pages=[n] + +This option causes qpdf to create separate output files for each +page or group of pages rather than a single output file. + +File names are generated from the specified output file as follows: + +- If the string %d appears in the output file name, it is replaced with a + zero-padded page range starting from 1 +- Otherwise, if the output file name ends in .pdf (case insensitive), a + zero-padded page range, preceded by a dash, is inserted before the file + extension +- Otherwise, the file name is appended with a zero-padded page range + preceded by a dash. + +Page ranges are single page numbers for single-page groups or first-last +for multi-page groups. +)"); +ap.addOptionHelp("--overlay", "modification", "begin overlay options", R"(--overlay file [ options ] -- + +Overlay pages from another file on the output. +Run qpdf --help=overlay-underlay for details. +)"); +ap.addOptionHelp("--underlay", "modification", "begin underlay options", R"(--underlay file [ options ] -- + +Underlay pages from another file on the output. +Run qpdf --help=overlay-underlay for details. +)"); +ap.addOptionHelp("--flatten-rotation", "modification", "remove rotation from page dictionary", R"(Rotate a page using content commands instead of page-level +metadata. This can be useful if a broken PDF viewer fails to +properly consider page rotation metadata. +)"); +ap.addOptionHelp("--flatten-annotations", "modification", "push annotations into content", R"(--flatten-annotations=option + +Push page annotations into the content streams. This may be +necessary in some case when printing or splitting files. +Options: "all", "print", "screen". +)"); +ap.addOptionHelp("--rotate", "modification", "rotate pages", R"(--rotate=[+|-]angle[:page-range] + +Rotate specified pages by multiples of 90 degrees specifying +either absolute or relative angles. "angle" may be 0, 90, 180, +or 270. You almost always want to use +angle or -angle rather +than just angle, as discussed in the manual. Run +qpdf --help=page-ranges for help with page ranges. +)"); +ap.addOptionHelp("--generate-appearances", "modification", "generate appearances for form fields", R"(PDF form fields consist of values and appearances, which may be +inconsistent with each other if a form field value has been +modified without updating its appearance. This option tells qpdf +to generate new appearance streams. There are some limitations, +which are discussed in the manual. +)"); +ap.addOptionHelp("--optimize-images", "modification", "use efficient compression for images", R"(Attempt to use DCT (JPEG) compression for images that fall +within certain constraints as long as doing so decreases the +size in bytes of the image. See also help for the following +options: + --oi-min-width + --oi-min-height + --oi-min-area + --keep-inline-images + +The --verbose flag is useful with this option. +)"); +ap.addOptionHelp("--oi-min-width", "modification", "minimum width for --optimize-images", R"(--oi-min-width=width + +Don't optimize images whose width is below the specified value. +)"); +ap.addOptionHelp("--oi-min-height", "modification", "minimum height for --optimize-images", R"(--oi-min-height=height + +Don't optimize images whose height is below the specified value. +)"); +ap.addOptionHelp("--oi-min-area", "modification", "minimum area for --optimize-images", R"(--oi-min-area=area-in-pixels + +Don't optimize images whose area in pixels is below the specified value. +)"); +ap.addOptionHelp("--keep-inline-images", "modification", "exclude inline images from optimization", R"(Prevent inline images from being considered by --optimize-images. +)"); +ap.addOptionHelp("--remove-page-labels", "modification", "remove page labels (numbers)", R"(Exclude page labels (explicit page numbers) from the output file. +)"); +ap.addHelpTopic("encryption", "create encrypted files", R"(Create encrypted files. Usage: + +--encrypt user-password owner-password key-length [ options ] -- + +Either or both of user-password and owner-password may be empty +strings. key-length may be 40, 128, or 256. Encryption options are +terminated by "--" by itself. + +40-bit encryption is insecure, as is 128-bit encryption without +AES. Use 256-bit encryption unless you have a specific reason to +use an insecure format, such as testing or compatibility with very +old viewers. You must use the --allow-weak-crypto to create +encrypted files that use insecure cryptographic algorithms. The +--allow-weak-crypto flag appears outside of --encrypt ... -- +(before --encrypt or after --). + +Available options vary by key length. Not all readers respect all +restrictions. Different PDF readers respond differently to various +combinations of options. Sometimes a PDF viewer may show you +restrictions that differ from what you selected. This is probably +not a bug in qpdf. + +Options for 40-bit only: + --annotate=[yn] restrict comments, filling forms, and signing + --extract=[yn] restrict text/graphic extraction + --modify=[yn] restrict document modification + --print=[yn] restrict printing + +Options for 128-bit or 256-bit: + --accessibility=[yn] restrict accessibility (usually ignored) + --annotate=[yn] restrict commenting/filling form fields + --assemble=[yn] restrict document assembly + --extract=[yn] restrict text/graphic extraction + --form=[yn] restrict filling form fields + --modify-other=[yn] restrict other modifications + --modify=modify-opt control modify access by level + --print=print-opt control printing access + --cleartext-metadata prevent encryption of metadata + +For 128-bit only: + --use-aes=[yn] indicates whether to use AES encryption + --force-V4 forces use of V=4 encryption handler + +For 256-bit only: + --force-R5 forces use of deprecated R=5 encryption + --allow-insecure allow user password with empty owner password + +Values for print-opt: + none disallow printing + low allow only low-resolution printing + full allow full printing + +Values for modify-opt: + none allow no modifications + assembly allow document assembly only + form assembly + filling in form fields and signing + annotate form + commenting and modifying forms + all allow full document modification +)"); +ap.addOptionHelp("--accessibility", "encryption", "restrict document accessibility", R"(--accessibility=[yn] + +This option is ignored except with very old encryption formats. +The current PDF specification does not allow restriction of +document accessibility. This option is not available with 40-bit +encryption. +)"); +ap.addOptionHelp("--annotate", "encryption", "restrict document annotation", R"(--annotate=[yn] + +Enable/disable modifying annotations including making comments +and filling in form fields. For 128-bit and 256-bit encryption, +this also enables editing, creating, and deleting form fields +unless --modify-other=n or --modify=none is also specified. +)"); +ap.addOptionHelp("--assemble", "encryption", "restrict document assembly", R"(--assemble=[yn] + +Enable/disable document assembly (rotation and reordering of +pages). This option is not available with 40-bit encryption. +)"); +ap.addOptionHelp("--extract", "encryption", "restrict text/graphic extraction", R"(--extract=[yn] + +Enable/disable text/graphic extraction for purposes other than +accessibility. +)"); +ap.addOptionHelp("--form", "encryption", "restrict form filling", R"(--form=[yn] + +Enable/disable whether filling form fields is allowed even if +modification of annotations is disabled. This option is not +available with 40-bit encryption. +)"); +} +static void add_help_5(QPDFArgParser& ap) +{ +ap.addOptionHelp("--modify-other", "encryption", "restrict other modifications", R"(--modify-other=[yn] + +Enable/disable modifications not controlled by --assemble, +--annotate, or --form. --modify-other=n is implied by any of the +other --modify options. This option is not available with 40-bit +encryption. +)"); +ap.addOptionHelp("--modify", "encryption", "restrict document modification", R"(--modify=modify-opt + +For 40-bit files, modify-opt may only be y or n and controls all +aspects of document modification. + +For 128-bit and 256-bit encryption, modify-opt values allow +enabling and disabling levels of restriction in a manner similar +to how some PDF creation tools do it. modify-opt values map to +other combinations of options as follows: + +all: allow full modification (the default) +annotate: --modify-other=n +form: --modify-other=n --annotate=n +assembly: --modify-other=n --annotate=n --form=n +none: --modify-other=n --annotate=n --form=n --assemble=n +)"); +ap.addOptionHelp("--print", "encryption", "restrict printing", R"(--print=print-opt + +Control what kind of printing is allowed. For 40-bit encryption, +print-opt may only be y or n and enables or disables all +printing. For 128-bit and 256-bit encryption, print-opt may have +the following values: + +none: disallow printing +low: allow low-resolution printing only +full: allow full printing (the default) +)"); +ap.addOptionHelp("--cleartext-metadata", "encryption", "don't encrypt metadata", R"(If specified, don't encrypt document metadata even when +encrypting the rest of the document. This option is not +available with 40-bit encryption. +)"); +ap.addOptionHelp("--use-aes", "encryption", "use AES with 128-bit encryption", R"(--use-aes=[yn] + +Enables/disables use of the more secure AES encryption with +128-bit encryption. Specifying --use-aes=y forces the PDF +version to be at least 1.6. This option is only available with +128-bit encryption. The default is "n" for compatibility +reasons. Use 256-bit encryption instead. +)"); +ap.addOptionHelp("--allow-insecure", "encryption", "allow empty owner passwords", R"(Allow creation of PDF files with empty owner passwords and +non-empty user passwords when using 256-bit encryption. +)"); +ap.addOptionHelp("--force-V4", "encryption", "force V=4 in encryption dictionary", R"(This option is for testing and is never needed in practice since +qpdf does this automatically when needed. +)"); +ap.addOptionHelp("--force-R5", "encryption", "use unsupported R=5 encryption", R"(Use an undocumented, unsupported, deprecated encryption +algorithm that existed only in Acrobat version IX. This option +should not be used except for compatibility testing. +)"); +ap.addHelpTopic("page-selection", "select pages from one or more files", R"(Use the --pages option to select pages from multiple files. Usage: + +qpdf in.pdf --pages input-file [ --password=password ] [ page-range ] \ + [ ... ] -- out.pdf + +Between --pages and the -- that terminates pages option, repeat +the following: + +filename [ --password=password ] [ page-range ] + +Document-level information, such as outlines, tags, etc., is taken +from in.pdf is preserved in out.pdf. You can use --empty in place +of an input file to start from an empty file and just copy pages +equally from all files. You can use "." as a shorthand for the +primary input file (if not --empty). In the above example, "." +would refer to in.pdf. + +Use --password=password to specify the password for a +password-protected input file. If the same input file is used more +than once, you only need to supply the password the first time. If +the page range is omitted, all pages are selected. + +Run qpdf --help=page-ranges for help with page ranges. + +Use --collate=n to cause pages to be collated in groups of n pages +(default 1) instead of concatenating the input. + +Examples: + +- Start with in.pdf and append all pages from a.pdf and the even + pages from b.pdf, and write the output to out.pdf. Document-level + information from in.pdf is retained. Note the use of "." to refer + to in.pdf. + + qpdf in.pdf --pages . a.pdf b.pdf:even -- out.pdf + +- Take all the pages from a.pdf, all the pages from b.pdf in + reverse, and only pages 3 and 6 from c.pdf and write the result + to out.pdf. Use password "x" to open b.pdf: + + qpdf --empty --pages a.pdf b.pdf --password=x z-1 c.pdf 3,6 + +More examples are in the manual. +)"); +ap.addHelpTopic("overlay-underlay", "overlay/underlay pages from other files", R"(These options allow pages from another file to be overlaid or +underlaid on the primary output. Overlaid pages are drawn on top of +the destination page and may obscure the page. Underlaid pages are +drawn below the destination page. Usage: + +{--overlay | --underlay } file + [ --password=password ] + [ --to=page-range ] + [ --from=[page-range] ] + [ --repeat=page-range ] + -- + +Note the use of "--" by itself to terminate overlay/underlay options. + +For overlay and underlay, a file and optional password are specified, along +with a series of optional page ranges. The default behavior is that each +page of the overlay or underlay file is imposed on the corresponding page +of the primary output until it runs out of pages, and any extra pages are +ignored. You can also give a page range with --repeat to cause +those pages to be repeated after the original pages are exhausted. + +Run qpdf --help=page-ranges for help with page ranges. +)"); +ap.addOptionHelp("--to", "overlay-underlay", "destination pages for underlay/overlay", R"(--to=page-range + +Specify the range of pages in the primary output to apply +overlay/underlay to. See qpdf --help=page-ranges for help with +the page range syntax. +)"); +ap.addOptionHelp("--from", "overlay-underlay", "source pages for underlay/overlay", R"(--from=[page-range] + +Specify pages from the overlay/underlay file that are applied to +the destination pages. See qpdf --help=page-ranges for help +with the page range syntax. The page range may be omitted +if --repeat is used. +)"); +ap.addOptionHelp("--repeat", "overlay-underlay", "overlay/underlay pages to repeat", R"(--repeat=page-range + +Specify pages from the overlay/underlay that are repeated after +"from" pages have been exhausted. See qpdf --help=page-ranges +for help with the page range syntax. +)"); +ap.addHelpTopic("attachments", "work with embedded files", R"(It is possible to list, add, or delete embedded files (also known +as attachments) and to copy attachments from other files. See help +on individual options for details. Run qpdf --help=add-attachment +for additional details about adding attachments. +)"); +ap.addOptionHelp("--list-attachments", "attachments", "list embedded files", R"(Show the key and stream number for each embedded file. Combine +with --verbose for more detailed information. +)"); +ap.addOptionHelp("--show-attachment", "attachments", "export an embedded file", R"(--show-attachment=key + +Write the contents of the specified attachment to standard +output as binary data. Get the key with --list-attachments. +)"); +ap.addOptionHelp("--add-attachment", "attachments", "start add attachment options", R"(--add-attachment file options -- + +The --add-attachment flag and its options may be repeated to add +multiple attachments. Run qpdf --help=add-attachment for details. +)"); +ap.addOptionHelp("--remove-attachment", "attachments", "remove an embedded file", R"(--remove-attachment=key + +Remove an embedded file using its key. Get the key with +--list-attachments. +)"); +ap.addOptionHelp("--copy-attachments-from", "attachments", "start copy attachment options", R"(--copy-attachments-from file options -- + +The --copy-attachments-from flag and its options may be repeated +to copy attachments from multiple files. Run +qpdf --help=copy-attachments for details. +)"); +} +static void add_help_6(QPDFArgParser& ap) +{ +ap.addHelpTopic("pdf-dates", "PDF date format", R"(When a date is required, the date should conform to the PDF date +format specification, which is "D:yyyymmddhhmmssz" where "z" is +either literally upper case "Z" for UTC or a timezone offset in +the form "-hh'mm'" or "+hh'mm'". Negative timezone offsets indicate +time before UTC. Positive offsets indicate how far after. For +example, US Eastern Standard Time (America/New_York) is "-05'00'", +and Indian Standard Time (Asia/Calcutta) is "+05'30'". + +Examples: +- D:20210207161528-05'00' February 7, 2021 at 4:15:28 p.m. +- D:20210207211528Z February 7, 2021 at 21:15:28 UTC +)"); +ap.addHelpTopic("add-attachment", "attach (embed) files", R"(The options listed below appear between --add-attachment and its +terminating "--". +)"); +ap.addOptionHelp("--key", "add-attachment", "specify attachment key", R"(--key=key + +Specify the key to use for the attachment in the embedded files +table. It defaults to the last element of the attached file's +filename. +)"); +ap.addOptionHelp("--filename", "add-attachment", "set attachment's displayed filename", R"(--filename=name + +Specify the filename to be used for the attachment. This is what +is usually displayed to the user and is the name most graphical +PDF viewers will use when saving a file. It defaults to the last +element of the attached file's filename. +)"); +ap.addOptionHelp("--creationdate", "add-attachment", "set attachment's creation date", R"(--creationdate=date + +Specify the attachment's creation date in PDF format; defaults +to the current time. Run qpdf --help=pdf-dates for information +about the date format. +)"); +ap.addOptionHelp("--moddate", "add-attachment", "set attachment's modification date", R"(--moddate=date + +Specify the attachment's modification date in PDF format; +defaults to the current time. Run qpdf --help=pdf-dates for +information about the date format. +)"); +ap.addOptionHelp("--mimetype", "add-attachment", "attachment mime type (e.g. application/pdf)", R"(--mimetype=type/subtype + +Specify the mime type for the attachment, such as text/plain, +application/pdf, image/png, etc. +)"); +ap.addOptionHelp("--description", "add-attachment", "set attachment's description", R"(--description="text" + +Supply descriptive text for the attachment, displayed by some +PDF viewers. +)"); +ap.addOptionHelp("--replace", "add-attachment", "replace attachment with same key", R"(Indicate that any existing attachment with the same key should +be replaced by the new attachment. Otherwise, qpdf gives an +error if an attachment with that key is already present. +)"); +ap.addHelpTopic("copy-attachments", "copy attachments from another file", R"(The options listed below appear between --copy-attachments-from and +its terminating "--". + +To copy attachments from a password-protected file, use +the --password option after the file name. +)"); +ap.addOptionHelp("--prefix", "copy-attachments", "key prefix for copying attachments", R"(--prefix=prefix + +Prepend a prefix to each key; may be needed if there are +duplicate attachment keys. This affects the key only, not the +file name. +)"); +ap.addOptionHelp("--is-encrypted", "copy-attachments", "silently test whether a file is encrypted", R"(Silently exit with a code indicating the file's encryption status: + +0: the file is encrypted +1: not used +2: the file is not encrypted + +This can be used with password-protected files even if you don't +know the password. +)"); +ap.addOptionHelp("--requires-password", "copy-attachments", "silently test a file's password", R"(Silently exit with a code indicating the file's password status: + +0: a password, other than as supplied, is required +1: not used +2: the file is not encrypted +3: the file is encrypted, and correct password (if any) has been supplied +)"); +ap.addOptionHelp("--check", "copy-attachments", "partially check whether PDF is valid", R"(Check the structure of the PDF file as well as a number of other +aspects of the file, and write information about the file to +standard output. Note that qpdf does not perform any validation +of the actual PDF page content or semantic correctness of the +PDF file. It merely checks that the PDF file is syntactically +valid. +)"); +ap.addOptionHelp("--show-encryption", "copy-attachments", "information about encrypted files", R"(Show document encryption parameters. Also show the document's +user password if the owner password is given and the file was +encrypted using older encryption formats that allow user +password recovery. +)"); +ap.addOptionHelp("--show-encryption-key", "copy-attachments", "show key with --show-encryption", R"(When used with --show-encryption, causes the underlying +encryption key to be displayed. +)"); +ap.addOptionHelp("--check-linearization", "copy-attachments", "check linearization tables", R"(Check to see whether a file is linearized and, if so, whether +the linearization hint tables are correct. +)"); +ap.addOptionHelp("--show-linearization", "copy-attachments", "show linearization hint tables", R"(Check and display all data in the linearization hint tables. +)"); +ap.addOptionHelp("--show-xref", "copy-attachments", "show cross reference data", R"(Show the contents of the cross-reference table or stream (object +locations in the file) in a human-readable form. This is +especially useful for files with cross-reference streams, which +are stored in a binary format. +)"); +} +static void add_help_7(QPDFArgParser& ap) +{ +ap.addOptionHelp("--show-object", "copy-attachments", "show contents of an object", R"(--show-object=trailer|obj[,gen] + +Show the contents of the given object. This is especially useful +for inspecting objects that are inside of object streams (also +known as "compressed objects"). +)"); +ap.addOptionHelp("--raw-stream-data", "copy-attachments", "show raw stream data", R"(When used with --show-object, if the object is a stream, write +the raw (compressed) binary stream data to standard output +instead of the object's contents. See also +--filtered-stream-data. +)"); +ap.addOptionHelp("--filtered-stream-data", "copy-attachments", "show filtered stream data", R"(When used with --show-object, if the object is a stream, write +the filtered (uncompressed, potentially binary) stream data to +standard output instead of the object's contents. See also +--raw-stream-data. +)"); +ap.addOptionHelp("--show-npages", "copy-attachments", "show number of pages", R"(Print the number of pages in the input file on a line by itself. +Useful for scripts. +)"); +ap.addOptionHelp("--show-pages", "copy-attachments", "display page dictionary information", R"(Show the object and generation number for each page dictionary +object and for each content stream associated with the page. +)"); +ap.addOptionHelp("--with-images", "copy-attachments", "include image details with --show-pages", R"(When used with --show-pages, also shows the object and +generation numbers for the image objects on each page. +)"); +ap.addHelpTopic("json", "JSON output for PDF information", R"(Show information about the PDF file in JSON format. Please see the +JSON chapter in the qpdf manual for details. +)"); +ap.addOptionHelp("--json", "json", "show file in json format", R"(Generate a JSON representation of the file. This is described in +depth in the JSON section of the manual. +)"); +ap.addOptionHelp("--json-help", "json", "show format of json output", R"(Describe the format of the JSON output. +)"); +ap.addOptionHelp("--json-key", "json", "restrict which keys are in json output", R"(--json-key=key + +This option is repeatable. If given, only the specified +top-level keys will be included in the JSON output. Otherwise, +all keys will be included. +)"); +ap.addOptionHelp("--json-object", "json", "restrict which objects are in JSON", R"(--json-object=trailer|obj[,gen] + +This option is repeatable. If given, only specified objects will +be shown in the "objects" key of the JSON output. Otherwise, all +objects will be shown. +)"); +ap.addHelpTopic("testing", "options for testing or debugging", R"(The options below are useful when writing automated test code that +includes files created by qpdf or when testing qpdf itself. +)"); +ap.addOptionHelp("--static-id", "testing", "use a fixed document ID", R"(Use a fixed value for the document ID. This is intended for +testing only. Never use it for production files. See also +qpdf --help=--deterministic-id. +)"); +ap.addOptionHelp("--static-aes-iv", "testing", "use a fixed AES vector", R"(Use a static initialization vector for AES-CBC. This is intended +for testing only so that output files can be reproducible. Never +use it for production files. This option is not secure since it +significantly weakens the encryption. +)"); +ap.addOptionHelp("--linearize-pass1", "testing", "save pass 1 of linearization", R"(--linearize-pass1=file + +Write the first pass of linearization to the named file. The +resulting file is not a valid PDF file. This option is useful only +for debugging qpdf. +)"); +} +static void add_help(QPDFArgParser& ap) +{ + add_help_1(ap); + add_help_2(ap); + add_help_3(ap); + add_help_4(ap); + add_help_5(ap); + add_help_6(ap); + add_help_7(ap); +ap.addHelpFooter("For detailed help, visit the qpdf manual: https://qpdf.readthedocs.io\n"); +} + diff --git a/libqpdf/qpdf/auto_job_init.hh b/libqpdf/qpdf/auto_job_init.hh index b19b2cc9..3d7cdd7b 100644 --- a/libqpdf/qpdf/auto_job_init.hh +++ b/libqpdf/qpdf/auto_job_init.hh @@ -162,4 +162,3 @@ this->ap.copyFromOtherTable("annotate", "128-bit encryption"); this->ap.copyFromOtherTable("form", "128-bit encryption"); this->ap.copyFromOtherTable("modify-other", "128-bit encryption"); this->ap.copyFromOtherTable("modify", "128-bit encryption"); -this->ap.addHelpFooter("For detailed help, visit the qpdf manual: https://qpdf.readthedocs.io\n"); diff --git a/libtests/qtest/arg_parser/help-all.out b/libtests/qtest/arg_parser/help-all.out index 432d4afb..3f1d0d16 100644 --- a/libtests/qtest/arg_parser/help-all.out +++ b/libtests/qtest/arg_parser/help-all.out @@ -6,7 +6,7 @@ Topics: baaa: Baaa Options quack: Quack Options -== topic baaa (Baaa Options) == +== baaa (Baaa Options) == Ewe can do sheepish things. For example, ewe can add more ram to your computer. @@ -15,15 +15,15 @@ Related options: --ewe: just for ewe --ram: curly horns -== topic quack (Quack Options) == +== quack (Quack Options) == Just put stuff after quack to get a count at the end. -== option --ewe (just for ewe) == +== --ewe (just for ewe) == You are not a ewe. -== option --ram (curly horns) == +== --ram (curly horns) == curly horns diff --git a/manual/.gitignore b/manual/.gitignore new file mode 100644 index 00000000..bee8a64b --- /dev/null +++ b/manual/.gitignore @@ -0,0 +1 @@ +__pycache__ diff --git a/manual/_ext/qpdf.py b/manual/_ext/qpdf.py new file mode 100644 index 00000000..032aa86c --- /dev/null +++ b/manual/_ext/qpdf.py @@ -0,0 +1,125 @@ +from collections import defaultdict +from operator import itemgetter +import re + +from sphinx import addnodes +from sphinx.directives import ObjectDescription +from sphinx.domains import Domain, Index +from sphinx.roles import XRefRole +from sphinx.util.nodes import make_refnode + +# Reference: +# https://www.sphinx-doc.org/en/master/development/tutorials/todo.html +# https://www.sphinx-doc.org/en/master/development/tutorials/recipe.html + + +class OptionDirective(ObjectDescription): + has_content = True + + def handle_signature(self, sig, signode): + signode += addnodes.desc_name(text=sig) + return sig + + def add_target_and_index(self, name_cls, sig, signode): + m = re.match(r'^--([^= ]+)', sig) + if not m: + raise Exception('option must start with --') + option_name = m.group(1) + signode['ids'].append(f'option-{option_name}') + qpdf = self.env.get_domain('qpdf') + qpdf.add_option(sig, option_name) + + +class OptionIndex(Index): + name = 'options' + localname = 'qpdf Command-line Options' + shortname = 'Options' + + def generate(self, docnames=None): + content = defaultdict(list) + options = self.domain.get_objects() + options = sorted(options, key=itemgetter(0)) + + # name, subtype, docname, anchor, extra, qualifier, description + for name, display_name, typ, docname, anchor, _ in options: + m = re.match(r'^(--([^= ]+))', display_name) + if not m: + raise Exception( + 'OptionIndex.generate: display name not as expected') + content[m.group(2)[0].lower()].append( + (m.group(1), 0, docname, anchor, '', '', typ)) + + content = sorted(content.items()) + return content, True + + +class QpdfDomain(Domain): + name = 'qpdf' + label = 'qpdf documentation domain' + roles = { + 'ref': XRefRole() + } + directives = { + 'option': OptionDirective, + } + indices = { + OptionIndex, + } + initial_data = { + 'options': [], # object list + } + + def get_full_qualified_name(self, node): + return '{}.{}'.format('option', node.arguments[0]) + + def get_objects(self): + for obj in self.data['options']: + yield(obj) + + def resolve_xref(self, env, from_doc_name, builder, typ, target, node, + contnode): + match = [(docname, anchor) + for name, sig, typ, docname, anchor, priority + in self.get_objects() if name == f'option.{target[2:]}'] + + if len(match) > 0: + to_doc_name = match[0][0] + match_target = match[0][1] + return make_refnode(builder, from_doc_name, to_doc_name, + match_target, contnode, match_target) + else: + raise Exception(f'invalid option xref ({target})') + + def add_option(self, signature, option_name): + if self.env.docname != 'cli': + raise Exception( + 'qpdf:option directives don\'t work outside of cli.rst') + + name = f'option.{option_name}' + anchor = f'option-{option_name}' + + # name, display_name, type, docname, anchor, priority + self.data['options'].append( + (name, signature, '', self.env.docname, anchor, 0)) + + def purge_options(self, docname): + self.data['options'] = list([ + x for x in self.data['options'] + if x[3] != docname + ]) + + +def purge_options(app, env, docname): + option = env.get_domain('qpdf') + option.purge_options(docname) + + +def setup(app): + app.add_domain(QpdfDomain) + app.connect('env-purge-doc', purge_options) + + return { + 'version': '0.1', + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/manual/build.mk b/manual/build.mk index bca1253d..45cb03fb 100644 --- a/manual/build.mk +++ b/manual/build.mk @@ -17,7 +17,7 @@ ifeq ($(BUILD_PDF),1) TARGETS_manual += $(PDF_TARGET) endif -MANUAL_DEPS = $(wildcard manual/*.rst) manual/conf.py +MANUAL_DEPS = $(wildcard manual/*.rst) manual/conf.py manual/_ext/qpdf.py # Prevent targets that run $(SPHINX) from running in parallel by using # order-only dependencies (the dependencies listed after the |) to diff --git a/manual/cli.rst b/manual/cli.rst index 2f5df5a7..ebeaed81 100644 --- a/manual/cli.rst +++ b/manual/cli.rst @@ -1,6 +1,25 @@ +.. NOTES + + This file contains text that is used for help file generation. + Lines that start with the magic comment ".. help topic x: y" + introduce a help topic called "x" with short text "y". The contents + of the comment are the long text. + + The ".. qpdf:option:: option" directive introduces a command-line + option. The next ".. help: short_text" comment's contents are the + long text of the help. Search for ".. help-topic" and "qpdf:option" + for additional help. Command line arguments can be referenced using + :qpdf:ref:`--option`. They also appear in an index. + + In this text, :samp:`...` and ``...`` are used somewhat + interchangeably. :samp: should be used when there is replaceable + text enclosed in curly braces. Otherwise, either is fine. Ideally + there should be a stricter editorial convention, but they render + the same, so I have not gone to the trouble of making it consistent. + .. _using: -Running QPDF +Running qpdf ============ This chapter describes how to run the qpdf program from the command @@ -11,1115 +30,956 @@ line. Basic Invocation ---------------- -When running qpdf, the basic invocation is as follows: +.. help-topic usage: basic invocation + + Read a PDF file, apply transformations or modifications, and write + a new PDF file. + + Usage: qpdf infile [options] [outfile] + OR qpdf help-option + + - infile, options, and outfile may be in any order as long as infile + precedes outfile. + - Use --empty in place of an input file for a zero-page, empty input + - Use --replace-input in place of an output file to overwrite the + input file with the output + - outfile may be - to write to stdout; reading from stdin is not supported + - @filename is an argument file; each line is treated as a separate + command-line argument + - @- may be used to read arguments from stdin + - Later options may override earlier options if contradictory :: - qpdf [ options ] { infilename | --empty } outfilename + Usage: qpdf infile [ options ] [ outfile ] -This converts PDF file :samp:`infilename` to PDF file -:samp:`outfilename`. The output file is functionally -identical to the input file but may have been structurally reorganized. -Also, orphaned objects will be removed from the file. Many -transformations are available as controlled by the options below. In -place of :samp:`infilename`, the parameter -:samp:`--empty` may be specified. This causes qpdf to -use a dummy input file that contains zero pages. The only normal use -case for using :samp:`--empty` would be if you were -going to add pages from another source, as discussed in :ref:`page-selection`. +The :command:`qpdf` command reads the PDF file :samp:`{infile}`, +applies various transformations or modifications to the file in +memory, and writes the results to :samp:`{outfile}`. When run with no +arguments, the output file is functionally identical to the input file +but may be structurally reorganized, and orphaned objects are removed +from the file. Many options are available for applying transformations +or modifications to the file. -If :samp:`@filename` appears as a word anywhere in the -command-line, it will be read line by line, and each line will be -treated as a command-line argument. Leading and trailing whitespace is -intentionally not removed from lines, which makes it possible to handle -arguments that start or end with spaces. The :samp:`@-` -option allows arguments to be read from standard input. This allows qpdf -to be invoked with an arbitrary number of arbitrarily long arguments. It -is also very useful for avoiding having to pass passwords on the command -line. Note that the :samp:`@filename` can't appear in -the middle of an argument, so constructs such as -:samp:`--arg=@option` will not work. You would have to -include the argument and its options together in the arguments file. +:samp:`{infile}` can be a regular file, or it can be +:qpdf:ref:`--empty` to start with an empty PDF file. :samp:`{outfile}` +can be a regular file, ``-`` to represent standard output, or +:qpdf:ref:`--replace-input` to indicate that the input file should be +overwritten. The output file does not have to be seekable, even when +generating linearized files. The input file *does* have to be +seekable. You can't read from standard input or a pipe. You can also +use :qpdf:ref:`--split-pages` to create separate output files for each +page (or group of pages) instead of a single output file. +Password-protected files may be opened by specifying a password with +:qpdf:ref:`--password`. These and many other options are discussed in +the remaining sections of this chapter. -:samp:`outfilename` does not have to be seekable, even -when generating linearized files. Specifying ":samp:`-`" -as :samp:`outfilename` means to write to standard -output. If you want to overwrite the input file with the output, use the -option :samp:`--replace-input` and omit the output file -name. You can't specify the same file as both the input and the output. -If you do this, qpdf will tell you about the -:samp:`--replace-input` option. +All options other than help options (see :ref:`help-options`) require +an input file. If inspection options (see :ref:`inspection-options`) +are given, an output file must not be given. Otherwise, an output file +is required. -Most options require an output file, but some testing or inspection -commands do not. These are specifically noted. +If :samp:`@filename` appears as a word anywhere in the command-line, +it will be read line by line, and each line will be treated as a +command-line argument. Leading and trailing whitespace is +intentionally not removed from lines, which makes it possible to +handle arguments that start or end with spaces. The :samp:`@-` option +allows arguments to be read from standard input. This allows qpdf to +be invoked with an arbitrary number of arbitrarily long arguments. It +is also very useful for avoiding having to pass passwords on the +command line, though see also :qpdf:ref:`--password-file`. Note that +the :samp:`@filename` can't appear in the middle of an argument, so +constructs such as :samp:`--arg=@filename` will not work. Instead, you +would have to include the argument and its parameter (e.g., +:samp:`--arg=parameter`) as a line in the :file:`filename` file and +just pass :samp:`@filename` on the command line. + +Related Options +~~~~~~~~~~~~~~~ + +.. qpdf:option:: --empty + + .. help: empty input file + + Use in place of infile for an empty input. Especially useful + with --pages. + + This option may be given in place of :samp:`{infile}`. This causes + qpdf to use a dummy input file that contains zero pages. This + option is useful in conjunction with :qpdf:ref:`--pages`. See + :ref:`page-selection` for details. + +.. qpdf:option:: --replace-input + + .. help: replace input with output + + Use in place of outfile to overwrite the input file with the output. + + This option may be given in place of :samp:`{outfile}`. This causes + qpdf to replace the input file with the output. It does this by + writing to :file:`{infilename}.~qpdf-temp#` and, when done, + overwriting the input file with the temporary file. If there were + any warnings, the original input is saved as + :file:`{infilename}.~qpdf-orig`. If there are errors, the input + file is left untouched. .. _exit-status: Exit Status -~~~~~~~~~~~ +----------- -The exit status of :command:`qpdf` may be interpreted as -follows: +.. help-topic exit-status: meanings of qpdf's exit codes + + Meaning of exit codes: + + 0: no errors or warnings + 1: not used by qpdf but may be used by the shell if unable to invoke qpdf + 2: errors detected + 3: warnings detected, unless --warning-exit-0 is given + +The exit status of :command:`qpdf` may be interpreted as follows: - ``0``: no errors or warnings were found. The file may still have - problems qpdf can't detect. If - :samp:`--warning-exit-0` was specified, exit status 0 - is used even if there are warnings. + problems qpdf can't detect. If :qpdf:ref:`--warning-exit-0` was + specified, exit status ``0`` is used even if there are warnings. + +- ``1``: :command:`qpdf` does not exit with status ``1`` since the + shell uses this exit code if it is unable to invoke the command. - ``2``: errors were found. qpdf was not able to fully process the file. -- ``3``: qpdf encountered problems that it was able to recover from. In - some cases, the resulting file may still be damaged. Note that qpdf - still exits with status ``3`` if it finds warnings even when - :samp:`--no-warn` is specified. With - :samp:`--warning-exit-0`, warnings without errors - exit with status 0 instead of 3. +- ``3``: qpdf encountered problems that it was able to recover from. + In some cases, the resulting file may still be damaged. Note that + qpdf still exits with status ``3`` if it finds warnings even when + :qpdf:ref:`--no-warn` is specified. With + :qpdf:ref:`--warning-exit-0`, warnings without errors exit with + status ``0`` instead of ``3``. -Note that :command:`qpdf` never exists with status ``1``. -If you get an exit status of ``1``, it was something else, like the -shell not being able to find or execute :command:`qpdf`. +The :qpdf:ref:`--is-encrypted` and :qpdf:ref:`--requires-password` +options use different exit codes. See their help for details. + +Related Options +~~~~~~~~~~~~~~~ + +.. qpdf:option:: --warning-exit-0 + + .. help: exit 0 even with warnings + + Use exit status 0 instead of 3 when warnings are present. When + combined with --no-warn, warnings are completely ignored. + + If there were warnings only and no errors, exit with exit code + ``0`` instead of ``3``. When combined with :qpdf:ref:`--no-warn`, + the effect is for :command:`qpdf` to completely ignore warnings. .. _shell-completion: Shell Completion ---------------- -Starting in qpdf version 8.3.0, qpdf provides its own completion support -for zsh and bash. You can enable bash completion with :command:`eval -$(qpdf --completion-bash)` and zsh completion with -:command:`eval $(qpdf --completion-zsh)`. If -:command:`qpdf` is not in your path, you should invoke it -above with an absolute path. If you invoke it with a relative path, it -will warn you, and the completion won't work if you're in a different -directory. +.. help-topic completion: shell completion -qpdf will use ``argv[0]`` to figure out where its executable is. This -may produce unwanted results in some cases, especially if you are trying -to use completion with copy of qpdf that is built from source. You can -specify a full path to the qpdf you want to use for completion in the -``QPDF_EXECUTABLE`` environment variable. + Shell completion is supported with bash and zsh. Use + eval $(qpdf --completion-bash) or eval $(qpdf --completion-zsh) + to enable. The QPDF_EXECUTABLE environment variable overrides the + path to qpdf that these commands output. -.. _basic-options: +:command:`qpdf` provides its own completion support for zsh and bash. +You can enable bash completion with :command:`eval $(qpdf +--completion-bash)` and zsh completion with :command:`eval $(qpdf +--completion-zsh)`. If :command:`qpdf` is not in your path, you should +invoke it above with an absolute path. If you invoke it with a +relative path, it will warn you, and the completion won't work if +you're in a different directory. -Basic Options -------------- +:command:`qpdf` will use ``argv[0]`` to figure out where its +executable is. This may produce unwanted results in some cases, +especially if you are trying to use completion with copy of qpdf that +is run directly out of the source tree or that is invoked with a +wrapper script. You can specify a full path to the qpdf you want to +use for completion in the ``QPDF_EXECUTABLE`` environment variable. -The following options are the most common ones and perform commonly -needed transformations. +Related Options +~~~~~~~~~~~~~~~ -:samp:`--help` - Display command-line invocation help. +.. qpdf:option:: --completion-bash -:samp:`--version` - Display the current version of qpdf. + .. help: enable bash completion -:samp:`--copyright` - Show detailed copyright information. + Output a command that enables bash completion -:samp:`--show-crypto` - Show a list of available crypto providers, each on a line by itself. - The default provider is always listed first. See :ref:`crypto` for more information about crypto - providers. - -:samp:`--completion-bash` Output a completion command you can eval to enable shell completion from bash. -:samp:`--completion-zsh` +.. qpdf:option:: --completion-zsh + + .. help: enable zsh completion + + Output a command that enables zsh completion + Output a completion command you can eval to enable shell completion from zsh. -:samp:`--password={password}` - Specifies a password for accessing encrypted files. To read the - password from a file or standard input, you can use - :samp:`--password-file`, added in qpdf 10.2. Note - that you can also use :samp:`@filename` or - :samp:`@-` as described above to put the password in - a file or pass it via standard input, but you would do so by - specifying the entire - :samp:`--password={password}` - option in the file. Syntax such as - :samp:`--password=@filename` won't work since - :samp:`@filename` is not recognized in the middle of - an argument. +.. _help-options: + +Help/Information +---------------- + +.. help-topic help: information about qpdf + + Help options provide some information about qpdf itself. Help + options are only valid as the first and only command-line argument. + +Help options provide some information about qpdf itself. Help options +are only valid as the first and only command-line argument. + +Related Options +~~~~~~~~~~~~~~~ + +.. qpdf:option:: --help + + .. help: provide help + + Display help information. Run qpdf --help for information about + how to get help on various topics. + + Display command-line invocation help. + +.. qpdf:option:: --version + + .. help: show qpdf version + + Display the version of qpdf. + + Display the version of qpdf. The version number displayed is the + one that is compiled into the qpdf library. If you don't see the + version number you expect, you may have more than one version of + :command:`qpdf` installed and may not have your library path set up + correctly. + +.. qpdf:option:: --copyright + + .. help: show copyright information + + Display copyright and license information. + + Display copyright and license information. + +.. qpdf:option:: --show-crypto + + .. help: show available crypto providers + + Show a list of available crypto providers, one per line. The + default provider is shown first. + + Show a list of available crypto providers, each on a line by + itself. The default provider is always listed first. See + :ref:`crypto` for more information about crypto providers. + +.. _general-options: + +General Options +--------------- + +.. help-topic general: general options + + General options control qpdf's behavior in ways that are not + directly related to the operation it is performing. + +This section describes general options that control :command:`qpdf`'s +behavior. They are not necessarily related to the specific operation +that is being performed and may be used whether or not an output file +is being created. + +Related Options +~~~~~~~~~~~~~~~ + +.. qpdf:option:: --password=password + + .. help: specify password + + Specify a password for an encrypted, password-protected file. + Not needed for encrypted files with no password. + + Specifies a password for accessing encrypted, password-protected + files. To read the password from a file or standard input, you can + use :qpdf:ref:`--password-file`. You can also use :samp:`@filename` + or :samp:`@-` (see :ref:`invocation`) to put the password in a file + or pass it via standard input, but you would do so by specifying + the entire :samp:`--password={password}` option in the file. Syntax + such as :samp:`--password=@filename` won't work since + :samp:`@filename` is not recognized in the middle of an argument. + + Prior to 8.4.0, in the case of passwords that contain characters that + fall outside of 7-bit US-ASCII, qpdf left the burden of supplying + properly encoded encryption and decryption passwords to the user. + Starting in qpdf 8.4.0, qpdf does this automatically in most cases. + For an in-depth discussion, please see :ref:`unicode-passwords`. + Previous versions of this manual described workarounds using the + :command:`iconv` command. Such workarounds are no longer required or + recommended with qpdf 8.4.0. However, for backward compatibility, qpdf + attempts to detect those workarounds and do the right thing in most + cases. + +.. qpdf:option:: --password-file=filename + + .. help: read password from a file + + The first line of the specified file is used as the password. + This is used in place of the --password option. -:samp:`--password-file={filename}` Reads the first line from the specified file and uses it as the - password for accessing encrypted files. - :samp:`{filename}` - may be ``-`` to read the password from standard input. Note that, in - this case, the password is echoed and there is no prompt, so use with + password for accessing encrypted files. :samp:`{filename}` may be + ``-`` to read the password from standard input, but if you do that + the password is echoed and there is no prompt, so use ``-`` with caution. -:samp:`--is-encrypted` - Silently exit with status 0 if the file is encrypted or status 2 if - the file is not encrypted. This is useful for shell scripts. Other - options are ignored if this is given. This option is mutually - exclusive with :samp:`--requires-password`. Both this - option and :samp:`--requires-password` exit with - status 2 for non-encrypted files. +.. qpdf:option:: --verbose -:samp:`--requires-password` - Silently exit with status 0 if a password (other than as supplied) is - required. Exit with status 2 if the file is not encrypted. Exit with - status 3 if the file is encrypted but requires no password or the - correct password has been supplied. This is useful for shell scripts. - Note that any supplied password is used when opening the file. When - used with a :samp:`--password` option, this option - can be used to check the correctness of the password. In that case, - an exit status of 3 means the file works with the supplied password. - This option is mutually exclusive with - :samp:`--is-encrypted`. Both this option and - :samp:`--is-encrypted` exit with status 2 for - non-encrypted files. + .. help: print additional information -:samp:`--verbose` - Increase verbosity of output. For now, this just prints some - indication of any file that it creates. + Output additional information about various things qpdf is + doing, including information about files created and operations + performed. -:samp:`--progress` - Indicate progress while writing files. + Increase verbosity of output. This includes information files + created, image optimization, and several other operations. In some + cases, it also displays additional information when inspection + options (see :ref:`inspection-options`) are used. -:samp:`--no-warn` - Suppress writing of warnings to stderr. If warnings were detected and - suppressed, :command:`qpdf` will still exit with exit - code 3. See also :samp:`--warning-exit-0`. +.. qpdf:option:: --progress -:samp:`--warning-exit-0` - If warnings are found but no errors, exit with exit code 0 instead 3. - When combined with :samp:`--no-warn`, the effect is - for :command:`qpdf` to completely ignore warnings. + .. help: show progress when writing -:samp:`--linearize` - Causes generation of a linearized (web-optimized) output file. + Indicate progress when writing files. -:samp:`--replace-input` - If specified, the output file name should be omitted. This option - tells qpdf to replace the input file with the output. It does this by - writing to - :file:`{infilename}.~qpdf-temp#` - and, when done, overwriting the input file with the temporary file. - If there were any warnings, the original input is saved as - :file:`{infilename}.~qpdf-orig`. + Indicate progress while writing output files. Progress indication + does not start until writing starts, so if complicated + transformations are being applied before the write progress begins, + there may be a delay before progress indicators are seen. -:samp:`--copy-encryption=file` - Encrypt the file using the same encryption parameters, including user - and owner password, as the specified file. Use - :samp:`--encryption-file-password` to specify a - password if one is needed to open this file. Note that copying the - encryption parameters from a file also copies the first half of - ``/ID`` from the file since this is part of the encryption - parameters. +.. qpdf:option:: --no-warn -:samp:`--encryption-file-password=password` - If the file specified with :samp:`--copy-encryption` - requires a password, specify the password using this option. Note - that only one of the user or owner password is required. Both - passwords will be preserved since QPDF does not distinguish between - the two passwords. It is possible to preserve encryption parameters, - including the owner password, from a file even if you don't know the - file's owner password. + .. help: suppress printing warning messages + + Suppress printing warning messages. If warnings were + encountered, qpdf still exits with exit status 3. + Use --warning-exit-0 with --no-warn to completely ignore + warnings. + + Suppress writing of warnings to stderr. If warnings were detected + and suppressed, :command:`qpdf` will still exit with exit code 3. + To completely ignore warnings, also specify + :qpdf:ref:`--warning-exit-0`. Use with caution as qpdf is not + always successful in recovering from situations that cause warnings + to be issued. + +.. qpdf:option:: --deterministic-id + + .. help: generate ID deterministically + + Generate a secure, random document ID only using static + information, such as the page contents. Does not use the file's + name or attributes or the current time. + + Generate of a secure, random document ID using deterministic + values. This prevents use of timestamp and output file name + information in the ID generation. Instead, at some slight + additional runtime cost, the ID field is generated to include a + digest of the significant parts of the content of the output PDF + file. This means that a given qpdf operation should generate the + same ID each time it is run, which can be useful when caching + results or for generation of some test data. Use of this flag is + not compatible with creation of encrypted files. + + Note that there is *no guarantee* that different versions of qpdf + will generate the same deterministic ID given the same generation + code and input. While care is taken to avoid gratuitous changes, + new versions of qpdf may include changes that affect the output. + This option can be useful for testing. See also + :qpdf:ref:`--static-id`. + +.. qpdf:option:: --allow-weak-crypto + + .. help: allow insecure cryptographic algorithms + + All creation of files with weak cryptographic algorithms. This + option is necessary to create 40-bit files or 128-bit files that + use RC4 encryption. -:samp:`--allow-weak-crypto` Starting with version 10.4, qpdf issues warnings when requested to create files using RC4 encryption. This option suppresses those warnings. In future versions of qpdf, qpdf will refuse to create - files with weak cryptography when this flag is not given. See :ref:`weak-crypto` for additional details. + files with weak cryptography when this flag is not given. See + :ref:`weak-crypto` for additional details. -:samp:`--encrypt options --` - Causes generation an encrypted output file. Please see :ref:`encryption-options` for details on how to specify - encryption parameters. +.. qpdf:option:: --keep-files-open=[yn] -:samp:`--decrypt` - Removes any encryption on the file. A password must be supplied if - the file is password protected. + .. help: manage keeping multiple files open + + When qpdf needs to work with many files, as when merging large + numbers of files, explicitly indicate whether files should be + kept open. The default behavior is to determine this based on + the number of files. + + This option controls whether qpdf keeps individual files open while + merging. By default, qpdf keeps files open when merging unless more + than 200 files are specified, in which case files are open as + needed and closed when finished. The behavior of repeatedly opening + and closing files may impose a large performance penalty with some + file systems, especially networked file systems. If you know that + you have a large enough open file limit and are suffering from + performance problems, or if you have an open file limit smaller + than 200, you can use this option to override the default behavior + by specifying :samp:`--keep-files-open=y` to force :command:`qpdf` + to keep files open or :samp:`--keep-files-open=n` to force it to + only open files as needed. See also + :qpdf:ref:`--keep-files-open-threshold`. + + Historical note: prior to version 8.1.0, qpdf always kept all files + open, but this meant that the number of files that could be merged + was limited by the operating system's open file limit. Version + 8.1.0 opened files as they were referenced and closed them after + each read, but this caused a major performance impact. Version + 8.2.0 optimized the performance but did so in a way that, for local + file systems, there was a small but unavoidable performance hit, + but for networked file systems, the performance impact could be + very high. The current behavior was introduced in qpdf version + 8.2.1. + +.. qpdf:option:: --keep-files-open-threshold=count + + .. help: set threshold for --keep-files-open + + Set the threshold used by --keep-files-open, overriding the + default value of 200. + + If specified, overrides the default value of 200 used as the + threshold for qpdf deciding whether or not to keep files open. See + :qpdf:ref:`--keep-files-open` for details. + +.. _advanced-control-options: + +Advanced Control Options +------------------------ + +.. help-topic advanced-control: tweak qpdf's behavior + + Advanced control options control qpdf's behavior in ways that would + normally never be needed by a user but that may be useful to + developers or people investigating problems with specific files. + +Advanced control options control qpdf's behavior in ways that would +normally never be needed by a user but that may be useful to +developers or people investigating problems with specific files. + +Related Options +~~~~~~~~~~~~~~~ + +.. qpdf:option:: --password-is-hex-key + + .. help: provide hex-encoded encryption key + + Provide the underlying file encryption key has a hex-encoded + string rather than supplying a password. This is an expert + option. -:samp:`--password-is-hex-key` Overrides the usual computation/retrieval of the PDF file's encryption key from user/owner password with an explicit specification of the encryption key. When this option is specified, - the argument to the :samp:`--password` option is - interpreted as a hexadecimal-encoded key value. This only applies to - the password used to open the main input file. It does not apply to - other files opened by :samp:`--pages` or other - options or to files being written. + the argument to the :qpdf:ref:`--password` option is interpreted as + a hexadecimal-encoded key value. This only applies to the password + used to open the main input file. It does not apply to other files + opened by :qpdf:ref:`--pages` or other options or to files being + written. Most users will never have a need for this option, and no standard viewers support this mode of operation, but it can be useful for forensic or investigatory purposes. For example, if a PDF file is encrypted with an unknown password, a brute-force attack using the - key directly is sometimes more efficient than one using the password. - Also, if a file is heavily damaged, it may be possible to derive the - encryption key and recover parts of the file using it directly. To - expose the encryption key used by an encrypted file that you can open - normally, use the :samp:`--show-encryption-key` - option. + key directly is sometimes more efficient than one using the + password. Also, if a file is heavily damaged, it may be possible to + derive the encryption key and recover parts of the file using it + directly. To expose the encryption key used by an encrypted file + that you can open normally, use the + :qpdf:ref:`--show-encryption-key` option. + +.. qpdf:option:: --suppress-password-recovery + + .. help: don't try different password encodings + + Suppress qpdf's behavior of attempting different encodings of a + password that contains non-ASCII Unicode characters if the first + attempt doesn't succeed. -:samp:`--suppress-password-recovery` Ordinarily, qpdf attempts to automatically compensate for passwords specified in the wrong character encoding. This option suppresses that behavior. Under normal conditions, there are no reasons to use - this option. See :ref:`unicode-passwords` for a - discussion + this option. See :ref:`unicode-passwords` for a discussion + +.. qpdf:option:: --password-mode={mode} + + .. help: tweak how qpdf encodes passwords + + Fine-tune how qpdf controls encoding of Unicode passwords. Valid + options are auto, bytes, hex-bytes, and unicode. -:samp:`--password-mode={mode}` This option can be used to fine-tune how qpdf interprets Unicode (non-ASCII) password strings passed on the command line. With the - exception of the :samp:`hex-bytes` mode, these only - apply to passwords provided when encrypting files. The - :samp:`hex-bytes` mode also applies to passwords - specified for reading files. For additional discussion of the - supported password modes and when you might want to use them, see - :ref:`unicode-passwords`. The following modes - are supported: + exception of the :samp:`hex-bytes` mode, these only apply to + passwords provided when encrypting files. The :samp:`hex-bytes` + mode also applies to passwords specified for reading files. For + additional discussion of the supported password modes and when you + might want to use them, see :ref:`unicode-passwords`. The following + modes are supported: - - :samp:`auto`: Automatically determine whether the - specified password is a properly encoded Unicode (UTF-8) string, - and transcode it as required by the PDF spec based on the type + - :samp:`auto`: Automatically determine whether the specified + password is a properly encoded Unicode (UTF-8) string, and + transcode it as required by the PDF spec based on the type encryption being applied. On Windows starting with version 8.4.0, and on almost all other modern platforms, incoming passwords will be properly encoded in UTF-8, so this is almost always what you want. - - :samp:`unicode`: Tells qpdf that the incoming - password is UTF-8, overriding whatever its automatic detection - determines. The only difference between this mode and - :samp:`auto` is that qpdf will fail with an error - message if the password is not valid UTF-8 instead of falling back - to :samp:`bytes` mode with a warning. + - :samp:`unicode`: Tells qpdf that the incoming password is UTF-8, + overriding whatever its automatic detection determines. The only + difference between this mode and :samp:`auto` is that qpdf will + fail with an error message if the password is not valid UTF-8 + instead of falling back to :samp:`bytes` mode with a warning. - - :samp:`bytes`: Interpret the password as a literal - byte string. For non-Windows platforms, this is what versions of - qpdf prior to 8.4.0 did. For Windows platforms, there is no way to - specify strings of binary data on the command line directly, but - you can use the :samp:`@filename` option to do it, - in which case this option forces qpdf to respect the string of - bytes as provided. This option will allow you to encrypt PDF files - with passwords that will not be usable by other readers. + - :samp:`bytes`: Interpret the password as a literal byte string. + For non-Windows platforms, this is what versions of qpdf prior to + 8.4.0 did. For Windows platforms, there is no way to specify + strings of binary data on the command line directly, but you can + use a :samp:`@filename` option or :qpdf:ref:`--password-file` to + do it, in which case this option forces qpdf to respect the + string of bytes as provided. Note that this option may cause you + to encrypt PDF files with passwords that will not be usable by + other readers. - - :samp:`hex-bytes`: Interpret the password as a - hex-encoded string. This provides a way to pass binary data as a - password on all platforms including Windows. As with - :samp:`bytes`, this option may allow creation of - files that can't be opened by other readers. This mode affects - qpdf's interpretation of passwords specified for decrypting files - as well as for encrypting them. It makes it possible to specify - strings that are encoded in some manner other than the system's - default encoding. + - :samp:`hex-bytes`: Interpret the password as a hex-encoded + string. This provides a way to pass binary data as a password on + all platforms including Windows. As with :samp:`bytes`, this + option may allow creation of files that can't be opened by other + readers. This mode affects qpdf's interpretation of passwords + specified for decrypting files as well as for encrypting them. It + makes it possible to specify strings that are encoded in some + manner other than the system's default encoding. -:samp:`--rotate=[+|-]angle[:page-range]` - Apply rotation to specified pages. The - :samp:`page-range` portion of the option value has - the same format as page ranges in :ref:`page-selection`. If the page range is omitted, the - rotation is applied to all pages. The :samp:`angle` - portion of the parameter may be either 0, 90, 180, or 270. If - preceded by :samp:`+` or :samp:`-`, - the angle is added to or subtracted from the specified pages' - original rotations. This is almost always what you want. Otherwise - the pages' rotations are set to the exact value, which may cause the - appearances of the pages to be inconsistent, especially for scans. - For example, the command :command:`qpdf in.pdf out.pdf - --rotate=+90:2,4,6 --rotate=180:7-8` would rotate pages - 2, 4, and 6 90 degrees clockwise from their original rotation and - force the rotation of pages 7 through 8 to 180 degrees regardless of - their original rotation, and the command :command:`qpdf in.pdf - out.pdf --rotate=+180` would rotate all pages by 180 - degrees. +.. qpdf:option:: --suppress-recovery -:samp:`--keep-files-open={[yn]}` - This option controls whether qpdf keeps individual files open while - merging. Prior to version 8.1.0, qpdf always kept all files open, but - this meant that the number of files that could be merged was limited - by the operating system's open file limit. Version 8.1.0 opened files - as they were referenced and closed them after each read, but this - caused a major performance impact. Version 8.2.0 optimized the - performance but did so in a way that, for local file systems, there - was a small but unavoidable performance hit, but for networked file - systems, the performance impact could be very high. Starting with - version 8.2.1, the default behavior is that files are kept open if no - more than 200 files are specified, but this default behavior can be - explicitly overridden with the - :samp:`--keep-files-open` flag. If you are merging - more than 200 files but less than the operating system's max open - files limit, you may want to use - :samp:`--keep-files-open=y`, especially if working - over a networked file system. If you are using a local file system - where the overhead is low and you might sometimes merge more than the - OS limit's number of files from a script and are not worried about a - few seconds additional processing time, you may want to specify - :samp:`--keep-files-open=n`. The threshold for - switching may be changed from the default 200 with the - :samp:`--keep-files-open-threshold` option. + .. help: suppress error recovery -:samp:`--keep-files-open-threshold={count}` - If specified, overrides the default value of 200 used as the - threshold for qpdf deciding whether or not to keep files open. See - :samp:`--keep-files-open` for details. + Avoid attempting to recover when errors are found in a file's + cross reference table or stream lengths. -:samp:`--pages options --` - Select specific pages from one or more input files. See :ref:`page-selection` for details on how to do - page selection (splitting and merging). + Prevents qpdf from attempting to reconstruct a file's cross + reference table when there are errors reading objects from the + file. Recovery is triggered by a variety of situations. While + usually successful, it uses heuristics that don't work on all + files. If this option is given, :command:`qpdf` fails on the first + error it encounters. -:samp:`--collate={n}` - When specified, collate rather than concatenate pages from files - specified with :samp:`--pages`. With a numeric - argument, collate in groups of :samp:`{n}`. - The default is 1. See :ref:`page-selection` for additional details. +.. qpdf:option:: --ignore-xref-streams -:samp:`--flatten-rotation` - For each page that is rotated using the ``/Rotate`` key in the page's - dictionary, remove the ``/Rotate`` key and implement the identical - rotation semantics by modifying the page's contents. This option can - be useful to prepare files for buggy PDF applications that don't - properly handle rotated pages. + .. help: use xref tables rather than streams -:samp:`--split-pages=[n]` - Write each group of :samp:`n` pages to a separate - output file. If :samp:`n` is not specified, create - single pages. Output file names are generated as follows: + Ignore any cross-reference streams in the file, falling back to + cross-reference tables or triggering document recovery. - - If the string ``%d`` appears in the output file name, it is - replaced with a range of zero-padded page numbers starting from 1. + Tells qpdf to ignore any cross-reference streams, falling back to + any embedded cross-reference tables or triggering document + recovery. Ordinarily, qpdf reads cross-reference streams when they + are present in a PDF file. If this option is specified, specified, + qpdf will ignore any cross-reference streams for hybrid PDF files. + The purpose of hybrid files is to make some content available to + viewers that are not aware of cross-reference streams. It is almost + never desirable to ignore them. The only time when you might want + to use this feature is if you are testing creation of hybrid PDF + files and wish to see how a PDF consumer that doesn't understand + object and cross-reference streams would interpret such a file. - - Otherwise, if the output file name ends in - :file:`.pdf` (case insensitive), a zero-padded - page range, preceded by a dash, is inserted before the file - extension. +.. _transformation-options: - - Otherwise, the file name is appended with a zero-padded page range - preceded by a dash. - - Page ranges are a single number in the case of single-page groups or - two numbers separated by a dash otherwise. For example, if - :file:`infile.pdf` has 12 pages - - - :command:`qpdf --split-pages infile.pdf %d-out` - would generate files :file:`01-out` through - :file:`12-out` - - - :command:`qpdf --split-pages=2 infile.pdf - outfile.pdf` would generate files - :file:`outfile-01-02.pdf` through - :file:`outfile-11-12.pdf` - - - :command:`qpdf --split-pages infile.pdf - something.else` would generate files - :file:`something.else-01` through - :file:`something.else-12` - - Note that outlines, threads, and other global features of the - original PDF file are not preserved. For each page of output, this - option creates an empty PDF and copies a single page from the output - into it. If you require the global data, you will have to run - :command:`qpdf` with the - :samp:`--pages` option once for each file. Using - :samp:`--split-pages` is much faster if you don't - require the global data. - -:samp:`--overlay options --` - Overlay pages from another file onto the output pages. See :ref:`overlay-underlay` for details on - overlay/underlay. - -:samp:`--underlay options --` - Overlay pages from another file onto the output pages. See :ref:`overlay-underlay` for details on - overlay/underlay. - -Password-protected files may be opened by specifying a password. By -default, qpdf will preserve any encryption data associated with a file. -If :samp:`--decrypt` is specified, qpdf will attempt to -remove any encryption information. If :samp:`--encrypt` -is specified, qpdf will replace the document's encryption parameters -with whatever is specified. - -Note that qpdf does not obey encryption restrictions already imposed on -the file. Doing so would be meaningless since qpdf can be used to remove -encryption from the file entirely. This functionality is not intended to -be used for bypassing copyright restrictions or other restrictions -placed on files by their producers. - -Prior to 8.4.0, in the case of passwords that contain characters that -fall outside of 7-bit US-ASCII, qpdf left the burden of supplying -properly encoded encryption and decryption passwords to the user. -Starting in qpdf 8.4.0, qpdf does this automatically in most cases. For -an in-depth discussion, please see :ref:`unicode-passwords`. Previous versions of this manual -described workarounds using the :command:`iconv` command. -Such workarounds are no longer required or recommended with qpdf 8.4.0. -However, for backward compatibility, qpdf attempts to detect those -workarounds and do the right thing in most cases. - -.. _encryption-options: - -Encryption Options +PDF Transformation ------------------ -To change the encryption parameters of a file, use the --encrypt flag. -The syntax is +.. help-topic transformation: make structural PDF changes -:: + The options below tell qpdf to apply transformations that change + the structure without changing the content. - --encrypt user-password owner-password key-length [ restrictions ] -- +The options discussed in this section tell qpdf to apply +transformations that change the structure of a PDF file without +changing the content. Examples include creating linearized +(web-optimized) files, adding or removing encryption, restructuring +files for older viewers, and rewriting files for human inspection, +among others. See also :ref:`modification-options`. -Note that ":samp:`--`" terminates parsing of encryption -flags and must be present even if no restrictions are present. +Related Options +~~~~~~~~~~~~~~~ -Either or both of the user password and the owner password may be empty -strings. Starting in qpdf 10.2, qpdf defaults to not allowing creation -of PDF files with a non-empty user password, an empty owner password, -and a 256-bit key since such files can be opened with no password. If -you want to create such files, specify the encryption option -:samp:`--allow-insecure`, as described below. +.. qpdf:option:: --linearize -The value for -:samp:`{key-length}` may -be 40, 128, or 256. The restriction flags are dependent upon key length. -When no additional restrictions are given, the default is to be fully -permissive. + .. help: linearize (web-optimize) output -If :samp:`{key-length}` -is 40, the following restriction options are available: + Create linearized (web-optimized) output files. -:samp:`--print=[yn]` - Determines whether or not to allow printing. + Create linearized (web-optimized) output files. Linearized files + are formatted in a way that allows compliant readers to begin + displaying a PDF file before it is fully downloaded. Ordinarily, + the entire file must be present before it can be rendered because + important cross-reference information typically appears at the end + of the file. + +.. qpdf:option:: --encrypt user owner key-length [ options ] -- + + .. help: start encryption options + + Run qpdf --help=encryption for details. + + This flag starts encryption options, used to create encrypted + files. + + Please see :ref:`encryption-options` for details about creating + encrypted files. + +.. qpdf:option:: --decrypt + + .. help: remove encryption from input file + + Create an unencrypted output file even if the input file was + encrypted. Normally qpdf preserves whatever encryption was + present on the input file. This option overrides that behavior. + + Create an output file with no encryption even if the input file is + encrypted. This option overrides the default behavior of preserving + whatever encryption was present on the input file. This + functionality is not intended to be used for bypassing copyright + restrictions or other restrictions placed on files by their + producers. See also :qpdf:ref:`--copy-encryption`. + +.. qpdf:option:: --copy-encryption=file + + .. help: copy another file's encryption details + + Copy encryption details from the specified file instead of + preserving the input file's encryption. Use --encryption-file-password + to specify the encryption file's password. + + Copy encryption parameters, including the user password, the owner + password, and all security restrictions, from the specified file + instead of preserving encryption details from the input file. This + works even if only one of the user password or owner password is + known. If the encryption file requires a password, use the + :qpdf:ref:`--encryption-file-password` option to set it. Note that + copying the encryption parameters from a file also copies the first + half of ``/ID`` from the file since this is part of the encryption + parameters. This option can be useful if you need to decrypt a file + to make manual changes to it or to change it outside of qpdf, and + then you want to restore the original encryption on the file + without having to manual specify all the individual settings. See + also :qpdf:ref:`--decrypt`. + +.. qpdf:option:: --encryption-file-password=password + + .. help: supply password for --copy-encryption + + If the file named in --copy-encryption requires a password, use + this option to specify the password. + + If the file specified with :qpdf:ref:`--copy-encryption` + requires a password, specify the password using this option. This + option is necessary because the :qpdf:ref:`--password` option + applies to the input file, not the file from which encryption is + being copied. + +.. qpdf:option:: --qdf + + .. help: enable viewing PDF code in a text editor + + Create a PDF file suitable for viewing in a text editor and even + editing. This is to edit the PDF code, not the page contents. + All streams that can be uncompressed are uncompressed, and + content streams are normalized, among other changes. The + companion tool "fix-qdf" can be used to repair hand-edited QDF + files. QDF is a feature specific to the qpdf tool. There is a + chapter about it in the manual. + + Create a PDF file suitable for viewing and editing in a text + editor. This is to edit the PDF code, not the page contents. To + edit a QDF file, your text editor must preserve binary data. In a + QDF file, all streams that can be uncompressed are uncompressed, + and content streams are normalized, among other changes. The + companion tool :command:`fix-qdf` can be used to repair hand-edited + QDF files. QDF is a feature specific to the qpdf tool. For + additional information about QDF mode, see :ref:`qdf`. Note that + :qpdf:ref:`--linearize` disables QDF mode. + + QDF mode has full support for object streams, but sometimes it's + easier to locate a specific object if object streams are disabled. + When trying to understand some PDF construct by inspecting an + existing file, it can often be useful to combine :samp:`--qdf` with + :samp:`--object-streams=disable`. + + This flag changes some of the defaults of other options: stream + data is uncompressed, content streams are normalized, and + encryption is removed. These defaults can still be overridden by + specifying the appropriate options with :samp:`--qdf`. + Additionally, in QDF mode, stream lengths are stored as indirect + objects, objects are formatted in a less efficient but more + readable fashion, and the documents are interspersed with comments + that make it easier for the user to find things and also make it + possible for :command:`fix-qdf` to work properly. When editing QDF + files, it is not necessary to maintain the object formatting. + + When normalizing content, if qpdf runs into any lexical errors, it + will print a warning indicating that content may be damaged. If you + want to create QDF files without content normalization, can you run + with :samp:`--qdf --normalize-content=n`. You can also create a + non-QDF file with uncompressed streams using + :samp:`--stream-data=uncompress`. These will both uncompress all + the streams but will not attempt to normalize content. Please note + that if you are using content normalization or QDF mode for the + purpose of manually inspecting files, you don't have to care about + this. + + See also :qpdf:ref:`--no-original-object-ids`. + +.. qpdf:option:: --no-original-object-ids + + .. help: omit original object ID in qdf + + Omit comments in a QDF file indicating the object ID an object + had in the original file. + + Suppresses inclusion of original object ID comments in QDF files. + This can be useful when generating QDF files for test purposes, + particularly when comparing them to determine whether two PDF files + have identical content. The original object ID comment is there by + default because it makes it easier to trace objects back to the + original file. + +.. qpdf:option:: --compress-streams=[yn] + + .. help: compress uncompressed streams + + Setting --compress-streams=n prevents qpdf from compressing + uncompressed streams. This can be useful if you are leaving some + streams uncompressed intentionally. + + By default, or with :samp:`--compress-streams=y`, qpdf will + compress streams using the flate compression algorithm (used by zip + and gzip) unless those streams are compressed in some other way. + This analysis is made after qpdf attempts to uncompress streams and + is therefore closely related to :qpdf:ref:`--decode-level`. To + suppress this behavior and leave streams streams uncompressed, use + :samp:`--compress-streams=n`. In QDF mode (see :ref:`qdf` and + :qpdf:ref:`--qdf`), the default is to leave streams uncompressed. + +.. qpdf:option:: --decode-level=option + + .. help: control which streams to uncompress + + When uncompressing streams, control which types of compression + schemes should be uncompressed: + - none: don't uncompress anything + - generalized: uncompress streams compressed with a + general-purpose compression algorithm. This is the default. + - specialized: in addition to generalized, also uncompress + streams compressed with a special-purpose but non-lossy + compression scheme + - all: in addition to specialized, uncompress streams compressed + with lossy compression schemes like JPEG (DCT) + qpdf does not know how to uncompress all compression schemes. -:samp:`--modify=[yn]` - Determines whether or not to allow document modification. - -:samp:`--extract=[yn]` - Determines whether or not to allow text/image extraction. - -:samp:`--annotate=[yn]` - Determines whether or not to allow comments and form fill-in and - signing. - -If :samp:`{key-length}` -is 128, the following restriction options are available: - -:samp:`--accessibility=[yn]` - Determines whether or not to allow accessibility to visually - impaired. The qpdf library disregards this field when AES is used or - when 256-bit encryption is used. You should really never disable - accessibility, but qpdf lets you do it in case you need to configure - a file this way for testing purposes. The PDF spec says that - conforming readers should disregard this permission and always allow - accessibility. - -:samp:`--extract=[yn]` - Determines whether or not to allow text/graphic extraction. - -:samp:`--assemble=[yn]` - Determines whether document assembly (rotation and reordering of - pages) is allowed. - -:samp:`--annotate=[yn]` - Determines whether modifying annotations is allowed. This includes - adding comments and filling in form fields. Also allows editing of - form fields if :samp:`--modify-other=y` is given. - -:samp:`--form=[yn]` - Determines whether filling form fields is allowed. - -:samp:`--modify-other=[yn]` - Allow all document editing except those controlled separately by the - :samp:`--assemble`, - :samp:`--annotate`, and - :samp:`--form` options. - -:samp:`--print={print-opt}` - Controls printing access. - :samp:`{print-opt}` - may be one of the following: - - - :samp:`full`: allow full printing - - - :samp:`low`: allow low-resolution printing only - - - :samp:`none`: disallow printing - -:samp:`--modify={modify-opt}` - Controls modify access. This way of controlling modify access has - less granularity than new options added in qpdf 8.4. - :samp:`{modify-opt}` - may be one of the following: - - - :samp:`all`: allow full document modification - - - :samp:`annotate`: allow comment authoring, form - operations, and document assembly - - - :samp:`form`: allow form field fill-in and signing - and document assembly - - - :samp:`assembly`: allow document assembly only - - - :samp:`none`: allow no modifications - - Using the :samp:`--modify` option does not allow you - to create certain combinations of permissions such as allowing form - filling but not allowing document assembly. Starting with qpdf 8.4, - you can either just use the other options to control fields - individually, or you can use something like :samp:`--modify=form - --assembly=n` to fine tune. - -:samp:`--cleartext-metadata` - If specified, any metadata stream in the document will be left - unencrypted even if the rest of the document is encrypted. This also - forces the PDF version to be at least 1.5. - -:samp:`--use-aes=[yn]` - If :samp:`--use-aes=y` is specified, AES encryption - will be used instead of RC4 encryption. This forces the PDF version - to be at least 1.6. - -:samp:`--allow-insecure` - From qpdf 10.2, qpdf defaults to not allowing creation of PDF files - where the user password is non-empty, the owner password is empty, - and a 256-bit key is in use. Files created in this way are insecure - since they can be opened without a password. Users would ordinarily - never want to create such files. If you are using qpdf to - intentionally created strange files for testing (a definite valid use - of qpdf!), this option allows you to create such insecure files. - -:samp:`--force-V4` - Use of this option forces the ``/V`` and ``/R`` parameters in the - document's encryption dictionary to be set to the value ``4``. As - qpdf will automatically do this when required, there is no reason to - ever use this option. It exists primarily for use in testing qpdf - itself. This option also forces the PDF version to be at least 1.5. - -If :samp:`{key-length}` -is 256, the minimum PDF version is 1.7 with extension level 8, and the -AES-based encryption format used is the PDF 2.0 encryption method -supported by Acrobat X. the same options are available as with 128 bits -with the following exceptions: - -:samp:`--use-aes` - This option is not available with 256-bit keys. AES is always used - with 256-bit encryption keys. - -:samp:`--force-V4` - This option is not available with 256 keys. - -:samp:`--force-R5` - If specified, qpdf sets the minimum version to 1.7 at extension level - 3 and writes the deprecated encryption format used by Acrobat version - IX. This option should not be used in practice to generate PDF files - that will be in general use, but it can be useful to generate files - if you are trying to test proper support in another application for - PDF files encrypted in this way. - -The default for each permission option is to be fully permissive. - -.. _page-selection: - -Page Selection Options ----------------------- - -Starting with qpdf 3.0, it is possible to split and merge PDF files by -selecting pages from one or more input files. Whatever file is given as -the primary input file is used as the starting point, but its pages are -replaced with pages as specified. - -:: - - --pages input-file [ --password=password ] [ page-range ] [ ... ] -- - -Multiple input files may be specified. Each one is given as the name of -the input file, an optional password (if required to open the file), and -the range of pages. Note that ":samp:`--`" terminates -parsing of page selection flags. - -Starting with qpf 8.4, the special input file name -":file:`.`" can be used as a shortcut for the -primary input filename. - -For each file that pages should be taken from, specify the file, a -password needed to open the file (if any), and a page range. The -password needs to be given only once per file. If any of the input files -are the same as the primary input file or the file used to copy -encryption parameters (if specified), you do not need to repeat the -password here. The same file can be repeated multiple times. If a file -that is repeated has a password, the password only has to be given the -first time. All non-page data (info, outlines, page numbers, etc.) are -taken from the primary input file. To discard these, use -:samp:`--empty` as the primary input. - -Starting with qpdf 5.0.0, it is possible to omit the page range. If qpdf -sees a value in the place where it expects a page range and that value -is not a valid range but is a valid file name, qpdf will implicitly use -the range ``1-z``, meaning that it will include all pages in the file. -This makes it possible to easily combine all pages in a set of files -with a command like :command:`qpdf --empty out.pdf --pages \*.pdf ---`. - -The page range is a set of numbers separated by commas, ranges of -numbers separated dashes, or combinations of those. The character "z" -represents the last page. A number preceded by an "r" indicates to count -from the end, so ``r3-r1`` would be the last three pages of the -document. Pages can appear in any order. Ranges can appear with a high -number followed by a low number, which causes the pages to appear in -reverse. Numbers may be repeated in a page range. A page range may be -optionally appended with ``:even`` or ``:odd`` to indicate only the even -or odd pages in the given range. Note that even and odd refer to the -positions within the specified, range, not whether the original number -is even or odd. - -Example page ranges: - -- ``1,3,5-9,15-12``: pages 1, 3, 5, 6, 7, 8, 9, 15, 14, 13, and 12 in - that order. - -- ``z-1``: all pages in the document in reverse - -- ``r3-r1``: the last three pages of the document - -- ``r1-r3``: the last three pages of the document in reverse order - -- ``1-20:even``: even pages from 2 to 20 - -- ``5,7-9,12:odd``: pages 5, 8, and, 12, which are the pages in odd - positions from among the original range, which represents pages 5, 7, - 8, 9, and 12. - -Starting in qpdf version 8.3, you can specify the -:samp:`--collate` option. Note that this option is -specified outside of :samp:`--pages ... --`. When -:samp:`--collate` is specified, it changes the meaning -of :samp:`--pages` so that the specified files, as -modified by page ranges, are collated rather than concatenated. For -example, if you add the files :file:`odd.pdf` and -:file:`even.pdf` containing odd and even pages of a -document respectively, you could run :command:`qpdf --collate odd.pdf ---pages odd.pdf even.pdf -- all.pdf` to collate the pages. -This would pick page 1 from odd, page 1 from even, page 2 from odd, page -2 from even, etc. until all pages have been included. Any number of -files and page ranges can be specified. If any file has fewer pages, -that file is just skipped when its pages have all been included. For -example, if you ran :command:`qpdf --collate --empty --pages a.pdf -1-5 b.pdf 6-4 c.pdf r1 -- out.pdf`, you would get the -following pages in this order: - -- a.pdf page 1 - -- b.pdf page 6 - -- c.pdf last page - -- a.pdf page 2 - -- b.pdf page 5 - -- a.pdf page 3 - -- b.pdf page 4 - -- a.pdf page 4 - -- a.pdf page 5 - -Starting in qpdf version 10.2, you may specify a numeric argument to -:samp:`--collate`. With -:samp:`--collate={n}`, -pull groups of :samp:`{n}` pages from each file, -again, stopping when there are no more pages. For example, if you ran -:command:`qpdf --collate=2 --empty --pages a.pdf 1-5 b.pdf 6-4 c.pdf -r1 -- out.pdf`, you would get the following pages in this -order: - -- a.pdf page 1 - -- a.pdf page 2 - -- b.pdf page 6 - -- b.pdf page 5 - -- c.pdf last page - -- a.pdf page 3 - -- a.pdf page 4 - -- b.pdf page 4 - -- a.pdf page 5 - -Starting in qpdf version 8.3, when you split and merge files, any page -labels (page numbers) are preserved in the final file. It is expected -that more document features will be preserved by splitting and merging. -In the mean time, semantics of splitting and merging vary across -features. For example, the document's outlines (bookmarks) point to -actual page objects, so if you select some pages and not others, -bookmarks that point to pages that are in the output file will work, and -remaining bookmarks will not work. A future version of -:command:`qpdf` may do a better job at handling these -issues. (Note that the qpdf library already contains all of the APIs -required in order to implement this in your own application if you need -it.) In the mean time, you can always use -:samp:`--empty` as the primary input file to avoid -copying all of that from the first file. For example, to take pages 1 -through 5 from a :file:`infile.pdf` while preserving -all metadata associated with that file, you could use - -:: - - qpdf infile.pdf --pages . 1-5 -- outfile.pdf - -If you wanted pages 1 through 5 from -:file:`infile.pdf` but you wanted the rest of the -metadata to be dropped, you could instead run - -:: - - qpdf --empty --pages infile.pdf 1-5 -- outfile.pdf - -If you wanted to take pages 1 through 5 from -:file:`file1.pdf` and pages 11 through 15 from -:file:`file2.pdf` in reverse, taking document-level -metadata from :file:`file2.pdf`, you would run - -:: - - qpdf file2.pdf --pages file1.pdf 1-5 . 15-11 -- outfile.pdf - -If, for some reason, you wanted to take the first page of an encrypted -file called :file:`encrypted.pdf` with password -``pass`` and repeat it twice in an output file, and if you wanted to -drop document-level metadata but preserve encryption, you would use - -:: - - qpdf --empty --copy-encryption=encrypted.pdf \ - --encryption-file-password=pass \ - --pages encrypted.pdf --password=pass 1 \ - ./encrypted.pdf --password=pass 1 -- \ - outfile.pdf - -Note that we had to specify the password all three times because giving -a password as :samp:`--encryption-file-password` doesn't -count for page selection, and as far as qpdf is concerned, -:file:`encrypted.pdf` and -:file:`./encrypted.pdf` are separated files. These -are all corner cases that most users should hopefully never have to be -bothered with. - -Prior to version 8.4, it was not possible to specify the same page from -the same file directly more than once, and the workaround of specifying -the same file in more than one way was required. Version 8.4 removes -this limitation, but there is still a valid use case. When you specify -the same page from the same file more than once, qpdf will share objects -between the pages. If you are going to do further manipulation on the -file and need the two instances of the same original page to be deep -copies, then you can specify the file in two different ways. For example -:command:`qpdf in.pdf --pages . 1 ./in.pdf 1 -- out.pdf` -would create a file with two copies of the first page of the input, and -the two copies would share any objects in common. This includes fonts, -images, and anything else the page references. - -.. _overlay-underlay: - -Overlay and Underlay Options ----------------------------- - -Starting with qpdf 8.4, it is possible to overlay or underlay pages from -other files onto the output generated by qpdf. Specify overlay or -underlay as follows: - -:: - - { --overlay | --underlay } file [ options ] -- - -Overlay and underlay options are processed late, so they can be combined -with other like merging and will apply to the final output. The -:samp:`--overlay` and :samp:`--underlay` -options work the same way, except underlay pages are drawn underneath -the page to which they are applied, possibly obscured by the original -page, and overlay files are drawn on top of the page to which they are -applied, possibly obscuring the page. You can combine overlay and -underlay. - -The default behavior of overlay and underlay is that pages are taken -from the overlay/underlay file in sequence and applied to corresponding -pages in the output until there are no more output pages. If the overlay -or underlay file runs out of pages, remaining output pages are left -alone. This behavior can be modified by options, which are provided -between the :samp:`--overlay` or -:samp:`--underlay` flag and the -:samp:`--` option. The following options are supported: - -- :samp:`--password=password`: supply a password if the - overlay/underlay file is encrypted. - -- :samp:`--to=page-range`: a range of pages in the same - form at described in :ref:`page-selection` - indicates which pages in the output should have the overlay/underlay - applied. If not specified, overlay/underlay are applied to all pages. - -- :samp:`--from=[page-range]`: a range of pages that - specifies which pages in the overlay/underlay file will be used for - overlay or underlay. If not specified, all pages will be used. This - can be explicitly specified to be empty if - :samp:`--repeat` is used. - -- :samp:`--repeat=page-range`: an optional range of - pages that specifies which pages in the overlay/underlay file will be - repeated after the "from" pages are used up. If you want to repeat a - range of pages starting at the beginning, you can explicitly use - :samp:`--from=`. - -Here are some examples. - -- :command:`--overlay o.pdf --to=1-5 --from=1-3 --repeat=4 - --`: overlay the first three pages from file - :file:`o.pdf` onto the first three pages of the - output, then overlay page 4 from :file:`o.pdf` - onto pages 4 and 5 of the output. Leave remaining output pages - untouched. - -- :command:`--underlay footer.pdf --from= --repeat=1,2 - --`: Underlay page 1 of - :file:`footer.pdf` on all odd output pages, and - underlay page 2 of :file:`footer.pdf` on all even - output pages. - -.. _attachments: - -Embedded Files/Attachments Options ----------------------------------- - -Starting with qpdf 10.2, you can work with file attachments in PDF files -from the command line. The following options are available: - -:samp:`--list-attachments` - Show the "key" and stream number for embedded files. With - :samp:`--verbose`, additional information, including - preferred file name, description, dates, and more are also displayed. - The key is usually but not always equal to the file name, and is - needed by some of the other options. - -:samp:`--show-attachment={key}` - Write the contents of the specified attachment to standard output as - binary data. The key should match one of the keys shown by - :samp:`--list-attachments`. If specified multiple - times, only the last attachment will be shown. - -:samp:`--add-attachment {file} {options} --` - Add or replace an attachment with the contents of - :samp:`{file}`. This may be specified more - than once. The following additional options may appear before the - ``--`` that ends this option: - - :samp:`--key={key}` - The key to use to register the attachment in the embedded files - table. Defaults to the last path element of - :samp:`{file}`. - - :samp:`--filename={name}` - The file name to be used for the attachment. This is what is - usually displayed to the user and is the name most graphical PDF - viewers will use when saving a file. It defaults to the last path - element of :samp:`{file}`. - - :samp:`--creationdate={date}` - The attachment's creation date in PDF format; defaults to the - current time. The date format is explained below. - - :samp:`--moddate={date}` - The attachment's modification date in PDF format; defaults to the - current time. The date format is explained below. - - :samp:`--mimetype={type/subtype}` - The mime type for the attachment, e.g. ``text/plain`` or - ``application/pdf``. Note that the mimetype appears in a field - called ``/Subtype`` in the PDF but actually includes the full type - and subtype of the mime type. - - :samp:`--description={"text"}` - Descriptive text for the attachment, displayed by some PDF - viewers. - - :samp:`--replace` - Indicates that any existing attachment with the same key should be - replaced by the new attachment. Otherwise, - :command:`qpdf` gives an error if an attachment - with that key is already present. - -:samp:`--remove-attachment={key}` - Remove the specified attachment. This doesn't only remove the - attachment from the embedded files table but also clears out the file - specification. That means that any potential internal links to the - attachment will be broken. This option may be specified multiple - times. Run with :samp:`--verbose` to see status of - the removal. - -:samp:`--copy-attachments-from {file} {options} --` - Copy attachments from another file. This may be specified more than - once. The following additional options may appear before the ``--`` - that ends this option: - - :samp:`--password={password}` - If required, the password needed to open - :samp:`{file}` - - :samp:`--prefix={prefix}` - Only required if the file from which attachments are being copied - has attachments with keys that conflict with attachments already - in the file. In this case, the specified prefix will be prepended - to each key. This affects only the key in the embedded files - table, not the file name. The PDF specification doesn't preclude - multiple attachments having the same file name. - -When a date is required, the date should conform to the PDF date format -specification, which is -``D:``\ :samp:`{yyyymmddhhmmss}`, where -:samp:`{}` is either ``Z`` for UTC or a -timezone offset in the form :samp:`{-hh'mm'}` or -:samp:`{+hh'mm'}`. Examples: -``D:20210207161528-05'00'``, ``D:20210207211528Z``. - -.. _advanced-parsing: - -Advanced Parsing Options ------------------------- - -These options control aspects of how qpdf reads PDF files. Mostly these -are of use to people who are working with damaged files. There is little -reason to use these options unless you are trying to solve specific -problems. The following options are available: - -:samp:`--suppress-recovery` - Prevents qpdf from attempting to recover damaged files. - -:samp:`--ignore-xref-streams` - Tells qpdf to ignore any cross-reference streams. - -Ordinarily, qpdf will attempt to recover from certain types of errors in -PDF files. These include errors in the cross-reference table, certain -types of object numbering errors, and certain types of stream length -errors. Sometimes, qpdf may think it has recovered but may not have -actually recovered, so care should be taken when using this option as -some data loss is possible. The -:samp:`--suppress-recovery` option will prevent qpdf -from attempting recovery. In this case, it will fail on the first error -that it encounters. - -Ordinarily, qpdf reads cross-reference streams when they are present in -a PDF file. If :samp:`--ignore-xref-streams` is -specified, qpdf will ignore any cross-reference streams for hybrid PDF -files. The purpose of hybrid files is to make some content available to -viewers that are not aware of cross-reference streams. It is almost -never desirable to ignore them. The only time when you might want to use -this feature is if you are testing creation of hybrid PDF files and wish -to see how a PDF consumer that doesn't understand object and -cross-reference streams would interpret such a file. - -.. _advanced-transformation: - -Advanced Transformation Options -------------------------------- - -These transformation options control fine points of how qpdf creates the -output file. Mostly these are of use only to people who are very -familiar with the PDF file format or who are PDF developers. The -following options are available: - -:samp:`--compress-streams={[yn]}` - By default, or with :samp:`--compress-streams=y`, - qpdf will compress any stream with no other filters applied to it - with the ``/FlateDecode`` filter when it writes it. To suppress this - behavior and preserve uncompressed streams as uncompressed, use - :samp:`--compress-streams=n`. - -:samp:`--decode-level={option}` Controls which streams qpdf tries to decode. The default is - :samp:`generalized`. The following options are - available: + :samp:`generalized`. + + The following options are available: - :samp:`none`: do not attempt to decode any streams - - :samp:`generalized`: decode streams filtered with - supported generalized filters: ``/LZWDecode``, ``/FlateDecode``, - ``/ASCII85Decode``, and ``/ASCIIHexDecode``. We define generalized - filters as those to be used for general-purpose compression or - encoding, as opposed to filters specifically designed for image - data. Note that, by default, streams already compressed with - ``/FlateDecode`` are not uncompressed and recompressed unless you - also specify :samp:`--recompress-flate`. + - :samp:`generalized`: decode streams filtered with supported + generalized filters: ``/LZWDecode``, ``/FlateDecode``, + ``/ASCII85Decode``, and ``/ASCIIHexDecode``. We define + generalized filters as those to be used for general-purpose + compression or encoding, as opposed to filters specifically + designed for image data. - - :samp:`specialized`: in addition to generalized, - decode streams with supported non-lossy specialized filters; - currently this is just ``/RunLengthDecode`` + - :samp:`specialized`: in addition to generalized, decode streams + with supported non-lossy specialized filters; currently this is + just ``/RunLengthDecode`` - - :samp:`all`: in addition to generalized and - specialized, decode streams with supported lossy filters; - currently this is just ``/DCTDecode`` (JPEG) + - :samp:`all`: in addition to generalized and specialized, decode + streams with supported lossy filters; currently this is just + ``/DCTDecode`` (JPEG) + + There are several filters that :command:`qpdf` does not support. + These are left untouched regardless of the option. Future versions + of qpdf may support additional filters. + + Because default value is ``generalized``, by default, when a stream + is encoded using non-lossy filters that qpdf understands and is not + already compressed using a good compression scheme, qpdf will + uncompress the stream. If ``--compress-streams=y`` is also in + effect, which is the default (see :qpdf:ref:`--compress-streams`), + the overall effect is that qpdf will recompress streams with + generalized filters using flate compression, effectively + eliminating LZW and ASCII-based filters. This is usually desirable + behavior but can be disabled with ``--decode-level=none``. + + As a special case, streams already compressed with ``/FlateDecode`` + are not uncompressed and recompressed. You can change this behavior + with :qpdf:ref:`--recompress-flate`. + +.. qpdf:option:: --stream-data=option + + .. help: control stream compression + + This option controls how streams are compressed in the output. + It is less granular than the newer options, --compress-streams + and --decode-level. + + Options: + - compress: same as --compress-streams=y --decode-level=generalized + - preserve: same as --compress-streams=n --decode-level=none + - uncompress: same as --compress-streams=n --decode-level=generalized -:samp:`--stream-data={option}` Controls transformation of stream data. This option predates the - :samp:`--compress-streams` and - :samp:`--decode-level` options. Those options can be - used to achieve the same affect with more control. The value of - :samp:`{option}` may - be one of the following: + :qpdf:ref:`--compress-streams` and :qpdf:ref:`--decode-level` + options. Those options can be used to achieve the same affect with + more control. The value of :samp:`{option}` may be one of the + following: - - :samp:`compress`: recompress stream data when - possible (default); equivalent to - :samp:`--compress-streams=y` - :samp:`--decode-level=generalized`. Does not - recompress streams already compressed with ``/FlateDecode`` unless - :samp:`--recompress-flate` is also specified. + - :samp:`compress`: recompress stream data when possible (default); + equivalent to :samp:`--compress-streams=y` + :samp:`--decode-level=generalized`. Does not recompress streams + already compressed with ``/FlateDecode`` unless + :qpdf:ref:`--recompress-flate` is also specified. - - :samp:`preserve`: leave all stream data as is; - equivalent to :samp:`--compress-streams=n` - :samp:`--decode-level=none` + - :samp:`preserve`: leave all stream data as is; equivalent to + :samp:`--compress-streams=n` :samp:`--decode-level=none` - - :samp:`uncompress`: uncompress stream data - compressed with generalized filters when possible; equivalent to - :samp:`--compress-streams=n` - :samp:`--decode-level=generalized` + - :samp:`uncompress`: uncompress stream data compressed with + generalized filters when possible; equivalent to + :samp:`--compress-streams=n` :samp:`--decode-level=generalized` -:samp:`--recompress-flate` - By default, streams already compressed with ``/FlateDecode`` are left - alone rather than being uncompressed and recompressed. This option - causes qpdf to uncompress and recompress the streams. There is a - significant performance cost to using this option, but you probably - want to use it if you specify - :samp:`--compression-level`. +.. qpdf:option:: --recompress-flate + + .. help: uncompress and recompress flate + + The default generalized compression scheme used by PDF is flate, + which is the same as used by zip and gzip. Usually qpdf just + leaves these alone. This option tells qpdf to uncompress and + recompress streams compressed with flate. This can be useful + when combined with --compression-level. + + The default generalized compression scheme used by PDF is flate + (``/FlateDecode``), which is the same as used by :command:`zip` and + :command:`gzip`. Usually qpdf just leaves these alone. This option + tells :command:`qpdf` to uncompress and recompress streams + compressed with flate. This can be useful when combined with + :qpdf:ref:`--compression-level`. Using this option may make + :command:`qpdf` much slower when writing output files. + +.. qpdf:option:: --compression-level=level + + .. help: set compression level for flate + + Set a compression level from 1 (least, fastest) to 9 (most, + slowest) when compressing files with flate (used in zip and + gzip), which is the default compression for most PDF files. + You need --recompress-flate with this option if you want to + change already compressed streams. -:samp:`--compression-level={level}` When writing new streams that are compressed with ``/FlateDecode``, - use the specified compression level. The value of - :samp:`level` should be a number from 1 to 9 and is - passed directly to zlib, which implements deflate compression. Note - that qpdf doesn't uncompress and recompress streams by default. To - have this option apply to already compressed streams, you should also - specify :samp:`--recompress-flate`. If your goal is - to shrink the size of PDF files, you should also use + use the specified compression level. The value of :samp:`level` + should be a number from 1 to 9 and is passed directly to zlib, + which implements deflate compression. Lower numbers compress less + and are faster; higher numbers compress more and are slower. Note + that :command:`qpdf` doesn't uncompress and recompress streams + compressed with flate by default. To have this option apply to + already compressed streams, you should also specify + :qpdf:ref:`--recompress-flate`. If your goal is to shrink the size + of PDF files, you should also use :samp:`--object-streams=generate`. -:samp:`--normalize-content=[yn]` - Enables or disables normalization of content streams. Content - normalization is enabled by default in QDF mode. Please see :ref:`qdf` for additional discussion of QDF mode. +.. qpdf:option:: --normalize-content=[yn] -:samp:`--object-streams={mode}` - Controls handling of object streams. The value of - :samp:`{mode}` may be - one of the following: + .. help: fix newlines in content streams - - :samp:`preserve`: preserve original object streams - (default) + Normalize newlines to UNIX-style newlines in PDF content + streams, which is useful for viewing them in a programmer's text + editor across multiple platforms. This is also turned on by + --qdf. - - :samp:`disable`: don't write any object streams + Enables or disables normalization of newlines in PDF content + streams to UNIX-style newlines, which is useful for viewing files + in a programmer-friendly text edit across multiple platforms. + Content normalization is off by default, but is automatically + enabled by :qpdf:ref:`--qdf` (see also :ref:`qdf`). It is not + recommended to use this option for production use. If qpdf runs + into any lexical errors while normalizing content, it will print a + warning indicating that content may be damaged. - - :samp:`generate`: use object streams wherever - possible +.. qpdf:option:: --object-streams=mode + + .. help: control use of object streams + + Control what qpdf does regarding object streams. Options: + - preserve: preserve original object streams, if any (the default) + - disable: create output files with no object streams + - generate: create object streams, and compress objects when possible + + Controls handling of object streams. The value of :samp:`{mode}` + may be one of the following: + + - :samp:`preserve`: preserve original object streams, if any (the + default) + + - :samp:`disable`: create output files with no object streams + + - :samp:`generate`: create object streams, and compress objects + when possible + + Object streams are PDF streams that contain other objects. Putting + objects in object streams allows the PDF objects themselves to be + compressed, which can result in much smaller PDF files. Combining + this option with :qpdf:ref:`--compression-level` and + :qpdf:ref:`--recompress-flate` can often result in creation of + smaller PDF files. + + Object streams, also known as compressed objects, were introduced + into the PDF specification at version 1.5 around 2003. Some ancient + PDF viewers may not support files with object streams. qpdf can be + used to transform files with object streams to files without object + streams or vice versa. + + In :samp:`preserve` mode, the relationship to objects and the + streams that contain them is preserved from the original file. If + the file has no object streams, qpdf will not add any. In + :samp:`disable` mode, all objects are written as regular, + uncompressed objects. The resulting file should be structurally + readable by older PDF viewers, though there is still a chance that + the file may contain other content that the older reader can't + support. In :samp:`generate` mode, qpdf will create its own object + streams. This will usually result in more compact PDF files. In + this mode, qpdf will also make sure the PDF version number in the + header is at least 1.5. + +.. qpdf:option:: --preserve-unreferenced + + .. help: preserve unreferenced objects + + Preserve all objects from the input even if not referenced. -:samp:`--preserve-unreferenced` Tells qpdf to preserve objects that are not referenced when writing - the file. Ordinarily any object that is not referenced in a traversal - of the document from the trailer dictionary will be discarded. This - may be useful in working with some damaged files or inspecting files - with known unreferenced objects. + the file. Ordinarily any object that is not referenced in a + traversal of the document from the trailer dictionary will be + discarded. This may be useful in working with some damaged files or + inspecting files with known unreferenced objects. This flag is ignored for linearized files and has the effect of causing objects in the new file to be written in order by object ID @@ -1128,28 +988,35 @@ following options are available: indirect differently from the original file, and the original file may have gaps in its numbering. - See also :samp:`--preserve-unreferenced-resources`, - which does something completely different. + See also :qpdf:ref:`--preserve-unreferenced-resources`, which does + something completely different. -:samp:`--remove-unreferenced-resources={option}` - The :samp:`{option}` may be ``auto``, - ``yes``, or ``no``. The default is ``auto``. +.. qpdf:option:: --remove-unreferenced-resources=option - Starting with qpdf 8.1, when splitting pages, qpdf is able to attempt - to remove images and fonts that are not used by a page even if they - are referenced in the page's resources dictionary. When shared - resources are in use, this behavior can greatly reduce the file sizes - of split pages, but the analysis is very slow. In versions from 8.1 - through 9.1.1, qpdf did this analysis by default. Starting in qpdf - 10.0.0, if ``auto`` is used, qpdf does a quick analysis of the file - to determine whether the file is likely to have unreferenced objects - on pages, a pattern that frequently occurs when resource dictionaries - are shared across multiple pages and rarely occurs otherwise. If it - discovers this pattern, then it will attempt to remove unreferenced - resources. Usually this means you get the slower splitting speed only - when it's actually going to create smaller files. You can suppress - removal of unreferenced resources altogether by specifying ``no`` or - force it to do the full algorithm by specifying ``yes``. + .. help: remove unreferenced page resources + + Remove from a page's resource dictionary any resources that are + not referenced in the page's contents. Options: "auto" + (default), "yes", "no". + + Options: ``auto`` (the default), ``yes``, or ``no``. + + Starting with qpdf 8.1, when splitting pages, qpdf is able to + attempt to remove images and fonts that are not used by a page even + if they are referenced in the page's resources dictionary. When + shared resources are in use, this behavior can greatly reduce the + file sizes of split pages, but the analysis is very slow. In + versions from 8.1 through 9.1.1, qpdf did this analysis by default. + Starting in qpdf 10.0.0, if ``auto`` is used, qpdf does a quick + analysis of the file to determine whether the file is likely to + have unreferenced objects on pages, a pattern that frequently + occurs when resource dictionaries are shared across multiple pages + and rarely occurs otherwise. If it discovers this pattern, then it + will attempt to remove unreferenced resources. Usually this means + you get the slower splitting speed only when it's actually going to + create smaller files. You can suppress removal of unreferenced + resources altogether by specifying ``no`` or force it to do the + full algorithm by specifying ``yes``. Other than cases in which you don't care about file size and care a lot about runtime, there are few reasons to use this option, @@ -1158,40 +1025,361 @@ following options are available: be removing. If you encounter that case, please report it as bug at https://github.com/qpdf/qpdf/issues/. -:samp:`--preserve-unreferenced-resources` - This is a synonym for - :samp:`--remove-unreferenced-resources=no`. +.. qpdf:option:: --preserve-unreferenced-resources - See also :samp:`--preserve-unreferenced`, which does - something completely different. + .. help: use --remove-unreferenced-resources=no -:samp:`--newline-before-endstream` - Tells qpdf to insert a newline before the ``endstream`` keyword, not - counted in the length, after any stream content even if the last - character of the stream was a newline. This may result in two + Synonym for --remove-unreferenced-resources=no. Use that instead. + + This is a synonym for :samp:`--remove-unreferenced-resources=no`. + See :qpdf:ref:`--remove-unreferenced-resources`. + + See also :qpdf:ref:`--preserve-unreferenced`, which does something + completely different. To reduce confusion, you should use + :samp:`--remove-unreferenced-resources=no` instead. + +.. qpdf:option:: --newline-before-endstream + + .. help: force a newline before endstream + + For an extra newline before endstream. Using this option enables + qpdf to preserve PDF/A when rewriting such files. + + Tell qpdf to insert a newline before the ``endstream`` keyword, + not counted in the length, after any stream content even if the + last character of the stream was a newline. This may result in two newlines in some cases. This is a requirement of PDF/A. While qpdf - doesn't specifically know how to generate PDF/A-compliant PDFs, this - at least prevents it from removing compliance on already compliant - files. + doesn't specifically know how to generate PDF/A-compliant PDFs, + this at least prevents it from removing compliance on already + compliant files. -:samp:`--linearize-pass1={file}` - Write the first pass of linearization to the named file. The - resulting file is not a valid PDF file. This option is useful only - for debugging ``QPDFWriter``'s linearization code. When qpdf - linearizes files, it writes the file in two passes, using the first - pass to calculate sizes and offsets that are required for hint tables - and the linearization dictionary. Ordinarily, the first pass is - discarded. This option enables it to be captured. +.. qpdf:option:: --coalesce-contents -:samp:`--coalesce-contents` - When a page's contents are split across multiple streams, this option - causes qpdf to combine them into a single stream. Use of this option - is never necessary for ordinary usage, but it can help when working - with some files in some cases. For example, this can also be combined - with QDF mode or content normalization to make it easier to look at - all of a page's contents at once. + .. help: combine content streams + + If a page has an array of content streams, concatenate them into + a single content stream. + + When a page's contents are split across multiple streams, this + option causes qpdf to combine them into a single stream. Use of + this option is never necessary for ordinary usage, but it can help + when working with some files in some cases. For example, this can + also be combined with QDF mode or content normalization to make it + easier to look at all of a page's contents at once. It is common + for PDF writers to create multiple content streams for a variety of + reasons such as making it easier to modify page contents and + splitting very large content streams so PDF viewers may be able to + use less memory. + +.. qpdf:option:: --externalize-inline-images + + .. help: convert inline to regular images + + Convert inline images to regular images. + + Convert inline images to regular images. By default, images whose + data is at least 1,024 bytes are converted when this option is + selected. Use :qpdf:ref:`--ii-min-bytes` to change the size + threshold. This option is implicitly selected when + :qpdf:ref:`--optimize-images` is selected unless + :qpdf:ref:`--keep-inline-images` is also specified. + +.. qpdf:option:: --ii-min-bytes=size-in-bytes + + .. help: set minimum size for --externalize-inline-images + + Don't externalize inline images smaller than this size. The + default is 1,024. Use 0 for no minimum. + + Avoid converting inline images whose size is below the specified + minimum size to regular images. The default is 1,024 bytes. Use 0 + for no minimum. + +.. qpdf:option:: --min-version=version + + .. help: set minimum PDF version + + Force the PDF version of the output to be at least the + specified version. + + Force the PDF version of the output file to be at least + :samp:`{version}`. In other words, if the input file has a lower + version than the specified version, the specified version will be + used. If the input file has a higher version, the input file's + original version will be used. It is seldom necessary to use this + option since qpdf will automatically increase the version as needed + when adding features that require newer PDF readers. + + The version number may be expressed in the form + :samp:`{major.minor.extension-level}`, in which case the version is + interpreted as :samp:`{major.minor}` at extension level + :samp:`{extension-level}`. For example, version ``1.7.8`` + represents version 1.7 at extension level 8. Note that minimal + syntax checking is done on the command line. :command:`qpdf` does + not check whether the specified version is actually required. + +.. qpdf:option:: --force-version=version + + .. help: set output PDF version + + Force the output PDF file's PDF version header to be the specified + value, even if the file uses features that may not be available + in that version. + + This option forces the PDF version to be the exact version + specified *even when the file may have content that is not + supported in that version*. The version number is interpreted in + the same way as with :qpdf:ref:`--min-version` so that extension + levels can be set. In some cases, forcing the output file's PDF + version to be lower than that of the input file will cause qpdf to + disable certain features of the document. Specifically, 256-bit + keys are disabled if the version is less than 1.7 with extension + level 8 (except R5 is disabled if less than 1.7 with extension + level 3), AES encryption is disabled if the version is less than + 1.6, cleartext metadata and object streams are disabled if less + than 1.5, 128-bit encryption keys are disabled if less than 1.4, + and all encryption is disabled if less than 1.3. Even with these + precautions, qpdf won't be able to do things like eliminate use of + newer image compression schemes, transparency groups, or other + features that may have been added in more recent versions of PDF. + + As a general rule, with the exception of big structural things like + the use of object streams or AES encryption, PDF viewers are + supposed to ignore features in files that they don't support from + newer versions. This means that forcing the version to a lower + version may make it possible to open your PDF file with an older + version, though bear in mind that some of the original document's + functionality may be lost. + +.. _page-ranges: + +Page Ranges +----------- + +.. help-topic page-ranges: page range syntax + + A full description of the page range syntax, with examples, can be + found in the manual. Summary: + + - a,b,c pages a, b, and c + - a-b pages a through b inclusive; if a > b, this counts down + - r where represents a number is the th page from the end + - z the last page, same as r1 + + You can append :even or :odd to select every other page from the + resulting set of pages, where :odd starts with the first page and + :even starts with the second page. These are odd and even pages + from the resulting set, not based on the original page numbers. + +Several :command:`qpdf` command-line arguments accept page ranges as +options. This section describes the syntax of a page range. + +- A plain number indicates a page numbered from ``1``, so ``1`` + represents the first page. + +- A number preceded by ``r`` counts from the end, so ``r1`` is the + last page, ``r2`` is the second-to-last page, etc. + +- The letter ``z`` represents the last page and is the same as ``r1``. + +- Page numbers may appear in any order separated by commas. + +- Two page numbers separated by dashes represents the inclusive range + of pages from the first to the second. If the first number is higher + than the second number, it is the range of pages in reverse. + +- The range may be appended with ``:odd`` or ``:even`` to select only + pages from the resulting in range in odd or even positions. In this + case, odd and even refer to positions in the final range, not + whether the original page number is odd or even. + +Example page ranges: + +- ``1,6,4``: pages 1, 6, and 4 in that order + +- ``3-7``: pages 3 through 7 inclusive in increasing order + +- ``7-3``: pages 7, 6, 5, 4, and 3 in that order + +- ``1-z``: all pages in order + +- ``z-1``: all pages in reverse order + +- ``1,3,5-9,15-12``: pages 1, 3, 5, 6, 7, 8, 9, 15, 14, 13, and 12 in + that order + +- ``r3-r1``: the last three pages of the document + +- ``r1-r3``: the last three pages of the document in reverse order + +- ``1-20:even``: even pages from 2 to 20 + +- ``5,7-9,12``: pages 5, 7, 8, 9, and 12 + +- ``5,7-9,12:odd``: pages 5, 8, and 12, which are the pages in odd + positions from the original set of 5, 7, 8, 9, 12 + +- ``5,7-9,12:even``: pages 7 and 9, which are the pages in even + positions from the original set of 5, 7, 8, 9, 12 + +.. _modification-options: + +PDF Modification +---------------- + +.. help-topic modification: change parts of the PDF + + Modification options make systematic changes to certain parts of + the PDF, causing the PDF to render differently from the original. + +Modification options make systematic changes to certain parts of the +PDF, causing the PDF to render differently from the original. See also +:ref:`transformation-options`. + +Related Options +~~~~~~~~~~~~~~~ + +.. qpdf:option:: --pages file [ --password=password ] [ page-range ] [ ... ] -- + + .. help: begin page selection + + Run qpdf --help=page-selection for details. + + This flag starts page selection options, which are used to select + pages from one or more input files to perform operations such as + splitting, merging, and collating files. + + Please see :ref:`page-selection` for details about selecting pages. + + See also :qpdf:ref:`--split-pages`, :qpdf:ref:`--collate`, + :ref:`page-ranges`. + +.. qpdf:option:: --collate=n + + .. help: collate with --pages + + Collate rather than concatenate pages specified with --pages. + With a numeric argument, collate in groups of n. The default + is 1. Run qpdf --help=page-selection for additional details. + + This option causes :command:`qpdf` to collate rather than + concatenate pages specified with :qpdf:ref:`--pages`. With a + numeric argument, collate in groups of :samp:`{n}`. The default + is 1. + + Please see :ref:`page-selection` for additional details. + +.. qpdf:option:: --split-pages=[n] + + .. help: write pages to separate files + + This option causes qpdf to create separate output files for each + page or group of pages rather than a single output file. + + File names are generated from the specified output file as follows: + + - If the string %d appears in the output file name, it is replaced with a + zero-padded page range starting from 1 + - Otherwise, if the output file name ends in .pdf (case insensitive), a + zero-padded page range, preceded by a dash, is inserted before the file + extension + - Otherwise, the file name is appended with a zero-padded page range + preceded by a dash. + + Page ranges are single page numbers for single-page groups or first-last + for multi-page groups. + + Write each group of :samp:`{n}` pages to a separate output file. If + :samp:`{n}` is not specified, create single pages. Output file + names are generated as follows: + + - If the string ``%d`` appears in the output file name, it is + replaced with a range of zero-padded page numbers starting + from 1. + + - Otherwise, if the output file name ends in :file:`.pdf` (case + insensitive), a zero-padded page range, preceded by a dash, is + inserted before the file extension. + + - Otherwise, the file name is appended with a zero-padded page + range preceded by a dash. + + Zero padding is added to all page numbers in file names so that all + the numbers are the same length, which causes the output filenames + to sort lexically in numerical order. + + Page ranges are a single number in the case of single-page groups or + two numbers separated by a dash otherwise. + + Here are some examples. In these examples, :file:`infile.pdf` has + 12 pages. + + - ``qpdf --split-pages infile.pdf %d-out``: output files are + :file:`01-out` through :file:`12-out` with no extension. + + - ``qpdf --split-pages=2 infile.pdf outfile.pdf``: output files are + :file:`outfile-01-02.pdf` through :file:`outfile-11-12.pdf` + + - ``qpdf --split-pages infile.pdf something.else`` would generate + files :file:`something.else-01` through + :file:`something.else-12`. The extension ``.else`` is not treated + in any special way regarding the placement of the number. + + Note that outlines, threads, and other document-level features of + the original PDF file are not preserved. For each page of output, + this option creates an empty PDF and copies a single page from the + output into it. If you require the document-level data, you will + have to run :command:`qpdf` with the :qpdf:ref:`--pages` option + once for each page. Using :qpdf:ref:`--split-pages` is much faster + if you don't require the document-level data. A future version of + qpdf may support preservation of some document-level information. + +.. qpdf:option:: --overlay file [ options ] -- + + .. help: begin overlay options + + Overlay pages from another file on the output. + Run qpdf --help=overlay-underlay for details. + + Overlay pages from another file on the output. + + See :ref:`overlay-underlay` for details. + +.. qpdf:option:: --underlay file [ options ] -- + + .. help: begin underlay options + + Underlay pages from another file on the output. + Run qpdf --help=overlay-underlay for details. + + Underlay pages from another file on the output. + + See :ref:`overlay-underlay` for details. + +.. qpdf:option:: --flatten-rotation + + .. help: remove rotation from page dictionary + + Rotate a page using content commands instead of page-level + metadata. This can be useful if a broken PDF viewer fails to + properly consider page rotation metadata. + + For each page that is rotated using the ``/Rotate`` key in the + page's dictionary, remove the ``/Rotate`` key and implement the + identical rotation semantics by modifying the page's contents. This + option can be useful to prepare files for buggy PDF applications + that don't properly handle rotated pages. There is usually no + reason to use this option unless you are working around a specific + problem. + +.. qpdf:option:: --flatten-annotations=option + + .. help: push annotations into content + + Push page annotations into the content streams. This may be + necessary in some case when printing or splitting files. + Options: "all", "print", "screen". -:samp:`--flatten-annotations={option}` This option collapses annotations into the pages' contents with special handling for form fields. Ordinarily, an annotation is rendered separately and on top of the page. Combining annotations @@ -1200,48 +1388,113 @@ following options are available: transformations. The library functionality backing this option was added for the benefit of programs that want to create *n-up* page layouts and other similar things that don't work well with - annotations. The :samp:`{option}` parameter - may be any of the following: + annotations. The :samp:`{option}` parameter may be any of the + following: - - :samp:`all`: include all annotations that are not - marked invisible or hidden + - :samp:`all`: include all annotations that are not marked + invisible or hidden - - :samp:`print`: only include annotations that - indicate that they should appear when the page is printed + - :samp:`print`: only include annotations that indicate that they + should appear when the page is printed - - :samp:`screen`: omit annotations that indicate - they should not appear on the screen + - :samp:`screen`: omit annotations that indicate they should not + appear on the screen - Note that form fields are special because the annotations that are - used to render filled-in form fields may become out of date from the - fields' values if the form is filled in by a program that doesn't - know how to update the appearances. If qpdf detects this case, its - default behavior is not to flatten those annotations because doing so - would cause the value of the form field to be lost. This gives you a - chance to go back and resave the form with a program that knows how - to generate appearances. QPDF itself can generate appearances with - some limitations. See the - :samp:`--generate-appearances` option below. + In a PDF file, interactive form fields have a value and, + independently, a set of instructions, called an appearance, to + render the filled-in field. If a form is filled in by a program + that doesn't know how to update the appearances, they may become + inconsistent with the fields' values. If qpdf detects this case, + its default behavior is not to flatten those annotations because + doing so would cause the value of the form field to be lost. This + gives you a chance to go back and resave the form with a program + that knows how to generate appearances. qpdf itself can generate + appearances with some limitations. See the + :qpdf:ref:`--generate-appearances` option for details. + +.. qpdf:option:: --rotate=[+|-]angle[:page-range] + + .. help: rotate pages + + Rotate specified pages by multiples of 90 degrees specifying + either absolute or relative angles. "angle" may be 0, 90, 180, + or 270. You almost always want to use +angle or -angle rather + than just angle, as discussed in the manual. Run + qpdf --help=page-ranges for help with page ranges. + + Rotate the specified range of pages by the specified angle, which + must be a multiple of 90 degrees. + + The value of :samp:`{angle}` may be ``0``, ``90``, ``180``, or ``270``. + + For a description of the syntax of :samp:`{page-range}`, see + :ref:`page-ranges`. If the page range is omitted, the rotation is + applied to all pages. + + If ``+`` is prepended to :samp:`{angle}`, the angle is added, so an + angle of ``+90`` indicates a 90-degree clockwise rotation. If ``-`` + is prepended, the angle is subtracted, so ``-90`` is a 90-degree + counterclockwise rotation and is exactly the same as ``+270``. + + If neither ``+`` or ``-`` is prepended, the rotation angle is set + exactly. You almost always want ``+`` or ``-`` since, without + inspecting the actual PDF code, it is impossible to know whether a + page that appears to be rotate is rotated "naturally" or has been + rotated by specifying rotation. For example, if a page appears to + contain a portrait-mode image rotated by 90 degrees so that the top + of the image is on the right edge of the page, there is no way to + tell by visual inspection whether the literal top of the image is + the top of the page or whether the literal top of the image is the + right edge and the page is already rotated in the PDF. Specifying a + rotation angle of ``-90`` will produce an image that appears + upright in either case. Use of absolute rotation angles should be + reserved for cases in which you have specific knowledge about the + way the PDF file is constructed. + + Examples: + + - ``qpdf in.pdf out.pdf --rotate=+90:2,4,6 --rotate=+180:7-8``: + rotate pages 2, 4, and 6 by 90 degrees clockwise from their + original rotation + + - ``qpdf in.pdf out.pdf --rotate=+180``: rotate all pages by 180 + degrees + + - ``qpdf in.pdf out.pdf --rotate=0``: force each page to displayed + in its natural orientation, which would undo the effect of any + rotations previously applied in page metadata. + + See also :qpdf:ref:`--flatten-rotation`. + +.. qpdf:option:: --generate-appearances + + .. help: generate appearances for form fields + + PDF form fields consist of values and appearances, which may be + inconsistent with each other if a form field value has been + modified without updating its appearance. This option tells qpdf + to generate new appearance streams. There are some limitations, + which are discussed in the manual. -:samp:`--generate-appearances` If a file contains interactive form fields and indicates that the appearances are out of date with the values of the form, this flag - will regenerate appearances, subject to a few limitations. Note that - there is not usually a reason to do this, but it can be necessary - before using the :samp:`--flatten-annotations` - option. Most of these are not a problem with well-behaved PDF files. - The limitations are as follows: + will regenerate appearances, subject to a few limitations. Note + that there is not usually a reason to do this, but it can be + necessary before using the :qpdf:ref:`--flatten-annotations` + option. Here is a summary of the limitations. - Radio button and checkbox appearances use the pre-set values in - the PDF file. QPDF just makes sure that the correct appearance is - displayed based on the value of the field. This is fine for PDF - files that create their forms properly. Some PDF writers save - appearances for fields when they change, which could cause some - controls to have inconsistent appearances. + the PDF file. :command:`qpdf` just makes sure that the correct + appearance is displayed based on the value of the field. This is + fine for PDF files that create their forms properly. Some PDF + writers save appearances for fields when they change, which could + cause some controls to have inconsistent appearances. - For text fields and list boxes, any characters that fall outside of US-ASCII or, if detected, "Windows ANSI" or "Mac Roman" encoding, will be replaced by the ``?`` character. + :command:`qpdf` does not know enough about fonts and encodings to + correctly represent characters that fall outside of this range. - Quadding is ignored. Quadding is used to specify whether the contents of a field should be left, center, or right aligned with @@ -1252,326 +1505,1583 @@ following options are available: - There is no support for multi-select fields or signature fields. - If qpdf doesn't do a good enough job with your form, use an external - application to save your filled-in form before processing it with - qpdf. + Appearances generated by :command:`qpdf` should be good enough for + simple forms consisting of ASCII characters where the original file + followed the PDF specification and provided template information + for text field appearances. If :command:`qpdf` doesn't do a good + enough job with your form, use an external application to save your + filled-in form before processing it with :command:`qpdf`. Most PDF + viewers that support filling in of forms will generate appearance + streams. Some of them will even do it for forms filled in with + characters outside the original font's character range by embedding + additional fonts as needed. + +.. qpdf:option:: --optimize-images + + .. help: use efficient compression for images + + Attempt to use DCT (JPEG) compression for images that fall + within certain constraints as long as doing so decreases the + size in bytes of the image. See also help for the following + options: + --oi-min-width + --oi-min-height + --oi-min-area + --keep-inline-images + + The --verbose flag is useful with this option. -:samp:`--optimize-images` This flag causes qpdf to recompress all images that are not - compressed with DCT (JPEG) using DCT compression as long as doing so - decreases the size in bytes of the image data and the image does not - fall below minimum specified dimensions. Useful information is - provided when used in combination with - :samp:`--verbose`. See also the - :samp:`--oi-min-width`, - :samp:`--oi-min-height`, and - :samp:`--oi-min-area` options. By default, starting - in qpdf 8.4, inline images are converted to regular images and - optimized as well. Use :samp:`--keep-inline-images` - to prevent inline images from being included. + compressed with DCT (JPEG) using DCT compression as long as doing + so decreases the size in bytes of the image data and the image does + not fall below minimum specified dimensions. Useful information is + provided when used in combination with :qpdf:ref:`--verbose`. See + also the :qpdf:ref:`--oi-min-width`, :qpdf:ref:`--oi-min-height`, + and :qpdf:ref:`--oi-min-area` options. By default, inline images + are converted to regular images and optimized as well. Use + :qpdf:ref:`--keep-inline-images` to prevent inline images from + being included. + +.. qpdf:option:: --oi-min-width=width + + .. help: minimum width for --optimize-images + + Don't optimize images whose width is below the specified value. -:samp:`--oi-min-width={width}` Avoid optimizing images whose width is below the specified amount. If omitted, the default is 128 pixels. Use 0 for no minimum. -:samp:`--oi-min-height={height}` +.. qpdf:option:: --oi-min-height=height + + .. help: minimum height for --optimize-images + + Don't optimize images whose height is below the specified value. + Avoid optimizing images whose height is below the specified amount. If omitted, the default is 128 pixels. Use 0 for no minimum. -:samp:`--oi-min-area={area-in-pixels}` - Avoid optimizing images whose pixel count (width × height) is below - the specified amount. If omitted, the default is 16,384 pixels. Use 0 - for no minimum. +.. qpdf:option:: --oi-min-area=area-in-pixels -:samp:`--externalize-inline-images` - Convert inline images to regular images. By default, images whose - data is at least 1,024 bytes are converted when this option is - selected. Use :samp:`--ii-min-bytes` to change the - size threshold. This option is implicitly selected when - :samp:`--optimize-images` is selected. Use - :samp:`--keep-inline-images` to exclude inline images - from image optimization. + .. help: minimum area for --optimize-images -:samp:`--ii-min-bytes={bytes}` - Avoid converting inline images whose size is below the specified - minimum size to regular images. If omitted, the default is 1,024 - bytes. Use 0 for no minimum. + Don't optimize images whose area in pixels is below the specified value. -:samp:`--keep-inline-images` - Prevent inline images from being included in image optimization. This - option has no affect when :samp:`--optimize-images` - is not specified. + Avoid optimizing images whose pixel count + (:samp:`{width}` × :samp:`{height}`) is below the specified amount. + If omitted, the default is 16,384 pixels. Use 0 for no minimum. -:samp:`--remove-page-labels` - Remove page labels from the output file. +.. qpdf:option:: --keep-inline-images -:samp:`--qdf` - Turns on QDF mode. For additional information on QDF, please see :ref:`qdf`. Note that :samp:`--linearize` - disables QDF mode. + .. help: exclude inline images from optimization -:samp:`--min-version={version}` - Forces the PDF version of the output file to be at least - :samp:`{version}`. In other words, if the - input file has a lower version than the specified version, the - specified version will be used. If the input file has a higher - version, the input file's original version will be used. It is seldom - necessary to use this option since qpdf will automatically increase - the version as needed when adding features that require newer PDF - readers. + Prevent inline images from being considered by --optimize-images. - The version number may be expressed in the form - :samp:`{major.minor.extension-level}`, in - which case the version is interpreted as - :samp:`{major.minor}` at extension level - :samp:`{extension-level}`. For example, - version ``1.7.8`` represents version 1.7 at extension level 8. Note - that minimal syntax checking is done on the command line. + Prevent inline images from being included in image optimization. + This option has no effect when :qpdf:ref:`--optimize-images` is not + specified. -:samp:`--force-version={version}` - This option forces the PDF version to be the exact version specified - *even when the file may have content that is not supported in that - version*. The version number is interpreted in the same way as with - :samp:`--min-version` so that extension levels can be - set. In some cases, forcing the output file's PDF version to be lower - than that of the input file will cause qpdf to disable certain - features of the document. Specifically, 256-bit keys are disabled if - the version is less than 1.7 with extension level 8 (except R5 is - disabled if less than 1.7 with extension level 3), AES encryption is - disabled if the version is less than 1.6, cleartext metadata and - object streams are disabled if less than 1.5, 128-bit encryption keys - are disabled if less than 1.4, and all encryption is disabled if less - than 1.3. Even with these precautions, qpdf won't be able to do - things like eliminate use of newer image compression schemes, - transparency groups, or other features that may have been added in - more recent versions of PDF. +.. qpdf:option:: --remove-page-labels - As a general rule, with the exception of big structural things like - the use of object streams or AES encryption, PDF viewers are supposed - to ignore features in files that they don't support from newer - versions. This means that forcing the version to a lower version may - make it possible to open your PDF file with an older version, though - bear in mind that some of the original document's functionality may - be lost. + .. help: remove page labels (numbers) -By default, when a stream is encoded using non-lossy filters that qpdf -understands and is not already compressed using a good compression -scheme, qpdf will uncompress and recompress streams. Assuming proper -filter implements, this is safe and generally results in smaller files. -This behavior may also be explicitly requested with -:samp:`--stream-data=compress`. + Exclude page labels (explicit page numbers) from the output file. -When :samp:`--normalize-content=y` is specified, qpdf -will attempt to normalize whitespace and newlines in page content -streams. This is generally safe but could, in some cases, cause damage -to the content streams. This option is intended for people who wish to -study PDF content streams or to debug PDF content. You should not use -this for "production" PDF files. + Exclude page labels (explicit page numbers) from the output file. -When normalizing content, if qpdf runs into any lexical errors, it will -print a warning indicating that content may be damaged. The only -situation in which qpdf is known to cause damage during content -normalization is when a page's contents are split across multiple -streams and streams are split in the middle of a lexical token such as a -string, name, or inline image. Note that files that do this are invalid -since the PDF specification states that content streams are not to be -split in the middle of a token. If you want to inspect the original -content streams in an uncompressed format, you can always run with -:samp:`--qdf --normalize-content=n` for a QDF file -without content normalization, or alternatively -:samp:`--stream-data=uncompress` for a regular non-QDF -mode file with uncompressed streams. These will both uncompress all the -streams but will not attempt to normalize content. Please note that if -you are using content normalization or QDF mode for the purpose of -manually inspecting files, you don't have to care about this. +.. _encryption-options: -Object streams, also known as compressed objects, were introduced into -the PDF specification at version 1.5, corresponding to Acrobat 6. Some -older PDF viewers may not support files with object streams. qpdf can be -used to transform files with object streams to files without object -streams or vice versa. As mentioned above, there are three object stream -modes: :samp:`preserve`, -:samp:`disable`, and :samp:`generate`. +Encryption +---------- -In :samp:`preserve` mode, the relationship to objects -and the streams that contain them is preserved from the original file. -In :samp:`disable` mode, all objects are written as -regular, uncompressed objects. The resulting file should be readable by -older PDF viewers. (Of course, the content of the files may include -features not supported by older viewers, but at least the structure will -be supported.) In :samp:`generate` mode, qpdf will -create its own object streams. This will usually result in more compact -PDF files, though they may not be readable by older viewers. In this -mode, qpdf will also make sure the PDF version number in the header is -at least 1.5. +.. help-topic encryption: create encrypted files -The :samp:`--qdf` flag turns on QDF mode, which changes -some of the defaults described above. Specifically, in QDF mode, by -default, stream data is uncompressed, content streams are normalized, -and encryption is removed. These defaults can still be overridden by -specifying the appropriate options as described above. Additionally, in -QDF mode, stream lengths are stored as indirect objects, objects are -laid out in a less efficient but more readable fashion, and the -documents are interspersed with comments that make it easier for the -user to find things and also make it possible for -:command:`fix-qdf` to work properly. QDF mode is intended -for people, mostly developers, who wish to inspect or modify PDF files -in a text editor. For details, please see :ref:`qdf`. + Create encrypted files. Usage: -.. _testing-options: + --encrypt user-password owner-password key-length [ options ] -- -Testing, Inspection, and Debugging Options ------------------------------------------- + Either or both of user-password and owner-password may be empty + strings. key-length may be 40, 128, or 256. Encryption options are + terminated by "--" by itself. -These options can be useful for digging into PDF files or for use in -automated test suites for software that uses the qpdf library. When any -of the options in this section are specified, no output file should be -given. The following options are available: + 40-bit encryption is insecure, as is 128-bit encryption without + AES. Use 256-bit encryption unless you have a specific reason to + use an insecure format, such as testing or compatibility with very + old viewers. You must use the --allow-weak-crypto to create + encrypted files that use insecure cryptographic algorithms. The + --allow-weak-crypto flag appears outside of --encrypt ... -- + (before --encrypt or after --). -:samp:`--deterministic-id` - Causes generation of a deterministic value for /ID. This prevents use - of timestamp and output file name information in the /ID generation. - Instead, at some slight additional runtime cost, the /ID field is - generated to include a digest of the significant parts of the content - of the output PDF file. This means that a given qpdf operation should - generate the same /ID each time it is run, which can be useful when - caching results or for generation of some test data. Use of this flag - is not compatible with creation of encrypted files. + Available options vary by key length. Not all readers respect all + restrictions. Different PDF readers respond differently to various + combinations of options. Sometimes a PDF viewer may show you + restrictions that differ from what you selected. This is probably + not a bug in qpdf. -:samp:`--static-id` - Causes generation of a fixed value for /ID. This is intended for - testing only. Never use it for production files. If you are trying to - get the same /ID each time for a given file and you are not - generating encrypted files, consider using the - :samp:`--deterministic-id` option. + Options for 40-bit only: + --annotate=[yn] restrict comments, filling forms, and signing + --extract=[yn] restrict text/graphic extraction + --modify=[yn] restrict document modification + --print=[yn] restrict printing -:samp:`--static-aes-iv` - Causes use of a static initialization vector for AES-CBC. This is - intended for testing only so that output files can be reproducible. - Never use it for production files. This option in particular is not - secure since it significantly weakens the encryption. + Options for 128-bit or 256-bit: + --accessibility=[yn] restrict accessibility (usually ignored) + --annotate=[yn] restrict commenting/filling form fields + --assemble=[yn] restrict document assembly + --extract=[yn] restrict text/graphic extraction + --form=[yn] restrict filling form fields + --modify-other=[yn] restrict other modifications + --modify=modify-opt control modify access by level + --print=print-opt control printing access + --cleartext-metadata prevent encryption of metadata -:samp:`--no-original-object-ids` - Suppresses inclusion of original object ID comments in QDF files. - This can be useful when generating QDF files for test purposes, - particularly when comparing them to determine whether two PDF files - have identical content. + For 128-bit only: + --use-aes=[yn] indicates whether to use AES encryption + --force-V4 forces use of V=4 encryption handler -:samp:`--show-encryption` - Shows document encryption parameters. Also shows the document's user - password if the owner password is given. + For 256-bit only: + --force-R5 forces use of deprecated R=5 encryption + --allow-insecure allow user password with empty owner password + + Values for print-opt: + none disallow printing + low allow only low-resolution printing + full allow full printing + + Values for modify-opt: + none allow no modifications + assembly allow document assembly only + form assembly + filling in form fields and signing + annotate form + commenting and modifying forms + all allow full document modification + +This section describes the options used to create encrypted files. For +other options related to encryption, see also :qpdf:ref:`--decrypt` +and :qpdf:ref:`--copy-encryption`. For a more in-depth technical +discussion of how PDF encryption works internally, see +:ref:`pdf-encryption`. + +To create an encrypted file, use + +:: + + --encrypt user-password owner-password key-length [ options ] -- + +Either or both of :samp:`{user-password}` and :samp:`{owner-password}` +may be empty strings. :samp:`{key-length}` may be ``40``, ``128``, or +``256``. Encryption options are terminated by ``--`` by itself. + +40-bit encryption is insecure, as is 128-bit encryption without AES. +Use 256-bit encryption unless you have a specific reason to use an +insecure format, such as testing or compatibility with very old +viewers. You must use the :qpdf:ref:`--allow-weak-crypto` flag to +create encrypted files that use insecure cryptographic algorithms. The +:qpdf:ref:`--allow-weak-crypto` flag appears outside of ``--encrypt +... --`` (before ``--encrypt`` or after ``--``). + +If :samp:`{key-length}` is 256, the minimum PDF version is 1.7 with +extension level 8, and the AES-based encryption format used is the one +described in the PDF 2.0 specification. Using 128-bit encryption +forces the PDF version to be at least 1.4, or if AES is used, 1.6. +Using 40-bit encryption forces the PDF version to be at least 1.3. + +When 256-bit encryption is used, PDF files with empty owner +passwords are insecure. To create such files, you must specify the +:qpdf:ref:`--allow-insecure` option. + +Available options vary by key length. Not all readers respect all +restrictions. The default for each permission option is to be fully +permissive. These restrictions may or may not be enforced by any +particular reader. :command:`qpdf` allows very granular setting of +restrictions. Some readers may not recognize the combination of +options you specify. If you specify certain combinations of +restrictions and find a reader that doesn't seem to honor them as you +expect, it is most likely not a bug in :command:`qpdf`. qpdf itself +does not obey encryption restrictions already imposed on the file. +Doing so would be meaningless since qpdf can be used to remove +encryption from the file entirely. + +Here is a summary of encryption options. Details are provided below. + +Options for 40-bit only + - ``--annotate=[yn]``: restrict comments, filling forms, and signing + + - ``--extract=[yn]``: restrict text/graphic extraction + + - ``--modify=[yn]``: restrict document modification + + - ``--print=[yn]``: restrict printing + +Options for 128-bit or 256-bit + - ``--accessibility=[yn]``: restrict accessibility (usually ignored) + + - ``--annotate=[yn]``: restrict commenting/filling form fields + + - ``--assemble=[yn]``: restrict document assembly + + - ``--extract=[yn]``: restrict text/graphic extraction + + - ``--form=[yn]``: restrict filling form fields + + - ``--modify-other=[yn]``: restrict other modifications + + - ``--modify=modify-opt``: control modify access by level + + - ``--print=print-opt``: control printing access + + - ``--cleartext-metadata``: prevent encryption of metadata + +For 128-bit only + - ``--use-aes=[yn]``: indicates whether to use AES encryption + + - ``--force-V4``: forces use of V=4 encryption handler + +For 256-bit only + - ``--force-R5``: forces use of deprecated ``R=5`` encryption algorithm + + - ``--allow-insecure``: allow user password with empty owner password + +Values for :samp:`{print-opt}` + - ``none``: disallow printing + + - ``low``: allow only low-resolution printing + + - ``full``: allow full printing + +Values for :samp:`{modify-opt}` + - ``none``: allow no modifications + + - ``assembly``: allow document assembly only + + - ``form``: ``assembly`` permissions plus filling in form fields and signing + + - ``annotate``: ``form`` permissions plus commenting and modifying forms + + - ``all``: allow full document modification + +Related Options +~~~~~~~~~~~~~~~ + +.. qpdf:option:: --accessibility=[yn] + + .. help: restrict document accessibility + + This option is ignored except with very old encryption formats. + The current PDF specification does not allow restriction of + document accessibility. This option is not available with 40-bit + encryption. + + Enable/disable extraction of text for accessibility to visually + impaired. The qpdf library disregards this field when AES is used + with 128-bit encryption or when 256-bit encryption is used. You + should never disable accessibility unless you are explicitly doing + so for creating test files. The PDF spec says that conforming + readers should disregard this permission and always allow + accessibility. + + This option is not available with 40-bit encryption. + +.. qpdf:option:: --annotate=[yn] + + .. help: restrict document annotation + + Enable/disable modifying annotations including making comments + and filling in form fields. For 128-bit and 256-bit encryption, + this also enables editing, creating, and deleting form fields + unless --modify-other=n or --modify=none is also specified. + + Enable/disable modifying annotations including making comments and + filling in form fields. For 128-bit and 256-bit encryption, this + also enables editing, creating, and deleting form fields unless + :samp:`--modify-other=n` or :samp:`--modify=none` is also + specified. + +.. qpdf:option:: --assemble=[yn] + + .. help: restrict document assembly + + Enable/disable document assembly (rotation and reordering of + pages). This option is not available with 40-bit encryption. + + Enable/disable document assembly (rotation and reordering of + pages). + + This option is not available with 40-bit encryption. + +.. qpdf:option:: --extract=[yn] + + .. help: restrict text/graphic extraction + + Enable/disable text/graphic extraction for purposes other than + accessibility. + + Enable/disable text/graphic extraction for purposes other than + accessibility. + +.. qpdf:option:: --form=[yn] + + .. help: restrict form filling + + Enable/disable whether filling form fields is allowed even if + modification of annotations is disabled. This option is not + available with 40-bit encryption. + + Enable/disable whether filling form fields is allowed even if + modification of annotations is disabled. + + This option is not available with 40-bit encryption. + +.. qpdf:option:: --modify-other=[yn] + + .. help: restrict other modifications + + Enable/disable modifications not controlled by --assemble, + --annotate, or --form. --modify-other=n is implied by any of the + other --modify options. This option is not available with 40-bit + encryption. + + Enable/disable modifications not controlled by + :qpdf:ref:`--assemble`, :qpdf:ref:`--annotate`, or + :qpdf:ref:`--form`. ``--modify-other=n`` is implied by any of the + other :qpdf:ref:`--modify` options except for ``--modify=all``. + + This option is not available with 40-bit encryption. + +.. qpdf:option:: --modify=modify-opt + + .. help: restrict document modification + + For 40-bit files, modify-opt may only be y or n and controls all + aspects of document modification. + + For 128-bit and 256-bit encryption, modify-opt values allow + enabling and disabling levels of restriction in a manner similar + to how some PDF creation tools do it. modify-opt values map to + other combinations of options as follows: + + all: allow full modification (the default) + annotate: --modify-other=n + form: --modify-other=n --annotate=n + assembly: --modify-other=n --annotate=n --form=n + none: --modify-other=n --annotate=n --form=n --assemble=n + + For 40-bit files, :samp:`{modify-opt}` may only be ``y`` or ``n`` + and controls all aspects of document modification. + + For 128-bit and 256-bit encryption, :samp:`{modify-opt}` values + allow enabling and disabling levels of restriction in a manner + similar to how some PDF creation tools do it: + + - ``none``: allow no modifications + + - ``assembly``: allow document assembly only + + - ``form``: ``assembly`` permissions plus filling in form fields + and signing + + - ``annotate``: ``form`` permissions plus commenting and modifying + forms + + - ``all``: allow full document modification (the default) + + :samp:`{modify-opt}` values map to other combinations of options as + follows: + + - ``none``: same as ``--modify-other=n --annotate=n --form=n --assemble=n`` + + - ``assembly``: same as ``--modify-other=n --annotate=n --form=n`` + + - ``form``: same as ``--modify-other=n --annotate=n`` + + - ``annotate``: same as ``--modify-other=n`` + + - ``all``: the default + + You can combine this option with the options listed above. If you + do, later options override earlier options. + +.. qpdf:option:: --print=print-opt + + .. help: restrict printing + + Control what kind of printing is allowed. For 40-bit encryption, + print-opt may only be y or n and enables or disables all + printing. For 128-bit and 256-bit encryption, print-opt may have + the following values: + + none: disallow printing + low: allow low-resolution printing only + full: allow full printing (the default) + + Control what kind of printing is allowed. For 40-bit encryption, + :samp:`{print-opt}` may be ``y`` or ``n`` and enable or disable all + printing. For 128-bit and 256-bit encryption, :samp:`{print-opt}` + may have the following values: + + - :samp:`none`: disallow printing + + - :samp:`low`: allow low-resolution printing only + + - :samp:`full`: allow full printing (the default) + +.. qpdf:option:: --cleartext-metadata + + .. help: don't encrypt metadata + + If specified, don't encrypt document metadata even when + encrypting the rest of the document. This option is not + available with 40-bit encryption. + + If specified, any metadata stream in the document will be left + unencrypted even if the rest of the document is encrypted. This also + forces the PDF version to be at least 1.5. + + This option is not available with 40-bit encryption. + +.. qpdf:option:: --use-aes=[yn] + + .. help: use AES with 128-bit encryption + + Enables/disables use of the more secure AES encryption with + 128-bit encryption. Specifying --use-aes=y forces the PDF + version to be at least 1.6. This option is only available with + 128-bit encryption. The default is "n" for compatibility + reasons. Use 256-bit encryption instead. + + Enables/disables use of the more secure AES encryption with 128-bit + encryption. Specifying ``--use-aes=y`` forces the PDF version to be + at least 1.6. This option is only available with 128-bit + encryption. The default is ``n`` for compatibility reasons. Use + 256-bit encryption instead. + +.. qpdf:option:: --allow-insecure + + .. help: allow empty owner passwords + + Allow creation of PDF files with empty owner passwords and + non-empty user passwords when using 256-bit encryption. + + Allow creation of PDF files with 256-bit keys where the user + password is non-empty and the owner password is empty. Files + created in this way are insecure since they can be opened without a + password, and restrictions will not be enforced. Users would + ordinarily never want to create such files. If you are using qpdf + to intentionally created strange files for testing (a definite + valid use of qpdf!), this option allows you to create such insecure + files. This option is only available with 256-bit encryption. + + See :ref:`pdf-passwords` for a more technical discussion of this + issue. + +.. qpdf:option:: --force-V4 + + .. help: force V=4 in encryption dictionary + + This option is for testing and is never needed in practice since + qpdf does this automatically when needed. + + Use of this option forces the ``V`` and ``R`` parameters in the + document's encryption dictionary to be set to the value ``4``. As + qpdf will automatically do this when required, there is no reason + to ever use this option. It exists primarily for use in testing + qpdf itself. This option also forces the PDF version to be at least + 1.5. + +.. qpdf:option:: --force-R5 + + .. help: use unsupported R=5 encryption + + Use an undocumented, unsupported, deprecated encryption + algorithm that existed only in Acrobat version IX. This option + should not be used except for compatibility testing. + + Use an undocumented, unsupported, deprecated encryption algorithm + that existed only in Acrobat version IX. This option should not be + used except for compatibility testing. If specified, qpdf sets the + minimum version to 1.7 at extension level 3. + +.. _page-selection: + +Page Selection +-------------- + +.. help-topic page-selection: select pages from one or more files + + Use the --pages option to select pages from multiple files. Usage: + + qpdf in.pdf --pages input-file [ --password=password ] [ page-range ] \ + [ ... ] -- out.pdf + + Between --pages and the -- that terminates pages option, repeat + the following: + + filename [ --password=password ] [ page-range ] + + Document-level information, such as outlines, tags, etc., is taken + from in.pdf is preserved in out.pdf. You can use --empty in place + of an input file to start from an empty file and just copy pages + equally from all files. You can use "." as a shorthand for the + primary input file (if not --empty). In the above example, "." + would refer to in.pdf. + + Use --password=password to specify the password for a + password-protected input file. If the same input file is used more + than once, you only need to supply the password the first time. If + the page range is omitted, all pages are selected. + + Run qpdf --help=page-ranges for help with page ranges. + + Use --collate=n to cause pages to be collated in groups of n pages + (default 1) instead of concatenating the input. + + Examples: + + - Start with in.pdf and append all pages from a.pdf and the even + pages from b.pdf, and write the output to out.pdf. Document-level + information from in.pdf is retained. Note the use of "." to refer + to in.pdf. + + qpdf in.pdf --pages . a.pdf b.pdf:even -- out.pdf + + - Take all the pages from a.pdf, all the pages from b.pdf in + reverse, and only pages 3 and 6 from c.pdf and write the result + to out.pdf. Use password "x" to open b.pdf: + + qpdf --empty --pages a.pdf b.pdf --password=x z-1 c.pdf 3,6 + + More examples are in the manual. + +:command:`qpdf` allows you to use the :qpdf:ref:`--pages` option to +split and merge PDF files by selecting pages from one or more input +files. + +Usage: :samp:`qpdf {in.pdf} --pages input-file [ --password={password} ] [ {page-range} ] [ ... ] -- {out.pdf}` + +Between ``--pages`` and the ``--`` that terminates pages option, +repeat the following: + +:samp:`{filename} [ --password={password} ] [ {page-range} ]` + +Notes: + - The password argument is needed only for password-protected files. + If you specify the same file more than once, you only need to supply + the password the first time. + + - The page range may be omitted. If omitted, all pages are included. + + - Document-level information, such as outlines, tags, etc., is taken + from the primary input file (in the above example, :file:`in.pdf`) + and is preserved in :file:`out.pdf`. You can use + :qpdf:ref:`--empty` in place of an input file to start from an + empty file and just copy pages equally from all files. + + - You can use ``.`` as a shorthand for the primary input file, if not + empty. + +See :ref:`page-ranges` for help on specifying a page range. + +Use :samp:`--collate={n}` to cause pages to be collated in groups of +:samp:`{n}` pages (default 1) instead of concatenating the input. Note +that the :qpdf:ref:`--collate` appears outside of ``--pages ... --`` +(before ``--pages`` or after ``--``). Pages are pulled from each +document in turn. When a document is out of pages, it is skipped. See +examples below. + +Examples +~~~~~~~~ + +- Start with :file:`in.pdf` and append all pages from :file:`a.pdf` + and the even pages from :file:`b.pdf`, and write the output to + :file:`out.pdf`. Document-level information from :file:`in.pdf` is + retained. Note the use of ``.`` to refer to :file:`in.pdf`. + + :: + + qpdf in.pdf --pages . a.pdf b.pdf:even -- out.pdf + + +- Take all the pages from :file:`a.pdf`, all the pages from + :file:`b.pdf` in reverse, and only pages 3 and 6 from :file:`c.pdf` + and write the result to :file:`out.pdf`. Document-level metadata is + discarded from all input files. The password ``x`` is used to open + :file:`b.pdf`. + + :: + + qpdf --empty --pages a.pdf b.pdf --password=x z-1 c.pdf 3,6 + +- Scan a document with printing on both sides by scanning the fronts + into :file:`odd.pdf` and the backs into :file:`even.pdf`. Collate + the results into :file:`all.pdf`. This takes the first page of + :file:`odd.pdf`, the first page of :file:`even.pdf`, the second page + of :file:`odd.pdf`, the second page of :file:`even.pdf`, etc. + + :: + + qpdf --collate odd.pdf --pages . even.pdf -- all.pdf + OR + qpdf --collate --empty --pages odd.pdf even.pdf -- all.pdf + +- When collating, any number of files and page ranges can be + specified. If any file has fewer pages, that file is just skipped + when its pages have all been included. For example, if you ran + + :: + + qpdf --collate --empty --pages a.pdf 1-5 b.pdf 6-4 c.pdf r1 -- out.pdf + + you would get the following pages in this order: + + - a.pdf page 1 + + - b.pdf page 6 + + - c.pdf last page + + - a.pdf page 2 + + - b.pdf page 5 + + - a.pdf page 3 + + - b.pdf page 4 + + - a.pdf page 4 + + - a.pdf page 5 + +- You can specify a numeric argument to :qpdf:ref:`--collate`. With + :samp:`--collate={n}`, pull groups of :samp:`{n}` pages from each + file, as always, stopping when there are no more pages. For example, + if you ran + + :: + + qpdf --collate=2 --empty --pages a.pdf 1-5 b.pdf 6-4 c.pdf r1 -- out.pdf + + you would get the following pages in this order: + + - a.pdf page 1 + + - a.pdf page 2 + + - b.pdf page 6 + + - b.pdf page 5 + + - c.pdf last page + + - a.pdf page 3 + + - a.pdf page 4 + + - b.pdf page 4 + + - a.pdf page 5 + +- Take pages 1 through 5 from :file:`file1.pdf` and pages 11 through + 15 in reverse from :file:`file2.pdf`, taking document-level metadata + from :file:`file2.pdf`. + + :: + + qpdf file2.pdf --pages file1.pdf 1-5 . 15-11 -- outfile.pdf + +- Here's a more contrived example. If, for some reason, you wanted to + take the first page of an encrypted file called + :file:`encrypted.pdf` with password ``pass`` and repeat it twice in + an output file without any shared data between the two copies of + page 1, and if you wanted to drop document-level metadata but + preserve encryption, you could run + + :: + + qpdf --empty --copy-encryption=encrypted.pdf \ + --encryption-file-password=pass \ + --pages encrypted.pdf --password=pass 1 \ + ./encrypted.pdf --password=pass 1 -- \ + outfile.pdf + + Note that we had to specify the password all three times because + giving a password as :qpdf:ref:`--encryption-file-password` doesn't + count for page selection, and as far as qpdf is concerned, + :file:`encrypted.pdf` and :file:`./encrypted.pdf` are separate + files. (This is by design. See :ref:`page-limitations` for a + discussion.) These are all corner cases that most users should + hopefully never have to be bothered with. + +.. _page-limitations: + +Limitations +~~~~~~~~~~~ + +With the exception of page labels (page numbers), :command:`qpdf` +doesn't yet have full support for handling document-level data as it +relates to pages. Certain document-level features such as form fields, +outlines (bookmarks), and article tags among others, are copied in +their entirety from the primary input file. Starting with qpdf version +8.3, page labels are preserved from all files unless +:qpdf:ref:`--remove-page-labels` is specified. + +.. If updating this after limitations are removed or reduced, + recheck --split-pages as well. + +It is expected that a future version of :command:`qpdf` will have more +complete and configurable behavior regarding document-level metadata. +In the meantime, semantics of splitting and merging vary across +features. For example, the document's outlines (bookmarks) point to +actual page objects, so if you select some pages and not others, +bookmarks that point to pages that are in the output file will work, +and remaining bookmarks will not work. If you don't want to preserve +the primary file's metadata, use :qpdf:ref:`--empty` as the primary +input file. + +Visit `qpdf issues labeled with "pages" +`__ +or look at the :file:`TODO` file in the qpdf source distribution for +some of the ideas. + +.. NOTE: + + The workaround described in the following paragraph is mentioned in + the documentation in more than one place. Searching for ./ should + help find them. It is also in the test suite. I believe there are + several valid uses cases for doing this, and so it is my intention + to leave the behavior of treating different paths to the same file + as separate even if the above limitations are removed. See also + https://github.com/qpdf/qpdf/issues/399 + +Prior to :command:`qpdf` version 8.4, it was not possible to specify +the same page from the same file directly more than once, and a +workaround of specifying the same file in more than one way was +required. Version 8.4 removes this limitation, but when the same page +is copied more than once, all its data is shared between the pages. +Sometimes this is fine, but sometimes it may not work correctly, +particularly if there are form fields or you intend to perform other +modifications on one of the pages. A future version of qpdf should +address this more completely. You can work around this by specifying +the same file in two different ways. For example :command:`qpdf +in.pdf --pages . 1 ./in.pdf 1 -- out.pdf` would create a file with two +copies of the first page of the input, and the two copies would not +share any objects in common. This includes fonts, images, and anything +else the page references. + +.. _overlay-underlay: + +Overlay and Underlay +-------------------- + +.. help-topic overlay-underlay: overlay/underlay pages from other files + + These options allow pages from another file to be overlaid or + underlaid on the primary output. Overlaid pages are drawn on top of + the destination page and may obscure the page. Underlaid pages are + drawn below the destination page. Usage: + + {--overlay | --underlay } file + [ --password=password ] + [ --to=page-range ] + [ --from=[page-range] ] + [ --repeat=page-range ] + -- + + Note the use of "--" by itself to terminate overlay/underlay options. + + For overlay and underlay, a file and optional password are specified, along + with a series of optional page ranges. The default behavior is that each + page of the overlay or underlay file is imposed on the corresponding page + of the primary output until it runs out of pages, and any extra pages are + ignored. You can also give a page range with --repeat to cause + those pages to be repeated after the original pages are exhausted. + + Run qpdf --help=page-ranges for help with page ranges. + +You can use :command:`qpdf` to overlay or underlay pages from other +files onto the output generated by qpdf. Specify overlay or underlay +as follows: + +:: + + { --overlay | --underlay } file [ options ] -- + +Overlay and underlay options are processed late, so they can be +combined with other options like merging and will apply to the final +output. The ``--overlay`` and ``--underlay`` options work the same +way, except underlay pages are drawn underneath the page to which they +are applied, possibly obscured by the original page, and overlay files +are drawn on top of the page to which they are applied, possibly +obscuring the page. You can combine overlay and underlay. + +The default behavior of overlay and underlay is that pages are taken +from the overlay/underlay file in sequence and applied to +corresponding pages in the output until there are no more output +pages. If the overlay or underlay file runs out of pages, remaining +output pages are left alone. This behavior can be modified by options, +which are provided between the ``--overlay`` or ``--underlay`` flag +and the ``--`` option. The following options are supported: + +.. qpdf:option:: --to=page-range + + .. help: destination pages for underlay/overlay + + Specify the range of pages in the primary output to apply + overlay/underlay to. See qpdf --help=page-ranges for help with + the page range syntax. + + Specify a page range (see :ref:`page-ranges`) that indicates which + pages in the output should have the overlay/underlay applied. If not + specified, overlay/underlay are applied to all pages. + +.. qpdf:option:: --from=[page-range] + + .. help: source pages for underlay/overlay + + Specify pages from the overlay/underlay file that are applied to + the destination pages. See qpdf --help=page-ranges for help + with the page range syntax. The page range may be omitted + if --repeat is used. + +Specify a page range that indicates which pages in the +overlay/underlay file will be used for overlay or underlay. If not +specified, all pages will be used. This can be left empty by omitting +:samp:`{page-range}` if :qpdf:ref:`--repeat` is used. + +.. qpdf:option:: --repeat=page-range + + .. help: overlay/underlay pages to repeat + + Specify pages from the overlay/underlay that are repeated after + "from" pages have been exhausted. See qpdf --help=page-ranges + for help with the page range syntax. + +Specify an optional page range that indicates which pages in the +overlay/underlay file will be repeated after the "from" pages are used +up. If you want to apply a repeat a range of pages starting with the +first page of output, you can explicitly use ``--from=``. + +Examples +~~~~~~~~ + +- Overlay the first three pages from file :file:`o.pdf` onto the first + three pages of the output, then overlay page 4 from :file:`o.pdf` + onto pages 4 and 5 of the output. Leave remaining output pages + untouched. + + :: + + qpdf in.pdf --overlay o.pdf --to=1-5 --from=1-3 --repeat=4 -- out.pdf + + +- Underlay page 1 of :file:`footer.pdf` on all odd output pages, and + underlay page 2 of :file:`footer.pdf` on all even output pages. + + :: + + qpdf in.pdf --underlay footer.pdf --from= --repeat=1,2 -- out.pdf + +- Combine two files and overlay the single page from watermark.pdf on + the result. + + :: + + qpdf --empty --pages a.pdf b.pdf -- \ + --overlay watermark.pdf --from= --repeat=1 -- out.pdf + +.. _attachments: + +Embedded Files/Attachments +-------------------------- + +.. help-topic attachments: work with embedded files + + It is possible to list, add, or delete embedded files (also known + as attachments) and to copy attachments from other files. See help + on individual options for details. Run qpdf --help=add-attachment + for additional details about adding attachments. + +It is possible to list, add, or delete embedded files (also known as +attachments) and to copy attachments from other files. + +Related Options +~~~~~~~~~~~~~~~ + +.. qpdf:option:: --list-attachments + + .. help: list embedded files + + Show the key and stream number for each embedded file. Combine + with --verbose for more detailed information. + + Show the *key* and stream number for each embedded file. With + :qpdf:ref:`--verbose`, additional information, including preferred + file name, description, dates, and more are also displayed. The key + is usually but not always equal to the file name and is needed by + some of the other options. + +.. qpdf:option:: --show-attachment=key + + .. help: export an embedded file + + Write the contents of the specified attachment to standard + output as binary data. Get the key with --list-attachments. + + Write the contents of the specified attachment to standard output + as binary data. The key should match one of the keys shown by + :qpdf:ref:`--list-attachments`. If this option is given more than + once, only the last attachment will be shown. + +.. qpdf:option:: --add-attachment file options -- + + .. help: start add attachment options + + The --add-attachment flag and its options may be repeated to add + multiple attachments. Run qpdf --help=add-attachment for details. + + This flag starts add attachment options, which are used to add + attachments to a file. + + The ``--add-attachment`` flag and its options may be repeated to + add multiple attachments. Please see :ref:`add-attachment` for + additional details. + +.. qpdf:option:: --remove-attachment=key + + .. help: remove an embedded file + + Remove an embedded file using its key. Get the key with + --list-attachments. + + Remove the specified attachment. This doesn't only remove the + attachment from the embedded files table but also clears out the + file specification to ensure that the attachment is actually not + present in the output file. That means that any potential internal + links to the attachment will be broken. This option may be + specified multiple times. Run with :qpdf:ref:`--verbose` to see + status of the removal. Use :qpdf:ref:`--list-attachments` to find + the attachment key. This option may be repeated to remove multiple + attachments. + +.. qpdf:option:: --copy-attachments-from file options -- + + .. help: start copy attachment options + + The --copy-attachments-from flag and its options may be repeated + to copy attachments from multiple files. Run + qpdf --help=copy-attachments for details. + + This flag starts copy attachment options, which are used to copy + attachments from other files. + + The ``--copy-attachments-from`` flag and its options may be + repeated to copy attachments from multiple files. Please see + :ref:`copy-attachments` for additional details. + +.. _pdf-dates: + +PDF Date Format +~~~~~~~~~~~~~~~ + +.. help-topic pdf-dates: PDF date format + + When a date is required, the date should conform to the PDF date + format specification, which is "D:yyyymmddhhmmssz" where "z" is + either literally upper case "Z" for UTC or a timezone offset in + the form "-hh'mm'" or "+hh'mm'". Negative timezone offsets indicate + time before UTC. Positive offsets indicate how far after. For + example, US Eastern Standard Time (America/New_York) is "-05'00'", + and Indian Standard Time (Asia/Calcutta) is "+05'30'". + + Examples: + - D:20210207161528-05'00' February 7, 2021 at 4:15:28 p.m. + - D:20210207211528Z February 7, 2021 at 21:15:28 UTC + +When a date is required, the date should conform to the PDF date +format specification, which is :samp:`D:{yyyymmddhhmmssz}` where +:samp:`{z}` is either literally upper case ``Z`` for UTC or a +timezone offset in the form :samp:`{-hh'mm'}` or :samp:`{+hh'mm'}`. +Negative timezone offsets indicate time before UTC. Positive offsets +indicate how far after. For example, US Eastern Standard Time +(America/New_York) is ``-05'00'``, and Indian Standard Time +(Asia/Calcutta) is ``+05'30'``. + +Examples: + - ``D:20210207161528-05'00'``: February 7, 2021 at 4:15:28 p.m. + + - ``D:20210207211528Z``: February 7, 2021 at 21:15:28 UTC + +.. _add-attachment: + +Options for Adding Attachments +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. help-topic add-attachment: attach (embed) files + + The options listed below appear between --add-attachment and its + terminating "--". + +These options are valid between :qpdf:ref:`--add-attachment` and ``--``. + +.. qpdf:option:: --key=key + + .. help: specify attachment key + + Specify the key to use for the attachment in the embedded files + table. It defaults to the last element of the attached file's + filename. + + Specify the key to use for the attachment in the embedded files + table. It defaults to the last element of the attached file's + filename. + +.. qpdf:option:: --filename=name + + .. help: set attachment's displayed filename + + Specify the filename to be used for the attachment. This is what + is usually displayed to the user and is the name most graphical + PDF viewers will use when saving a file. It defaults to the last + element of the attached file's filename. + + Specify the filename to be used for the attachment. This is what is + usually displayed to the user and is the name most graphical PDF + viewers will use when saving a file. It defaults to the last + element of the attached file's filename. + +.. qpdf:option:: --creationdate=date + + .. help: set attachment's creation date + + Specify the attachment's creation date in PDF format; defaults + to the current time. Run qpdf --help=pdf-dates for information + about the date format. + + Specify the attachment's creation date in PDF format; defaults to + the current time. See :ref:`pdf-dates` for information about the + date format. + +.. qpdf:option:: --moddate=date + + .. help: set attachment's modification date + + Specify the attachment's modification date in PDF format; + defaults to the current time. Run qpdf --help=pdf-dates for + information about the date format. + + Specify the attachment's modification date in PDF format; defaults + to the current time. See :ref:`pdf-dates` for information about the + date format. + +.. qpdf:option:: --mimetype=type/subtype + + .. help: attachment mime type (e.g. application/pdf) + + Specify the mime type for the attachment, such as text/plain, + application/pdf, image/png, etc. + + Specify the mime type for the attachment, such as ``text/plain``, + ``application/pdf``, ``image/png``, etc. The qpdf library does not + automatically determine the mime type. In a UNIX-like environment, + the :command:`file` command can often provide this information. In + MacOS, you can use :samp:`file -I {filename}`. In Linux, it's + :samp:`file -i {filename}`. + + Implementation note: the mime type appears in a field called + ``/Subtype`` in the PDF file, but that field actually includes the + full type and subtype of the mime type. This is because ``/Type`` + already means something else in PDF. + +.. qpdf:option:: --description="text" + + .. help: set attachment's description + + Supply descriptive text for the attachment, displayed by some + PDF viewers. + + Supply descriptive text for the attachment, displayed by some PDF + viewers. + +.. qpdf:option:: --replace + + .. help: replace attachment with same key + + Indicate that any existing attachment with the same key should + be replaced by the new attachment. Otherwise, qpdf gives an + error if an attachment with that key is already present. + + Indicate that any existing attachment with the same key should be + replaced by the new attachment. Otherwise, :command:`qpdf` gives an + error if an attachment with that key is already present. + +.. _copy-attachments: + +Options for Copying Attachments +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. help-topic copy-attachments: copy attachments from another file + + The options listed below appear between --copy-attachments-from and + its terminating "--". + + To copy attachments from a password-protected file, use + the --password option after the file name. + +Options in this section are valid between +:qpdf:ref:`--copy-attachments-from` and ``--``. + +.. qpdf:option:: --prefix=prefix + + .. help: key prefix for copying attachments + + Prepend a prefix to each key; may be needed if there are + duplicate attachment keys. This affects the key only, not the + file name. + + Only required if the file from which attachments are being copied + has attachments with keys that conflict with attachments already + in the file. In this case, the specified prefix will be prepended + to each key. This affects only the key in the embedded files + table, not the file name. The PDF specification doesn't preclude + multiple attachments having the same file name. + +.. _inspection-options: + +PDF Inspection +-------------- + +These options provide tools for inspecting PDF files. When any of the +options in this section are specified, no output file should be given. + +Related Options +~~~~~~~~~~~~~~~ + +.. qpdf:option:: --is-encrypted + + .. help: silently test whether a file is encrypted + + Silently exit with a code indicating the file's encryption status: + + 0: the file is encrypted + 1: not used + 2: the file is not encrypted + + This can be used with password-protected files even if you don't + know the password. + + Silently exit with a code indicating the file's encryption status: + + - ``0``: the file is encrypted + + - ``1``: not used + + - ``2``: the file is not encrypted + + This option can be used for password-protected files even if you + don't know the password. + + This option is useful for shell scripts. Other options are ignored + if this is given. This option is mutually exclusive with + :qpdf:ref:`--requires-password`. Both this option and + :qpdf:ref:`--requires-password` exit with status ``2`` for + non-encrypted files. + +.. qpdf:option:: --requires-password + + .. help: silently test a file's password + + Silently exit with a code indicating the file's password status: + + 0: a password, other than as supplied, is required + 1: not used + 2: the file is not encrypted + 3: the file is encrypted, and correct password (if any) has been supplied + + Silently exit with a code indicating the file's password status: + + - ``0``: a password, other than as supplied, is required + + - ``1``: not used + + - ``2``: the file is not encrypted + + - ``3``: the file is encrypted, and correct password (if any) has + been supplied + + Use with the :qpdf:ref:`--password` option to specify the password + to test. + + The choice of exit status ``0`` to mean that a password is required + is to enable code like + + .. code-block:: bash + + if [ qpdf --requires-password file.pdf ]; then + # prompt for password + fi + + If a password is supplied with :qpdf:ref:`--password`, that + password is used to open the file just as with any normal + invocation of :command:`qpdf`. That means that using this option + with :qpdf:ref:`--password` option can be used to check the + correctness of the password. In that case, an exit status of ``3`` + means the file works with the supplied password. This option is + mutually exclusive with :qpdf:ref:`--is-encrypted`. Both this + option and :qpdf:ref:`--is-encrypted` exit with status ``2`` for + non-encrypted files. + +.. qpdf:option:: --check + + .. help: partially check whether PDF is valid + + Check the structure of the PDF file as well as a number of other + aspects of the file, and write information about the file to + standard output. Note that qpdf does not perform any validation + of the actual PDF page content or semantic correctness of the + PDF file. It merely checks that the PDF file is syntactically + valid. + + Check the file's structure and well as encryption, linearization, + and encoding of stream data, and write information about the file + to standard output. An exit status of ``0`` indicates syntactic + correctness of the PDF file. Note that :samp:`--check` writes + nothing to standard error when everything is valid, so if you are + using this to programmatically validate files in bulk, it is safe + to run without output redirected to :file:`/dev/null` and just + check for a ``0`` exit code. + + A file for which :samp:`--check` reports no errors may still have + errors in stream data content or may contain constructs that don't + conform to the PDF specification, but it should be syntactically + valid. If :samp:`--check` reports any errors, qpdf will exit with a + status of ``2``. There are some recoverable conditions that + :samp:`--check` detects. These are issued as warnings instead of + errors. If qpdf finds no errors but finds warnings, it will exit + with a status of ``3``. When :samp:`--check` is combined with other + options, checks are always performed before any other options are + processed. For erroneous files, :samp:`--check` will cause qpdf to + attempt to recover, after which other options are effectively + operating on the recovered file. Combining :samp:`--check` with + other options in this way can be useful for manually recovering + severely damaged files. + +.. qpdf:option:: --show-encryption + + .. help: information about encrypted files + + Show document encryption parameters. Also show the document's + user password if the owner password is given and the file was + encrypted using older encryption formats that allow user + password recovery. + + This option shows document encryption parameters. It also shows the + document's user password if the owner password is given and the + file was encrypted using older encryption formats that allow user + password recovery. (See :ref:`pdf-encryption` for a technical + discussion of this feature.) The output of ``--show-encryption`` is + included in the output of :qpdf:ref:`--check`. + +.. qpdf:option:: --show-encryption-key + + .. help: show key with --show-encryption + + When used with --show-encryption, causes the underlying + encryption key to be displayed. -:samp:`--show-encryption-key` When encryption information is being displayed, as when - :samp:`--check` or - :samp:`--show-encryption` is given, display the + :qpdf:ref:`--check` or :qpdf:ref:`--show-encryption` is given, display the computed or retrieved encryption key as a hexadecimal string. This value is not ordinarily useful to users, but it can be used as the - argument to :samp:`--password` if the - :samp:`--password-is-hex-key` is specified. Note - that, when PDF files are encrypted, passwords and other metadata are - used only to compute an encryption key, and the encryption key is - what is actually used for encryption. This enables retrieval of that - key. + argument to :qpdf:ref:`--password` if the :qpdf:ref:`--password-is-hex-key` + is specified. Note that, when PDF files are encrypted, passwords + and other metadata are used only to compute an encryption key, and + the encryption key is what is actually used for encryption. This + enables retrieval of that key. See :ref:`pdf-encryption` for a + technical discussion. -:samp:`--check-linearization` - Checks file integrity and linearization status. +.. qpdf:option:: --check-linearization -:samp:`--show-linearization` - Checks and displays all data in the linearization hint tables. + .. help: check linearization tables -:samp:`--show-xref` - Shows the contents of the cross-reference table in a human-readable - form. This is especially useful for files with cross-reference - streams which are stored in a binary format. + Check to see whether a file is linearized and, if so, whether + the linearization hint tables are correct. -:samp:`--show-object=trailer|obj[,gen]` - Show the contents of the given object. This is especially useful for - inspecting objects that are inside of object streams (also known as - "compressed objects"). + Check to see whether a file is linearized and, if so, whether the + linearization hint tables are correct. qpdf does not check all + aspects of linearization. A linearized PDF file with linearization + errors that is otherwise correct is almost always readable by a PDF + viewer. As such, "errors" in PDF linearization are treated by + :command:`qpdf` as warnings. -:samp:`--raw-stream-data` - When used along with the :samp:`--show-object` - option, if the object is a stream, shows the raw stream data instead - of object's contents. +.. qpdf:option:: --show-linearization -:samp:`--filtered-stream-data` - When used along with the :samp:`--show-object` - option, if the object is a stream, shows the filtered stream data - instead of object's contents. If the stream is filtered using filters - that qpdf does not support, an error will be issued. + .. help: show linearization hint tables -:samp:`--show-npages` - Prints the number of pages in the input file on a line by itself. + Check and display all data in the linearization hint tables. + + Check and display all data in the linearization hint tables. + +.. qpdf:option:: --show-xref + + .. help: show cross reference data + + Show the contents of the cross-reference table or stream (object + locations in the file) in a human-readable form. This is + especially useful for files with cross-reference streams, which + are stored in a binary format. + + Show the contents of the cross-reference table or stream in a + human-readable form. The cross-reference data gives the offset of + regular objects and the object stream ID and 0-based index within + the object stream for compressed objects. This is especially useful + for files with cross-reference streams, which are stored in a + binary format. If the file is invalid and cross reference table + reconstruction is performed, this option will show the information + in the reconstructed table. + +.. qpdf:option:: --show-object=trailer|obj[,gen] + + .. help: show contents of an object + + Show the contents of the given object. This is especially useful + for inspecting objects that are inside of object streams (also + known as "compressed objects"). + + Show the contents of the given object. This is especially useful + for inspecting objects that are inside of object streams (also + known as "compressed objects"). + +.. qpdf:option:: --raw-stream-data + + .. help: show raw stream data + + When used with --show-object, if the object is a stream, write + the raw (compressed) binary stream data to standard output + instead of the object's contents. See also + --filtered-stream-data. + + When used with :qpdf:ref:`--show-object`, if the object is a + stream, write the raw (compressed) binary stream data to standard + output instead of the object's contents. Avoid combining this with + other inspection options to avoid commingling the stream data with + other output. See also :qpdf:ref:`--filtered-stream-data`. + +.. qpdf:option:: --filtered-stream-data + + .. help: show filtered stream data + + When used with --show-object, if the object is a stream, write + the filtered (uncompressed, potentially binary) stream data to + standard output instead of the object's contents. See also + --raw-stream-data. + + When used with :qpdf:ref:`--show-object`, if the object is a stream, + write the filtered (uncompressed, potentially binary) stream data + to standard output instead of the object's contents. If the stream + is filtered using filters that qpdf does not support, an error will + be issued. This option acts as if ``--decode-level=all`` was + specified (see :qpdf:ref:`--decode-level`), so it will uncompress + images compressed with supported lossy compression schemes. Avoid + combining this with other inspection options to avoid commingling + the stream data with other output. + + This option may be combined with :qpdf:ref:`--normalize-content`. + If you do this, qpdf will attempt to run content normalization even + if the stream is not a content stream, which will probably produce + unusable results. + + See also :qpdf:ref:`--raw-stream-data`. + +.. qpdf:option:: --show-npages + + .. help: show number of pages + + Print the number of pages in the input file on a line by itself. + Useful for scripts. + + Print the number of pages in the input file on a line by itself. Since the number of pages appears by itself on a line, this option can be useful for scripting if you need to know the number of pages in a file. -:samp:`--show-pages` - Shows the object and generation number for each page dictionary +.. qpdf:option:: --show-pages + + .. help: display page dictionary information + + Show the object and generation number for each page dictionary + object and for each content stream associated with the page. + + Show the object and generation number for each page dictionary object and for each content stream associated with the page. Having this information makes it more convenient to inspect objects from a - particular page. + particular page. See also :qpdf:ref:`--with-images`. -:samp:`--with-images` - When used along with :samp:`--show-pages`, also shows - the object and generation numbers for the image objects on each page. - (At present, information about images in shared resource dictionaries - are not output by this command. This is discussed in a comment in the - source code.) +.. qpdf:option:: --with-images + + .. help: include image details with --show-pages + + When used with --show-pages, also shows the object and + generation numbers for the image objects on each page. + + When used with :qpdf:ref:`--show-pages`, also shows the object and + generation numbers for the image objects on each page. + +.. _json-options: + +JSON Options +------------ + +.. help-topic json: JSON output for PDF information + + Show information about the PDF file in JSON format. Please see the + JSON chapter in the qpdf manual for details. + +It is possible to view information about PDF files in a JSON format. +See :ref:`json` for details about the qpdf JSON format. + +Related Options +~~~~~~~~~~~~~~~ + +.. qpdf:option:: --json + + .. help: show file in json format + + Generate a JSON representation of the file. This is described in + depth in the JSON section of the manual. -:samp:`--json` Generate a JSON representation of the file. This is described in - depth in :ref:`json` + depth in :ref:`json`. + +.. qpdf:option:: --json-help + + .. help: show format of json output + + Describe the format of the JSON output. -:samp:`--json-help` Describe the format of the JSON output. -:samp:`--json-key=key` - This option is repeatable. If specified, only top-level keys - specified will be included in the JSON output. If not specified, all - keys will be shown. +.. qpdf:option:: --json-key=key -:samp:`--json-object=trailer|obj[,gen]` - This option is repeatable. If specified, only specified objects will - be shown in the "``objects``" key of the JSON output. If absent, all + .. help: restrict which keys are in json output + + This option is repeatable. If given, only the specified + top-level keys will be included in the JSON output. Otherwise, + all keys will be included. + + This option is repeatable. If given, only the specified top-level + keys will be included in the JSON output. Otherwise, all keys will + be included. + +.. qpdf:option:: --json-object=trailer|obj[,gen] + + .. help: restrict which objects are in JSON + + This option is repeatable. If given, only specified objects will + be shown in the "objects" key of the JSON output. Otherwise, all + objects will be shown. + + This option is repeatable. If given, only specified objects will + be shown in the "``objects``" key of the JSON output. Otherwise, all objects will be shown. -:samp:`--check` - Checks file structure and well as encryption, linearization, and - encoding of stream data. A file for which - :samp:`--check` reports no errors may still have - errors in stream data content but should otherwise be structurally - sound. If :samp:`--check` any errors, qpdf will exit - with a status of 2. There are some recoverable conditions that - :samp:`--check` detects. These are issued as warnings - instead of errors. If qpdf finds no errors but finds warnings, it - will exit with a status of 3 (as of version 2.0.4). When - :samp:`--check` is combined with other options, - checks are always performed before any other options are processed. - For erroneous files, :samp:`--check` will cause qpdf - to attempt to recover, after which other options are effectively - operating on the recovered file. Combining - :samp:`--check` with other options in this way can be - useful for manually recovering severely damaged files. Note that - :samp:`--check` produces no output to standard output - when everything is valid, so if you are using this to - programmatically validate files in bulk, it is safe to run without - output redirected to :file:`/dev/null` and just - check for a 0 exit code. +.. _test-options: -The :samp:`--raw-stream-data` and -:samp:`--filtered-stream-data` options are ignored -unless :samp:`--show-object` is given. Either of these -options will cause the stream data to be written to standard output. In -order to avoid commingling of stream data with other output, it is -recommend that these objects not be combined with other test/inspection -options. +Options for Testing or Debugging +-------------------------------- -If :samp:`--filtered-stream-data` is given and -:samp:`--normalize-content=y` is also given, qpdf will -attempt to normalize the stream data as if it is a page content stream. -This attempt will be made even if it is not a page content stream, in -which case it will produce unusable results. +.. help-topic testing: options for testing or debugging + + The options below are useful when writing automated test code that + includes files created by qpdf or when testing qpdf itself. + +The options below are useful when writing automated test code that +includes files created by qpdf or when testing qpdf itself. When +changes are made to qpdf, care is taken to avoid gratuitously changing +the output of PDF files. This is to make it easier to do direct +comparisons in test suites with files created by qpdf. However, there +are no guarantees that the PDF output won't change such as in the +event of a bug fix or feature enhancement to some aspect of the output +that qpdf creates. + +.. _idempotency: + +Idempotency +~~~~~~~~~~~ + +Note about idempotency of byte-for-byte content: there is no +expectation that qpdf is idempotent in the general case. In other +words, there is no expectation that, when qpdf is run on its own +output, it will create *byte-for-byte* identical output, even though +it will create semantically identical files. There are a variety of +reasons for this including document ID generation, which includes a +random element, as well as the interaction of stream length encoding +with dictionary key sorting. + +It is possible to get idempotent behavior by using the +:qpdf:ref:`--static-id` or :qpdf:ref:`--deterministic-id` option with +qpdf and running it *three* times so that you are processing the +output of qpdf on its own previous output. For example, in this +sequence of commands: + +:: + + qpdf any-file.pdf 1.pdf + qpdf --static-id 1.pdf 2.pdf + qpdf --static-id 2.pdf 3.pdf + +the files :file:`2.pdf` and :file:`3.pdf` should be *byte-for-byte* +identical. The qpdf test suite relies on this behavior. See also +:qpdf:ref:`--static-aes-iv`. + +Related Options +~~~~~~~~~~~~~~~ + +.. qpdf:option:: --static-id + + .. help: use a fixed document ID + + Use a fixed value for the document ID. This is intended for + testing only. Never use it for production files. See also + qpdf --help=--deterministic-id. + + Use a fixed value for the document ID (``/ID`` in the trailer). + **This is intended for testing only. Never use it for production + files.** If you are trying to get the same ID each time for a given + file and you are not generating encrypted files, consider using the + :qpdf:ref:`--deterministic-id` option. + +.. qpdf:option:: --static-aes-iv + + .. help: use a fixed AES vector + + Use a static initialization vector for AES-CBC. This is intended + for testing only so that output files can be reproducible. Never + use it for production files. This option is not secure since it + significantly weakens the encryption. + + Use a static initialization vector for AES-CBC. This is intended + for testing only so that output files can be reproducible. Never + use it for production files. **This option in particular is not + secure since it significantly weakens the encryption.** When + combined with :qpdf:ref:`--static-id` and using the three-step + process described in :ref:`idempotency`, it is possible to create + byte-for-byte idempotent output with PDF files that use 256-bit + encryption to assist with creating reproducible test suites. + +.. qpdf:option:: --linearize-pass1=file + + .. help: save pass 1 of linearization + + Write the first pass of linearization to the named file. The + resulting file is not a valid PDF file. This option is useful only + for debugging qpdf. + + Write the first pass of linearization to the named file. *The + resulting file is not a valid PDF file.* This option is useful only + for debugging ``QPDFWriter``'s linearization code. When qpdf + linearizes files, it writes the file in two passes, using the first + pass to calculate sizes and offsets that are required for hint + tables and the linearization dictionary. Ordinarily, the first pass + is discarded. This option enables it to be captured, allowing + inspection of the file before values calculated in pass 1 are + inserted into the file for pass 2. .. _unicode-passwords: @@ -1623,7 +3133,7 @@ you by interpreting them as UTF-8, you can use :samp:`--password-mode=bytes` to suppress qpdf's automatic behavior. -The :samp:`--password-mode` option, as described earlier +The :qpdf:ref:`--password-mode` option, as described earlier in this chapter, can be used to change qpdf's interpretation of supplied passwords. There are very few reasons to use this option. One would be the unlikely case described in the previous paragraph in which the @@ -1654,7 +3164,7 @@ recovery methods should make qpdf transparently open most encrypted files with the password supplied correctly but in the wrong coding system. There are no real downsides to this behavior, but if you don't want qpdf to do this, you can use the -:samp:`--suppress-password-recovery` option. One reason +:qpdf:ref:`--suppress-password-recovery` option. One reason to do that is to ensure that you know the exact password that was used to encrypt the file. @@ -1670,8 +3180,9 @@ will be addressed in a future version of qpdf. The ``QPDFWriter`` methods that enable encryption on the output file accept passwords as strings of bytes. -Please note that the :samp:`--password-is-hex-key` -option is unrelated to all this. This flag bypasses the normal process -of going from password to encryption string entirely, allowing the raw -encryption key to be specified directly. This is useful for forensic -purposes or for brute-force recovery of files with unknown passwords. +Please note that the :qpdf:ref:`--password-is-hex-key` option is +unrelated to all this. That flag bypasses the normal process of going +from password to encryption string entirely, allowing the raw +encryption key to be specified directly. That behavior is useful for +forensic purposes or for brute-force recovery of files with unknown +passwords and has nothing to do with the document's actual passwords. diff --git a/manual/conf.py b/manual/conf.py index 92e4a3de..3f334d8c 100644 --- a/manual/conf.py +++ b/manual/conf.py @@ -7,6 +7,10 @@ # To see the default sample conf.py, run sphinx-quickstart in an empty # directory. Most of the original comments and options were removed. import sphinx_rtd_theme # noQA F401 +import os +import sys + +sys.path.append(os.path.abspath("./_ext")) project = 'QPDF' copyright = '2005-2021, Jay Berkenbilt' @@ -16,6 +20,7 @@ release = '10.5.0' version = release extensions = [ 'sphinx_rtd_theme', + 'qpdf', ] html_theme = 'sphinx_rtd_theme' html_theme_options = { diff --git a/manual/encryption.rst b/manual/encryption.rst new file mode 100644 index 00000000..6ca950f7 --- /dev/null +++ b/manual/encryption.rst @@ -0,0 +1,327 @@ +.. _pdf-encryption: + +PDF Encryption +============== + +This chapter discusses PDF encryption in a general way with an angle +toward how it works in :command:`qpdf`. This chapter is not intended +to replace the PDF specification. Please consult the spec for full +details. + +PDF Encryption Concepts +----------------------- + +Encryption + Encryption is the replacement of *clear text* with encrypted text, + also known as *ciphertext*. The clear text may be retrieved from the + ciphertext if the encryption key is known. + + PDF files consist of an object structure. PDF objects may be of a + variety of types including (among others) numbers, boolean values, + names, arrays, dictionaries, strings, and streams. In a PDF file, + only strings and streams are encrypted. + +Security Handler + Since the inception of PDF, there have been several modifications to + the way files are encrypted. Encryption is handled by a *security + handler*. The *standard security handler* is password-based. This is + the only security handler implemented by qpdf, and this material is + all focused on the standard security handler. There are various + flags that control the specific details of encryption with the + standard security handler. These are discussed below. + +Encryption Key + This refers to the actual key used by the encryption and decryption + algorithms. It is distinct from the password. The main encryption + key is generated at random and stored encrypted in the PDF file. The + passwords used to protect a PDF file, if any, are used to protect + the encryption key. This design makes it possible to use different + passwords (e.g., user and owner passwords) to retrieve the + encryption key or even to change the password on a file without + changing the encryption key. qpdf can expose the encryption key when + run with the :qpdf:ref:`--show-encryption-key` option and can accept + a hex-encoded encryption key in place of a password when run with + the :qpdf:ref:`--password-is-hex-key` option. + +Password Protection + Password protection is distinct from encryption. This point is often + misunderstood. A PDF file can be encrypted without being + password-protected. The intent of PDF encryption was that there + would be two passwords: a *user password* and an *owner password*. + Either password can be used to retrieve the encryption key. A + conforming reader is supposed to obey the security restrictions + if the file is opened using the user password but not if the file is + opened with the owner password. :command:`qpdf` makes no distinction + between which password is used to open the file. The distinction + made by conforming readers between the user and owner password is + what makes it common to create encrypted files with no password + protection. This is done by using the empty string as the user + password and some secret string as the owner password. When a user + opens the PDF file, the empty string is used to retrieve the + encryption key, making the file usable, but a conforming reader + restricts certain operations from the user. + +What does all this mean? Here are a few things to realize. + +- Since the user password and the owner password are both used to + recover the single encryption key, there is *fundamentally no way* + to prevent an application from disregarding the security + restrictions on a file. Any software that can read the encrypted + file at all has the encryption key. Therefore, the security of the + restrictions placed on PDF files is solely enforced by the software. + Any open source PDF reader could be trivially modified to ignore the + security restrictions on a file. The PDF specification is clear + about this point. This means that PDF restrictions on + non-password-protected files only restrict users who don't know how + to circumvent them. + +- If a file is password-protected, you have to know at least one of + the user or owner password to retrieve the encryption key. However, + in the case of 40-bit encryption, the actual encryption key is only + 5 bytes long and can be easily brute-forced. As such, files + encrypted with 40-bit encryption are not secure regardless of how + strong the password is. With 128-bit encryption, the default + security handler uses RC4 encryption, which is also known be + insecure. As such, the only way to securely encrypt a PDF file using + the standard security handler (as of the last review of this chapter + in 2022) is to use AES encryption. This is the only supported + algorithm with 256-bit encryption, and it can be selected to be used + with 128-bit encryption as well. However there is no reason to use + 128-bit encryption with AES. If you are going to use AES, just use + 256-bit encryption instead. The security of a 256-bit AES-encrypted + PDF file with a strong password is comparable to using a + general-purpose encryption tool like :command:`gpg` or + :command:`openssl` to encrypt the PDF file with the same password, + but the advantage of using PDF encryption is that no software is + required beyond a regular PDF viewer. + +PDF Encryption Details +---------------------- + +This section describes a few details about PDF encryption. It does not +describe all the details. For that, read the PDF specification. The +details presented here, however, should go a long way toward helping a +casual user/developer understand what's going on with encrypted PDF +files. + +Here are more concepts to understand. + +Algorithm parameters ``V`` and ``R`` + There are two parameters that control the details of encryption + using the standard security handler: ``V`` and ``R``. + + ``V`` is a code specifying the algorithms that are used for + encrypting the file, handling keys, etc. It may have any of the + following values: + + - 1: The original algorithm, which encrypted files using 40-bit keys. + + - 2: An extension of the original algorithm allowing longer keys. + Introduced in PDF 1.4. + + - 3: An unpublished algorithm that permits file encryption key + lengths ranging from 40 to 128 bits. Introduced in PDF 1.4. qpdf + is believed to be able to read files with ``V`` = 3 but does not + write such files. + + - 4: An extension of the algorithm that allows it to be + parameterized by additional rules for handling strings and + streams. Introduced in PDF 1.5. + + - 5: An algorithm that allows specification of separate security + handlers for strings and streams as well as embedded files, and + which supports 256-bit keys. Introduced in PDF 1.7 extension level + 3 and later extended in extension level 8. This is the encryption + system in the PDF 2.0 specification, ISO-32000. + + ``R`` is a code specifying the revision of the standard handler. It + is tightly coupled with the value of ``V``. ``R`` may have any of + the following values: + + - 2: ``V`` must be 1 + + - 3: ``V`` must be 2 or 3 + + - 4: ``V`` must be 4 + + - 5: ``V`` must be 5; this extension was never fully specified and + existed for a short time in some versions of Acrobat. + :command:`qpdf` is able to read and write this format, but it + should not be used for any purpose other than testing + compatibility with the format. + + - 6: ``V`` must be 5. This is the only value that is not deprecated + in the PDF 2.0 specification, ISO-32000. + +Encryption Dictionary + Encrypted PDF files have an encryption dictionary. There are several + fields, but these are the important ones for our purposes: + + - ``V`` and ``R`` as described above + + - ``O``, ``U``, ``OE``, ``UE``: values used by the algorithms that + recover the encryption key from the user and owner password. Which + of these are defined and how they are used vary based on the value + of ``R``. + + - ``P``: a bit field that describes which restrictions are in place. + This is discussed below in :ref:`security-restrictions` + +Encryption Algorithms + PDF files may be encrypted with the obsolete, insecure RC4 algorithm + or the more secure AES algorithm. See also :ref:`weak-crypto` for a + discussion. 40-bit encryption always uses RC4. 128-bit can use + either RC4 (the default for compatibility reasons) or, starting with + PDF 1.6, AES. 256-bit encryption always uses AES. + +.. _security-restrictions: + +PDF Security Restrictions +------------------------- + +PDF security restrictions are described by a bit field whose value is +stored in the ``P`` field in the encryption dictionary. The value of +``P`` is used by the algorithms to recover the encryption key given +the password, which makes the value of ``P`` tamper-resistent. + +``P`` is a 32-bit integer, treated as a signed twos-complement number. +A 1 in any bit position means the permission is granted. The PDF +specification numbers the bits from 1 (least significant bit) to 32 +(most significant bit) rather than the more customary 0 to 31. For +consistency with the spec, the remainder of this section uses the +1-based numbering. + +Only bits 3, 4, 5, 6, 9, 10, 11, and 12 are used. All other bits are +set to 1. Since bit 32 is always set to 1, the value of ``P`` is +always a negative number. (:command:`qpdf` recognizes a positive +number on behalf of buggy writers that treat ``P`` as unsigned. Such +files have been seen in the wild.) + +Here are the meanings of the bit positions. All bits not listed must +have the value 1 except bits 1 and 2, which must have the value 0. +However, the values of bits other than those in the table are ignored, +so having incorrect values probably doesn't break anything in most +cases. A value of 1 indicates that the permission is granted. + +- 3: for ``R`` = 2 printing; for ``R`` >= 3, printing at low + resolution + +- 4: modifying the document except as controlled by bits 6, + 9, and 11 + +- 5: extracting text and graphics for purposes other than + accessibility to visually impaired users + +- 6: add or modify annotations, fill in interactive form fields; + if bit 4 is also set, create or modify interactive form fields + +- 9: for ``R`` >= 3, fill in interactive form fields even if bit 6 is + clear + +- 10: not used; formerly granted permission to extract material + for accessibility, but the specification now disallows restriction + of accessibility, and conforming readers are to treat this bit as if + it is set regardless of its value + +- 11: for ``R`` >= 3, assemble document including inserting, rotating, + or deleting pages or creating document outlines or thumbnail images + +- 12: for ``R`` >= 3, allow printing at full resolution + +.. _qpdf-P: + +How qpdf handles security restrictions +-------------------------------------- + +The section describes exactly what the qpdf library does with regard +to ``P`` based on the various settings of different security options. + +- Start with all bits set except bits 1 and 2, which are cleared + +- For ``R`` = 2: + + - ``--print=n``: clear bit 3 + + - ``--modify=n``: clear bit 4 + + - ``--extract=n``: clear bit 5 + + - ``--annotate=n``: clear bit 6 + +- For ``R >= 3``: + + - ``--accessibility=n``: for ``R`` = 3, clear bit 10; otherwise, + ignore so bit 10 is always clear if ``R`` >= 4. qpdf allows + creating files with bit 10 clear so that it can be used to create + test files to ensure that a conforming reader ignores the value of + the bit. You should never intentionally clear accessibility. + + - ``--extract=n``: clear bit 5 + + - ``--print=none``: clear bits 3 and 12 + + - ``--print=low``: clear bit 12 + + - ``--modify=none``: clear bits 4, 6, 9, and 11 + + - ``--modify=assembly``: clear bits 4, 6, and 9 + + - ``--modify=form``: clear bits 4 and 6 + + - ``--modify=annotate``: clear bit 4 + + - ``--assemble=n``: clear bit 11 + + - ``--annotate=n``: clear bit 6 + + - ``--form=n``: clear bit 9 + + - ``--modify-other=n``: clear bit 4 + +Options to :command:`qpdf`, both at the CLI and library level, allow +more granular clearing of permission bits than do most tools, +including Adobe Acrobat. As such, PDF viewers may respond in +surprising ways based on options passed to qpdf. If you observe this, +it is probably not because of a bug in qpdf. + +.. _pdf-passwords: + +User and Owner Passwords +------------------------ + +When you use qpdf to show encryption parameters and you open a file +with the owner password, sometimes qpdf reveals the user password, and +sometimes it doesn't. Here's why. + +For ``V`` < 5, the user password is actually stored in the PDF file +encrypted with a key that is derived from the owner password, and the +main encryption key is encrypted using a key derived from the user +password. When you open a PDF file, the reader first tries to treat +the given password as the user password, using it to recover the +encryption key. If that works, you're in with restrictions (assuming +the reader chooses to enforce them). If it doesn't work, then the +reader treats the password as the owner password, using it to recover +the user password, and then uses the user password to retrieve the +encryption key. This is why creating a file with the same user +password and owner password with ``V`` < 5 results in a file that some +readers will never allow you to open as the owner. Typically when a +reader encounters a file with ``V`` < 5, it will first attempt to +treat the empty string as a user password. If that works, the file is +encrypted but not password-protected. If it doesn't work, then a +password prompt is given. Creating a file with an empty owner password +is like creating a file with the same owner and user password: there +is no way to open the file as an owner. + +For ``V`` >= 5, the main encryption key is independently encrypted +using the user password and the owner password. There is no way to +recover the user password from the owner password. Restrictions are +imposed or not depending on which password was used. In this case, the +password supplied, if any, is tried both as the user password and the +owner password, and whichever works is used. Typically the password is +tried as the owner password first. (This is what the PDF specification +says to do.) As such, specifying a user password and leaving the owner +password blank results in a file that is opened as owner with no +password, effectively rendering the security restrictions useless. +This is why :command:`qpdf` requires you to pass +:qpdf:ref:`--allow-insecure` to create a file with an empty owner +password when 256-bit encryption is in use. diff --git a/manual/index.rst b/manual/index.rst index 56413cf6..7f8b1483 100644 --- a/manual/index.rst +++ b/manual/index.rst @@ -30,5 +30,11 @@ documentation, please visit `https://qpdf.readthedocs.io design linearization object-streams + encryption release-notes acknowledgement + +Indices +======= + +* :ref:`qpdf-options` diff --git a/manual/installation.rst b/manual/installation.rst index 0bd13e31..e8773a65 100644 --- a/manual/installation.rst +++ b/manual/installation.rst @@ -208,7 +208,7 @@ files you need to build. Runtime Crypto Provider Selection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -You can use the :samp:`--show-crypto` option to +You can use the :qpdf:ref:`--show-crypto` option to :command:`qpdf` to get a list of available crypto providers. The default provider is always listed first, and the rest are listed in lexical order. Each crypto provider is listed on a line by diff --git a/manual/json.rst b/manual/json.rst index 39bf9445..5567794d 100644 --- a/manual/json.rst +++ b/manual/json.rst @@ -51,7 +51,7 @@ Compatibility Documentation The :command:`qpdf` command can be invoked with the - :samp:`--json-help` option. This will output a JSON + :qpdf:ref:`--json-help` option. This will output a JSON structure that has the same structure as the JSON output that qpdf generates, except that each field in the help output is a description of the corresponding field in the JSON output. The specific @@ -134,7 +134,7 @@ There are a few limitations to be aware of with the JSON structure: encoding. In other words, it's best if you don't try to use the JSON format to extract binary strings from the PDF file, but if you really had to, it could be done. Note that qpdf's - :samp:`--show-object` option does not have this + :qpdf:ref:`--show-object` option does not have this limitation and will reveal the string as encoded in the original file. @@ -150,9 +150,9 @@ be aware of: - While qpdf guarantees that keys present in the help will be present in the output, those fields may be null or empty if the information is not known or absent in the file. Also, if you specify - :samp:`--json-keys`, the keys that are not listed + :qpdf:ref:`--json-key`, the keys that are not listed will be excluded entirely except for those that - :samp:`--json-help` says are always present. + :qpdf:ref:`--json-help` says are always present. - In a few places, there are keys with names containing ``pageposfrom1``. The values of these keys are null or an integer. If @@ -168,7 +168,7 @@ be aware of: - The image information included in the ``page`` section of the JSON output includes the key "``filterable``". Note that the value of this - field may depend on the :samp:`--decode-level` that + field may depend on the :qpdf:ref:`--decode-level` that you invoke qpdf with. The JSON output includes a top-level key "``parameters``" that indicates the decode level used for computing whether a stream was filterable. For example, jpeg images will be diff --git a/manual/release-notes.rst b/manual/release-notes.rst index 15089f40..8c2af683 100644 --- a/manual/release-notes.rst +++ b/manual/release-notes.rst @@ -90,7 +90,7 @@ For a detailed list of changes, please see the file - Handling of Weak Cryptography Algorithms - From the qpdf CLI, the - :samp:`--allow-weak-crypto` is now required to + :qpdf:ref:`--allow-weak-crypto` is now required to suppress a warning when explicitly creating PDF files using RC4 encryption. While qpdf will always retain the ability to read and write such files, doing so will require explicit @@ -108,7 +108,7 @@ For a detailed list of changes, please see the file (with no resource dictionary). - Fix crash that could occur under certain conditions when using - :samp:`--pages` with files that had form + :qpdf:ref:`--pages` with files that had form fields. - Library Enhancements @@ -127,7 +127,7 @@ For a detailed list of changes, please see the file - CLI Enhancements - Improve diagnostics around parsing - :samp:`--pages` command-line options + :qpdf:ref:`--pages` command-line options - Packaging Changes @@ -139,7 +139,7 @@ For a detailed list of changes, please see the file - When generating a file while preserving object streams, unreferenced objects are correctly removed unless - :samp:`--preserve-unreferenced` is specified. + :qpdf:ref:`--preserve-unreferenced` is specified. - Library Enhancements @@ -202,19 +202,19 @@ For a detailed list of changes, please see the file - Operations that work on combining pages are much better about protecting form fields. In particular, - :samp:`--split-pages` and - :samp:`--pages` now preserve interaction form + :qpdf:ref:`--split-pages` and + :qpdf:ref:`--pages` now preserve interaction form functionality by copying the relevant form field information from the original files. Additionally, if you use - :samp:`--pages` to select only some pages from + :qpdf:ref:`--pages` to select only some pages from the original input file, unused form fields are removed, which prevents lots of unused annotations from being retained. - By default, :command:`qpdf` no longer allows creation of encrypted PDF files whose user password is non-empty and owner password is empty when a 256-bit key is in - use. The :samp:`--allow-insecure` option, - specified inside the :samp:`--encrypt` options, + use. The :qpdf:ref:`--allow-insecure` option, + specified inside the :qpdf:ref:`--encrypt` options, allows creation of such files. Behavior changes in the CLI are avoided when possible, but an exception was made here because this is security-related. qpdf must always allow creation of @@ -255,7 +255,7 @@ For a detailed list of changes, please see the file removing, and and copying file attachments. See :ref:`attachments` for details. - Page splitting and merging operations, as well as - :samp:`--flatten-rotation`, are better behaved + :qpdf:ref:`--flatten-rotation`, are better behaved with respect to annotations and interactive form fields. In most cases, interactive form field functionality and proper formatting and functionality of annotations is preserved by @@ -284,7 +284,7 @@ For a detailed list of changes, please see the file extraction of attachments. More detailed information can be obtained by following the reference to the file spec object. - - Add numeric option to :samp:`--collate`. If + - Add numeric option to :qpdf:ref:`--collate`. If :samp:`--collate={n}` is given, take pages in groups of :samp:`{n}` from the given files. @@ -367,7 +367,7 @@ For a detailed list of changes, please see the file - Bug Fixes - - The :samp:`--flatten-rotation` option applies + - The :qpdf:ref:`--flatten-rotation` option applies transformations to any annotations that may be on the page. - If a form XObject lacks a resources dictionary, consider any @@ -390,7 +390,7 @@ For a detailed list of changes, please see the file 10.1.0: January 5, 2021 - CLI Enhancements - - Add :samp:`--flatten-rotation` command-line + - Add :qpdf:ref:`--flatten-rotation` command-line option, which causes all pages that are rotated using parameters in the page's dictionary to instead be identically rotated in the page's contents. The change is not user-visible @@ -510,7 +510,7 @@ For a detailed list of changes, please see the file - Bug Fixes - When concatenating content streams, as with - :samp:`--coalesce-contents`, there were cases + :qpdf:ref:`--coalesce-contents`, there were cases in which qpdf would merge two lexical tokens together, creating invalid results. A newline is now inserted between merged content streams if one is not already present. @@ -527,7 +527,7 @@ For a detailed list of changes, please see the file already ignored the user's locale for numeric conversion. - Fix several instances in which warnings were not suppressed in - spite of :samp:`--no-warn` and/or errors or + spite of :qpdf:ref:`--no-warn` and/or errors or warnings were written to standard output rather than standard error. @@ -540,10 +540,10 @@ For a detailed list of changes, please see the file - Enhancements - - New option :samp:`--warning-exit-0` causes qpdf + - New option :qpdf:ref:`--warning-exit-0` causes qpdf to exit with a status of ``0`` rather than ``3`` if there are warnings but no errors. Combine with - :samp:`--no-warn` to completely ignore + :qpdf:ref:`--no-warn` to completely ignore warnings. - Performance improvements have been made to @@ -656,17 +656,16 @@ For a detailed list of changes, please see the file :command:`qpdf --json-help` for details. - Add new option - :samp:`--remove-unreferenced-resources` which + :qpdf:ref:`--remove-unreferenced-resources` which takes ``auto``, ``yes``, or ``no`` as arguments. The new ``auto`` mode, which is the default, performs a fast heuristic over a PDF file when splitting pages to determine whether the expensive process of finding and removing unreferenced resources is likely to be of benefit. For most files, this new default will result in a significant performance improvement - for splitting pages. See :ref:`advanced-transformation` for a more detailed - discussion. + for splitting pages. - - The :samp:`--preserve-unreferenced-resources` + - The :qpdf:ref:`--preserve-unreferenced-resources` is now just a synonym for :samp:`--remove-unreferenced-resources=no`. @@ -760,8 +759,8 @@ For a detailed list of changes, please see the file - CLI Enhancements - - Added options :samp:`--is-encrypted` and - :samp:`--requires-password` for testing whether + - Added options :qpdf:ref:`--is-encrypted` and + :qpdf:ref:`--requires-password` for testing whether a file is encrypted or requires a password other than the supplied (or empty) password. These communicate via exit status, making them useful for shell scripts. They also work on @@ -770,7 +769,7 @@ For a detailed list of changes, please see the file - Added ``encrypt`` key to JSON options. With the exception of the reconstructed user password for older encryption formats, this provides the same information as - :samp:`--show-encryption` but in a consistent, + :qpdf:ref:`--show-encryption` but in a consistent, parseable format. See output of :command:`qpdf --json-help` for details. @@ -778,7 +777,7 @@ For a detailed list of changes, please see the file - In QDF mode, be sure not to write more than one XRef stream to a file, even when - :samp:`--preserve-unreferenced` is used. + :qpdf:ref:`--preserve-unreferenced` is used. :command:`fix-qdf` assumes that there is only one XRef stream, and that it appears at the end of the file. @@ -824,7 +823,7 @@ For a detailed list of changes, please see the file - CLI Enhancements - - Addition of the :samp:`--show-crypto` option in + - Addition of the :qpdf:ref:`--show-crypto` option in support of selectable crypto providers, as described in :ref:`crypto`. - Allow ``:even`` or ``:odd`` to be appended to numeric ranges @@ -838,7 +837,7 @@ For a detailed list of changes, please see the file - Bug Fix - Fix the name of the temporary file used by - :samp:`--replace-input` so that it doesn't + :qpdf:ref:`--replace-input` so that it doesn't require path splitting and works with paths include directories. @@ -891,21 +890,21 @@ For a detailed list of changes, please see the file - CLI Enhancements - - The :samp:`--replace-input` option may be given + - The :qpdf:ref:`--replace-input` option may be given in place of an output file name. This causes qpdf to overwrite the input file with the output. See the description of - :samp:`--replace-input` in :ref:`basic-options` for more details. + :qpdf:ref:`--replace-input` for more details. - - The :samp:`--recompress-flate` instructs + - The :qpdf:ref:`--recompress-flate` instructs :command:`qpdf` to recompress streams that are already compressed with ``/FlateDecode``. Useful with - :samp:`--compression-level`. + :qpdf:ref:`--compression-level`. - The :samp:`--compression-level={level}` sets the zlib compression level used for any streams compressed by ``/FlateDecode``. Most effective when combined with - :samp:`--recompress-flate`. + :qpdf:ref:`--recompress-flate`. - Library Enhancements @@ -998,8 +997,8 @@ For a detailed list of changes, please see the file a file with linearization warnings but not errors, it now properly exits with exit code 3 instead of 2. - - The :samp:`--completion-bash` and - :samp:`--completion-zsh` options now work + - The :qpdf:ref:`--completion-bash` and + :qpdf:ref:`--completion-zsh` options now work properly when qpdf is invoked as an AppImage. - Calling ``QPDFWriter::set*EncryptionParameters`` on a @@ -1063,7 +1062,7 @@ For a detailed list of changes, please see the file qpdf than the library, which may indicate a problem with the installation. - - New option :samp:`--remove-page-labels` will + - New option :qpdf:ref:`--remove-page-labels` will remove page labels before generating output. This used to happen if you ran :command:`qpdf --empty --pages .. --`, but the behavior changed in qpdf 8.3.0. This @@ -1090,7 +1089,7 @@ For a detailed list of changes, please see the file during page splitting operations. - Revert change that included preservation of outlines - (bookmarks) in :samp:`--split-pages`. The way + (bookmarks) in :qpdf:ref:`--split-pages`. The way it was implemented in 8.3.0 and 8.4.0 caused a very significant degradation of performance for splitting certain files. A future release of qpdf may re-introduce the behavior in a more @@ -1143,16 +1142,16 @@ For a detailed list of changes, please see the file depth in :ref:`unicode-passwords`. - New options - :samp:`--externalize-inline-images`, - :samp:`--ii-min-bytes`, and - :samp:`--keep-inline-images` control qpdf's + :qpdf:ref:`--externalize-inline-images`, + :qpdf:ref:`--ii-min-bytes`, and + :qpdf:ref:`--keep-inline-images` control qpdf's handling of inline images and possible conversion of them to regular images. By default, - :samp:`--optimize-images` now also applies to - inline images. These options are discussed in :ref:`advanced-transformation`. + :qpdf:ref:`--optimize-images` now also applies to + inline images. - - Add options :samp:`--overlay` and - :samp:`--underlay` for overlaying or + - Add options :qpdf:ref:`--overlay` and + :qpdf:ref:`--underlay` for overlaying or underlaying pages of other files onto output pages. See :ref:`overlay-underlay` for details. @@ -1162,32 +1161,32 @@ For a detailed list of changes, please see the file non-ASCII characters, qpdf will try a number of alternative passwords to try to compensate for possible character encoding errors. This behavior can be suppressed with the - :samp:`--suppress-password-recovery` option. + :qpdf:ref:`--suppress-password-recovery` option. See :ref:`unicode-passwords` for a full discussion. - - Add the :samp:`--password-mode` option to + - Add the :qpdf:ref:`--password-mode` option to fine-tune how qpdf interprets password arguments, especially when they contain non-ASCII characters. See :ref:`unicode-passwords` for more information. - - In the :samp:`--pages` option, it is now + - In the :qpdf:ref:`--pages` option, it is now possible to copy the same page more than once from the same file without using the previous workaround of specifying two different paths to the same file. - - In the :samp:`--pages` option, allow use of "." + - In the :qpdf:ref:`--pages` option, allow use of "." as a shortcut for the primary input file. That way, you can do :command:`qpdf in.pdf --pages . 1-2 -- out.pdf` instead of having to repeat :file:`in.pdf` in the command. - When encrypting with 128-bit and 256-bit encryption, new - encryption options :samp:`--assemble`, - :samp:`--annotate`, - :samp:`--form`, and - :samp:`--modify-other` allow more fine-grained + encryption options :qpdf:ref:`--assemble`, + :qpdf:ref:`--annotate`, + :qpdf:ref:`--form`, and + :qpdf:ref:`--modify-other` allow more fine-grained granularity in configuring options. Before, the - :samp:`--modify` option only configured certain + :qpdf:ref:`--modify` option only configured certain predefined groups of permissions. - Bug Fixes and Enhancements @@ -1198,7 +1197,7 @@ For a detailed list of changes, please see the file file's internal structure shared these resource lists across pages and if some but not all of the pages in the output did not reference all the fonts and images. Using the - :samp:`--preserve-unreferenced-resources` + :qpdf:ref:`--preserve-unreferenced-resources` option would work around the incorrect behavior. This bug was the result of a typo in the code and a deficiency in the test suite. The case that triggered the error was known, just not @@ -1326,11 +1325,11 @@ For a detailed list of changes, please see the file - Page numbers (also known as page labels) are now preserved when merging and splitting files with the - :samp:`--pages` and - :samp:`--split-pages` options. + :qpdf:ref:`--pages` and + :qpdf:ref:`--split-pages` options. - Bookmarks are partially preserved when splitting pages with the - :samp:`--split-pages` option. Specifically, the + :qpdf:ref:`--split-pages` option. Specifically, the outlines dictionary and some supporting metadata are copied into the split files. The result is that all bookmarks from the original file appear, those that point to pages that are @@ -1340,48 +1339,48 @@ For a detailed list of changes, please see the file operations. - Page collation: add new option - :samp:`--collate`. When specified, the - semantics of :samp:`--pages` change from + :qpdf:ref:`--collate`. When specified, the + semantics of :qpdf:ref:`--pages` change from concatenation to collation. See :ref:`page-selection` for examples and discussion. - Generation of information in JSON format, primarily to facilitate use of qpdf from languages other than C++. Add new - options :samp:`--json`, - :samp:`--json-key`, and - :samp:`--json-object` to generate a JSON + options :qpdf:ref:`--json`, + :qpdf:ref:`--json-key`, and + :qpdf:ref:`--json-object` to generate a JSON representation of the PDF file. Run :command:`qpdf --json-help` to get a description of the JSON format. For more information, see :ref:`json`. - - The :samp:`--generate-appearances` flag will + - The :qpdf:ref:`--generate-appearances` flag will cause qpdf to generate appearances for form fields if the PDF file indicates that form field appearances are out of date. This can happen when PDF forms are filled in by a program that doesn't know how to regenerate the appearances of the filled-in fields. - - The :samp:`--flatten-annotations` flag can be + - The :qpdf:ref:`--flatten-annotations` flag can be used to *flatten* annotations, including form fields. Ordinarily, annotations are drawn separately from the page. Flattening annotations is the process of combining their appearances into the page's contents. You might want to do this if you are going to rotate or combine pages using a tool that doesn't understand about annotations. You may also want to use - :samp:`--generate-appearances` when using this + :qpdf:ref:`--generate-appearances` when using this flag since annotations for outdated form fields are not flattened as that would cause loss of information. - - The :samp:`--optimize-images` flag tells qpdf + - The :qpdf:ref:`--optimize-images` flag tells qpdf to recompresses every image using DCT (JPEG) compression as long as the image is not already compressed with lossy compression and recompressing the image reduces its size. The - additional options :samp:`--oi-min-width`, - :samp:`--oi-min-height`, and - :samp:`--oi-min-area` prevent recompression of + additional options :qpdf:ref:`--oi-min-width`, + :qpdf:ref:`--oi-min-height`, and + :qpdf:ref:`--oi-min-area` prevent recompression of images whose width, height, or pixel area (width × height) are below a specified threshold. - - The :samp:`--show-object` option can now be + - The :qpdf:ref:`--show-object` option can now be given as :samp:`--show-object=trailer` to show the trailer dictionary. @@ -1531,12 +1530,12 @@ For a detailed list of changes, please see the file :samp:`--keep-files-open={[yn]}` to override default determination of whether to keep files open when merging. Please see the discussion of - :samp:`--keep-files-open` in :ref:`basic-options` for additional details. + :qpdf:ref:`--keep-files-open` for additional details. 8.2.0: August 16, 2018 - Command-line Enhancements - - Add :samp:`--no-warn` option to suppress + - Add :qpdf:ref:`--no-warn` option to suppress issuing warning messages. If there are any conditions that would have caused warnings to be issued, the exit status is still 3. @@ -1556,7 +1555,7 @@ For a detailed list of changes, please see the file - Bug fix: end of line characters were not properly handled inside strings in some cases. - - Bug fix: using :samp:`--progress` on very small + - Bug fix: using :qpdf:ref:`--progress` on very small files could cause an infinite loop. - API enhancements @@ -1596,15 +1595,14 @@ For a detailed list of changes, please see the file old behavior should be desired, or if you have a case where page splitting is very slow, the old behavior (and speed) can be enabled by specifying - :samp:`--preserve-unreferenced-resources`. For - additional details, please see :ref:`advanced-transformation`. + :qpdf:ref:`--preserve-unreferenced-resources`. - When merging multiple PDF files, qpdf no longer leaves all the files open. This makes it possible to merge numbers of files that may exceed the operating system's limit for the maximum number of open files. - - The :samp:`--rotate` option's syntax has been + - The :qpdf:ref:`--rotate` option's syntax has been extended to make the page range optional. If you specify :samp:`--rotate={angle}` without specifying a page range, the rotation will be applied @@ -1613,10 +1611,10 @@ For a detailed list of changes, please see the file down. - When merging multiple files, the - :samp:`--verbose` option now prints information + :qpdf:ref:`--verbose` option now prints information about each file as it operates on that file. - - When the :samp:`--progress` option is + - When the :qpdf:ref:`--progress` option is specified, qpdf will print a running indicator of its best guess at how far through the writing process it is. Note that, as with all progress meters, it's an approximation. This option @@ -1672,7 +1670,7 @@ For a detailed list of changes, please see the file it thinks it is through writing its output. Client programs can use this to implement reasonably accurate progress meters. The :command:`qpdf` command line tool uses this to - implement its :samp:`--progress` option. + implement its :qpdf:ref:`--progress` option. - New methods ``QPDFObjectHandle::newUnicodeString`` and ``QPDFObject::unparseBinary`` have been added to allow for more @@ -1733,7 +1731,7 @@ For a detailed list of changes, please see the file :samp:`--linearize-pass1={file}` has been added for debugging qpdf's linearization code. - - The option :samp:`--coalesce-contents` can be + - The option :qpdf:ref:`--coalesce-contents` can be used to combine content streams of a page whose contents are an array of streams into a single stream. @@ -1782,8 +1780,7 @@ For a detailed list of changes, please see the file password when opening encrypted files, and will optionally display the encryption key used by a file. This is a non-standard operation, but it can be useful in certain situations. Please see - the discussion of :samp:`--password-is-hex-key` in - :ref:`basic-options` or the comments around + the discussion of :qpdf:ref:`--password-is-hex-key` or the comments around ``QPDF::setPasswordIsHexKey`` in :file:`QPDF.hh` for additional details. @@ -1820,8 +1817,8 @@ For a detailed list of changes, please see the file or RunLength encoding. Library API enhancements and command-line options have been added to control this behavior. See command-line options - :samp:`--compress-streams` and - :samp:`--decode-level` and methods + :qpdf:ref:`--compress-streams` and + :qpdf:ref:`--decode-level` and methods ``QPDFWriter::setCompressStreams`` and ``QPDFWriter::setDecodeLevel``. @@ -1846,27 +1843,27 @@ For a detailed list of changes, please see the file - Command-line arguments can now be read from files or standard input using ``@file`` or ``@-`` syntax. Please see :ref:`invocation`. - - :samp:`--rotate`: request page rotation + - :qpdf:ref:`--rotate`: request page rotation - - :samp:`--newline-before-endstream`: ensure that + - :qpdf:ref:`--newline-before-endstream`: ensure that a newline appears before every ``endstream`` keyword in the file; used to prevent qpdf from breaking PDF/A compliance on already compliant files. - - :samp:`--preserve-unreferenced`: preserve + - :qpdf:ref:`--preserve-unreferenced`: preserve unreferenced objects in the input PDF - - :samp:`--split-pages`: break output into chunks + - :qpdf:ref:`--split-pages`: break output into chunks with fixed numbers of pages - - :samp:`--verbose`: print the name of each + - :qpdf:ref:`--verbose`: print the name of each output file that is created - - :samp:`--compress-streams` and - :samp:`--decode-level` replace - :samp:`--stream-data` for improving granularity + - :qpdf:ref:`--compress-streams` and + :qpdf:ref:`--decode-level` replace + :qpdf:ref:`--stream-data` for improving granularity of controlling compression and decompression of stream data. - The :samp:`--stream-data` option will remain + The :qpdf:ref:`--stream-data` option will remain available. - When running :command:`qpdf --check` with other @@ -1877,8 +1874,8 @@ For a detailed list of changes, please see the file reference table, or other similar operations. - Process :command:`--pages` earlier so that other - options like :samp:`--show-pages` or - :samp:`--split-pages` can operate on the file + options like :qpdf:ref:`--show-pages` or + :qpdf:ref:`--split-pages` can operate on the file after page splitting/merging has occurred. - API Changes. All new API calls are documented in their respective @@ -1911,7 +1908,7 @@ For a detailed list of changes, please see the file ``QPDFWriter`` methods. 6.0.0: November 10, 2015 - - Implement :samp:`--deterministic-id` command-line + - Implement :qpdf:ref:`--deterministic-id` command-line option and ``QPDFWriter::setDeterministicID`` as well as C API function ``qpdf_set_deterministic_ID`` for generating a deterministic ID for non-encrypted files. When this option is @@ -2024,12 +2021,12 @@ For a detailed list of changes, please see the file :file:`QPDFObjectHandle.hh` for additional notes. - - Add :samp:`--show-npages` command-line option to + - Add :qpdf:ref:`--show-npages` command-line option to the :command:`qpdf` command to show the number of pages in a file. - Allow omission of the page range within - :samp:`--pages` for the + :qpdf:ref:`--pages` for the :command:`qpdf` command. When omitted, the page range is implicitly taken to be all the pages in the file. @@ -2156,8 +2153,9 @@ For a detailed list of changes, please see the file ``QPDFWriter::setMinimumPDFVersion`` and ``QPDFWriter::forcePDFVersion`` that accept an extension level, and extended syntax for specifying forced and minimum versions on - the command line as described in :ref:`advanced-transformation`. Corresponding functions - have been added to the C API as well. + the command line as described in :qpdf:ref:`--force-version` and + :qpdf:ref:`--min-version`. Corresponding functions have been added + to the C API as well. - Minor fixes to prevent qpdf from referencing objects in the file that are not referenced in the file's overall structure. Most @@ -2213,12 +2211,12 @@ For a detailed list of changes, please see the file ``QPDFWriter``. - Removed the method ``decodeStreams``. This method was used by - the :samp:`--check` option of the + the :qpdf:ref:`--check` option of the :command:`qpdf` command-line tool to force all streams in the file to be decoded, but it also suffered from the problem of opening otherwise unreferenced streams and thus could report false positive. The - :samp:`--check` option now causes qpdf to go + :qpdf:ref:`--check` option now causes qpdf to go through all the motions of writing a new file based on the original one, so it will always reference and check exactly those parts of a file that any ordinary viewer would check. @@ -2307,7 +2305,7 @@ For a detailed list of changes, please see the file - Options have been added to the :command:`qpdf` command-line tool for copying encryption parameters from another - file. See :ref:`basic-options`. + file. (QXXXQ Link) - New methods have been added to the ``QPDF`` object for adding and removing pages. See :ref:`adding-and-remove-pages`. @@ -2571,7 +2569,7 @@ For a detailed list of changes, please see the file permissions, it does make them available so that applications that use qpdf can enforce permissions. - - The :samp:`--check` option to + - The :qpdf:ref:`--check` option to :command:`qpdf` has been extended to include some additional information. diff --git a/manual/weak-crypto.rst b/manual/weak-crypto.rst index a75a1add..ffbcc31a 100644 --- a/manual/weak-crypto.rst +++ b/manual/weak-crypto.rst @@ -13,12 +13,12 @@ cryptography algorithm, and MD5, which is a weak hashing algorithm. In version 10.4, qpdf generates warnings for some (but not all) cases of writing files with weak cryptography when invoked from the command-line. These warnings can be suppressed using the -:samp:`--allow-weak-crypto` option. +:qpdf:ref:`--allow-weak-crypto` option. It is planned for qpdf version 11 to be stricter, making it an error to write files with insecure cryptography from the command-line tool in most cases without specifying the -:samp:`--allow-weak-crypto` flag and also to require +:qpdf:ref:`--allow-weak-crypto` flag and also to require explicit steps when using the C++ library to enable use of insecure cryptography.