From 1c8d53465ff4d8e732498b39e49595b16d6754af Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 22 Jan 2022 11:25:55 -0500 Subject: [PATCH] Incorporate job schema generation into generate_auto_job --- README-maintainer | 5 +- generate_auto_job | 122 +++++++++++++++++++++- job.sums | 5 +- job.yml | 158 ++++++++++++++++++++++++++++ libqpdf/qpdf/auto_job_schema.hh | 176 ++++++++++++++++++++++++++++++++ 5 files changed, 459 insertions(+), 7 deletions(-) create mode 100644 libqpdf/qpdf/auto_job_schema.hh diff --git a/README-maintainer b/README-maintainer index d5801070..7ea049dc 100644 --- a/README-maintainer +++ b/README-maintainer @@ -128,7 +128,10 @@ Command-line arguments are closely coupled with QPDFJob. To add a new command-line argument, add the option to the appropriate table in job.yml. This will automatically declare a method in the private ArgParser class in QPDFJob_argv.cc which you have to implement. The -implementation should make calls to methods in QPDFJob. +implementation should make calls to methods in QPDFJob. Then, add the +same option to either the no-json section of job.yml if it is to be +excluded from the job json structure, or add it under the json +structure to the place where it should appear in the json structure. The build will fail until the new option is documented in manual/cli.rst. To do that, create documentation for the option by diff --git a/generate_auto_job b/generate_auto_job index 79abc8b9..1706ff04 100755 --- a/generate_auto_job +++ b/generate_auto_job @@ -5,6 +5,7 @@ import argparse import hashlib import re import yaml +import json whoami = os.path.basename(sys.argv[0]) BANNER = f'''// @@ -29,6 +30,7 @@ class Main: 'decl': 'libqpdf/qpdf/auto_job_decl.hh', 'init': 'libqpdf/qpdf/auto_job_init.hh', 'help': 'libqpdf/qpdf/auto_job_help.hh', + 'schema': 'libqpdf/qpdf/auto_job_schema.hh', } SUMS = 'job.sums' @@ -199,6 +201,9 @@ class Main: raise Exception( f'help for unknown option {option},' f' lineno={lineno}') + if option not in self.help_options: + # QXXXQ also need to exclude help table + self.jdata[option[2:]]['help'] = short_text print(f'ap.addOptionHelp("{option}", "{topic}",' f' "{short_text}", R"({long_text})");', file=f) help_lines += 1 @@ -226,9 +231,12 @@ class Main: with open('job.yml', 'r') as f: data = yaml.safe_load(f.read()) self.validate(data) - self.options_without_help = set( + # Add the built-in help options to tables that we populate as + # we read job.yml since we won't encounter these in job.yml + self.help_options = set( ['--completion-bash', '--completion-zsh', '--help'] ) + self.options_without_help = set(self.help_options) self.prepare(data) with open(self.DESTS['decl'], 'w') as f: print(BANNER, file=f) @@ -242,6 +250,11 @@ class Main: with open('manual/cli.rst', 'r') as df: print(BANNER, file=f) self.generate_doc(df, f) + self.generate_schema(data) + with open(self.DESTS['schema'], 'w') as f: + print('static constexpr char const* JOB_SCHEMA_DATA = R"(' + + json.dumps(self.schema, indent=2, separators=(',', ': ')) + + ')";', file=f) # Update hashes last to ensure that this will be rerun in the # event of a failure. @@ -251,6 +264,24 @@ class Main: def prepare(self, data): self.decls = [] self.init = [] + self.jdata = { + # option: { + # tables: set(), + # help: string, + # QXXXQ something for registering handler + # } + } + + def add_jdata(flag, table): + nonlocal self + if table == 'help': + self.help_options.add(f'--{flag}') + elif flag in self.jdata: + self.jdata[flag]['tables'].add(table) + else: + self.jdata[flag] = { + 'tables': set([table]), + } self.init.append('auto b = [this](void (ArgParser::*f)()) {') self.init.append(' return QPDFArgParser::bindBare(f, this);') @@ -275,7 +306,7 @@ class Main: self.decls.append('') for o in data['options']: table = o['table'] - + table_prefix = o.get('prefix', table) if table == 'main': self.init.append('this->ap.selectMainOptionTable();') elif table == 'help': @@ -296,12 +327,14 @@ class Main: self.decls.append(f'void {identifier}();') self.init.append(f'this->ap.addBare("{i}", ' f'b(&ArgParser::{identifier}));') + add_jdata(i, table_prefix) for i in o.get('optional_parameter', []): self.options_without_help.add(f'--{i}') identifier = self.to_identifier(i, prefix, False) self.decls.append(f'void {identifier}(char *);') self.init.append(f'this->ap.addOptionalParameter("{i}", ' f'p(&ArgParser::{identifier}));') + add_jdata(i, table_prefix) for i, v in o.get('required_parameter', {}).items(): self.options_without_help.add(f'--{i}') identifier = self.to_identifier(i, prefix, False) @@ -309,6 +342,7 @@ class Main: self.init.append(f'this->ap.addRequiredParameter("{i}", ' f'p(&ArgParser::{identifier})' f', "{v}");') + add_jdata(i, table_prefix) for i, v in o.get('required_choices', {}).items(): self.options_without_help.add(f'--{i}') identifier = self.to_identifier(i, prefix, False) @@ -316,6 +350,7 @@ class Main: self.init.append(f'this->ap.addChoices("{i}", ' f'p(&ArgParser::{identifier})' f', true, {v}_choices);') + add_jdata(i, table_prefix) for i, v in o.get('optional_choices', {}).items(): self.options_without_help.add(f'--{i}') identifier = self.to_identifier(i, prefix, False) @@ -323,11 +358,13 @@ class Main: self.init.append(f'this->ap.addChoices("{i}", ' f'p(&ArgParser::{identifier})' f', false, {v}_choices);') + add_jdata(i, table_prefix) if table not in ('main', 'help'): identifier = self.to_identifier(table, 'argEnd', False) self.decls.append(f'void {identifier}();') for o in data['options']: table = o['table'] + table_prefix = o.get('prefix', table) if 'from_table' not in o: continue if table == 'main': @@ -341,6 +378,79 @@ class Main: for j in ft['options']: self.init.append('this->ap.copyFromOtherTable' f'("{j}", "{other_table}");') + add_jdata(j, table_prefix) + + def generate_schema(self, data): + # XXX check data['json'] against what we know from jdata. + # Ultimately be able to generate a schema as well as + # JSONHandler and registering stuff. + + # Check to make sure that every command-line option is + # represented either in data['json'] or data['no-json']. + + # Build a list of options that we expect. If an option appears + # once, we just expect to see it once. If it appears in more + # than one options table, we need to see a separate version of + # it for each option table. It is represented prepended in + # job.yml with the table prefix. The table prefix is removed + # in the schema. + expected = {} + for k, v in self.jdata.items(): + tables = v['tables'] + if len(tables) == 1: + expected[k] = {**v} + else: + for t in sorted(tables): + expected[f'{t}.{k}'] = {**v} + for _, v in expected.items(): + del v['tables'] + options_seen = set(data['no-json']) + + self.schema = {} + + def option_to_json_key(s): + return self.to_identifier(s, '', False) + + # Walk through the json information building the schema as we + # go. This verifies consistency between command-line options + # and the json section of the data and builds up a schema by + # populating with help information as available. + def build_schema(j, s): + for k, v in j.items(): + if not (k in expected or + k.startswith('_') or + isinstance(v, str)): + raise Exception(f'json: unknown key {k}') + if k.startswith('_'): + schema_key = k[1:] + else: + schema_key = re.sub(r'[^\.]+\.', '', k) + schema_key = option_to_json_key(schema_key) + schema_value = v + if k in expected: + options_seen.add(re.sub('^_', '', k)) + if v is None: + schema_value = re.sub( + r'--(\S+)', + lambda x: option_to_json_key(x.group(1)), + expected[k]['help']) + if (isinstance(v, dict)): + schema_value = {} + build_schema(v, schema_value) + elif (isinstance(v, list)): + if len(v) != 1: + raise Exception('json contains array with length != 1') + if isinstance(v[0], dict): + schema_value = [{}] + build_schema(v[0], schema_value[0]) + elif schema_value is None: + raise Exception(f'unknown schema value for {k}') + s[schema_key] = schema_value + + build_schema(data['json'], self.schema) + if options_seen != set(expected.keys()): + raise Exception('missing from json: ' + + str(set(expected.keys()) - options_seen)) def check_keys(self, what, d, exp): if not isinstance(d, dict): @@ -351,7 +461,8 @@ class Main: exit(f'{what}: unknown keys = {extra}') def validate(self, data): - self.check_keys('top', data, set(['choices', 'options'])) + self.check_keys('top', data, set( + ['choices', 'options', 'no-json', 'json'])) for o in data['options']: self.check_keys('top', o, set( ['table', 'prefix', 'bare', 'positional', @@ -363,7 +474,10 @@ class Main: if const: identifier = f'{prefix}_{identifier.upper()}' else: - identifier = f'{prefix}_{identifier.lower()}' + if prefix: + identifier = f'{prefix}_{identifier.lower()}' + else: + identifier = identifier.lower() identifier = re.sub(r'_([a-z])', lambda x: x.group(1).upper(), identifier).replace('_', '') diff --git a/job.sums b/job.sums index 2387d64b..07c08c43 100644 --- a/job.sums +++ b/job.sums @@ -1,8 +1,9 @@ # Generated by generate_auto_job -generate_auto_job b70f64314f1ae1f100fa6a11975dee5f7669038e2a619b6c9da1e5230db1dd1b -job.yml 8177cadf41096efdc174f04daadfe5d98c592ad44ad10cb96537521fd79a801a +generate_auto_job 0758b244fc4e2d3e440883072d2740bc4cdb26c5aa8de938f028afd7d83fad79 +job.yml 2856c2635d42f0a58717d3ffce3125816d8f98ff17245c4b7a0669d70cd68b84 libqpdf/qpdf/auto_job_decl.hh 97395ecbe590b23ae04d6cce2080dbd0e998917ff5eeaa5c6aafa91041d3cd6a libqpdf/qpdf/auto_job_help.hh 2653faaf59415bec81c3a85d426239d52b609ac24faba34ec2d26f00710dd2c6 libqpdf/qpdf/auto_job_init.hh 465bf46769559ceb77110d1b9d3293ba9b3595850b49848c31aeabd10aadb4ad +libqpdf/qpdf/auto_job_schema.hh c91a4e182e088797b70dda94af03ca32d360f3564890132da2a8bdc3c4432423 manual/_ext/qpdf.py 855fe12de5af7a10bb24be6ecc4d5dff4c84ac58cf388a13be6bbb394346a67d manual/cli.rst b136c7f33a538c580b081a7e802c27635aad2a4229efa0eb0736466116b7aa90 diff --git a/job.yml b/job.yml index fa15e290..dec43e90 100644 --- a/job.yml +++ b/job.yml @@ -217,3 +217,161 @@ options: required_parameter: prefix: prefix password: password +no-json: + - preserve-unreferenced-resources +json: + # The structure of this section defines what the json input to + # QPDFJob looks like. If a key starts with underscore or has a value + # that is a string, it does not map to a command-line argument. If + # value is null, its properties and help come from other information + # known by generate_auto_job. This information is used to construct + # a "schema" (as in JSON.hh) for the json input to QPDFJob. The + # leading underscore is removed. + _input: + _file: + _name: "input filename" + main.password: + password-file: + empty: + _output: + _file: + _name: "output filename" + replace-input: + split-pages: + _options: + qdf: + preserve-unreferenced: + newline-before-endstream: + normalize-content: + stream-data: + compress-streams: + recompress-flate: + decode-level: + decrypt: + static-aes-iv: + static-id: + no-original-object-ids: + copy-encryption: + encryption-file-password: + linearize: + linearize-pass1: + object-streams: + min-version: + force-version: + progress: + encrypt: + user-password: "user password" + owner-password: "owner password" + key-length: "key length: 48, 128, 256" + _40-bit: + Enc40.annotate: + Enc40.extract: + Enc40.modify: + Enc40.print: + _128-bit: + Enc128.accessibility: + Enc128.annotate: + Enc128.assemble: + Enc128.cleartext-metadata: + Enc128.extract: + Enc128.form: + Enc128.modify-other: + Enc128.modify: + Enc128.print: + force-V4: + use-aes: + _256-bit: + Enc256.accessibility: + Enc256.annotate: + Enc256.assemble: + Enc256.cleartext-metadata: + Enc256.extract: + Enc256.form: + Enc256.modify-other: + Enc256.modify: + Enc256.print: + allow-insecure: + force-R5: + _options: + allow-weak-crypto: + deterministic-id: + keep-files-open: + keep-files-open-threshold: + no-warn: + verbose: + warning-exit-0: + ignore-xref-streams: + password-is-hex-key: + password-mode: + suppress-password-recovery: + suppress-recovery: + _inspect: + check: + check-linearization: + filtered-stream-data: + is-encrypted: + raw-stream-data: + requires-password: + show-encryption: + show-encryption-key: + show-linearization: + show-npages: + show-object: + show-pages: + show-xref: + with-images: + list-attachments: + show-attachment: + json: + json-key: + - null + json-object: + - null + _transform: + coalesce-contents: + compression-level: + externalize-inline-images: + ii-min-bytes: + remove-unreferenced-resources: + _modify: + add-attachment: + - file: "attachment to add" + creationdate: + description: + filename: + key: + mimetype: + moddate: + replace: + remove-attachment: + copy-attachments-from: + - file: "attachment source filename" + CopyAtt.password: + prefix: + collate: + flatten-annotations: + flatten-rotation: + generate-appearances: + keep-inline-images: + oi-min-area: + oi-min-height: + oi-min-width: + optimize-images: + pages: + - file: "source for for pages" + Pages.password: + range: "page range" + remove-page-labels: + rotate: + overlay: + file: "source file for overlay" + UO.password: + from: + repeat: + to: + underlay: + file: "source file for underlay" + UO.password: + from: + repeat: + to: diff --git a/libqpdf/qpdf/auto_job_schema.hh b/libqpdf/qpdf/auto_job_schema.hh new file mode 100644 index 00000000..9ecdc507 --- /dev/null +++ b/libqpdf/qpdf/auto_job_schema.hh @@ -0,0 +1,176 @@ +static constexpr char const* JOB_SCHEMA_DATA = R"({ + "input": { + "file": { + "name": "input filename", + "password": "specify password", + "passwordFile": "read password from a file" + }, + "empty": "empty input file" + }, + "output": { + "file": { + "name": "output filename" + }, + "replaceInput": "replace input with output", + "splitPages": "write pages to separate files", + "options": { + "qdf": "enable viewing PDF code in a text editor", + "preserveUnreferenced": "preserve unreferenced objects", + "newlineBeforeEndstream": "force a newline before endstream", + "normalizeContent": "fix newlines in content streams", + "streamData": "control stream compression", + "compressStreams": "compress uncompressed streams", + "recompressFlate": "uncompress and recompress flate", + "decodeLevel": "control which streams to uncompress", + "decrypt": "remove encryption from input file", + "staticAesIv": "use a fixed AES vector", + "staticId": "use a fixed document ID", + "noOriginalObjectIds": "omit original object ID in qdf", + "copyEncryption": "copy another file's encryption details", + "encryptionFilePassword": "supply password for copyEncryption", + "linearize": "linearize (web-optimize) output", + "linearizePass1": "save pass 1 of linearization", + "objectStreams": "control use of object streams", + "minVersion": "set minimum PDF version", + "forceVersion": "set output PDF version", + "progress": "show progress when writing", + "encrypt": { + "userPassword": "user password", + "ownerPassword": "owner password", + "keyLength": "key length: 48, 128, 256", + "40Bit": { + "annotate": "restrict document annotation", + "extract": "restrict text/graphic extraction", + "modify": "restrict document modification", + "print": "restrict printing" + }, + "128Bit": { + "accessibility": "restrict document accessibility", + "annotate": "restrict document annotation", + "assemble": "restrict document assembly", + "cleartextMetadata": "don't encrypt metadata", + "extract": "restrict text/graphic extraction", + "form": "restrict form filling", + "modifyOther": "restrict other modifications", + "modify": "restrict document modification", + "print": "restrict printing", + "forceV4": "force V=4 in encryption dictionary", + "useAes": "use AES with 128-bit encryption" + }, + "256Bit": { + "accessibility": "restrict document accessibility", + "annotate": "restrict document annotation", + "assemble": "restrict document assembly", + "cleartextMetadata": "don't encrypt metadata", + "extract": "restrict text/graphic extraction", + "form": "restrict form filling", + "modifyOther": "restrict other modifications", + "modify": "restrict document modification", + "print": "restrict printing", + "allowInsecure": "allow empty owner passwords", + "forceR5": "use unsupported R=5 encryption" + } + } + } + }, + "options": { + "allowWeakCrypto": "allow insecure cryptographic algorithms", + "deterministicId": "generate ID deterministically", + "keepFilesOpen": "manage keeping multiple files open", + "keepFilesOpenThreshold": "set threshold for keepFilesOpen", + "noWarn": "suppress printing warning messages", + "verbose": "print additional information", + "warningExit0": "exit 0 even with warnings", + "ignoreXrefStreams": "use xref tables rather than streams", + "passwordIsHexKey": "provide hex-encoded encryption key", + "passwordMode": "tweak how qpdf encodes passwords", + "suppressPasswordRecovery": "don't try different password encodings", + "suppressRecovery": "suppress error recovery" + }, + "inspect": { + "check": "partially check whether PDF is valid", + "checkLinearization": "check linearization tables", + "filteredStreamData": "show filtered stream data", + "isEncrypted": "silently test whether a file is encrypted", + "rawStreamData": "show raw stream data", + "requiresPassword": "silently test a file's password", + "showEncryption": "information about encrypted files", + "showEncryptionKey": "show key with showEncryption", + "showLinearization": "show linearization hint tables", + "showNpages": "show number of pages", + "showObject": "show contents of an object", + "showPages": "display page dictionary information", + "showXref": "show cross reference data", + "withImages": "include image details with showPages", + "listAttachments": "list embedded files", + "showAttachment": "export an embedded file", + "json": "show file in json format", + "jsonKey": [ + null + ], + "jsonObject": [ + null + ] + }, + "transform": { + "coalesceContents": "combine content streams", + "compressionLevel": "set compression level for flate", + "externalizeInlineImages": "convert inline to regular images", + "iiMinBytes": "set minimum size for externalizeInlineImages", + "removeUnreferencedResources": "remove unreferenced page resources" + }, + "modify": { + "addAttachment": [ + { + "file": "attachment to add", + "creationdate": "set attachment's creation date", + "description": "set attachment's description", + "filename": "set attachment's displayed filename", + "key": "specify attachment key", + "mimetype": "attachment mime type, e.g. application/pdf", + "moddate": "set attachment's modification date", + "replace": "replace attachment with same key" + } + ], + "removeAttachment": "remove an embedded file", + "copyAttachmentsFrom": [ + { + "file": "attachment source filename", + "password": "specify password", + "prefix": "key prefix for copying attachments" + } + ], + "collate": "collate with pages", + "flattenAnnotations": "push annotations into content", + "flattenRotation": "remove rotation from page dictionary", + "generateAppearances": "generate appearances for form fields", + "keepInlineImages": "exclude inline images from optimization", + "oiMinArea": "minimum area for optimizeImages", + "oiMinHeight": "minimum height for optimizeImages", + "oiMinWidth": "minimum width for optimizeImages", + "optimizeImages": "use efficient compression for images", + "pages": [ + { + "file": "source for for pages", + "password": "specify password", + "range": "page range" + } + ], + "removePageLabels": "remove explicit page numbers", + "rotate": "rotate pages", + "overlay": { + "file": "source file for overlay", + "password": "specify password", + "from": "source pages for underlay/overlay", + "repeat": "overlay/underlay pages to repeat", + "to": "destination pages for underlay/overlay" + }, + "underlay": { + "file": "source file for underlay", + "password": "specify password", + "from": "source pages for underlay/overlay", + "repeat": "overlay/underlay pages to repeat", + "to": "destination pages for underlay/overlay" + } + } +})";