Incorporate job schema generation into generate_auto_job

This commit is contained in:
Jay Berkenbilt 2022-01-22 11:25:55 -05:00
parent b9cd693a5b
commit 1c8d53465f
5 changed files with 459 additions and 7 deletions

View File

@ -128,7 +128,10 @@ Command-line arguments are closely coupled with QPDFJob. To add a new
command-line argument, add the option to the appropriate table in
job.yml. This will automatically declare a method in the private
ArgParser class in QPDFJob_argv.cc which you have to implement. The
implementation should make calls to methods in QPDFJob.
implementation should make calls to methods in QPDFJob. Then, add the
same option to either the no-json section of job.yml if it is to be
excluded from the job json structure, or add it under the json
structure to the place where it should appear in the json structure.
The build will fail until the new option is documented in
manual/cli.rst. To do that, create documentation for the option by

View File

@ -5,6 +5,7 @@ import argparse
import hashlib
import re
import yaml
import json
whoami = os.path.basename(sys.argv[0])
BANNER = f'''//
@ -29,6 +30,7 @@ class Main:
'decl': 'libqpdf/qpdf/auto_job_decl.hh',
'init': 'libqpdf/qpdf/auto_job_init.hh',
'help': 'libqpdf/qpdf/auto_job_help.hh',
'schema': 'libqpdf/qpdf/auto_job_schema.hh',
}
SUMS = 'job.sums'
@ -199,6 +201,9 @@ class Main:
raise Exception(
f'help for unknown option {option},'
f' lineno={lineno}')
if option not in self.help_options:
# QXXXQ also need to exclude help table
self.jdata[option[2:]]['help'] = short_text
print(f'ap.addOptionHelp("{option}", "{topic}",'
f' "{short_text}", R"({long_text})");', file=f)
help_lines += 1
@ -226,9 +231,12 @@ class Main:
with open('job.yml', 'r') as f:
data = yaml.safe_load(f.read())
self.validate(data)
self.options_without_help = set(
# Add the built-in help options to tables that we populate as
# we read job.yml since we won't encounter these in job.yml
self.help_options = set(
['--completion-bash', '--completion-zsh', '--help']
)
self.options_without_help = set(self.help_options)
self.prepare(data)
with open(self.DESTS['decl'], 'w') as f:
print(BANNER, file=f)
@ -242,6 +250,11 @@ class Main:
with open('manual/cli.rst', 'r') as df:
print(BANNER, file=f)
self.generate_doc(df, f)
self.generate_schema(data)
with open(self.DESTS['schema'], 'w') as f:
print('static constexpr char const* JOB_SCHEMA_DATA = R"(' +
json.dumps(self.schema, indent=2, separators=(',', ': ')) +
')";', file=f)
# Update hashes last to ensure that this will be rerun in the
# event of a failure.
@ -251,6 +264,24 @@ class Main:
def prepare(self, data):
self.decls = []
self.init = []
self.jdata = {
# option: {
# tables: set(),
# help: string,
# QXXXQ something for registering handler
# }
}
def add_jdata(flag, table):
nonlocal self
if table == 'help':
self.help_options.add(f'--{flag}')
elif flag in self.jdata:
self.jdata[flag]['tables'].add(table)
else:
self.jdata[flag] = {
'tables': set([table]),
}
self.init.append('auto b = [this](void (ArgParser::*f)()) {')
self.init.append(' return QPDFArgParser::bindBare(f, this);')
@ -275,7 +306,7 @@ class Main:
self.decls.append('')
for o in data['options']:
table = o['table']
table_prefix = o.get('prefix', table)
if table == 'main':
self.init.append('this->ap.selectMainOptionTable();')
elif table == 'help':
@ -296,12 +327,14 @@ class Main:
self.decls.append(f'void {identifier}();')
self.init.append(f'this->ap.addBare("{i}", '
f'b(&ArgParser::{identifier}));')
add_jdata(i, table_prefix)
for i in o.get('optional_parameter', []):
self.options_without_help.add(f'--{i}')
identifier = self.to_identifier(i, prefix, False)
self.decls.append(f'void {identifier}(char *);')
self.init.append(f'this->ap.addOptionalParameter("{i}", '
f'p(&ArgParser::{identifier}));')
add_jdata(i, table_prefix)
for i, v in o.get('required_parameter', {}).items():
self.options_without_help.add(f'--{i}')
identifier = self.to_identifier(i, prefix, False)
@ -309,6 +342,7 @@ class Main:
self.init.append(f'this->ap.addRequiredParameter("{i}", '
f'p(&ArgParser::{identifier})'
f', "{v}");')
add_jdata(i, table_prefix)
for i, v in o.get('required_choices', {}).items():
self.options_without_help.add(f'--{i}')
identifier = self.to_identifier(i, prefix, False)
@ -316,6 +350,7 @@ class Main:
self.init.append(f'this->ap.addChoices("{i}", '
f'p(&ArgParser::{identifier})'
f', true, {v}_choices);')
add_jdata(i, table_prefix)
for i, v in o.get('optional_choices', {}).items():
self.options_without_help.add(f'--{i}')
identifier = self.to_identifier(i, prefix, False)
@ -323,11 +358,13 @@ class Main:
self.init.append(f'this->ap.addChoices("{i}", '
f'p(&ArgParser::{identifier})'
f', false, {v}_choices);')
add_jdata(i, table_prefix)
if table not in ('main', 'help'):
identifier = self.to_identifier(table, 'argEnd', False)
self.decls.append(f'void {identifier}();')
for o in data['options']:
table = o['table']
table_prefix = o.get('prefix', table)
if 'from_table' not in o:
continue
if table == 'main':
@ -341,6 +378,79 @@ class Main:
for j in ft['options']:
self.init.append('this->ap.copyFromOtherTable'
f'("{j}", "{other_table}");')
add_jdata(j, table_prefix)
def generate_schema(self, data):
# XXX check data['json'] against what we know from jdata.
# Ultimately be able to generate a schema as well as
# JSONHandler and registering stuff.
# Check to make sure that every command-line option is
# represented either in data['json'] or data['no-json'].
# Build a list of options that we expect. If an option appears
# once, we just expect to see it once. If it appears in more
# than one options table, we need to see a separate version of
# it for each option table. It is represented prepended in
# job.yml with the table prefix. The table prefix is removed
# in the schema.
expected = {}
for k, v in self.jdata.items():
tables = v['tables']
if len(tables) == 1:
expected[k] = {**v}
else:
for t in sorted(tables):
expected[f'{t}.{k}'] = {**v}
for _, v in expected.items():
del v['tables']
options_seen = set(data['no-json'])
self.schema = {}
def option_to_json_key(s):
return self.to_identifier(s, '', False)
# Walk through the json information building the schema as we
# go. This verifies consistency between command-line options
# and the json section of the data and builds up a schema by
# populating with help information as available.
def build_schema(j, s):
for k, v in j.items():
if not (k in expected or
k.startswith('_') or
isinstance(v, str)):
raise Exception(f'json: unknown key {k}')
if k.startswith('_'):
schema_key = k[1:]
else:
schema_key = re.sub(r'[^\.]+\.', '', k)
schema_key = option_to_json_key(schema_key)
schema_value = v
if k in expected:
options_seen.add(re.sub('^_', '', k))
if v is None:
schema_value = re.sub(
r'--(\S+)',
lambda x: option_to_json_key(x.group(1)),
expected[k]['help'])
if (isinstance(v, dict)):
schema_value = {}
build_schema(v, schema_value)
elif (isinstance(v, list)):
if len(v) != 1:
raise Exception('json contains array with length != 1')
if isinstance(v[0], dict):
schema_value = [{}]
build_schema(v[0], schema_value[0])
elif schema_value is None:
raise Exception(f'unknown schema value for {k}')
s[schema_key] = schema_value
build_schema(data['json'], self.schema)
if options_seen != set(expected.keys()):
raise Exception('missing from json: ' +
str(set(expected.keys()) - options_seen))
def check_keys(self, what, d, exp):
if not isinstance(d, dict):
@ -351,7 +461,8 @@ class Main:
exit(f'{what}: unknown keys = {extra}')
def validate(self, data):
self.check_keys('top', data, set(['choices', 'options']))
self.check_keys('top', data, set(
['choices', 'options', 'no-json', 'json']))
for o in data['options']:
self.check_keys('top', o, set(
['table', 'prefix', 'bare', 'positional',
@ -363,7 +474,10 @@ class Main:
if const:
identifier = f'{prefix}_{identifier.upper()}'
else:
identifier = f'{prefix}_{identifier.lower()}'
if prefix:
identifier = f'{prefix}_{identifier.lower()}'
else:
identifier = identifier.lower()
identifier = re.sub(r'_([a-z])',
lambda x: x.group(1).upper(),
identifier).replace('_', '')

View File

@ -1,8 +1,9 @@
# Generated by generate_auto_job
generate_auto_job b70f64314f1ae1f100fa6a11975dee5f7669038e2a619b6c9da1e5230db1dd1b
job.yml 8177cadf41096efdc174f04daadfe5d98c592ad44ad10cb96537521fd79a801a
generate_auto_job 0758b244fc4e2d3e440883072d2740bc4cdb26c5aa8de938f028afd7d83fad79
job.yml 2856c2635d42f0a58717d3ffce3125816d8f98ff17245c4b7a0669d70cd68b84
libqpdf/qpdf/auto_job_decl.hh 97395ecbe590b23ae04d6cce2080dbd0e998917ff5eeaa5c6aafa91041d3cd6a
libqpdf/qpdf/auto_job_help.hh 2653faaf59415bec81c3a85d426239d52b609ac24faba34ec2d26f00710dd2c6
libqpdf/qpdf/auto_job_init.hh 465bf46769559ceb77110d1b9d3293ba9b3595850b49848c31aeabd10aadb4ad
libqpdf/qpdf/auto_job_schema.hh c91a4e182e088797b70dda94af03ca32d360f3564890132da2a8bdc3c4432423
manual/_ext/qpdf.py 855fe12de5af7a10bb24be6ecc4d5dff4c84ac58cf388a13be6bbb394346a67d
manual/cli.rst b136c7f33a538c580b081a7e802c27635aad2a4229efa0eb0736466116b7aa90

158
job.yml
View File

@ -217,3 +217,161 @@ options:
required_parameter:
prefix: prefix
password: password
no-json:
- preserve-unreferenced-resources
json:
# The structure of this section defines what the json input to
# QPDFJob looks like. If a key starts with underscore or has a value
# that is a string, it does not map to a command-line argument. If
# value is null, its properties and help come from other information
# known by generate_auto_job. This information is used to construct
# a "schema" (as in JSON.hh) for the json input to QPDFJob. The
# leading underscore is removed.
_input:
_file:
_name: "input filename"
main.password:
password-file:
empty:
_output:
_file:
_name: "output filename"
replace-input:
split-pages:
_options:
qdf:
preserve-unreferenced:
newline-before-endstream:
normalize-content:
stream-data:
compress-streams:
recompress-flate:
decode-level:
decrypt:
static-aes-iv:
static-id:
no-original-object-ids:
copy-encryption:
encryption-file-password:
linearize:
linearize-pass1:
object-streams:
min-version:
force-version:
progress:
encrypt:
user-password: "user password"
owner-password: "owner password"
key-length: "key length: 48, 128, 256"
_40-bit:
Enc40.annotate:
Enc40.extract:
Enc40.modify:
Enc40.print:
_128-bit:
Enc128.accessibility:
Enc128.annotate:
Enc128.assemble:
Enc128.cleartext-metadata:
Enc128.extract:
Enc128.form:
Enc128.modify-other:
Enc128.modify:
Enc128.print:
force-V4:
use-aes:
_256-bit:
Enc256.accessibility:
Enc256.annotate:
Enc256.assemble:
Enc256.cleartext-metadata:
Enc256.extract:
Enc256.form:
Enc256.modify-other:
Enc256.modify:
Enc256.print:
allow-insecure:
force-R5:
_options:
allow-weak-crypto:
deterministic-id:
keep-files-open:
keep-files-open-threshold:
no-warn:
verbose:
warning-exit-0:
ignore-xref-streams:
password-is-hex-key:
password-mode:
suppress-password-recovery:
suppress-recovery:
_inspect:
check:
check-linearization:
filtered-stream-data:
is-encrypted:
raw-stream-data:
requires-password:
show-encryption:
show-encryption-key:
show-linearization:
show-npages:
show-object:
show-pages:
show-xref:
with-images:
list-attachments:
show-attachment:
json:
json-key:
- null
json-object:
- null
_transform:
coalesce-contents:
compression-level:
externalize-inline-images:
ii-min-bytes:
remove-unreferenced-resources:
_modify:
add-attachment:
- file: "attachment to add"
creationdate:
description:
filename:
key:
mimetype:
moddate:
replace:
remove-attachment:
copy-attachments-from:
- file: "attachment source filename"
CopyAtt.password:
prefix:
collate:
flatten-annotations:
flatten-rotation:
generate-appearances:
keep-inline-images:
oi-min-area:
oi-min-height:
oi-min-width:
optimize-images:
pages:
- file: "source for for pages"
Pages.password:
range: "page range"
remove-page-labels:
rotate:
overlay:
file: "source file for overlay"
UO.password:
from:
repeat:
to:
underlay:
file: "source file for underlay"
UO.password:
from:
repeat:
to:

View File

@ -0,0 +1,176 @@
static constexpr char const* JOB_SCHEMA_DATA = R"({
"input": {
"file": {
"name": "input filename",
"password": "specify password",
"passwordFile": "read password from a file"
},
"empty": "empty input file"
},
"output": {
"file": {
"name": "output filename"
},
"replaceInput": "replace input with output",
"splitPages": "write pages to separate files",
"options": {
"qdf": "enable viewing PDF code in a text editor",
"preserveUnreferenced": "preserve unreferenced objects",
"newlineBeforeEndstream": "force a newline before endstream",
"normalizeContent": "fix newlines in content streams",
"streamData": "control stream compression",
"compressStreams": "compress uncompressed streams",
"recompressFlate": "uncompress and recompress flate",
"decodeLevel": "control which streams to uncompress",
"decrypt": "remove encryption from input file",
"staticAesIv": "use a fixed AES vector",
"staticId": "use a fixed document ID",
"noOriginalObjectIds": "omit original object ID in qdf",
"copyEncryption": "copy another file's encryption details",
"encryptionFilePassword": "supply password for copyEncryption",
"linearize": "linearize (web-optimize) output",
"linearizePass1": "save pass 1 of linearization",
"objectStreams": "control use of object streams",
"minVersion": "set minimum PDF version",
"forceVersion": "set output PDF version",
"progress": "show progress when writing",
"encrypt": {
"userPassword": "user password",
"ownerPassword": "owner password",
"keyLength": "key length: 48, 128, 256",
"40Bit": {
"annotate": "restrict document annotation",
"extract": "restrict text/graphic extraction",
"modify": "restrict document modification",
"print": "restrict printing"
},
"128Bit": {
"accessibility": "restrict document accessibility",
"annotate": "restrict document annotation",
"assemble": "restrict document assembly",
"cleartextMetadata": "don't encrypt metadata",
"extract": "restrict text/graphic extraction",
"form": "restrict form filling",
"modifyOther": "restrict other modifications",
"modify": "restrict document modification",
"print": "restrict printing",
"forceV4": "force V=4 in encryption dictionary",
"useAes": "use AES with 128-bit encryption"
},
"256Bit": {
"accessibility": "restrict document accessibility",
"annotate": "restrict document annotation",
"assemble": "restrict document assembly",
"cleartextMetadata": "don't encrypt metadata",
"extract": "restrict text/graphic extraction",
"form": "restrict form filling",
"modifyOther": "restrict other modifications",
"modify": "restrict document modification",
"print": "restrict printing",
"allowInsecure": "allow empty owner passwords",
"forceR5": "use unsupported R=5 encryption"
}
}
}
},
"options": {
"allowWeakCrypto": "allow insecure cryptographic algorithms",
"deterministicId": "generate ID deterministically",
"keepFilesOpen": "manage keeping multiple files open",
"keepFilesOpenThreshold": "set threshold for keepFilesOpen",
"noWarn": "suppress printing warning messages",
"verbose": "print additional information",
"warningExit0": "exit 0 even with warnings",
"ignoreXrefStreams": "use xref tables rather than streams",
"passwordIsHexKey": "provide hex-encoded encryption key",
"passwordMode": "tweak how qpdf encodes passwords",
"suppressPasswordRecovery": "don't try different password encodings",
"suppressRecovery": "suppress error recovery"
},
"inspect": {
"check": "partially check whether PDF is valid",
"checkLinearization": "check linearization tables",
"filteredStreamData": "show filtered stream data",
"isEncrypted": "silently test whether a file is encrypted",
"rawStreamData": "show raw stream data",
"requiresPassword": "silently test a file's password",
"showEncryption": "information about encrypted files",
"showEncryptionKey": "show key with showEncryption",
"showLinearization": "show linearization hint tables",
"showNpages": "show number of pages",
"showObject": "show contents of an object",
"showPages": "display page dictionary information",
"showXref": "show cross reference data",
"withImages": "include image details with showPages",
"listAttachments": "list embedded files",
"showAttachment": "export an embedded file",
"json": "show file in json format",
"jsonKey": [
null
],
"jsonObject": [
null
]
},
"transform": {
"coalesceContents": "combine content streams",
"compressionLevel": "set compression level for flate",
"externalizeInlineImages": "convert inline to regular images",
"iiMinBytes": "set minimum size for externalizeInlineImages",
"removeUnreferencedResources": "remove unreferenced page resources"
},
"modify": {
"addAttachment": [
{
"file": "attachment to add",
"creationdate": "set attachment's creation date",
"description": "set attachment's description",
"filename": "set attachment's displayed filename",
"key": "specify attachment key",
"mimetype": "attachment mime type, e.g. application/pdf",
"moddate": "set attachment's modification date",
"replace": "replace attachment with same key"
}
],
"removeAttachment": "remove an embedded file",
"copyAttachmentsFrom": [
{
"file": "attachment source filename",
"password": "specify password",
"prefix": "key prefix for copying attachments"
}
],
"collate": "collate with pages",
"flattenAnnotations": "push annotations into content",
"flattenRotation": "remove rotation from page dictionary",
"generateAppearances": "generate appearances for form fields",
"keepInlineImages": "exclude inline images from optimization",
"oiMinArea": "minimum area for optimizeImages",
"oiMinHeight": "minimum height for optimizeImages",
"oiMinWidth": "minimum width for optimizeImages",
"optimizeImages": "use efficient compression for images",
"pages": [
{
"file": "source for for pages",
"password": "specify password",
"range": "page range"
}
],
"removePageLabels": "remove explicit page numbers",
"rotate": "rotate pages",
"overlay": {
"file": "source file for overlay",
"password": "specify password",
"from": "source pages for underlay/overlay",
"repeat": "overlay/underlay pages to repeat",
"to": "destination pages for underlay/overlay"
},
"underlay": {
"file": "source file for underlay",
"password": "specify password",
"from": "source pages for underlay/overlay",
"repeat": "overlay/underlay pages to repeat",
"to": "destination pages for underlay/overlay"
}
}
})";