2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-05-29 00:10:54 +00:00
qpdf/generate_auto_job
Jay Berkenbilt ea96330bb6 QPDFJob json: flatten json structure
Flatten everything to make it easier to map command-line flags to
json. The old structure was an illusion anyway because there was no
mechanism to enforce that things were in the right place. This also
helps with future flexibility.
2022-01-31 18:16:09 -05:00

630 lines
25 KiB
Python
Executable File

#!/usr/bin/env python3
import os
import sys
import argparse
import hashlib
import re
import yaml
import json
import filecmp
from contextlib import contextmanager
whoami = os.path.basename(sys.argv[0])
BANNER = f'''//
// This file is automatically generated by {whoami}.
// Edits will be automatically overwritten if the build is
// run in maintainer mode.
//'''
def warn(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
@contextmanager
def write_file(filename):
tmpfile = filename + '.tmp'
with open(tmpfile, 'w') as f:
yield f
if os.path.exists(filename) and filecmp.cmp(filename, tmpfile, False):
os.unlink(tmpfile)
else:
os.rename(tmpfile, filename)
class Main:
SOURCES = [
whoami,
'manual/_ext/qpdf.py',
'job.yml',
'manual/cli.rst',
]
DESTS = {
'decl': 'libqpdf/qpdf/auto_job_decl.hh',
'init': 'libqpdf/qpdf/auto_job_init.hh',
'help': 'libqpdf/qpdf/auto_job_help.hh',
'schema': 'libqpdf/qpdf/auto_job_schema.hh',
'json_decl': 'libqpdf/qpdf/auto_job_json_decl.hh',
'json_init': 'libqpdf/qpdf/auto_job_json_init.hh',
# Others are added in top
}
SUMS = 'job.sums'
def main(self, args=sys.argv[1:], prog=whoami):
options = self.parse_args(args, prog)
self.top(options)
def parse_args(self, args, prog):
parser = argparse.ArgumentParser(
prog=prog,
description='Generate files for QPDFJob',
)
mxg = parser.add_mutually_exclusive_group(required=True)
mxg.add_argument('--check',
help='update checksums if files are not up to date',
action='store_true', default=False)
mxg.add_argument('--generate',
help='generate files from sources',
action='store_true', default=False)
return parser.parse_args(args)
def top(self, options):
with open('job.yml', 'r') as f:
data = yaml.safe_load(f.read())
self.config_decls = {}
self.declared_configs = set()
for o in data['options']:
config = o.get('config', None)
if config is not None:
self.DESTS[config] = f'include/qpdf/auto_job_{config}.hh'
self.config_decls[config] = []
if self.check_hashes():
exit(0)
elif options.check:
exit(f'{whoami}: auto job inputs have changed')
elif options.generate:
self.generate(data)
else:
exit(f'{whoami} unknown mode')
def get_hashes(self):
hashes = {}
for i in sorted([*self.SOURCES, *self.DESTS.values()]):
m = hashlib.sha256()
try:
with open(i, 'rb') as f:
m.update(f.read())
hashes[i] = m.hexdigest()
except FileNotFoundError:
pass
return hashes
def check_hashes(self):
hashes = self.get_hashes()
match = False
try:
old_hashes = {}
with open(self.SUMS, 'r') as f:
for line in f.readlines():
m = re.match(r'^(\S+) (\S+)\s*$', line)
if m:
old_hashes[m.group(1)] = m.group(2)
match = old_hashes == hashes
except Exception:
pass
return match
def update_hashes(self):
hashes = self.get_hashes()
with open(self.SUMS, 'w') as f:
print(f'# Generated by {whoami}', file=f)
for k, v in hashes.items():
print(f'{k} {v}', file=f)
def generate_doc(self, df, f):
st_top = 0
st_topic = 1
st_option = 2
st_option_help = 3
state = st_top
indent = None
topic = None
option = None
short_text = None
long_text = None
# Generate a bunch of short static functions rather than a big
# member function for help. Some compilers have problems with
# very large member functions in classes in anonymous
# namespaces.
help_files = 0
help_lines = 0
self.all_topics = set(self.options_without_help)
self.referenced_topics = set()
def set_indent(x):
nonlocal indent
indent = ' ' * len(x)
def append_long_text(line, topic):
nonlocal indent, long_text
if line == '\n':
long_text += '\n'
elif line.startswith(indent):
long_text += line[len(indent):]
else:
long_text = long_text.strip()
if long_text == '':
raise Exception(f'missing long text for {topic}')
long_text += '\n'
if 'help' not in topic:
# Help for --help itself has --help=... not
# referring to specific options.
for i in re.finditer(r'--help=([^\.\s]+)', long_text):
self.referenced_topics.add(i.group(1))
return True
return False
lineno = 0
for line in df.readlines():
if help_lines == 0:
if help_files > 0:
print('}', file=f)
help_files += 1
help_lines += 1
print(f'static void add_help_{help_files}(QPDFArgParser& ap)\n'
'{', file=f)
lineno += 1
if state == st_top:
m = re.match(r'^(\s*\.\. )help-topic (\S+): (.*)$', line)
if m:
set_indent(m.group(1))
topic = m.group(2)
short_text = m.group(3)
long_text = ''
state = st_topic
continue
m = re.match(
r'^(\s*\.\. )qpdf:option:: (([^=\[\s]+)([\[= ](.+))?)$',
line)
if m:
if topic is None:
raise Exception('option seen before topic')
set_indent(m.group(1))
option = m.group(3)
synopsis = m.group(2)
if synopsis.endswith('`'):
raise Exception(
f'stray ` at end of option line (line {lineno})')
if synopsis != option:
long_text = synopsis + '\n'
else:
long_text = ''
state = st_option
continue
elif state == st_topic:
if append_long_text(line, topic):
self.all_topics.add(topic)
print(f'ap.addHelpTopic("{topic}", "{short_text}",'
f' R"({long_text})");', file=f)
help_lines += 1
state = st_top
elif state == st_option:
if line == '\n' or line.startswith(indent):
m = re.match(r'^(\s*\.\. )help: (.*)$', line)
if m:
set_indent(m.group(1))
short_text = m.group(2)
state = st_option_help
else:
raise Exception('option without help text')
state = st_top
elif state == st_option_help:
if append_long_text(line, option):
if option in self.options_without_help:
self.options_without_help.remove(option)
else:
raise Exception(
f'help for unknown option {option},'
f' lineno={lineno}')
if option not in self.help_options:
self.jdata[option[2:]]['help'] = short_text
print(f'ap.addOptionHelp("{option}", "{topic}",'
f' "{short_text}", R"({long_text})");', file=f)
help_lines += 1
state = st_top
if help_lines == 20:
help_lines = 0
print('}', file=f)
print('static void add_help(QPDFArgParser& ap)\n{', file=f)
for i in range(help_files):
print(f' add_help_{i+1}(ap);', file=f)
print('ap.addHelpFooter("For detailed help, visit'
' the qpdf manual: https://qpdf.readthedocs.io\\n");', file=f)
print('}\n', file=f)
for i in self.referenced_topics:
if i not in self.all_topics:
raise Exception(f'help text referenced --help={i}')
for i in self.options_without_help:
raise Exception(
'Options without help: ' +
', '.join(self.options_without_help))
def generate(self, data):
warn(f'{whoami}: regenerating auto job files')
self.validate(data)
# Add the built-in help options to tables that we populate as
# we read job.yml since we won't encounter these in job.yml
self.help_options = set(
['--completion-bash', '--completion-zsh', '--help']
)
self.options_without_help = set(self.help_options)
self.prepare(data)
with write_file(self.DESTS['decl']) as f:
print(BANNER, file=f)
for i in self.decls:
print(i, file=f)
with write_file(self.DESTS['init']) as f:
print(BANNER, file=f)
for i in self.init:
print(i, file=f)
with write_file(self.DESTS['help']) as f:
with open('manual/cli.rst', 'r') as df:
print(BANNER, file=f)
self.generate_doc(df, f)
self.generate_schema(data)
with write_file(self.DESTS['schema']) as f:
print('static constexpr char const* JOB_SCHEMA_DATA = R"(' +
json.dumps(self.schema, indent=2, separators=(',', ': ')) +
')";', file=f)
for k, v in self.config_decls.items():
with write_file(self.DESTS[k]) as f:
print(BANNER, file=f)
for i in v:
print(i, file=f)
with write_file(self.DESTS['json_decl']) as f:
print(BANNER, file=f)
for i in self.json_decls:
print(i, file=f)
with write_file(self.DESTS['json_init']) as f:
print(BANNER, file=f)
for i in self.json_init:
print(i, file=f)
# Update hashes last to ensure that this will be rerun in the
# event of a failure.
self.update_hashes()
# DON'T ADD CODE TO generate AFTER update_hashes
def handle_trivial(self, i, identifier, cfg, prefix, kind, v):
decl_arg = 1
if kind == 'bare':
decl_arg = 0
self.init.append(f'this->ap.addBare("{i}", '
f'[this](){{{cfg}->{identifier}();}});')
elif kind == 'optional_parameter':
self.init.append(f'this->ap.addOptionalParameter("{i}", '
f'[this](char *x){{{cfg}->{identifier}(x);}});')
elif kind == 'required_parameter':
self.init.append(f'this->ap.addRequiredParameter("{i}", '
f'[this](char *x){{{cfg}->{identifier}(x);}}'
f', "{v}");')
elif kind == 'required_choices':
self.init.append(f'this->ap.addChoices("{i}", '
f'[this](char *x){{{cfg}->{identifier}(x);}}'
f', true, {v}_choices);')
elif kind == 'optional_choices':
self.init.append(f'this->ap.addChoices("{i}", '
f'[this](char *x){{{cfg}->{identifier}(x);}}'
f', false, {v}_choices);')
# Generate declarations for config methods separately by
# config object.
config_prefix = prefix + 'Config'
arg = ''
if decl_arg:
arg = 'char const* parameter'
fn = f'{config_prefix}* {identifier}({arg})'
if fn not in self.declared_configs:
self.declared_configs.add(fn)
self.config_decls[cfg].append(f'QPDF_DLL {fn};')
def handle_flag(self, i, identifier, kind, v):
if kind == 'bare':
self.decls.append(f'void {identifier}();')
self.init.append(f'this->ap.addBare("{i}", '
f'b(&ArgParser::{identifier}));')
elif kind == 'optional_parameter':
self.decls.append(f'void {identifier}(char *);')
self.init.append(f'this->ap.addOptionalParameter("{i}", '
f'p(&ArgParser::{identifier}));')
elif kind == 'required_parameter':
self.decls.append(f'void {identifier}(char *);')
self.init.append(f'this->ap.addRequiredParameter("{i}", '
f'p(&ArgParser::{identifier})'
f', "{v}");')
elif kind == 'required_choices':
self.decls.append(f'void {identifier}(char *);')
self.init.append(f'this->ap.addChoices("{i}", '
f'p(&ArgParser::{identifier})'
f', true, {v}_choices);')
elif kind == 'optional_choices':
self.decls.append(f'void {identifier}(char *);')
self.init.append(f'this->ap.addChoices("{i}", '
f'p(&ArgParser::{identifier})'
f', false, {v}_choices);')
def prepare(self, data):
self.decls = []
self.init = []
self.json_decls = []
self.json_init = []
self.jdata = {}
self.by_table = {}
def add_jdata(flag, table, details):
nonlocal self
if table == 'help':
self.help_options.add(f'--{flag}')
elif flag in self.jdata:
self.jdata[flag]['tables'][table] = details
else:
self.jdata[flag] = {
'tables': {table: details},
}
self.init.append('auto b = [this](void (ArgParser::*f)()) {')
self.init.append(' return QPDFArgParser::bindBare(f, this);')
self.init.append('};')
self.init.append('auto p = [this](void (ArgParser::*f)(char *)) {')
self.init.append(' return QPDFArgParser::bindParam(f, this);')
self.init.append('};')
self.init.append('')
for k, v in data['choices'].items():
s = f'static char const* {k}_choices[] = {{'
for i in v:
s += f'"{i}", '
s += '0};'
self.init.append(s)
self.json_init.append(s)
self.init.append('')
self.json_init.append('')
for o in data['options']:
table = o['table']
if table in ('main', 'help'):
continue
i = self.to_identifier(table, 'O', True)
self.decls.append(f'static constexpr char const* {i} = "{table}";')
self.decls.append('')
for o in data['options']:
table = o['table']
config = o.get('config', None)
table_prefix = o.get('prefix', '')
arg_prefix = 'arg' + table_prefix
config_prefix = o.get('config_prefix', table_prefix)
manual = o.get('manual', [])
json_prefix = table_prefix or table
self.by_table[json_prefix] = {
'config': config,
'manual': manual,
}
if table == 'main':
self.init.append('this->ap.selectMainOptionTable();')
elif table == 'help':
self.init.append('this->ap.selectHelpOptionTable();')
else:
identifier = self.to_identifier(table, 'argEnd', False)
self.init.append(f'this->ap.registerOptionTable("{table}",'
f' b(&ArgParser::{identifier}));')
if o.get('positional', False):
self.decls.append(f'void {arg_prefix}Positional(char*);')
self.init.append('this->ap.addPositional('
f'p(&ArgParser::{arg_prefix}Positional));')
flags = {}
for i in o.get('bare', []):
flags[i] = ['bare', None]
for i in o.get('optional_parameter', []):
flags[i] = ['optional_parameter', None]
for i, v in o.get('required_parameter', {}).items():
flags[i] = ['required_parameter', v]
for i, v in o.get('required_choices', {}).items():
flags[i] = ['required_choices', v]
for i, v in o.get('optional_choices', {}).items():
flags[i] = ['optional_choices', v]
self.options_without_help.add(f'--{i}')
for i, [kind, v] in flags.items():
self.options_without_help.add(f'--{i}')
add_jdata(i, json_prefix, [kind, v])
if config is None or i in manual:
identifier = self.to_identifier(i, arg_prefix, False)
self.handle_flag(i, identifier, kind, v)
else:
identifier = self.to_identifier(i, '', False)
self.handle_trivial(
i, identifier, config, config_prefix, kind, v)
if table not in ('main', 'help'):
identifier = self.to_identifier(table, 'argEnd', False)
self.decls.append(f'void {identifier}();')
def handle_json_trivial(self, flag_key, fdata):
config = None
for t, [kind, v] in fdata['tables'].items():
# We have determined that all tables, if multiple, have
# the same config.
tdata = self.by_table[t]
config = tdata['config']
if kind == 'bare':
self.json_init.append(
f'addBare([this]() {{ {config}->{flag_key}(); }});')
elif kind == 'optional_parameter' or kind == 'required_parameter':
# No optional parameters in json
self.json_init.append(
f'addParameter([this](char const* p)'
f' {{ {config}->{flag_key}(p); }});')
elif kind == 'optional_choices' or kind == 'required_choices':
# No optional choices in json
self.json_init.append(
f'addChoices({v}_choices,'
f' [this](char const* p) {{ {config}->{flag_key}(p); }});')
def handle_json_manual(self, path):
method = re.sub(r'\.([a-zA-Z0-9])',
lambda x: x.group(1).upper(),
f'setup{path}')
self.json_decls.append(f'void {method}();')
self.json_init.append(f'{method}();')
def option_to_json_key(self, s):
return self.to_identifier(s, '', False)
def flag_to_schema_key(self, k):
if k.startswith('_'):
schema_key = k[1:]
else:
schema_key = re.sub(r'[^\.]+\.', '', k)
return self.option_to_json_key(schema_key)
def build_schema(self, j, path, flag, expected, options_seen):
if flag in expected:
options_seen.add(flag)
elif isinstance(j, str):
if not flag.startswith('_'):
raise Exception(f'json: {flag} has a description'
' but doesn\'t start with _')
elif not (flag == '' or flag.startswith('_')):
raise Exception(f'json: unknown key {flag}')
if isinstance(j, dict):
schema_value = {}
if flag:
identifier = self.to_identifier(path, '', False)
self.json_decls.append(f'void begin{identifier}(JSON);')
self.json_decls.append(f'void end{identifier}();')
self.json_init.append(
f'beginDict(bindJSON(&Handlers::begin{identifier}),'
f' bindBare(&Handlers::end{identifier})); // {path}')
for k, v in j.items():
schema_key = self.flag_to_schema_key(k)
subpath = f'{path}.{schema_key}'
self.json_init.append(f'pushKey("{schema_key}");')
schema_value[schema_key] = self.build_schema(
v, subpath, k, expected, options_seen)
self.json_init.append(f'popHandler(); // key: {schema_key}')
elif isinstance(j, list):
if len(j) != 1:
raise Exception('json contains array with length != 1')
identifier = self.to_identifier(path, '', False)
self.json_decls.append(f'void begin{identifier}Array(JSON);')
self.json_decls.append(f'void end{identifier}Array();')
self.json_init.append(
f'beginArray(bindJSON(&Handlers::begin{identifier}Array),'
f' bindBare(&Handlers::end{identifier}Array));'
f' // {path}[]')
schema_value = [
self.build_schema(j[0], path, flag,
expected, options_seen)
]
self.json_init.append(
f'popHandler(); // array: {path}[]')
else:
schema_value = j
if schema_value is None:
schema_value = re.sub(
r'--(\S+)',
lambda x: self.option_to_json_key(x.group(1)),
expected[flag]['help'])
is_trivial = False
if flag in expected:
is_trivial = True
common_config = None
for t in expected[flag]['tables']:
tdata = self.by_table[t]
if flag in tdata['manual']:
is_trivial = False
if common_config is None:
common_config = tdata['config']
elif common_config != tdata['config']:
is_trivial = False
config_key = self.flag_to_schema_key(flag)
if is_trivial:
self.handle_json_trivial(config_key, expected[flag])
else:
self.handle_json_manual(path)
return schema_value
def generate_schema(self, data):
# Check to make sure that every command-line option is
# represented in data['json'].
# Build a list of options that we expect. If an option appears
# once, we just expect to see it once. If it appears in more
# than one options table, we need to see a separate version of
# it for each option table. It is represented in job.yml
# prepended with the table prefix. The table prefix is removed
# in the schema.
expected = {}
for k, v in self.jdata.items():
tables = v['tables']
if len(tables) == 1:
expected[k] = {**v}
else:
for t in sorted(tables):
expected[f'{t}.{k}'] = {**v}
options_seen = set()
# Walk through the json information building the schema as we
# go. This verifies consistency between command-line options
# and the json section of the data and builds up a schema by
# populating with help information as available.
self.schema = self.build_schema(
data['json'], '', '', expected, options_seen)
if options_seen != set(expected.keys()):
raise Exception('missing from json: ' +
str(set(expected.keys()) - options_seen))
def check_keys(self, what, d, exp):
if not isinstance(d, dict):
exit(f'{what} is not a dictionary')
actual = set(d.keys())
extra = actual - exp
if extra:
exit(f'{what}: unknown keys = {extra}')
def validate(self, data):
self.check_keys('top', data, set(
['choices', 'options', 'json']))
for o in data['options']:
self.check_keys('top', o, set(
['table', 'prefix', 'config', 'config_prefix',
'manual', 'bare', 'positional',
'optional_parameter', 'required_parameter',
'required_choices', 'optional_choices']))
def to_identifier(self, label, prefix, const):
identifier = re.sub(r'[^a-zA-Z0-9]', '_', label)
if const:
identifier = f'{prefix}_{identifier.upper()}'
else:
if prefix:
identifier = f'{prefix}_{identifier}'
identifier = re.sub(r'_([a-z])',
lambda x: x.group(1).upper(),
identifier).replace('_', '')
return identifier
if __name__ == '__main__':
try:
os.chdir(os.path.dirname(os.path.realpath(__file__)))
Main().main()
except KeyboardInterrupt:
exit(130)