2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-12-22 02:49:00 +00:00
qpdf/generate_auto_job
Jay Berkenbilt c62ab2ee9f QPDFJob: use pointers instead of references for Config
Why? The main methods that create them return smart pointers so that
users can initialize them when needed, which you can't do with
references. Returning pointers instead of references makes for a more
uniform interface.
2022-01-30 13:11:03 -05:00

549 lines
21 KiB
Python
Executable File

#!/usr/bin/env python3
import os
import sys
import argparse
import hashlib
import re
import yaml
import json
import filecmp
from contextlib import contextmanager
whoami = os.path.basename(sys.argv[0])
BANNER = f'''//
// This file is automatically generated by {whoami}.
// Edits will be automatically overwritten if the build is
// run in maintainer mode.
//'''
def warn(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
@contextmanager
def write_file(filename):
tmpfile = filename + '.tmp'
with open(tmpfile, 'w') as f:
yield f
if os.path.exists(filename) and filecmp.cmp(filename, tmpfile, False):
os.unlink(tmpfile)
else:
os.rename(tmpfile, filename)
class Main:
SOURCES = [
whoami,
'manual/_ext/qpdf.py',
'job.yml',
'manual/cli.rst',
]
DESTS = {
'decl': 'libqpdf/qpdf/auto_job_decl.hh',
'init': 'libqpdf/qpdf/auto_job_init.hh',
'help': 'libqpdf/qpdf/auto_job_help.hh',
'schema': 'libqpdf/qpdf/auto_job_schema.hh',
# Others are added in top
}
SUMS = 'job.sums'
def main(self, args=sys.argv[1:], prog=whoami):
options = self.parse_args(args, prog)
self.top(options)
def parse_args(self, args, prog):
parser = argparse.ArgumentParser(
prog=prog,
description='Generate files for QPDFJob',
)
mxg = parser.add_mutually_exclusive_group(required=True)
mxg.add_argument('--check',
help='update checksums if files are not up to date',
action='store_true', default=False)
mxg.add_argument('--generate',
help='generate files from sources',
action='store_true', default=False)
return parser.parse_args(args)
def top(self, options):
with open('job.yml', 'r') as f:
data = yaml.safe_load(f.read())
self.config_decls = {}
self.declared_configs = set()
for o in data['options']:
config = o.get('config', None)
if config is not None:
self.DESTS[config] = f'include/qpdf/auto_job_{config}.hh'
self.config_decls[config] = []
if self.check_hashes():
exit(0)
elif options.check:
exit(f'{whoami}: auto job inputs have changed')
elif options.generate:
self.generate(data)
else:
exit(f'{whoami} unknown mode')
def get_hashes(self):
hashes = {}
for i in sorted([*self.SOURCES, *self.DESTS.values()]):
m = hashlib.sha256()
try:
with open(i, 'rb') as f:
m.update(f.read())
hashes[i] = m.hexdigest()
except FileNotFoundError:
pass
return hashes
def check_hashes(self):
hashes = self.get_hashes()
match = False
try:
old_hashes = {}
with open(self.SUMS, 'r') as f:
for line in f.readlines():
m = re.match(r'^(\S+) (\S+)\s*$', line)
if m:
old_hashes[m.group(1)] = m.group(2)
match = old_hashes == hashes
except Exception:
pass
return match
def update_hashes(self):
hashes = self.get_hashes()
with open(self.SUMS, 'w') as f:
print(f'# Generated by {whoami}', file=f)
for k, v in hashes.items():
print(f'{k} {v}', file=f)
def generate_doc(self, df, f):
st_top = 0
st_topic = 1
st_option = 2
st_option_help = 3
state = st_top
indent = None
topic = None
option = None
short_text = None
long_text = None
# Generate a bunch of short static functions rather than a big
# member function for help. Some compilers have problems with
# very large member functions in classes in anonymous
# namespaces.
help_files = 0
help_lines = 0
self.all_topics = set(self.options_without_help)
self.referenced_topics = set()
def set_indent(x):
nonlocal indent
indent = ' ' * len(x)
def append_long_text(line, topic):
nonlocal indent, long_text
if line == '\n':
long_text += '\n'
elif line.startswith(indent):
long_text += line[len(indent):]
else:
long_text = long_text.strip()
if long_text == '':
raise Exception(f'missing long text for {topic}')
long_text += '\n'
for i in re.finditer(r'--help=([^\.\s]+)', long_text):
self.referenced_topics.add(i.group(1))
return True
return False
lineno = 0
for line in df.readlines():
if help_lines == 0:
if help_files > 0:
print('}', file=f)
help_files += 1
help_lines += 1
print(f'static void add_help_{help_files}(QPDFArgParser& ap)\n'
'{', file=f)
lineno += 1
if state == st_top:
m = re.match(r'^(\s*\.\. )help-topic (\S+): (.*)$', line)
if m:
set_indent(m.group(1))
topic = m.group(2)
short_text = m.group(3)
long_text = ''
state = st_topic
continue
m = re.match(
r'^(\s*\.\. )qpdf:option:: (([^=\s]+)([= ](.+))?)$',
line)
if m:
if topic is None:
raise Exception('option seen before topic')
set_indent(m.group(1))
option = m.group(3)
synopsis = m.group(2)
if synopsis.endswith('`'):
raise Exception(
f'stray ` at end of option line (line {lineno})')
if synopsis != option:
long_text = synopsis + '\n'
else:
long_text = ''
state = st_option
continue
elif state == st_topic:
if append_long_text(line, topic):
self.all_topics.add(topic)
print(f'ap.addHelpTopic("{topic}", "{short_text}",'
f' R"({long_text})");', file=f)
help_lines += 1
state = st_top
elif state == st_option:
if line == '\n' or line.startswith(indent):
m = re.match(r'^(\s*\.\. )help: (.*)$', line)
if m:
set_indent(m.group(1))
short_text = m.group(2)
state = st_option_help
else:
raise Exception('option without help text')
state = st_top
elif state == st_option_help:
if append_long_text(line, option):
if option in self.options_without_help:
self.options_without_help.remove(option)
else:
raise Exception(
f'help for unknown option {option},'
f' lineno={lineno}')
if option not in self.help_options:
self.jdata[option[2:]]['help'] = short_text
print(f'ap.addOptionHelp("{option}", "{topic}",'
f' "{short_text}", R"({long_text})");', file=f)
help_lines += 1
state = st_top
if help_lines == 20:
help_lines = 0
print('}', file=f)
print('static void add_help(QPDFArgParser& ap)\n{', file=f)
for i in range(help_files):
print(f' add_help_{i+1}(ap);', file=f)
print('ap.addHelpFooter("For detailed help, visit'
' the qpdf manual: https://qpdf.readthedocs.io\\n");', file=f)
print('}\n', file=f)
for i in self.referenced_topics:
if i not in self.all_topics:
raise Exception(f'help text referenced --help={i}')
for i in self.options_without_help:
raise Exception(
'Options without help: ' +
', '.join(self.options_without_help))
def generate(self, data):
warn(f'{whoami}: regenerating auto job files')
self.validate(data)
# Add the built-in help options to tables that we populate as
# we read job.yml since we won't encounter these in job.yml
self.help_options = set(
['--completion-bash', '--completion-zsh', '--help']
)
self.options_without_help = set(self.help_options)
self.prepare(data)
with write_file(self.DESTS['decl']) as f:
print(BANNER, file=f)
for i in self.decls:
print(i, file=f)
with write_file(self.DESTS['init']) as f:
print(BANNER, file=f)
for i in self.init:
print(i, file=f)
with write_file(self.DESTS['help']) as f:
with open('manual/cli.rst', 'r') as df:
print(BANNER, file=f)
self.generate_doc(df, f)
self.generate_schema(data)
with write_file(self.DESTS['schema']) as f:
print('static constexpr char const* JOB_SCHEMA_DATA = R"(' +
json.dumps(self.schema, indent=2, separators=(',', ': ')) +
')";', file=f)
for k, v in self.config_decls.items():
with write_file(self.DESTS[k]) as f:
print(BANNER, file=f)
for i in v:
print(i, file=f)
# Update hashes last to ensure that this will be rerun in the
# event of a failure.
self.update_hashes()
# DON'T ADD CODE TO generate AFTER update_hashes
def handle_trivial(self, i, identifier, cfg, prefix, kind, v):
decl_arg = 1
if kind == 'bare':
decl_arg = 0
self.init.append(f'this->ap.addBare("{i}", '
f'[this](){{{cfg}->{identifier}();}});')
elif kind == 'optional_parameter':
self.init.append(f'this->ap.addOptionalParameter("{i}", '
f'[this](char *x){{{cfg}->{identifier}(x);}});')
elif kind == 'required_parameter':
self.init.append(f'this->ap.addRequiredParameter("{i}", '
f'[this](char *x){{{cfg}->{identifier}(x);}}'
f', "{v}");')
elif kind == 'required_choices':
self.init.append(f'this->ap.addChoices("{i}", '
f'[this](char *x){{{cfg}->{identifier}(x);}}'
f', true, {v}_choices);')
elif kind == 'optional_choices':
self.init.append(f'this->ap.addChoices("{i}", '
f'[this](char *x){{{cfg}->{identifier}(x);}}'
f', false, {v}_choices);')
# Generate declarations for config methods separately by
# config object.
config_prefix = prefix + 'Config'
arg = ''
if decl_arg:
arg = 'char const* parameter'
fn = f'{config_prefix}* {identifier}({arg})'
if fn not in self.declared_configs:
self.declared_configs.add(fn)
self.config_decls[cfg].append(f'QPDF_DLL {fn};')
def handle_flag(self, i, identifier, kind, v):
if kind == 'bare':
self.decls.append(f'void {identifier}();')
self.init.append(f'this->ap.addBare("{i}", '
f'b(&ArgParser::{identifier}));')
elif kind == 'optional_parameter':
self.decls.append(f'void {identifier}(char *);')
self.init.append(f'this->ap.addOptionalParameter("{i}", '
f'p(&ArgParser::{identifier}));')
elif kind == 'required_parameter':
self.decls.append(f'void {identifier}(char *);')
self.init.append(f'this->ap.addRequiredParameter("{i}", '
f'p(&ArgParser::{identifier})'
f', "{v}");')
elif kind == 'required_choices':
self.decls.append(f'void {identifier}(char *);')
self.init.append(f'this->ap.addChoices("{i}", '
f'p(&ArgParser::{identifier})'
f', true, {v}_choices);')
elif kind == 'optional_choices':
self.decls.append(f'void {identifier}(char *);')
self.init.append(f'this->ap.addChoices("{i}", '
f'p(&ArgParser::{identifier})'
f', false, {v}_choices);')
def prepare(self, data):
self.decls = []
self.init = []
self.jdata = {
# option: {
# tables: set(),
# help: string,
# QXXXQ something for registering handler
# }
}
def add_jdata(flag, table):
nonlocal self
if table == 'help':
self.help_options.add(f'--{flag}')
elif flag in self.jdata:
self.jdata[flag]['tables'].add(table)
else:
self.jdata[flag] = {
'tables': set([table]),
}
self.init.append('auto b = [this](void (ArgParser::*f)()) {')
self.init.append(' return QPDFArgParser::bindBare(f, this);')
self.init.append('};')
self.init.append('auto p = [this](void (ArgParser::*f)(char *)) {')
self.init.append(' return QPDFArgParser::bindParam(f, this);')
self.init.append('};')
self.init.append('')
for k, v in data['choices'].items():
s = f'char const* {k}_choices[] = {{'
for i in v:
s += f'"{i}", '
self.init.append(s + '0};')
self.init.append('')
for o in data['options']:
table = o['table']
if table in ('main', 'help'):
continue
i = self.to_identifier(table, 'O', True)
self.decls.append(f'static constexpr char const* {i} = "{table}";')
self.decls.append('')
for o in data['options']:
table = o['table']
config = o.get('config', None)
table_prefix = o.get('prefix', '')
arg_prefix = 'arg' + table_prefix
if table == 'main':
self.init.append('this->ap.selectMainOptionTable();')
elif table == 'help':
self.init.append('this->ap.selectHelpOptionTable();')
else:
identifier = self.to_identifier(table, 'argEnd', False)
self.init.append(f'this->ap.registerOptionTable("{table}",'
f' b(&ArgParser::{identifier}));')
if o.get('positional', False):
self.decls.append(f'void {arg_prefix}Positional(char*);')
self.init.append('this->ap.addPositional('
f'p(&ArgParser::{arg_prefix}Positional));')
flags = {}
for i in o.get('bare', []):
flags[i] = ['bare', None]
for i in o.get('optional_parameter', []):
flags[i] = ['optional_parameter', None]
for i, v in o.get('required_parameter', {}).items():
flags[i] = ['required_parameter', v]
for i, v in o.get('required_choices', {}).items():
flags[i] = ['required_choices', v]
for i, v in o.get('optional_choices', {}).items():
flags[i] = ['optional_choices', v]
self.options_without_help.add(f'--{i}')
for i, [kind, v] in flags.items():
self.options_without_help.add(f'--{i}')
add_jdata(i, table_prefix or table)
if config is None or i in o.get('manual', []):
identifier = self.to_identifier(i, arg_prefix, False)
self.handle_flag(i, identifier, kind, v)
else:
identifier = self.to_identifier(i, '', False)
prefix = o.get('config_prefix', table_prefix)
self.handle_trivial(
i, identifier, config, prefix, kind, v)
if table not in ('main', 'help'):
identifier = self.to_identifier(table, 'argEnd', False)
self.decls.append(f'void {identifier}();')
def generate_schema(self, data):
# XXX check data['json'] against what we know from jdata.
# Ultimately be able to generate a schema as well as
# JSONHandler and registering stuff.
# Check to make sure that every command-line option is
# represented either in data['json'] or data['no-json'].
# Build a list of options that we expect. If an option appears
# once, we just expect to see it once. If it appears in more
# than one options table, we need to see a separate version of
# it for each option table. It is represented prepended in
# job.yml with the table prefix. The table prefix is removed
# in the schema.
expected = {}
for k, v in self.jdata.items():
tables = v['tables']
if len(tables) == 1:
expected[k] = {**v}
else:
for t in sorted(tables):
expected[f'{t}.{k}'] = {**v}
for _, v in expected.items():
del v['tables']
options_seen = set(data['no-json'])
self.schema = {}
def option_to_json_key(s):
return self.to_identifier(s, '', False)
# Walk through the json information building the schema as we
# go. This verifies consistency between command-line options
# and the json section of the data and builds up a schema by
# populating with help information as available.
def build_schema(j, s):
for k, v in j.items():
if not (k in expected or
k.startswith('_') or
isinstance(v, str)):
raise Exception(f'json: unknown key {k}')
if k.startswith('_'):
schema_key = k[1:]
else:
schema_key = re.sub(r'[^\.]+\.', '', k)
schema_key = option_to_json_key(schema_key)
schema_value = v
if k in expected:
options_seen.add(re.sub('^_', '', k))
if v is None:
schema_value = re.sub(
r'--(\S+)',
lambda x: option_to_json_key(x.group(1)),
expected[k]['help'])
if (isinstance(v, dict)):
schema_value = {}
build_schema(v, schema_value)
elif (isinstance(v, list)):
if len(v) != 1:
raise Exception('json contains array with length != 1')
if isinstance(v[0], dict):
schema_value = [{}]
build_schema(v[0], schema_value[0])
elif schema_value is None:
raise Exception(f'unknown schema value for {k}')
s[schema_key] = schema_value
build_schema(data['json'], self.schema)
if options_seen != set(expected.keys()):
raise Exception('missing from json: ' +
str(set(expected.keys()) - options_seen))
def check_keys(self, what, d, exp):
if not isinstance(d, dict):
exit(f'{what} is not a dictionary')
actual = set(d.keys())
extra = actual - exp
if extra:
exit(f'{what}: unknown keys = {extra}')
def validate(self, data):
self.check_keys('top', data, set(
['choices', 'options', 'no-json', 'json']))
for o in data['options']:
self.check_keys('top', o, set(
['table', 'prefix', 'config', 'config_prefix',
'manual', 'bare', 'positional',
'optional_parameter', 'required_parameter',
'required_choices', 'optional_choices']))
def to_identifier(self, label, prefix, const):
identifier = re.sub(r'[^a-zA-Z0-9]', '_', label)
if const:
identifier = f'{prefix}_{identifier.upper()}'
else:
if prefix:
identifier = f'{prefix}_{identifier.lower()}'
else:
identifier = identifier.lower()
identifier = re.sub(r'_([a-z])',
lambda x: x.group(1).upper(),
identifier).replace('_', '')
return identifier
if __name__ == '__main__':
try:
os.chdir(os.path.dirname(os.path.realpath(__file__)))
Main().main()
except KeyboardInterrupt:
exit(130)