2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-12-31 14:01:59 +00:00
qpdf/generate_auto_job

591 lines
22 KiB
Plaintext
Raw Normal View History

#!/usr/bin/env python3
import os
import sys
import argparse
import hashlib
import re
import yaml
import json
import filecmp
from contextlib import contextmanager
whoami = os.path.basename(sys.argv[0])
BANNER = f'''//
// This file is automatically generated by {whoami}.
// Edits will be automatically overwritten if the build is
// run in maintainer mode.
//'''
def warn(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
@contextmanager
def write_file(filename):
tmpfile = filename + '.tmp'
with open(tmpfile, 'w') as f:
yield f
if os.path.exists(filename) and filecmp.cmp(filename, tmpfile, False):
os.unlink(tmpfile)
else:
os.rename(tmpfile, filename)
# QXXXQ
# These need manual handlers.
complex = set([
'add-attachment',
'copy-attachments-from',
'encrypt',
'overlay',
'pages',
'underlay',
])
# QXXXQ
# These are trivial but not in main and so need a different config
# object. Some are in more than one table.
not_yet = set([
'accessibility',
'allow-insecure',
'annotate',
'assemble',
'cleartext-metadata',
'creationdate',
'decode-level',
'description',
'extract',
'filename',
'force-R5',
'force-V4',
'form',
'from',
'job-json-file',
'key',
'mimetype',
'moddate',
'modify',
'modify-other',
'object-streams',
'password-file',
'password-mode',
'print',
'remove-unreferenced-resources',
'repeat',
'replace',
'rotate',
'show-object',
'stream-data',
'to',
'use-aes',
])
class Main:
SOURCES = [
whoami,
'manual/_ext/qpdf.py',
'job.yml',
'manual/cli.rst',
]
DESTS = {
'decl': 'libqpdf/qpdf/auto_job_decl.hh',
'init': 'libqpdf/qpdf/auto_job_init.hh',
'help': 'libqpdf/qpdf/auto_job_help.hh',
'schema': 'libqpdf/qpdf/auto_job_schema.hh',
}
SUMS = 'job.sums'
def main(self, args=sys.argv[1:], prog=whoami):
options = self.parse_args(args, prog)
self.top(options)
def parse_args(self, args, prog):
parser = argparse.ArgumentParser(
prog=prog,
description='Generate files for QPDFJob',
)
mxg = parser.add_mutually_exclusive_group(required=True)
mxg.add_argument('--check',
help='update checksums if files are not up to date',
action='store_true', default=False)
mxg.add_argument('--generate',
help='generate files from sources',
action='store_true', default=False)
return parser.parse_args(args)
def top(self, options):
if self.check_hashes():
exit(0)
elif options.check:
exit(f'{whoami}: auto job inputs have changed')
elif options.generate:
self.generate()
else:
exit(f'{whoami} unknown mode')
def get_hashes(self):
hashes = {}
for i in sorted([*self.SOURCES, *self.DESTS.values()]):
m = hashlib.sha256()
try:
with open(i, 'rb') as f:
m.update(f.read())
hashes[i] = m.hexdigest()
except FileNotFoundError:
pass
return hashes
def check_hashes(self):
hashes = self.get_hashes()
match = False
try:
old_hashes = {}
with open(self.SUMS, 'r') as f:
for line in f.readlines():
m = re.match(r'^(\S+) (\S+)\s*$', line)
if m:
old_hashes[m.group(1)] = m.group(2)
match = old_hashes == hashes
except Exception:
pass
return match
def update_hashes(self):
hashes = self.get_hashes()
with open(self.SUMS, 'w') as f:
print(f'# Generated by {whoami}', file=f)
for k, v in hashes.items():
print(f'{k} {v}', file=f)
def generate_doc(self, df, f):
st_top = 0
st_topic = 1
st_option = 2
st_option_help = 3
state = st_top
indent = None
topic = None
option = None
short_text = None
long_text = None
# Generate a bunch of short static functions rather than a big
# member function for help. Some compilers have problems with
# very large member functions in classes in anonymous
# namespaces.
help_files = 0
help_lines = 0
self.all_topics = set(self.options_without_help)
self.referenced_topics = set()
def set_indent(x):
nonlocal indent
indent = ' ' * len(x)
def append_long_text(line, topic):
nonlocal indent, long_text
if line == '\n':
long_text += '\n'
elif line.startswith(indent):
long_text += line[len(indent):]
else:
long_text = long_text.strip()
if long_text == '':
raise Exception(f'missing long text for {topic}')
long_text += '\n'
for i in re.finditer(r'--help=([^\.\s]+)', long_text):
self.referenced_topics.add(i.group(1))
return True
return False
lineno = 0
for line in df.readlines():
if help_lines == 0:
if help_files > 0:
print('}', file=f)
help_files += 1
help_lines += 1
print(f'static void add_help_{help_files}(QPDFArgParser& ap)\n'
'{', file=f)
lineno += 1
if state == st_top:
m = re.match(r'^(\s*\.\. )help-topic (\S+): (.*)$', line)
if m:
set_indent(m.group(1))
topic = m.group(2)
short_text = m.group(3)
long_text = ''
state = st_topic
continue
m = re.match(
r'^(\s*\.\. )qpdf:option:: (([^=\s]+)([= ](.+))?)$',
line)
if m:
if topic is None:
raise Exception('option seen before topic')
set_indent(m.group(1))
option = m.group(3)
synopsis = m.group(2)
if synopsis.endswith('`'):
raise Exception(
f'stray ` at end of option line (line {lineno})')
if synopsis != option:
long_text = synopsis + '\n'
else:
long_text = ''
state = st_option
continue
elif state == st_topic:
if append_long_text(line, topic):
self.all_topics.add(topic)
print(f'ap.addHelpTopic("{topic}", "{short_text}",'
f' R"({long_text})");', file=f)
help_lines += 1
state = st_top
elif state == st_option:
if line == '\n' or line.startswith(indent):
m = re.match(r'^(\s*\.\. )help: (.*)$', line)
if m:
set_indent(m.group(1))
short_text = m.group(2)
state = st_option_help
else:
raise Exception('option without help text')
state = st_top
elif state == st_option_help:
if append_long_text(line, option):
if option in self.options_without_help:
self.options_without_help.remove(option)
else:
raise Exception(
f'help for unknown option {option},'
f' lineno={lineno}')
if option not in self.help_options:
# QXXXQ also need to exclude help table
self.jdata[option[2:]]['help'] = short_text
print(f'ap.addOptionHelp("{option}", "{topic}",'
f' "{short_text}", R"({long_text})");', file=f)
help_lines += 1
state = st_top
if help_lines == 20:
help_lines = 0
print('}', file=f)
print('static void add_help(QPDFArgParser& ap)\n{', file=f)
for i in range(help_files):
print(f' add_help_{i+1}(ap);', file=f)
print('ap.addHelpFooter("For detailed help, visit'
' the qpdf manual: https://qpdf.readthedocs.io\\n");', file=f)
print('}\n', file=f)
for i in self.referenced_topics:
if i not in self.all_topics:
raise Exception(f'help text referenced --help={i}')
for i in self.options_without_help:
raise Exception(
'Options without help: ' +
', '.join(self.options_without_help))
def generate(self):
warn(f'{whoami}: regenerating auto job files')
with open('job.yml', 'r') as f:
data = yaml.safe_load(f.read())
self.validate(data)
# Add the built-in help options to tables that we populate as
# we read job.yml since we won't encounter these in job.yml
self.help_options = set(
['--completion-bash', '--completion-zsh', '--help']
)
self.options_without_help = set(self.help_options)
self.prepare(data)
with write_file(self.DESTS['decl']) as f:
print(BANNER, file=f)
for i in self.decls:
print(i, file=f)
with write_file(self.DESTS['init']) as f:
print(BANNER, file=f)
for i in self.init:
print(i, file=f)
with write_file(self.DESTS['help']) as f:
with open('manual/cli.rst', 'r') as df:
print(BANNER, file=f)
self.generate_doc(df, f)
self.generate_schema(data)
with write_file(self.DESTS['schema']) as f:
print('static constexpr char const* JOB_SCHEMA_DATA = R"(' +
json.dumps(self.schema, indent=2, separators=(',', ': ')) +
')";', file=f)
# Update hashes last to ensure that this will be rerun in the
# event of a failure.
self.update_hashes()
# DON'T ADD CODE TO generate AFTER update_hashes
def handle_trivial(self, i, identifier, cfg, kind, v):
# QXXXQ could potentially generate declarations for config
# methods separately by config object
if kind == 'bare':
self.init.append(f'this->ap.addBare("{i}", '
f'[this](){{{cfg}->{identifier}();}});')
elif kind == 'optional_parameter':
self.init.append(f'this->ap.addOptionalParameter("{i}", '
f'[this](char *x){{{cfg}->{identifier}(x);}});')
elif kind == 'required_parameter':
self.init.append(f'this->ap.addRequiredParameter("{i}", '
f'[this](char *x){{{cfg}->{identifier}(x);}}'
f', "{v}");')
elif kind == 'required_choices':
self.init.append(f'this->ap.addChoices("{i}", '
f'[this](char *x){{{cfg}->{identifier}(x);}}'
f', true, {v}_choices);')
elif kind == 'optional_choices':
self.init.append(f'this->ap.addChoices("{i}", '
f'[this](char *x){{{cfg}->{identifier}(x);}}'
f', false, {v}_choices);')
def handle_flag(self, i, identifier, kind, v):
if kind == 'bare':
self.decls.append(f'void {identifier}();')
self.init.append(f'this->ap.addBare("{i}", '
f'b(&ArgParser::{identifier}));')
elif kind == 'optional_parameter':
self.decls.append(f'void {identifier}(char *);')
self.init.append(f'this->ap.addOptionalParameter("{i}", '
f'p(&ArgParser::{identifier}));')
elif kind == 'required_parameter':
self.decls.append(f'void {identifier}(char *);')
self.init.append(f'this->ap.addRequiredParameter("{i}", '
f'p(&ArgParser::{identifier})'
f', "{v}");')
elif kind == 'required_choices':
self.decls.append(f'void {identifier}(char *);')
self.init.append(f'this->ap.addChoices("{i}", '
f'p(&ArgParser::{identifier})'
f', true, {v}_choices);')
elif kind == 'optional_choices':
self.decls.append(f'void {identifier}(char *);')
self.init.append(f'this->ap.addChoices("{i}", '
f'p(&ArgParser::{identifier})'
f', false, {v}_choices);')
def prepare(self, data):
self.decls = []
self.init = []
self.jdata = {
# option: {
# tables: set(),
# help: string,
# QXXXQ something for registering handler
# }
}
def add_jdata(flag, table):
nonlocal self
if table == 'help':
self.help_options.add(f'--{flag}')
elif flag in self.jdata:
self.jdata[flag]['tables'].add(table)
else:
self.jdata[flag] = {
'tables': set([table]),
}
self.init.append('auto b = [this](void (ArgParser::*f)()) {')
self.init.append(' return QPDFArgParser::bindBare(f, this);')
self.init.append('};')
self.init.append('auto p = [this](void (ArgParser::*f)(char *)) {')
self.init.append(' return QPDFArgParser::bindParam(f, this);')
self.init.append('};')
self.init.append('')
for k, v in data['choices'].items():
s = f'char const* {k}_choices[] = {{'
for i in v:
s += f'"{i}", '
self.init.append(s + '0};')
self.init.append('')
for o in data['options']:
table = o['table']
if table in ('main', 'help'):
continue
i = self.to_identifier(table, 'O', True)
self.decls.append(f'static constexpr char const* {i} = "{table}";')
self.decls.append('')
for o in data['options']:
table = o['table']
config = o.get('config', None)
table_prefix = o.get('prefix', table)
if table == 'main':
self.init.append('this->ap.selectMainOptionTable();')
elif table == 'help':
self.init.append('this->ap.selectHelpOptionTable();')
else:
identifier = self.to_identifier(table, 'argEnd', False)
self.init.append(f'this->ap.registerOptionTable("{table}",'
f' b(&ArgParser::{identifier}));')
prefix = 'arg' + o.get('prefix', '')
if o.get('positional', False):
identifier = self.to_identifier(i, prefix, False)
self.decls.append(f'void {prefix}Positional(char*);')
self.init.append('this->ap.addPositional('
f'p(&ArgParser::{prefix}Positional));')
flags = {}
for i in o.get('bare', []):
flags[i] = ['bare', None]
for i in o.get('optional_parameter', []):
flags[i] = ['optional_parameter', None]
for i, v in o.get('required_parameter', {}).items():
flags[i] = ['required_parameter', v]
for i, v in o.get('required_choices', {}).items():
flags[i] = ['required_choices', v]
for i, v in o.get('optional_choices', {}).items():
flags[i] = ['optional_choices', v]
self.options_without_help.add(f'--{i}')
for i, [kind, v] in flags.items():
self.options_without_help.add(f'--{i}')
add_jdata(i, table_prefix)
# QXXXQ complex, not_yet
if i in complex or i in not_yet or config is None:
identifier = self.to_identifier(i, prefix, False)
self.handle_flag(i, identifier, kind, v)
else:
identifier = self.to_identifier(i, '', False)
self.handle_trivial(i, identifier, config, kind, v)
if table not in ('main', 'help'):
identifier = self.to_identifier(table, 'argEnd', False)
self.decls.append(f'void {identifier}();')
for o in data['options']:
table = o['table']
table_prefix = o.get('prefix', table)
if 'from_table' not in o:
continue
if table == 'main':
self.init.append('this->ap.selectMainOptionTable();')
elif table == 'help':
self.init.append('this->ap.selectHelpOptionTable();')
else:
self.init.append(f'this->ap.selectOptionTable("{table}");')
ft = o['from_table']
other_table = ft['table']
for j in ft['options']:
self.init.append('this->ap.copyFromOtherTable'
f'("{j}", "{other_table}");')
add_jdata(j, table_prefix)
def generate_schema(self, data):
# XXX check data['json'] against what we know from jdata.
# Ultimately be able to generate a schema as well as
# JSONHandler and registering stuff.
# Check to make sure that every command-line option is
# represented either in data['json'] or data['no-json'].
# Build a list of options that we expect. If an option appears
# once, we just expect to see it once. If it appears in more
# than one options table, we need to see a separate version of
# it for each option table. It is represented prepended in
# job.yml with the table prefix. The table prefix is removed
# in the schema.
expected = {}
for k, v in self.jdata.items():
tables = v['tables']
if len(tables) == 1:
expected[k] = {**v}
else:
for t in sorted(tables):
expected[f'{t}.{k}'] = {**v}
for _, v in expected.items():
del v['tables']
options_seen = set(data['no-json'])
self.schema = {}
def option_to_json_key(s):
return self.to_identifier(s, '', False)
# Walk through the json information building the schema as we
# go. This verifies consistency between command-line options
# and the json section of the data and builds up a schema by
# populating with help information as available.
def build_schema(j, s):
for k, v in j.items():
if not (k in expected or
k.startswith('_') or
isinstance(v, str)):
raise Exception(f'json: unknown key {k}')
if k.startswith('_'):
schema_key = k[1:]
else:
schema_key = re.sub(r'[^\.]+\.', '', k)
schema_key = option_to_json_key(schema_key)
schema_value = v
if k in expected:
options_seen.add(re.sub('^_', '', k))
if v is None:
schema_value = re.sub(
r'--(\S+)',
lambda x: option_to_json_key(x.group(1)),
expected[k]['help'])
if (isinstance(v, dict)):
schema_value = {}
build_schema(v, schema_value)
elif (isinstance(v, list)):
if len(v) != 1:
raise Exception('json contains array with length != 1')
if isinstance(v[0], dict):
schema_value = [{}]
build_schema(v[0], schema_value[0])
elif schema_value is None:
raise Exception(f'unknown schema value for {k}')
s[schema_key] = schema_value
build_schema(data['json'], self.schema)
if options_seen != set(expected.keys()):
raise Exception('missing from json: ' +
str(set(expected.keys()) - options_seen))
def check_keys(self, what, d, exp):
if not isinstance(d, dict):
exit(f'{what} is not a dictionary')
actual = set(d.keys())
extra = actual - exp
if extra:
exit(f'{what}: unknown keys = {extra}')
def validate(self, data):
self.check_keys('top', data, set(
['choices', 'options', 'no-json', 'json']))
for o in data['options']:
self.check_keys('top', o, set(
['table', 'prefix', 'config', 'bare', 'positional',
'optional_parameter', 'required_parameter',
'required_choices', 'optional_choices', 'from_table']))
def to_identifier(self, label, prefix, const):
identifier = re.sub(r'[^a-zA-Z0-9]', '_', label)
if const:
identifier = f'{prefix}_{identifier.upper()}'
else:
if prefix:
identifier = f'{prefix}_{identifier.lower()}'
else:
identifier = identifier.lower()
identifier = re.sub(r'_([a-z])',
lambda x: x.group(1).upper(),
identifier).replace('_', '')
return identifier
if __name__ == '__main__':
try:
os.chdir(os.path.dirname(os.path.realpath(__file__)))
Main().main()
except KeyboardInterrupt:
exit(130)