#!/usr/bin/env python3 import os import sys import argparse import hashlib import re import yaml import json import filecmp from contextlib import contextmanager whoami = os.path.basename(sys.argv[0]) BANNER = f'''// // This file is automatically generated by {whoami}. // Edits will be automatically overwritten if the build is // run in maintainer mode. //''' def warn(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) @contextmanager def write_file(filename): tmpfile = filename + '.tmp' with open(tmpfile, 'w') as f: yield f if os.path.exists(filename) and filecmp.cmp(filename, tmpfile, False): os.unlink(tmpfile) else: os.rename(tmpfile, filename) class Main: SOURCES = [ whoami, 'manual/_ext/qpdf.py', 'job.yml', 'manual/cli.rst', ] DESTS = { 'decl': 'libqpdf/qpdf/auto_job_decl.hh', 'init': 'libqpdf/qpdf/auto_job_init.hh', 'help': 'libqpdf/qpdf/auto_job_help.hh', 'schema': 'libqpdf/qpdf/auto_job_schema.hh', # Others are added in top } SUMS = 'job.sums' def main(self, args=sys.argv[1:], prog=whoami): options = self.parse_args(args, prog) self.top(options) def parse_args(self, args, prog): parser = argparse.ArgumentParser( prog=prog, description='Generate files for QPDFJob', ) mxg = parser.add_mutually_exclusive_group(required=True) mxg.add_argument('--check', help='update checksums if files are not up to date', action='store_true', default=False) mxg.add_argument('--generate', help='generate files from sources', action='store_true', default=False) return parser.parse_args(args) def top(self, options): with open('job.yml', 'r') as f: data = yaml.safe_load(f.read()) self.config_decls = {} self.declared_configs = set() for o in data['options']: config = o.get('config', None) if config is not None: self.DESTS[config] = f'include/qpdf/auto_job_{config}.hh' self.config_decls[config] = [] if self.check_hashes(): exit(0) elif options.check: exit(f'{whoami}: auto job inputs have changed') elif options.generate: self.generate(data) else: exit(f'{whoami} unknown mode') def get_hashes(self): hashes = {} for i in sorted([*self.SOURCES, *self.DESTS.values()]): m = hashlib.sha256() try: with open(i, 'rb') as f: m.update(f.read()) hashes[i] = m.hexdigest() except FileNotFoundError: pass return hashes def check_hashes(self): hashes = self.get_hashes() match = False try: old_hashes = {} with open(self.SUMS, 'r') as f: for line in f.readlines(): m = re.match(r'^(\S+) (\S+)\s*$', line) if m: old_hashes[m.group(1)] = m.group(2) match = old_hashes == hashes except Exception: pass return match def update_hashes(self): hashes = self.get_hashes() with open(self.SUMS, 'w') as f: print(f'# Generated by {whoami}', file=f) for k, v in hashes.items(): print(f'{k} {v}', file=f) def generate_doc(self, df, f): st_top = 0 st_topic = 1 st_option = 2 st_option_help = 3 state = st_top indent = None topic = None option = None short_text = None long_text = None # Generate a bunch of short static functions rather than a big # member function for help. Some compilers have problems with # very large member functions in classes in anonymous # namespaces. help_files = 0 help_lines = 0 self.all_topics = set(self.options_without_help) self.referenced_topics = set() def set_indent(x): nonlocal indent indent = ' ' * len(x) def append_long_text(line, topic): nonlocal indent, long_text if line == '\n': long_text += '\n' elif line.startswith(indent): long_text += line[len(indent):] else: long_text = long_text.strip() if long_text == '': raise Exception(f'missing long text for {topic}') long_text += '\n' for i in re.finditer(r'--help=([^\.\s]+)', long_text): self.referenced_topics.add(i.group(1)) return True return False lineno = 0 for line in df.readlines(): if help_lines == 0: if help_files > 0: print('}', file=f) help_files += 1 help_lines += 1 print(f'static void add_help_{help_files}(QPDFArgParser& ap)\n' '{', file=f) lineno += 1 if state == st_top: m = re.match(r'^(\s*\.\. )help-topic (\S+): (.*)$', line) if m: set_indent(m.group(1)) topic = m.group(2) short_text = m.group(3) long_text = '' state = st_topic continue m = re.match( r'^(\s*\.\. )qpdf:option:: (([^=\s]+)([= ](.+))?)$', line) if m: if topic is None: raise Exception('option seen before topic') set_indent(m.group(1)) option = m.group(3) synopsis = m.group(2) if synopsis.endswith('`'): raise Exception( f'stray ` at end of option line (line {lineno})') if synopsis != option: long_text = synopsis + '\n' else: long_text = '' state = st_option continue elif state == st_topic: if append_long_text(line, topic): self.all_topics.add(topic) print(f'ap.addHelpTopic("{topic}", "{short_text}",' f' R"({long_text})");', file=f) help_lines += 1 state = st_top elif state == st_option: if line == '\n' or line.startswith(indent): m = re.match(r'^(\s*\.\. )help: (.*)$', line) if m: set_indent(m.group(1)) short_text = m.group(2) state = st_option_help else: raise Exception('option without help text') state = st_top elif state == st_option_help: if append_long_text(line, option): if option in self.options_without_help: self.options_without_help.remove(option) else: raise Exception( f'help for unknown option {option},' f' lineno={lineno}') if option not in self.help_options: self.jdata[option[2:]]['help'] = short_text print(f'ap.addOptionHelp("{option}", "{topic}",' f' "{short_text}", R"({long_text})");', file=f) help_lines += 1 state = st_top if help_lines == 20: help_lines = 0 print('}', file=f) print('static void add_help(QPDFArgParser& ap)\n{', file=f) for i in range(help_files): print(f' add_help_{i+1}(ap);', file=f) print('ap.addHelpFooter("For detailed help, visit' ' the qpdf manual: https://qpdf.readthedocs.io\\n");', file=f) print('}\n', file=f) for i in self.referenced_topics: if i not in self.all_topics: raise Exception(f'help text referenced --help={i}') for i in self.options_without_help: raise Exception( 'Options without help: ' + ', '.join(self.options_without_help)) def generate(self, data): warn(f'{whoami}: regenerating auto job files') self.validate(data) # Add the built-in help options to tables that we populate as # we read job.yml since we won't encounter these in job.yml self.help_options = set( ['--completion-bash', '--completion-zsh', '--help'] ) self.options_without_help = set(self.help_options) self.prepare(data) with write_file(self.DESTS['decl']) as f: print(BANNER, file=f) for i in self.decls: print(i, file=f) with write_file(self.DESTS['init']) as f: print(BANNER, file=f) for i in self.init: print(i, file=f) with write_file(self.DESTS['help']) as f: with open('manual/cli.rst', 'r') as df: print(BANNER, file=f) self.generate_doc(df, f) self.generate_schema(data) with write_file(self.DESTS['schema']) as f: print('static constexpr char const* JOB_SCHEMA_DATA = R"(' + json.dumps(self.schema, indent=2, separators=(',', ': ')) + ')";', file=f) for k, v in self.config_decls.items(): with write_file(self.DESTS[k]) as f: print(BANNER, file=f) for i in v: print(i, file=f) # Update hashes last to ensure that this will be rerun in the # event of a failure. self.update_hashes() # DON'T ADD CODE TO generate AFTER update_hashes def handle_trivial(self, i, identifier, cfg, prefix, kind, v): decl_arg = 1 if kind == 'bare': decl_arg = 0 self.init.append(f'this->ap.addBare("{i}", ' f'[this](){{{cfg}->{identifier}();}});') elif kind == 'optional_parameter': self.init.append(f'this->ap.addOptionalParameter("{i}", ' f'[this](char *x){{{cfg}->{identifier}(x);}});') elif kind == 'required_parameter': self.init.append(f'this->ap.addRequiredParameter("{i}", ' f'[this](char *x){{{cfg}->{identifier}(x);}}' f', "{v}");') elif kind == 'required_choices': self.init.append(f'this->ap.addChoices("{i}", ' f'[this](char *x){{{cfg}->{identifier}(x);}}' f', true, {v}_choices);') elif kind == 'optional_choices': self.init.append(f'this->ap.addChoices("{i}", ' f'[this](char *x){{{cfg}->{identifier}(x);}}' f', false, {v}_choices);') # Generate declarations for config methods separately by # config object. config_prefix = prefix + 'Config' arg = '' if decl_arg: arg = 'char const* parameter' fn = f'{config_prefix}& {identifier}({arg})' if fn not in self.declared_configs: self.declared_configs.add(fn) self.config_decls[cfg].append(f'QPDF_DLL {fn};') def handle_flag(self, i, identifier, kind, v): if kind == 'bare': self.decls.append(f'void {identifier}();') self.init.append(f'this->ap.addBare("{i}", ' f'b(&ArgParser::{identifier}));') elif kind == 'optional_parameter': self.decls.append(f'void {identifier}(char *);') self.init.append(f'this->ap.addOptionalParameter("{i}", ' f'p(&ArgParser::{identifier}));') elif kind == 'required_parameter': self.decls.append(f'void {identifier}(char *);') self.init.append(f'this->ap.addRequiredParameter("{i}", ' f'p(&ArgParser::{identifier})' f', "{v}");') elif kind == 'required_choices': self.decls.append(f'void {identifier}(char *);') self.init.append(f'this->ap.addChoices("{i}", ' f'p(&ArgParser::{identifier})' f', true, {v}_choices);') elif kind == 'optional_choices': self.decls.append(f'void {identifier}(char *);') self.init.append(f'this->ap.addChoices("{i}", ' f'p(&ArgParser::{identifier})' f', false, {v}_choices);') def prepare(self, data): self.decls = [] self.init = [] self.jdata = { # option: { # tables: set(), # help: string, # QXXXQ something for registering handler # } } def add_jdata(flag, table): nonlocal self if table == 'help': self.help_options.add(f'--{flag}') elif flag in self.jdata: self.jdata[flag]['tables'].add(table) else: self.jdata[flag] = { 'tables': set([table]), } self.init.append('auto b = [this](void (ArgParser::*f)()) {') self.init.append(' return QPDFArgParser::bindBare(f, this);') self.init.append('};') self.init.append('auto p = [this](void (ArgParser::*f)(char *)) {') self.init.append(' return QPDFArgParser::bindParam(f, this);') self.init.append('};') self.init.append('') for k, v in data['choices'].items(): s = f'char const* {k}_choices[] = {{' for i in v: s += f'"{i}", ' self.init.append(s + '0};') self.init.append('') for o in data['options']: table = o['table'] if table in ('main', 'help'): continue i = self.to_identifier(table, 'O', True) self.decls.append(f'static constexpr char const* {i} = "{table}";') self.decls.append('') for o in data['options']: table = o['table'] config = o.get('config', None) table_prefix = o.get('prefix', '') arg_prefix = 'arg' + table_prefix if table == 'main': self.init.append('this->ap.selectMainOptionTable();') elif table == 'help': self.init.append('this->ap.selectHelpOptionTable();') else: identifier = self.to_identifier(table, 'argEnd', False) self.init.append(f'this->ap.registerOptionTable("{table}",' f' b(&ArgParser::{identifier}));') if o.get('positional', False): self.decls.append(f'void {arg_prefix}Positional(char*);') self.init.append('this->ap.addPositional(' f'p(&ArgParser::{arg_prefix}Positional));') flags = {} for i in o.get('bare', []): flags[i] = ['bare', None] for i in o.get('optional_parameter', []): flags[i] = ['optional_parameter', None] for i, v in o.get('required_parameter', {}).items(): flags[i] = ['required_parameter', v] for i, v in o.get('required_choices', {}).items(): flags[i] = ['required_choices', v] for i, v in o.get('optional_choices', {}).items(): flags[i] = ['optional_choices', v] self.options_without_help.add(f'--{i}') for i, [kind, v] in flags.items(): self.options_without_help.add(f'--{i}') add_jdata(i, table_prefix or table) if config is None or i in o.get('manual', []): identifier = self.to_identifier(i, arg_prefix, False) self.handle_flag(i, identifier, kind, v) else: identifier = self.to_identifier(i, '', False) prefix = o.get('config_prefix', table_prefix) self.handle_trivial( i, identifier, config, prefix, kind, v) if table not in ('main', 'help'): identifier = self.to_identifier(table, 'argEnd', False) self.decls.append(f'void {identifier}();') def generate_schema(self, data): # XXX check data['json'] against what we know from jdata. # Ultimately be able to generate a schema as well as # JSONHandler and registering stuff. # Check to make sure that every command-line option is # represented either in data['json'] or data['no-json']. # Build a list of options that we expect. If an option appears # once, we just expect to see it once. If it appears in more # than one options table, we need to see a separate version of # it for each option table. It is represented prepended in # job.yml with the table prefix. The table prefix is removed # in the schema. expected = {} for k, v in self.jdata.items(): tables = v['tables'] if len(tables) == 1: expected[k] = {**v} else: for t in sorted(tables): expected[f'{t}.{k}'] = {**v} for _, v in expected.items(): del v['tables'] options_seen = set(data['no-json']) self.schema = {} def option_to_json_key(s): return self.to_identifier(s, '', False) # Walk through the json information building the schema as we # go. This verifies consistency between command-line options # and the json section of the data and builds up a schema by # populating with help information as available. def build_schema(j, s): for k, v in j.items(): if not (k in expected or k.startswith('_') or isinstance(v, str)): raise Exception(f'json: unknown key {k}') if k.startswith('_'): schema_key = k[1:] else: schema_key = re.sub(r'[^\.]+\.', '', k) schema_key = option_to_json_key(schema_key) schema_value = v if k in expected: options_seen.add(re.sub('^_', '', k)) if v is None: schema_value = re.sub( r'--(\S+)', lambda x: option_to_json_key(x.group(1)), expected[k]['help']) if (isinstance(v, dict)): schema_value = {} build_schema(v, schema_value) elif (isinstance(v, list)): if len(v) != 1: raise Exception('json contains array with length != 1') if isinstance(v[0], dict): schema_value = [{}] build_schema(v[0], schema_value[0]) elif schema_value is None: raise Exception(f'unknown schema value for {k}') s[schema_key] = schema_value build_schema(data['json'], self.schema) if options_seen != set(expected.keys()): raise Exception('missing from json: ' + str(set(expected.keys()) - options_seen)) def check_keys(self, what, d, exp): if not isinstance(d, dict): exit(f'{what} is not a dictionary') actual = set(d.keys()) extra = actual - exp if extra: exit(f'{what}: unknown keys = {extra}') def validate(self, data): self.check_keys('top', data, set( ['choices', 'options', 'no-json', 'json'])) for o in data['options']: self.check_keys('top', o, set( ['table', 'prefix', 'config', 'config_prefix', 'manual', 'bare', 'positional', 'optional_parameter', 'required_parameter', 'required_choices', 'optional_choices'])) def to_identifier(self, label, prefix, const): identifier = re.sub(r'[^a-zA-Z0-9]', '_', label) if const: identifier = f'{prefix}_{identifier.upper()}' else: if prefix: identifier = f'{prefix}_{identifier.lower()}' else: identifier = identifier.lower() identifier = re.sub(r'_([a-z])', lambda x: x.group(1).upper(), identifier).replace('_', '') return identifier if __name__ == '__main__': try: os.chdir(os.path.dirname(os.path.realpath(__file__))) Main().main() except KeyboardInterrupt: exit(130)