mirror of
https://github.com/qpdf/qpdf.git
synced 2024-11-14 08:44:04 +00:00
953 lines
40 KiB
Python
Executable File
953 lines
40 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import os
|
|
import sys
|
|
import argparse
|
|
import hashlib
|
|
import re
|
|
import yaml
|
|
import json
|
|
import filecmp
|
|
from contextlib import contextmanager
|
|
|
|
# The purpose of this code is to automatically generate various parts
|
|
# of the QPDFJob class. It is fairly complicated and extremely
|
|
# bespoke, so understanding it is important if modifications are to be
|
|
# made.
|
|
|
|
# Documentation of QPDFJob is divided among three places:
|
|
#
|
|
# * "HOW TO ADD A COMMAND-LINE ARGUMENT" in README-maintainer provides
|
|
# a quick reminder for how to add a command-line argument
|
|
#
|
|
# * This file has a detailed explanation about how QPDFJob and
|
|
# generate_auto_job work together
|
|
#
|
|
# * The manual ("QPDFJob Design" in qpdf-job.rst) discusses the design
|
|
# approach, rationale, and evolution of QPDFJob.
|
|
#
|
|
# QPDFJob solved the problem of moving extensive functionality that
|
|
# lived in qpdf.cc into the library. The QPDFJob class consists of
|
|
# four major sections:
|
|
#
|
|
# * The run() method and its subsidiaries are responsible for
|
|
# performing the actual operations on PDF files. This is implemented
|
|
# in QPDFJob.cc
|
|
#
|
|
# * The nested Config class and the other classes it creates provide
|
|
# an API for setting up a QPDFJob instance and correspond to the
|
|
# command-line arguments of the qpdf executable. This is implemented
|
|
# in QPDFJob_config.cc
|
|
#
|
|
# * The argument parsing code reads an argv array and calls
|
|
# configuration methods. This is implemented in QPDFJob_argv.cc. The
|
|
# argument parsing logic itself is implemented in the QPDFArgParser
|
|
# class.
|
|
#
|
|
# * The job JSON handling code, which reads a QPDFJob JSON file and
|
|
# calls configuration methods. This is implemented in
|
|
# QPDFJob_json.cc. The JSON parsing code is in the JSON class. A
|
|
# sax-like JSON handler class that calls callbacks in response to
|
|
# items in the JSON is implemented in the JSONHandler class.
|
|
#
|
|
# This code has the job of ensuring that configuration, command-line
|
|
# arguments, and JSON are all consistent and complete so that a
|
|
# developer or user can freely move among those different ways of
|
|
# interacting with QPDFJob in a predictable fashion. In addition, help
|
|
# information for each option appears in manual/cli.rst, and that
|
|
# information is used in the creation of the job JSON schema and to supply
|
|
# help text to QPDFArgParser. This code also ensures that there is an
|
|
# exact match between options in job.yml and options in cli.rst.
|
|
#
|
|
# The job.yml file contains the data that drives this code. To
|
|
# understand job.yml, here are some important concepts.
|
|
#
|
|
# QPDFArgParser option table. There is support for positional
|
|
# arguments, options consisting of flags and optional parameters, and
|
|
# subparsers that start with a regular parameterless flag, have their
|
|
# own positional and option sections, and are terminated with -- by
|
|
# itself. Examples of this include --encrypt and --pages. An "option
|
|
# table" contains an optional positional argument handler and a list
|
|
# of valid options with specifications about their parameters. There
|
|
# are three kinds of option tables:
|
|
#
|
|
# * The built-in "help" option table contains help commands, like
|
|
# --help and --version, that are only valid when they appear as the
|
|
# single command-line argument.
|
|
#
|
|
# * The "main" option table contains the options that are valid
|
|
# starting at the beginning of argument parsing.
|
|
#
|
|
# * A named option table can be started manually by the argument
|
|
# parsing code to switch the argument parser's context. Switching
|
|
# the parser to a new option table is manual (via a call to
|
|
# selectOptionTable). Context reverts to the main option table
|
|
# automatically when -- is encountered.
|
|
#
|
|
# In QPDFJob.hh, there is a Config class for each option table except
|
|
# help.
|
|
#
|
|
# Option type: bare, required/optional parameter, required/optional
|
|
# choices. A bare argument is just a flag, like --qdf. A parameter
|
|
# option takes an arbitrary parameter, like --password. A choices
|
|
# option takes one of a fixed list of choices, like --object-streams.
|
|
# If a parameter or choices option's parameter is option, the empty
|
|
# string may be specified as an option, such as --collate (or
|
|
# --collate=). For a bare option, --option= is always the same as just
|
|
# --option. This makes it possible to switch an option from bare to
|
|
# optional choice to optional parameter all without breaking
|
|
# compatibility.
|
|
#
|
|
# JSON "schema". This is a qpdf-specific "schema" for JSON. It is not
|
|
# related to any kind of standard JSON schema. It is described in
|
|
# JSON.hh and in the manual. QPDFJob uses the JSON "schema" in a mode
|
|
# in which keys in the schema are all optional in the JSON object.
|
|
#
|
|
# Here is the mapping between configuration, argv, and JSON.
|
|
#
|
|
# The help options table is implemented solely for argv processing and
|
|
# has no counterpart in configuration or JSON.
|
|
#
|
|
# The config() method returns a shared pointer to a Config object.
|
|
# Every command-line option in the main option table has a
|
|
# corresponding method in Config whose name is the option converted to
|
|
# camel case. For bare options and options with optional parameters, a
|
|
# version exists that takes no arguments. For other than bare options,
|
|
# a version exist, possibly in addition, that takes a std::string
|
|
# const&. For example, the --qdf flag implies a qdf() method in
|
|
# Config, and the --object-streams flag implies an
|
|
# objectStreams(std::string const&) method in Config. For flags in
|
|
# option tables, the method is declared inside a config class specific
|
|
# to the option table. The mapping between option tables and config
|
|
# classes is explicit in job.yml. Positional arguments are handled
|
|
# individually and manually -- see QPDFJob.hh in the CONFIGURATION
|
|
# section for details. See examples/qpdf-job.cc for an example.
|
|
#
|
|
# To understand the rest, start at main and follow comments in the
|
|
# code.
|
|
|
|
whoami = os.path.basename(sys.argv[0])
|
|
BANNER = f'''//
|
|
// This file is automatically generated by {whoami}.
|
|
// Edits will be automatically overwritten if the build is
|
|
// run in maintainer mode.
|
|
//
|
|
// clang-format off
|
|
//'''
|
|
|
|
MAN_BANNER = f'''.\\"
|
|
.\\" This file is automatically generated by {whoami}.
|
|
.\\" Edits will be automatically overwritten if the build is
|
|
.\\" run in maintainer mode.
|
|
.\\"
|
|
'''
|
|
|
|
def warn(*args, **kwargs):
|
|
print(*args, file=sys.stderr, **kwargs)
|
|
|
|
|
|
@contextmanager
|
|
def write_file(filename):
|
|
tmpfile = filename + '.tmp'
|
|
with open(tmpfile, 'w') as f:
|
|
yield f
|
|
if os.path.exists(filename) and filecmp.cmp(filename, tmpfile, False):
|
|
os.unlink(tmpfile)
|
|
else:
|
|
os.rename(tmpfile, filename)
|
|
|
|
|
|
class Main:
|
|
# SOURCES is a list of source files whose contents are used by
|
|
# this program. If they change, we are out of date.
|
|
SOURCES = [
|
|
# Keep this list in sync with CMakeLists.txt: auto_job_inputs
|
|
whoami,
|
|
'CMakeLists.txt',
|
|
'manual/_ext/qpdf.py',
|
|
'job.yml',
|
|
'manual/cli.rst',
|
|
'manual/qpdf.1.in',
|
|
]
|
|
# DESTS is a map to the output files this code generates. These
|
|
# generated files, as well as those added to DESTS later in the
|
|
# code, are included in various places by QPDFJob.hh or any of the
|
|
# implementing QPDFJob*.cc files.
|
|
DESTS = {
|
|
# Keep this list in sync with CMakeLists.txt: auto_job_outputs
|
|
'decl': 'libqpdf/qpdf/auto_job_decl.hh',
|
|
'init': 'libqpdf/qpdf/auto_job_init.hh',
|
|
'help': 'libqpdf/qpdf/auto_job_help.hh',
|
|
'schema': 'libqpdf/qpdf/auto_job_schema.hh',
|
|
'json_decl': 'libqpdf/qpdf/auto_job_json_decl.hh',
|
|
'json_init': 'libqpdf/qpdf/auto_job_json_init.hh',
|
|
'man': 'manual/qpdf.1',
|
|
# Others are added in top
|
|
}
|
|
# SUMS contains a checksum for each source and destination and is
|
|
# used to detect whether we're up to date without having to force
|
|
# recompilation all the time. This way the build can invoke this
|
|
# script unconditionally without causing stuff to rebuild every
|
|
# time.
|
|
SUMS = 'job.sums'
|
|
|
|
def main(self, args=sys.argv[1:], prog=whoami):
|
|
options = self.parse_args(args, prog)
|
|
self.top(options)
|
|
|
|
def parse_args(self, args, prog):
|
|
parser = argparse.ArgumentParser(
|
|
prog=prog,
|
|
description='Generate files for QPDFJob',
|
|
)
|
|
mxg = parser.add_mutually_exclusive_group(required=True)
|
|
mxg.add_argument('--check',
|
|
help='update checksums if files are not up to date',
|
|
action='store_true', default=False)
|
|
mxg.add_argument('--generate',
|
|
help='generate files from sources',
|
|
action='store_true', default=False)
|
|
return parser.parse_args(args)
|
|
|
|
def top(self, options):
|
|
with open('job.yml', 'r') as f:
|
|
data = yaml.safe_load(f.read())
|
|
# config_decls maps a config key from an option in "options"
|
|
# (from job.yml) to a list of declarations. A declaration is
|
|
# generated for each config method for that option table.
|
|
self.config_decls = {}
|
|
# Keep track of which configs we've declared since we can have
|
|
# option tables share a config class, as with the encryption
|
|
# tables.
|
|
self.declared_configs = set()
|
|
|
|
# Update DESTS -- see above. This ensures that each config
|
|
# class's contents are included in job.sums.
|
|
for o in data['options']:
|
|
config = o.get('config', None)
|
|
if config is not None:
|
|
self.DESTS[config] = f'include/qpdf/auto_job_{config}.hh'
|
|
self.config_decls[config] = []
|
|
|
|
if self.check_hashes():
|
|
exit(0)
|
|
elif options.check:
|
|
exit(f'{whoami}: auto job inputs have changed')
|
|
elif options.generate:
|
|
self.generate(data)
|
|
else:
|
|
exit(f'{whoami} unknown mode')
|
|
|
|
def get_hashes(self):
|
|
hashes = {}
|
|
for i in sorted([*self.SOURCES, *self.DESTS.values()]):
|
|
m = hashlib.sha256()
|
|
try:
|
|
with open(i, 'rb') as f:
|
|
m.update(f.read())
|
|
hashes[i] = m.hexdigest()
|
|
except FileNotFoundError:
|
|
pass
|
|
return hashes
|
|
|
|
def check_hashes(self):
|
|
hashes = self.get_hashes()
|
|
match = False
|
|
try:
|
|
old_hashes = {}
|
|
with open(self.SUMS, 'r') as f:
|
|
for line in f.readlines():
|
|
m = re.match(r'^(\S+) (\S+)\s*$', line)
|
|
if m:
|
|
old_hashes[m.group(1)] = m.group(2)
|
|
match = old_hashes == hashes
|
|
if not match:
|
|
# Write to stdout, not stderr. What we write to stderr
|
|
# is visible in a normal build. Writing to stdout will
|
|
# hide it in that case but expose it if you directly
|
|
# run ./generate_auto_job --check as in CI.
|
|
print(f'*** {whoami} hash mismatches ***')
|
|
match = False
|
|
for k, v in hashes.items():
|
|
if k not in old_hashes:
|
|
print(f' {k} is not in job.sums')
|
|
elif v != old_hashes[k]:
|
|
print(f' {k} was modified')
|
|
for k in old_hashes:
|
|
if k not in hashes:
|
|
print(f' {k} disappeared')
|
|
except Exception:
|
|
pass
|
|
return match
|
|
|
|
def update_hashes(self):
|
|
hashes = self.get_hashes()
|
|
with open(self.SUMS, 'w') as f:
|
|
print(f'# Generated by {whoami}', file=f)
|
|
for k, v in hashes.items():
|
|
print(f'{k} {v}', file=f)
|
|
|
|
def generate_doc(self, df, f, f_man):
|
|
st_top = 0
|
|
st_topic = 1
|
|
st_option = 2
|
|
st_option_help = 3
|
|
state = st_top
|
|
|
|
indent = None
|
|
topic = None
|
|
option = None
|
|
short_text = None
|
|
long_text = None
|
|
|
|
# Generate a bunch of short static functions rather than a big
|
|
# member function for help. Some compilers have problems with
|
|
# very large member functions in classes in anonymous
|
|
# namespaces.
|
|
|
|
help_files = 0
|
|
help_lines = 0
|
|
|
|
self.all_topics = set(self.options_without_help)
|
|
self.referenced_topics = set()
|
|
|
|
def set_indent(x):
|
|
nonlocal indent
|
|
indent = ' ' * len(x)
|
|
|
|
def append_long_text(line, topic):
|
|
nonlocal indent, long_text
|
|
if line == '\n':
|
|
long_text += '\n'
|
|
elif line.startswith(indent):
|
|
long_text += line[len(indent):]
|
|
else:
|
|
long_text = long_text.strip()
|
|
if long_text == '':
|
|
raise Exception(f'missing long text for {topic}')
|
|
long_text += '\n'
|
|
if 'help' not in topic:
|
|
# Help for --help itself has --help=... not
|
|
# referring to specific options.
|
|
for i in re.finditer(r'--help=([^\.\s]+)', long_text):
|
|
self.referenced_topics.add(i.group(1))
|
|
return True
|
|
return False
|
|
|
|
def manify(text):
|
|
lines = text.split('\n')
|
|
out = []
|
|
last_was_item = False
|
|
for line in lines:
|
|
if line.startswith('- '):
|
|
last_was_item = True
|
|
out.append('.IP \\[bu]')
|
|
out.append(line[2:])
|
|
elif last_was_item and line.startswith(' '):
|
|
out.append(line[2:])
|
|
else:
|
|
last_was_item = False
|
|
out.append(line)
|
|
return '\n'.join(out)
|
|
|
|
last_option_topic = ''
|
|
lineno = 0
|
|
for line in df.readlines():
|
|
if help_lines == 0:
|
|
if help_files > 0:
|
|
print('}', file=f)
|
|
help_files += 1
|
|
help_lines += 1
|
|
print(f'static void add_help_{help_files}(QPDFArgParser& ap)\n'
|
|
'{', file=f)
|
|
lineno += 1
|
|
if state == st_top:
|
|
m = re.match(r'^(\s*\.\. )help-topic (\S+): (.*)$', line)
|
|
if m:
|
|
set_indent(m.group(1))
|
|
topic = m.group(2)
|
|
short_text = m.group(3)
|
|
long_text = ''
|
|
state = st_topic
|
|
continue
|
|
m = re.match(
|
|
r'^(\s*\.\. )qpdf:option:: (([^=\[\s]+)([\[= ](.+))?)$',
|
|
line)
|
|
if m:
|
|
if topic is None:
|
|
raise Exception('option seen before topic')
|
|
set_indent(m.group(1))
|
|
option = m.group(3)
|
|
synopsis = m.group(2)
|
|
if synopsis.endswith('`'):
|
|
raise Exception(
|
|
f'stray ` at end of option line (line {lineno})')
|
|
if synopsis != option:
|
|
long_text = synopsis + '\n'
|
|
else:
|
|
long_text = ''
|
|
state = st_option
|
|
continue
|
|
elif state == st_topic:
|
|
if append_long_text(line, topic):
|
|
self.all_topics.add(topic)
|
|
print(f'ap.addHelpTopic("{topic}", "{short_text}",'
|
|
f' R"({long_text})");', file=f)
|
|
print(f'.SH {topic.upper()} ({short_text})', file=f_man)
|
|
print(manify(long_text), file=f_man, end='')
|
|
help_lines += 1
|
|
state = st_top
|
|
elif state == st_option:
|
|
if line == '\n' or line.startswith(indent):
|
|
m = re.match(r'^(\s*\.\. )help: (.*)$', line)
|
|
if m:
|
|
set_indent(m.group(1))
|
|
short_text = m.group(2)
|
|
state = st_option_help
|
|
else:
|
|
raise Exception('option without help text')
|
|
elif state == st_option_help:
|
|
if append_long_text(line, option):
|
|
if option in self.options_without_help:
|
|
self.options_without_help.remove(option)
|
|
else:
|
|
raise Exception(
|
|
f'help for unknown option {option},'
|
|
f' lineno={lineno}')
|
|
if option not in self.help_options:
|
|
self.jdata[option[2:]]['help'] = short_text
|
|
print(f'ap.addOptionHelp("{option}", "{topic}",'
|
|
f' "{short_text}", R"({long_text})");', file=f)
|
|
if last_option_topic != topic:
|
|
print('.PP\nRelated Options:', file=f_man)
|
|
last_option_topic = topic
|
|
print(f'.TP\n.B {option} \\-\\- {short_text}', file=f_man)
|
|
print(manify(long_text), file=f_man, end='')
|
|
help_lines += 1
|
|
state = st_top
|
|
if help_lines == 20:
|
|
help_lines = 0
|
|
print('}', file=f)
|
|
print('static void add_help(QPDFArgParser& ap)\n{', file=f)
|
|
for i in range(help_files):
|
|
print(f' add_help_{i+1}(ap);', file=f)
|
|
print('ap.addHelpFooter("For detailed help, visit'
|
|
' the qpdf manual: https://qpdf.readthedocs.io\\n");', file=f)
|
|
print('}\n', file=f)
|
|
print('''.SH SEE ALSO
|
|
.PP
|
|
For a summary of qpdf's options, please run \\fBqpdf \\-\\-help\\fR.
|
|
A complete manual can be found at https://qpdf.readthedocs.io.
|
|
''', file=f_man, end='')
|
|
for i in self.referenced_topics:
|
|
if i not in self.all_topics:
|
|
raise Exception(f'help text referenced --help={i}')
|
|
for i in self.options_without_help:
|
|
raise Exception(
|
|
'Options without help: ' +
|
|
', '.join(self.options_without_help))
|
|
|
|
def generate(self, data):
|
|
warn(f'{whoami}: regenerating auto job files')
|
|
self.validate(data)
|
|
|
|
version = None
|
|
with open('CMakeLists.txt', 'r') as f:
|
|
for line in f.readlines():
|
|
if line.strip().startswith('VERSION '):
|
|
version = line.strip().split(' ')[1]
|
|
if version is None:
|
|
raise Exception("can't read version from CMakeLists.txt")
|
|
|
|
# Keep track of which options are help options since they are
|
|
# handled specially. Add the built-in help options to tables
|
|
# that we populate as we read job.yml since we won't encounter
|
|
# these in job.yml
|
|
self.help_options = set(
|
|
['--completion-bash', '--completion-zsh', '--help']
|
|
)
|
|
# Keep track of which options we have encountered but haven't
|
|
# seen help text for. This enables us to report if any option
|
|
# is missing help.
|
|
self.options_without_help = set(self.help_options)
|
|
|
|
# Compute the information needed for generated files and write
|
|
# the files.
|
|
self.prepare(data)
|
|
with write_file(self.DESTS['decl']) as f:
|
|
print(BANNER, file=f)
|
|
for i in self.decls:
|
|
print(i, file=f)
|
|
with write_file(self.DESTS['init']) as f:
|
|
print(BANNER, file=f)
|
|
for i in self.init:
|
|
print(i, file=f)
|
|
with write_file(self.DESTS['help']) as f:
|
|
with write_file(self.DESTS['man']) as f_man:
|
|
print(MAN_BANNER, file=f_man, end='')
|
|
with open('manual/qpdf.1.in', 'r') as m_in:
|
|
for line in m_in.readlines():
|
|
line = line.replace('@PROJECT_VERSION@', version)
|
|
print(line, file=f_man, end='')
|
|
with open('manual/cli.rst', 'r') as df:
|
|
print(BANNER, file=f)
|
|
self.generate_doc(df, f, f_man)
|
|
|
|
# Compute the json files after the config and arg parsing
|
|
# files. We need to have full information about all the
|
|
# options before we can generate the schema. Generating the
|
|
# schema also generates the json header files.
|
|
self.generate_schema(data)
|
|
with write_file(self.DESTS['schema']) as f:
|
|
print('static constexpr char const* JOB_SCHEMA_DATA = R"(' +
|
|
json.dumps(self.schema, indent=2, separators=(',', ': ')) +
|
|
')";', file=f)
|
|
for k, v in self.config_decls.items():
|
|
with write_file(self.DESTS[k]) as f:
|
|
print(BANNER, file=f)
|
|
for i in v:
|
|
print(i, file=f)
|
|
with write_file(self.DESTS['json_decl']) as f:
|
|
print(BANNER, file=f)
|
|
for i in self.json_decls:
|
|
print(i, file=f)
|
|
with write_file(self.DESTS['json_init']) as f:
|
|
print(BANNER, file=f)
|
|
for i in self.json_init:
|
|
print(i, file=f)
|
|
|
|
# Update hashes last to ensure that this will be rerun in the
|
|
# event of a failure.
|
|
self.update_hashes()
|
|
# DON'T ADD CODE TO generate AFTER update_hashes
|
|
|
|
def handle_trivial(self, i, identifier, cfg, prefix, kind, v):
|
|
# A "trivial" option is one whose handler does nothing other
|
|
# than to call the config method with the same name (switched
|
|
# to camelCase).
|
|
decl_arg = 1
|
|
decl_arg_optional = False
|
|
if kind == 'bare':
|
|
decl_arg = 0
|
|
self.init.append(f'this->ap.addBare("{i}", '
|
|
f'[this](){{{cfg}->{identifier}();}});')
|
|
elif kind == 'required_parameter':
|
|
self.init.append(
|
|
f'this->ap.addRequiredParameter("{i}", '
|
|
f'[this](std::string const& x){{{cfg}->{identifier}(x);}}'
|
|
f', "{v}");')
|
|
elif kind == 'optional_parameter':
|
|
decl_arg_optional = True
|
|
self.init.append(
|
|
f'this->ap.addOptionalParameter("{i}", '
|
|
f'[this](std::string const& x){{{cfg}->{identifier}(x);}});')
|
|
elif kind == 'required_choices':
|
|
self.init.append(
|
|
f'this->ap.addChoices("{i}", '
|
|
f'[this](std::string const& x){{{cfg}->{identifier}(x);}}'
|
|
f', true, {v}_choices);')
|
|
elif kind == 'optional_choices':
|
|
decl_arg_optional = True
|
|
self.init.append(
|
|
f'this->ap.addChoices("{i}", '
|
|
f'[this](std::string const& x){{{cfg}->{identifier}(x);}}'
|
|
f', false, {v}_choices);')
|
|
|
|
# Generate declarations for config methods separately by
|
|
# config object.
|
|
config_prefix = prefix + 'Config'
|
|
arg = ''
|
|
if decl_arg:
|
|
arg = 'std::string const& parameter'
|
|
fn = f'{config_prefix}* {identifier}({arg})'
|
|
if fn not in self.declared_configs:
|
|
self.declared_configs.add(fn)
|
|
self.config_decls[cfg].append(f'QPDF_DLL {fn};')
|
|
if decl_arg_optional:
|
|
# Rather than making the parameter optional, add an
|
|
# overloaded method that takes no arguments. This
|
|
# strategy enables us to change an option from bare to
|
|
# optional_parameter or optional_choices without
|
|
# breaking binary compatibility. The overloaded
|
|
# methods both have to be implemented manually. They
|
|
# are not automatically called, so if you forget,
|
|
# someone will get a link error if they try to call
|
|
# one.
|
|
self.config_decls[cfg].append(
|
|
f'QPDF_DLL {config_prefix}* {identifier}();')
|
|
|
|
def handle_flag(self, i, identifier, kind, v):
|
|
# For flags that require manual handlers, declare the handler
|
|
# and register it. They have to be implemented manually in
|
|
# QPDFJob_argv.cc. You get compiler/linker errors for any
|
|
# missing methods.
|
|
if kind == 'bare':
|
|
self.decls.append(f'void {identifier}();')
|
|
self.init.append(f'this->ap.addBare("{i}", '
|
|
f'b(&ArgParser::{identifier}));')
|
|
elif kind == 'required_parameter':
|
|
self.decls.append(f'void {identifier}(std::string const&);')
|
|
self.init.append(f'this->ap.addRequiredParameter("{i}", '
|
|
f'p(&ArgParser::{identifier})'
|
|
f', "{v}");')
|
|
elif kind == 'optional_parameter':
|
|
self.decls.append(f'void {identifier}(std::string const&);')
|
|
self.init.append(f'this->ap.addOptionalParameter("{i}", '
|
|
f'p(&ArgParser::{identifier}));')
|
|
elif kind == 'required_choices':
|
|
self.decls.append(f'void {identifier}(std::string const&);')
|
|
self.init.append(f'this->ap.addChoices("{i}", '
|
|
f'p(&ArgParser::{identifier})'
|
|
f', true, {v}_choices);')
|
|
elif kind == 'optional_choices':
|
|
self.decls.append(f'void {identifier}(std::string const&);')
|
|
self.init.append(f'this->ap.addChoices("{i}", '
|
|
f'p(&ArgParser::{identifier})'
|
|
f', false, {v}_choices);')
|
|
|
|
def prepare(self, data):
|
|
self.decls = [] # argv handler declarations
|
|
self.init = [] # initialize arg parsing code
|
|
self.json_decls = [] # json handler declarations
|
|
self.json_init = [] # initialize json handlers
|
|
self.jdata = {} # running data used for json generate
|
|
self.by_table = {} # table information by name for easy lookup
|
|
|
|
def add_jdata(flag, table, details):
|
|
# Keep track of each flag and where it appears so we can
|
|
# check consistency between the json information and the
|
|
# options section.
|
|
nonlocal self
|
|
if table == 'help':
|
|
self.help_options.add(f'--{flag}')
|
|
elif flag in self.jdata:
|
|
self.jdata[flag]['tables'][table] = details
|
|
else:
|
|
self.jdata[flag] = {
|
|
'tables': {table: details},
|
|
}
|
|
|
|
# helper functions
|
|
self.init.append('auto b = [this](void (ArgParser::*f)()) {')
|
|
self.init.append(' return QPDFArgParser::bindBare(f, this);')
|
|
self.init.append('};')
|
|
self.init.append(
|
|
'auto p = [this](void (ArgParser::*f)(std::string const&)) {')
|
|
self.init.append(' return QPDFArgParser::bindParam(f, this);')
|
|
self.init.append('};')
|
|
self.init.append('')
|
|
|
|
# static variables for each set of choices for choices options
|
|
for k, v in data['choices'].items():
|
|
s = f'static char const* {k}_choices[] = {{'
|
|
for i in v:
|
|
s += f'"{i}", '
|
|
s += '0};'
|
|
self.init.append(s)
|
|
self.json_init.append(s)
|
|
self.init.append('')
|
|
self.json_init.append('')
|
|
|
|
# constants for the table names to reduce hard-coding strings
|
|
# in the handlers
|
|
for o in data['options']:
|
|
table = o['table']
|
|
if table in ('main', 'help'):
|
|
continue
|
|
i = self.to_identifier(table, 'O', True)
|
|
self.decls.append(f'static constexpr char const* {i} = "{table}";')
|
|
self.decls.append('')
|
|
|
|
# Walk through all the options adding declarations for the
|
|
# option handlers and initialization code to register the
|
|
# handlers in QPDFArgParser. For "trivial" cases,
|
|
# QPDFArgParser will call the corresponding config method
|
|
# automatically. Otherwise, it will declare a handler that you
|
|
# have to explicitly implement.
|
|
|
|
# If you add a new option table, you have to set config to the
|
|
# name of a member variable that you declare in the ArgParser
|
|
# class in QPDFJob_argv.cc. Then there should be an option in
|
|
# the main table, also listed as manual in job.yml, that
|
|
# switches to it. See implementations of any of the existing
|
|
# options that do this for examples.
|
|
for o in data['options']:
|
|
table = o['table']
|
|
config = o.get('config', None)
|
|
table_prefix = o.get('prefix', '')
|
|
arg_prefix = 'arg' + table_prefix
|
|
config_prefix = o.get('config_prefix', table_prefix)
|
|
manual = o.get('manual', [])
|
|
json_prefix = table_prefix or table
|
|
self.by_table[json_prefix] = {
|
|
'config': config,
|
|
'manual': manual,
|
|
}
|
|
if table == 'main':
|
|
self.init.append('this->ap.selectMainOptionTable();')
|
|
elif table == 'help':
|
|
self.init.append('this->ap.selectHelpOptionTable();')
|
|
else:
|
|
identifier = self.to_identifier(table, 'argEnd', False)
|
|
self.init.append(f'this->ap.registerOptionTable("{table}",'
|
|
f' b(&ArgParser::{identifier}));')
|
|
if o.get('positional', False):
|
|
self.decls.append(
|
|
f'void {arg_prefix}Positional(std::string const&);')
|
|
self.init.append('this->ap.addPositional('
|
|
f'p(&ArgParser::{arg_prefix}Positional));')
|
|
|
|
flags = {}
|
|
for i in o.get('bare', []):
|
|
flags[i] = ['bare', None]
|
|
for i, v in o.get('required_parameter', {}).items():
|
|
flags[i] = ['required_parameter', v]
|
|
for i in o.get('optional_parameter', []):
|
|
flags[i] = ['optional_parameter', None]
|
|
for i, v in o.get('required_choices', {}).items():
|
|
flags[i] = ['required_choices', v]
|
|
for i, v in o.get('optional_choices', {}).items():
|
|
flags[i] = ['optional_choices', v]
|
|
self.options_without_help.add(f'--{i}')
|
|
|
|
for i, [kind, v] in flags.items():
|
|
self.options_without_help.add(f'--{i}')
|
|
add_jdata(i, json_prefix, [kind, v])
|
|
if config is None or i in manual:
|
|
identifier = self.to_identifier(i, arg_prefix, False)
|
|
self.handle_flag(i, identifier, kind, v)
|
|
else:
|
|
identifier = self.to_identifier(i, '', False)
|
|
self.handle_trivial(
|
|
i, identifier, config, config_prefix, kind, v)
|
|
|
|
# Subsidiary options tables need end methods to do any
|
|
# final checking within the option table. Final checking
|
|
# for the main option table is handled by
|
|
# checkConfiguration, which is called explicitly in the
|
|
# QPDFJob code.
|
|
if table not in ('main', 'help'):
|
|
identifier = self.to_identifier(table, 'argEnd', False)
|
|
self.decls.append(f'void {identifier}();')
|
|
|
|
def handle_json_trivial(self, flag_key, fdata):
|
|
config = None
|
|
for t, [kind, v] in fdata['tables'].items():
|
|
# We have determined that all tables, if multiple, have
|
|
# the same config.
|
|
tdata = self.by_table[t]
|
|
config = tdata['config']
|
|
if kind == 'bare':
|
|
self.json_init.append(
|
|
f'addBare([this]() {{ {config}->{flag_key}(); }});')
|
|
elif kind == 'required_parameter' or kind == 'optional_parameter':
|
|
# Optional parameters end up just being the empty string,
|
|
# so the handler has to deal with it. The empty string is
|
|
# also allowed for non-optional.
|
|
self.json_init.append(
|
|
f'addParameter([this](std::string const& p)'
|
|
f' {{ {config}->{flag_key}(p); }});')
|
|
elif kind == 'required_choices':
|
|
self.json_init.append(
|
|
f'addChoices({v}_choices, true,'
|
|
f' [this](std::string const& p)'
|
|
f' {{ {config}->{flag_key}(p); }});')
|
|
elif kind == 'optional_choices':
|
|
self.json_init.append(
|
|
f'addChoices({v}_choices, false,'
|
|
f' [this](std::string const& p)'
|
|
f' {{ {config}->{flag_key}(p); }});')
|
|
|
|
def handle_json_manual(self, path):
|
|
method = re.sub(r'\.([a-zA-Z0-9])',
|
|
lambda x: x.group(1).upper(),
|
|
f'setup{path}')
|
|
self.json_decls.append(f'void {method}();')
|
|
self.json_init.append(f'{method}();')
|
|
|
|
def option_to_json_key(self, s):
|
|
return self.to_identifier(s, '', False)
|
|
|
|
def flag_to_schema_key(self, k):
|
|
if k.startswith('_'):
|
|
schema_key = k[1:]
|
|
else:
|
|
schema_key = re.sub(r'[^\.]+\.', '', k)
|
|
return self.option_to_json_key(schema_key)
|
|
|
|
def build_schema(self, j, path, flag, expected, options_seen):
|
|
# j: the part of data from "json" in job.yml as we traverse it
|
|
# path: a string representation of the path in the json
|
|
# flag: the command-line flag
|
|
# expected: a map of command-line options we expect to eventually see
|
|
# options_seen: which options we have seen so far
|
|
|
|
# As described in job.yml, the json can have keys that don't
|
|
# map to options. This includes keys whose values are
|
|
# dictionaries as well as keys that correspond to positional
|
|
# arguments. These start with _ and get their help from
|
|
# job.yml. Things that correspond to options get their help
|
|
# from the help text we gathered from cli.rst.
|
|
|
|
if flag in expected:
|
|
options_seen.add(flag)
|
|
elif flag.startswith('__'):
|
|
# This marks a flag that has no JSON equivalent because it
|
|
# is handled in some other fashion.
|
|
options_seen.add(flag[2:])
|
|
return
|
|
elif isinstance(j, str):
|
|
if not flag.startswith('_'):
|
|
raise Exception(f'json: {flag} has a description'
|
|
' but doesn\'t start with _')
|
|
elif not (flag == '' or flag.startswith('_')):
|
|
raise Exception(f'json: unknown key {flag}')
|
|
|
|
# The logic here is subtle and makes sense if you understand
|
|
# how our JSON schemas work. They are described in JSON.hh,
|
|
# but basically, if you see a dictionary, the schema should
|
|
# have a dictionary with the same keys whose values are
|
|
# descriptive. If you see an array, the array should have
|
|
# single member that describes each element of the array. See
|
|
# JSON.hh for details.
|
|
|
|
# See comments in QPDFJob_json.cc in the Handlers class
|
|
# declaration to understand how and why the methods called
|
|
# here work. The idea is that Handlers keeps a stack of
|
|
# JSONHandler shared pointers so that we can register our
|
|
# handlers in the right place as we go.
|
|
if isinstance(j, dict):
|
|
schema_value = {}
|
|
if flag:
|
|
identifier = self.to_identifier(path, '', False)
|
|
self.json_decls.append(f'void begin{identifier}(JSON);')
|
|
self.json_decls.append(f'void end{identifier}();')
|
|
self.json_init.append(
|
|
f'beginDict(bindJSON(&Handlers::begin{identifier}),'
|
|
f' bindBare(&Handlers::end{identifier})); // {path}')
|
|
for k, v in j.items():
|
|
schema_key = self.flag_to_schema_key(k)
|
|
subpath = f'{path}.{schema_key}'
|
|
self.json_init.append(f'pushKey("{schema_key}");')
|
|
schema_value[schema_key] = self.build_schema(
|
|
v, subpath, k, expected, options_seen)
|
|
self.json_init.append(f'popHandler(); // key: {schema_key}')
|
|
elif isinstance(j, list):
|
|
if len(j) != 1:
|
|
raise Exception('json contains array with length != 1')
|
|
identifier = self.to_identifier(path, '', False)
|
|
self.json_decls.append(f'void begin{identifier}Array(JSON);')
|
|
self.json_decls.append(f'void end{identifier}Array();')
|
|
self.json_init.append(
|
|
f'beginArray(bindJSON(&Handlers::begin{identifier}Array),'
|
|
f' bindBare(&Handlers::end{identifier}Array));'
|
|
f' // {path}[]')
|
|
schema_value = [
|
|
self.build_schema(j[0], path, flag,
|
|
expected, options_seen)
|
|
]
|
|
self.json_init.append(
|
|
f'popHandler(); // array: {path}[]')
|
|
else:
|
|
schema_value = j
|
|
if schema_value is None:
|
|
schema_value = re.sub(
|
|
r'--([^\s=]+)',
|
|
lambda x: self.option_to_json_key(x.group(1)),
|
|
expected[flag]['help'])
|
|
is_trivial = False
|
|
if flag in expected:
|
|
is_trivial = True
|
|
common_config = None
|
|
for t in expected[flag]['tables']:
|
|
tdata = self.by_table[t]
|
|
if flag in tdata['manual']:
|
|
is_trivial = False
|
|
if common_config is None:
|
|
common_config = tdata['config']
|
|
elif common_config != tdata['config']:
|
|
is_trivial = False
|
|
config_key = self.flag_to_schema_key(flag)
|
|
if is_trivial:
|
|
self.handle_json_trivial(config_key, expected[flag])
|
|
else:
|
|
self.handle_json_manual(path)
|
|
return schema_value
|
|
|
|
def generate_schema(self, data):
|
|
# Check to make sure that every command-line option is
|
|
# represented in data['json']. Build a list of options that we
|
|
# expect. If an option appears once, we just expect to see it
|
|
# once. If it appears in more than one options table, we need
|
|
# to see a separate version of it for each option table. It is
|
|
# represented in job.yml prepended with the table prefix. The
|
|
# table prefix is removed in the schema. Example: "password"
|
|
# appears multiple times, so the json section of job.yml has
|
|
# main.password, uo.password, etc. But most options appear
|
|
# only once, so we can just list them as they are. There is a
|
|
# nearly exact match between option tables and dictionary in
|
|
# the job json schema, but it's not perfect because of how
|
|
# positional arguments are handled, so we have to do this
|
|
# extra work. Information about which tables a particular
|
|
# option appeared in is gathered up in prepare().
|
|
expected = {}
|
|
for k, v in self.jdata.items():
|
|
tables = v['tables']
|
|
if len(tables) == 1:
|
|
expected[k] = {**v}
|
|
else:
|
|
for t in sorted(tables):
|
|
expected[f'{t}.{k}'] = {**v}
|
|
options_seen = set()
|
|
|
|
# Walk through the json information building the schema as we
|
|
# go. This verifies consistency between command-line options
|
|
# and the json section of the data and builds up a schema by
|
|
# populating with help information as available. In addition
|
|
# to generating the schema, we declare and register json
|
|
# handlers that correspond with it. That way, we can first
|
|
# check a job JSON file against the schema, and if it matches,
|
|
# we have fewer error opportunities while calling handlers.
|
|
self.schema = self.build_schema(
|
|
data['json'], '', '', expected, options_seen)
|
|
if options_seen != set(expected.keys()):
|
|
raise Exception('missing from json: ' +
|
|
str(set(expected.keys()) - options_seen))
|
|
|
|
def check_keys(self, what, d, exp):
|
|
if not isinstance(d, dict):
|
|
exit(f'{what} is not a dictionary')
|
|
actual = set(d.keys())
|
|
extra = actual - exp
|
|
if extra:
|
|
exit(f'{what}: unknown keys = {extra}')
|
|
|
|
def validate(self, data):
|
|
self.check_keys('top', data, set(
|
|
['choices', 'options', 'json']))
|
|
for o in data['options']:
|
|
self.check_keys('top', o, set(
|
|
['table', 'prefix', 'config', 'config_prefix',
|
|
'manual', 'bare', 'positional',
|
|
'optional_parameter', 'required_parameter',
|
|
'required_choices', 'optional_choices']))
|
|
|
|
def to_identifier(self, label, prefix, const):
|
|
identifier = re.sub(r'[^a-zA-Z0-9]', '_', label)
|
|
if const:
|
|
identifier = f'{prefix}_{identifier.upper()}'
|
|
else:
|
|
if prefix:
|
|
identifier = f'{prefix}_{identifier}'
|
|
identifier = re.sub(r'_([a-z])',
|
|
lambda x: x.group(1).upper(),
|
|
identifier).replace('_', '')
|
|
return identifier
|
|
|
|
|
|
if __name__ == '__main__':
|
|
try:
|
|
os.chdir(os.path.dirname(os.path.realpath(__file__)))
|
|
Main().main()
|
|
except KeyboardInterrupt:
|
|
exit(130)
|