mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-23 07:08:30 +00:00
Simplify --json-output
Now --json-output just changes defaults. Allow output file with --json.
This commit is contained in:
parent
80acfc3826
commit
5f4224f31a
37
TODO
37
TODO
@ -69,47 +69,12 @@ Soon: Break ground on "Document-level work"
|
||||
JSON v2 fixes
|
||||
=============
|
||||
|
||||
* Unify code between QPDFJob::doJSONObjects and QPDF::writeJSON. Make
|
||||
sure that the "qpdf" key is always present when json-output is
|
||||
specified.
|
||||
|
||||
* Change the name of the "qpdf-v2" key to "qpdf". Use that in place of
|
||||
"objects" and change its content to a two-element array whose first
|
||||
element is metadata required (or useful) for parsing and whose
|
||||
second element contains the actual data. Use of an array is the only
|
||||
way to ensure that the metadata is guaranteed to be parsed before we
|
||||
start parsing the objects. Example:
|
||||
|
||||
{
|
||||
"qpdf": [
|
||||
{
|
||||
"jsonversion": 2,
|
||||
"pdfversion": "1.3",
|
||||
"pushedinheritedpageresources": false,
|
||||
"calledgetallpages": false,
|
||||
"maxobjectid": 10
|
||||
},
|
||||
{
|
||||
... objects ...
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
This implies a few things:
|
||||
|
||||
* Still need to test pushedinheritedpageresources and
|
||||
calledgetallpages and check/use their values when reading
|
||||
|
||||
* Fix --json-help
|
||||
* Rethink QPDF::writeJSON. Maybe provide a simpler overload?
|
||||
|
||||
* When reading back in, we'll have to call
|
||||
pushInheritedAttributesToPage or getAllPages based on the values
|
||||
of the metadata.
|
||||
|
||||
* Test --json with --json-stream-data and --json-output with
|
||||
--json-stream-data=none. Recheck writeJSON's handling of the
|
||||
pipeline argument.
|
||||
|
||||
* Support json v2 in the C API. At a minimum, write_json,
|
||||
create_from_json, and update_from_json need to be there and should
|
||||
take the same kinds of functions as the C API for logger.
|
||||
|
@ -133,7 +133,7 @@ class QPDF
|
||||
QPDF_DLL
|
||||
void updateFromJSON(std::shared_ptr<InputSource>);
|
||||
|
||||
// Write qpdf json format to the pipeline "p". The only supported
|
||||
// Write qpdf JSON format to the pipeline "p". The only supported
|
||||
// version is 2.
|
||||
//
|
||||
// If the value of "complete" is true, a complete JSON object
|
||||
|
@ -554,7 +554,7 @@ class QPDFJob
|
||||
void setEncryptionOptions(QPDF&, QPDFWriter&);
|
||||
void maybeFixWritePassword(int R, std::string& password);
|
||||
void writeOutfile(QPDF& pdf);
|
||||
void writeJSON(Pipeline* p, QPDF& pdf, bool complete, bool& first_key);
|
||||
void writeJSON(QPDF& pdf);
|
||||
|
||||
// JSON
|
||||
void doJSON(QPDF& pdf, Pipeline*);
|
||||
|
6
job.sums
6
job.sums
@ -8,10 +8,10 @@ include/qpdf/auto_job_c_pages.hh b3cc0f21029f6d89efa043dcdbfa183cb59325b6506001c
|
||||
include/qpdf/auto_job_c_uo.hh ae21b69a1efa9333050f4833d465f6daff87e5b38e5106e49bbef5d4132e4ed1
|
||||
job.yml f9564f18b08a45d17328af43652645771d3498471820c858b8c9013a193e1412
|
||||
libqpdf/qpdf/auto_job_decl.hh 7844eba58edffb9494b19e8eca6fd59a24d6e152ca606c3b07da569f753df2da
|
||||
libqpdf/qpdf/auto_job_help.hh db2e4350c700e064b204e3e20d4fee4eddfe312b28092afcf608b4b6863d30e5
|
||||
libqpdf/qpdf/auto_job_help.hh 700d7600b34588169c80f3e325e39e592e2f5c1af1cdac16614150ff38424b40
|
||||
libqpdf/qpdf/auto_job_init.hh fd1635a5ad6ba16b7ae008467145560a59a5ecfd10d29c5ef7cd0d8347747cd2
|
||||
libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a9474319fb7c86d92634cc8297
|
||||
libqpdf/qpdf/auto_job_json_init.hh 59545578a2e47c660ff98516ed53f06638be75eb4658e2a09d32cc08e0cb7268
|
||||
libqpdf/qpdf/auto_job_schema.hh 9d543cd4a43eafffc2c4b8a6fee29e399c271c52cb6f7d417ae5497b3c1127dc
|
||||
libqpdf/qpdf/auto_job_schema.hh 5352ef1be1ad7cc6f4f36dab88f2937d278e6bd3a0e2d46259794dc226c8ba6b
|
||||
manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580
|
||||
manual/cli.rst 8e1f443c6fa000e023e516c318df4d04d58233d4d8648907c4a71f0ea5722bca
|
||||
manual/cli.rst bbce4cfb662a96c8df0c8563f8065844b77aca7b4ec6385955546b9a455d9953
|
||||
|
@ -680,8 +680,15 @@ QPDFJob::checkConfiguration()
|
||||
" an output file is specified");
|
||||
} else if (m->split_pages) {
|
||||
usage("--split-pages may not be used with --replace-input");
|
||||
} else if (m->json_version) {
|
||||
usage("--json may not be used with --replace-input");
|
||||
}
|
||||
}
|
||||
if (m->json_version && (m->outfilename == nullptr)) {
|
||||
// The output file is optional with --json for backward
|
||||
// compatibility and defaults to standard output.
|
||||
m->outfilename = QUtil::make_shared_cstr("-");
|
||||
}
|
||||
if (m->infilename == nullptr) {
|
||||
usage("an input file name is required");
|
||||
} else if (
|
||||
@ -1116,6 +1123,7 @@ QPDFJob::doJSONObject(
|
||||
void
|
||||
QPDFJob::doJSONObjects(Pipeline* p, bool& first, QPDF& pdf)
|
||||
{
|
||||
if (m->json_version == 1) {
|
||||
JSON::writeDictionaryKey(p, first, "objects", 1);
|
||||
bool first_object = true;
|
||||
JSON::writeDictionaryOpen(p, first_object, 1);
|
||||
@ -1135,6 +1143,27 @@ QPDFJob::doJSONObjects(Pipeline* p, bool& first, QPDF& pdf)
|
||||
doJSONObject(p, first_object, "trailer", trailer);
|
||||
}
|
||||
JSON::writeDictionaryClose(p, first_object, 1);
|
||||
} else {
|
||||
std::set<std::string> json_objects;
|
||||
if (this->m->json_objects.count("trailer")) {
|
||||
json_objects.insert("trailer");
|
||||
}
|
||||
auto wanted = getWantedJSONObjects();
|
||||
for (auto const& og: wanted) {
|
||||
std::ostringstream s;
|
||||
s << "obj:" << og.unparse(' ') << " R";
|
||||
json_objects.insert(s.str());
|
||||
}
|
||||
pdf.writeJSON(
|
||||
this->m->json_version,
|
||||
p,
|
||||
false,
|
||||
first,
|
||||
this->m->decode_level,
|
||||
this->m->json_stream_data,
|
||||
this->m->json_stream_prefix,
|
||||
json_objects);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@ -1777,7 +1806,7 @@ void
|
||||
QPDFJob::doJSON(QPDF& pdf, Pipeline* p)
|
||||
{
|
||||
// qpdf guarantees that no new top-level keys whose names start
|
||||
// with "xdata" will be added. These are reserved for users.
|
||||
// with "x-" will be added. These are reserved for users.
|
||||
|
||||
std::string captured_json;
|
||||
std::shared_ptr<Pl_String> pl_str;
|
||||
@ -1788,6 +1817,12 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p)
|
||||
|
||||
bool first = true;
|
||||
JSON::writeDictionaryOpen(p, first, 0);
|
||||
|
||||
if (m->json_output) {
|
||||
// Exclude version and parameters to keep the output file
|
||||
// minimal. The JSON version is inside the "qpdf" key for
|
||||
// version 2.
|
||||
} else {
|
||||
// This version is updated every time a non-backward-compatible
|
||||
// change is made to the JSON format. Clients of the JSON are to
|
||||
// ignore unrecognized keys, so we only update the version of a
|
||||
@ -1813,7 +1848,7 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p)
|
||||
j_params.addDictionaryMember(
|
||||
"decodelevel", JSON::makeString(decode_level_str));
|
||||
JSON::writeDictionaryItem(p, first, "parameters", j_params, 1);
|
||||
|
||||
}
|
||||
bool all_keys = m->json_keys.empty();
|
||||
// The list of selectable top-level keys id duplicated in the
|
||||
// following places: job.yml, QPDFJob::json_schema, and
|
||||
@ -1850,11 +1885,7 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p)
|
||||
// qpdf/objects/objectinfo without other keys.
|
||||
if (all_keys || m->json_keys.count("objects") ||
|
||||
m->json_keys.count("qpdf")) {
|
||||
if (this->m->json_version == 1) {
|
||||
doJSONObjects(p, first, pdf);
|
||||
} else {
|
||||
writeJSON(p, pdf, false, first);
|
||||
}
|
||||
}
|
||||
if (this->m->json_version == 1) {
|
||||
// "objectinfo" is not needed for version >1 since you can
|
||||
@ -1889,9 +1920,6 @@ QPDFJob::doInspection(QPDF& pdf)
|
||||
if (m->check) {
|
||||
doCheck(pdf);
|
||||
}
|
||||
if (m->json_version) {
|
||||
doJSON(pdf, &cout);
|
||||
}
|
||||
if (m->show_npages) {
|
||||
QTC::TC("qpdf", "QPDFJob npages");
|
||||
cout << pdf.getRoot().getKey("/Pages").getKey("/Count").getIntValue()
|
||||
@ -3337,9 +3365,8 @@ QPDFJob::writeOutfile(QPDF& pdf)
|
||||
} else if (strcmp(m->outfilename.get(), "-") == 0) {
|
||||
m->outfilename = nullptr;
|
||||
}
|
||||
if (this->m->json_output) {
|
||||
bool unused = true;
|
||||
writeJSON(nullptr, pdf, true, unused);
|
||||
if (this->m->json_version) {
|
||||
writeJSON(pdf);
|
||||
} else {
|
||||
// QPDFWriter must have block scope so the output file will be
|
||||
// closed after write() finishes.
|
||||
@ -3393,52 +3420,30 @@ QPDFJob::writeOutfile(QPDF& pdf)
|
||||
}
|
||||
|
||||
void
|
||||
QPDFJob::writeJSON(Pipeline* p, QPDF& pdf, bool complete, bool& first_key)
|
||||
QPDFJob::writeJSON(QPDF& pdf)
|
||||
{
|
||||
// File pipeline must have block scope so it will be closed
|
||||
// after write.
|
||||
std::shared_ptr<QUtil::FileCloser> fc;
|
||||
std::shared_ptr<Pipeline> fp;
|
||||
std::string file_prefix = this->m->json_stream_prefix;
|
||||
if (m->outfilename.get()) {
|
||||
QTC::TC("qpdf", "QPDFJob write json to file");
|
||||
if (file_prefix.empty()) {
|
||||
file_prefix = this->m->outfilename.get();
|
||||
if (this->m->json_stream_prefix.empty()) {
|
||||
this->m->json_stream_prefix = this->m->outfilename.get();
|
||||
}
|
||||
fc = std::make_shared<QUtil::FileCloser>(
|
||||
QUtil::safe_fopen(this->m->outfilename.get(), "w"));
|
||||
fp = std::make_shared<Pl_StdioFile>("json output", fc->f);
|
||||
} else if (
|
||||
(this->m->json_stream_data == qpdf_sj_file) && file_prefix.empty()) {
|
||||
(this->m->json_stream_data == qpdf_sj_file) &&
|
||||
this->m->json_stream_prefix.empty()) {
|
||||
QTC::TC("qpdf", "QPDFJob need json-stream-prefix for stdout");
|
||||
usage("please specify --json-stream-prefix since the input file "
|
||||
"name is unknown");
|
||||
} else {
|
||||
QTC::TC("qpdf", "QPDFJob write json to stdout");
|
||||
if (p == nullptr) {
|
||||
fp = this->m->log->getInfo();
|
||||
this->m->log->saveToStandardOutput(true);
|
||||
fp = this->m->log->getSave();
|
||||
}
|
||||
}
|
||||
if (p == nullptr) {
|
||||
p = fp.get();
|
||||
}
|
||||
std::set<std::string> json_objects;
|
||||
if (this->m->json_objects.count("trailer")) {
|
||||
json_objects.insert("trailer");
|
||||
}
|
||||
auto wanted = getWantedJSONObjects();
|
||||
for (auto const& og: wanted) {
|
||||
std::ostringstream s;
|
||||
s << "obj:" << og.unparse(' ') << " R";
|
||||
json_objects.insert(s.str());
|
||||
}
|
||||
pdf.writeJSON(
|
||||
this->m->json_version,
|
||||
p,
|
||||
complete,
|
||||
first_key,
|
||||
this->m->decode_level,
|
||||
this->m->json_stream_data,
|
||||
file_prefix,
|
||||
json_objects);
|
||||
doJSON(pdf, fp.get());
|
||||
}
|
||||
|
@ -244,7 +244,6 @@ QPDFJob::Config::json(std::string const& parameter)
|
||||
if ((o.m->json_version < 1) || (o.m->json_version > JSON::LATEST)) {
|
||||
usage(std::string("unsupported json version ") + parameter);
|
||||
}
|
||||
o.m->require_outfile = false;
|
||||
return this;
|
||||
}
|
||||
|
||||
@ -297,14 +296,7 @@ QPDFJob::Config*
|
||||
QPDFJob::Config::jsonOutput(std::string const& parameter)
|
||||
{
|
||||
o.m->json_output = true;
|
||||
if (parameter.empty() || (parameter == "latest")) {
|
||||
o.m->json_version = JSON::LATEST;
|
||||
} else {
|
||||
o.m->json_version = QUtil::string_to_int(parameter.c_str());
|
||||
}
|
||||
if ((o.m->json_version < 2) || (o.m->json_version > JSON::LATEST)) {
|
||||
usage(std::string("unsupported json output version ") + parameter);
|
||||
}
|
||||
json(parameter);
|
||||
if (!o.m->json_stream_data_set) {
|
||||
// No need to set json_stream_data_set -- that indicates
|
||||
// explicit use of --json-stream-data.
|
||||
@ -313,9 +305,7 @@ QPDFJob::Config::jsonOutput(std::string const& parameter)
|
||||
if (!o.m->decode_level_set) {
|
||||
o.m->decode_level = qpdf_dl_none;
|
||||
}
|
||||
if (o.m->json_keys.empty()) {
|
||||
o.m->json_keys.insert("qpdf");
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -803,7 +803,9 @@ depth in the JSON section of the manual. "version" may be a
|
||||
specific version or "latest" (the default). Run qpdf --json-help
|
||||
for a description of the generated JSON object.
|
||||
)");
|
||||
ap.addOptionHelp("--json-help", "json", "show format of JSON output", R"(Describe the format of the JSON output by writing to standard
|
||||
ap.addOptionHelp("--json-help", "json", "show format of JSON output", R"(--json-help[=version]
|
||||
|
||||
Describe the format of the JSON output by writing to standard
|
||||
output a JSON object with the same keys and with values
|
||||
containing descriptive text.
|
||||
)");
|
||||
@ -838,17 +840,17 @@ which is to use the output file name. Whatever is given here
|
||||
will be appended with -nnn to create the name of the file that
|
||||
will contain the data for the stream stream in object nnn.
|
||||
)");
|
||||
ap.addOptionHelp("--json-output", "json", "serialize to JSON", R"(--json-output[=version]
|
||||
ap.addOptionHelp("--json-output", "json", "apply defaults for JSON serialization", R"(--json-output[=version]
|
||||
|
||||
The output file will be qpdf JSON format at the given version.
|
||||
"version" may be a specific version or "latest" (the default).
|
||||
The only supported version is 2. See also --json-stream-data,
|
||||
--json-stream-prefix, and --decode-level.
|
||||
Implies --json=version. Changes default values for certain
|
||||
options so that the JSON output written is the most faithful
|
||||
representation of the original PDF and contains no additional
|
||||
JSON keys. See also --json-stream-data, --json-stream-prefix,
|
||||
and --decode-level.
|
||||
)");
|
||||
ap.addOptionHelp("--json-input", "json", "input file is qpdf JSON", R"(Treat the input file as a JSON file in qpdf JSON format as
|
||||
written by qpdf --json-output. See the "qpdf JSON Format"
|
||||
section of the manual for information about how to use this
|
||||
option.
|
||||
ap.addOptionHelp("--json-input", "json", "input file is qpdf JSON", R"(Treat the input file as a JSON file in qpdf JSON format. See the
|
||||
"qpdf JSON Format" section of the manual for information about
|
||||
how to use this option.
|
||||
)");
|
||||
ap.addOptionHelp("--update-from-json", "json", "update a PDF from qpdf JSON", R"(--update-from-json=qpdf-json-file
|
||||
|
||||
|
@ -28,7 +28,7 @@ static constexpr char const* JOB_SCHEMA_DATA = R"({
|
||||
"forceVersion": "set output PDF version",
|
||||
"progress": "show progress when writing",
|
||||
"splitPages": "write pages to separate files",
|
||||
"jsonOutput": "serialize to JSON",
|
||||
"jsonOutput": "apply defaults for JSON serialization",
|
||||
"encrypt": {
|
||||
"userPassword": "user password",
|
||||
"ownerPassword": "owner password",
|
||||
|
@ -3194,7 +3194,16 @@ Related Options
|
||||
:qpdf:ref:`--json-help` option to get a description of the JSON
|
||||
object.
|
||||
|
||||
.. qpdf:option:: --json-help
|
||||
Starting with qpdf 11, when this option is specified, an output
|
||||
file is optional (for backward compatibility) and defaults to
|
||||
standard output. You may specify an output file to write the JSON
|
||||
to a file rather than standard output.
|
||||
|
||||
Stream data is only included if :qpdf:ref:`--json-output` is
|
||||
specified or if a value other than ``none`` is passed to
|
||||
:qpdf:ref:`--json-stream-data`.
|
||||
|
||||
.. qpdf:option:: --json-help[=version]
|
||||
|
||||
.. help: show format of JSON output
|
||||
|
||||
@ -3202,12 +3211,13 @@ Related Options
|
||||
output a JSON object with the same keys and with values
|
||||
containing descriptive text.
|
||||
|
||||
Describe the format of the JSON output by writing to standard
|
||||
output a JSON object with the same structure as the JSON generated
|
||||
by qpdf. In the output written by ``--json-help``, each key's value
|
||||
is a description of the key. The specific contract guaranteed by
|
||||
qpdf in its JSON representation is explained in more detail in the
|
||||
:ref:`json`.
|
||||
Describe the format of the corresponding version of JSON output by
|
||||
writing to standard output a JSON object with the same structure as
|
||||
the JSON generated by qpdf. In the output written by
|
||||
``--json-help``, each key's value is a description of the key. The
|
||||
specific contract guaranteed by qpdf in its JSON representation is
|
||||
explained in more detail in the :ref:`json`. The default version of
|
||||
help is version ``2``, as with the :qpdf:ref:`--json` flag.
|
||||
|
||||
.. qpdf:option:: --json-key=key
|
||||
|
||||
@ -3233,11 +3243,9 @@ Related Options
|
||||
objects will be shown.
|
||||
|
||||
This option is repeatable. If given, only specified objects will be
|
||||
shown in the ``"objects"`` key of the JSON output. Otherwise, all
|
||||
objects will be shown. For qpdf JSON version 1, this also affects
|
||||
the ``"objectinfo"`` key, which is not present in version 2. This
|
||||
option may be used with :qpdf:ref:`--json` and also with
|
||||
:qpdf:ref:`--json-output`.
|
||||
shown in the objects dictionary in the JSON output. Otherwise, all
|
||||
objects will be shown. See :ref:`json` for details about the qpdf
|
||||
JSON format.
|
||||
|
||||
.. qpdf:option:: --json-stream-data={none|inline|file}
|
||||
|
||||
@ -3281,28 +3289,30 @@ Related Options
|
||||
|
||||
.. qpdf:option:: --json-output[=version]
|
||||
|
||||
.. help: serialize to JSON
|
||||
.. help: apply defaults for JSON serialization
|
||||
|
||||
The output file will be qpdf JSON format at the given version.
|
||||
"version" may be a specific version or "latest" (the default).
|
||||
The only supported version is 2. See also --json-stream-data,
|
||||
--json-stream-prefix, and --decode-level.
|
||||
Implies --json=version. Changes default values for certain
|
||||
options so that the JSON output written is the most faithful
|
||||
representation of the original PDF and contains no additional
|
||||
JSON keys. See also --json-stream-data, --json-stream-prefix,
|
||||
and --decode-level.
|
||||
|
||||
The output file, instead of being a PDF file, will be a JSON file
|
||||
in qpdf JSON format at the given version. ``version`` may be a
|
||||
specific version or ``latest`` (the default). The only supported
|
||||
version is 2. See also :qpdf:ref:`--json-stream-data` and
|
||||
:qpdf:ref:`--json-stream-prefix`. This option also changes the
|
||||
following defaults:
|
||||
Implies :qpdf:ref:`--json` at the specified version. This option
|
||||
changes several default values, all of which can be overridden by
|
||||
specifying the stated option:
|
||||
|
||||
- The default value for :qpdf:ref:`--json-stream-data` changes from
|
||||
``none`` to ``inline``.
|
||||
|
||||
- The default decode level for stream data becomes ``none``, but you can
|
||||
override it with :qpdf:ref:`--decode-level`.
|
||||
- The default value for :qpdf:ref:`--decode-level` changes from
|
||||
``generalized`` to ``none``.
|
||||
|
||||
- Only the ``"qpdf"`` key is included in the JSON output, but you
|
||||
can add additional keys with :qpdf:ref:`--json-key`.
|
||||
- By default, only the ``"qpdf"`` key is included in the JSON
|
||||
output, but you can add additional keys with
|
||||
:qpdf:ref:`--json-key`.
|
||||
|
||||
- Excludes the ``"version"`` and ``"parameters"`` keys from the
|
||||
JSON output.
|
||||
|
||||
If you want to look at the contents of streams easily as you would
|
||||
in QDF mode (see :ref:`qdf`), you can use
|
||||
@ -3313,15 +3323,15 @@ Related Options
|
||||
|
||||
.. help: input file is qpdf JSON
|
||||
|
||||
Treat the input file as a JSON file in qpdf JSON format as
|
||||
written by qpdf --json-output. See the "qpdf JSON Format"
|
||||
section of the manual for information about how to use this
|
||||
option.
|
||||
Treat the input file as a JSON file in qpdf JSON format. See the
|
||||
"qpdf JSON Format" section of the manual for information about
|
||||
how to use this option.
|
||||
|
||||
Treat the input file as a JSON file in qpdf JSON format as written
|
||||
by ``qpdf --json-output``. The input file must be complete and
|
||||
include all stream data. For information about converting between
|
||||
PDF and JSON, please see :ref:`json`.
|
||||
Treat the input file as a JSON file in qpdf JSON format. The input
|
||||
file must be complete and include all stream data. The JSON version
|
||||
must be at least 2. All top-level keys are ignored except for
|
||||
``"qpdf"``. For information about converting between PDF and JSON,
|
||||
please see :ref:`json`.
|
||||
|
||||
.. qpdf:option:: --update-from-json=qpdf-json-file
|
||||
|
||||
|
214
manual/json.rst
214
manual/json.rst
@ -24,27 +24,28 @@ represents the contents of a PDF file. This is distinct from the
|
||||
interacting with qpdf the way the command-line tool does. For
|
||||
information about that, see :ref:`qpdf-job`.
|
||||
|
||||
The qpdf JSON format is specific to qpdf. There are two ways to use
|
||||
qpdf JSON:
|
||||
The qpdf JSON format is specific to qpdf. With JSON version 2, the
|
||||
:qpdf:ref:`--json` command-line flag causes creation of a JSON
|
||||
representation of all the objects in a PDF file. This includes an
|
||||
unambiguous representation of the PDF object structure and also
|
||||
provides JSON-formatted summaries of other information about the file.
|
||||
This functionality is built into ``QPDFJob`` and can be accessed from
|
||||
the ``qpdf`` command-line tool or from the ``QPDFJob`` C or C++ API.
|
||||
|
||||
- The :qpdf:ref:`--json` command-line flag causes creation of a JSON
|
||||
representation of all the objects in a PDF file, excluding stream
|
||||
data. This includes an unambiguous representation of the PDF object
|
||||
structure and also provides JSON-formatted summaries of other
|
||||
information about the file. This functionality is built into
|
||||
``QPDFJob`` and can be accessed from the ``qpdf`` command-line tool
|
||||
or from the ``QPDFJob`` C or C++ API.
|
||||
|
||||
- qpdf can create a JSON file that completely represents a PDF file.
|
||||
By default, stream data is omitted, but it can be included by
|
||||
specifying the :qpdf:ref:`--json-stream-data` option. With stream data
|
||||
included, the generated JSON file completely represents a PDF file.
|
||||
You can think of this as using JSON as an *alternative syntax* for
|
||||
representing a PDF file. Using qpdf JSON, it is possible to
|
||||
convert a PDF file to JSON, manipulate the structure or contents of
|
||||
the objects at a low level, and convert the results back to a PDF
|
||||
file. This functionality can be accessed from the command-line with
|
||||
the :qpdf:ref:`--json-output`, :qpdf:ref:`--json-input`, and
|
||||
:qpdf:ref:`--update-from-json` flags, or from the API using the
|
||||
``QPDF::writeJSON``, ``QPDF::createFromJSON``, and
|
||||
``QPDF::updateFromJSON`` methods.
|
||||
representing a PDF file. Using qpdf JSON, it is possible to convert a
|
||||
PDF file to JSON, manipulate the structure or contents of the objects
|
||||
at a low level, and convert the results back to a PDF file. This
|
||||
functionality can be accessed from the command-line with the
|
||||
:qpdf:ref:`--json-input`, and :qpdf:ref:`--update-from-json` flags, or
|
||||
from the API using the ``QPDF::writeJSON``, ``QPDF::createFromJSON``,
|
||||
and ``QPDF::updateFromJSON`` methods. The :qpdf:ref:`--json-output`
|
||||
flag changes a handful of defaults so that the resulting JSON is as
|
||||
close as possible to the original input and is ready for being
|
||||
converted back to PDF.
|
||||
|
||||
.. _json-terminology:
|
||||
|
||||
@ -120,18 +121,53 @@ qpdf JSON Object Representation
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
This section describes the representation of PDF objects in qpdf JSON
|
||||
version 2. PDF objects are represented within the ``"objects"``
|
||||
dictionary of a qpdf JSON file. This is true both for PDF serialized
|
||||
to JSON (:qpdf:ref:`--json-output`, ``QPDF::writeJSON``) or objects as
|
||||
they appear in the output of ``qpdf`` with the :qpdf:ref:`--json`
|
||||
option.
|
||||
version 2. PDF objects are represented within the ``"qpdf"`` entry of
|
||||
a qpdf JSON file. The ``"qpdf"`` entry is a two-element array. The
|
||||
first element is a dictionary containing header-like information about
|
||||
the file such as the PDF version. The second element is a dictionary
|
||||
containing all the objects in the PDF file. We refer to this as the
|
||||
*objects dictionary*.
|
||||
|
||||
Each key in the ``"objects"`` dictionary is either ``"trailer"`` or a
|
||||
string of the form ``"obj:O G R"`` where ``O`` and ``G`` are the
|
||||
object and generation numbers and ``R`` is the literal string ``R``.
|
||||
This is the PDF syntax for the indirect object reference prepended by
|
||||
``obj:``. The value, representing the object itself, is a JSON object
|
||||
whose structure is described below.
|
||||
The first element contains the following keys:
|
||||
|
||||
- ``"jsonversion"`` -- a number indicating the JSON version used for
|
||||
writing. This will always be ``2``.
|
||||
|
||||
- ``"pdfversion"`` -- a string containing PDF version as indicated in
|
||||
the PDF header (e.g. ``"1.7"``, ``"2.0"``)
|
||||
|
||||
- ``pushedinheritedpageresources`` -- a boolean indicating whether
|
||||
the library pushed inherited resources down to the page level.
|
||||
Certain library calls cause this to happen, and qpdf needs to know
|
||||
when reading a JSON file back in whether it should do this as it may
|
||||
cause certain objects to be renumbered.
|
||||
|
||||
- ``calledgetallpages`` -- a boolean indicating whether
|
||||
``getAllPages`` was called prior to writing the JSON output. This
|
||||
method causes page tree repair to occur, which may renumber some
|
||||
objects (in very rare cases of corrupted page trees), so qpdf needs
|
||||
to know this information when reading a JSON file back in.
|
||||
|
||||
- ``"maxobjectid"`` -- a number indicating the object ID of the
|
||||
highest numbered object in the file. This is provided to make it
|
||||
easier for software that wants to add new objects to the file as you
|
||||
can safely start with one above that number when creating new
|
||||
objects. Note that the value of ``"maxobjectid"`` may be higher than
|
||||
the actual maximum object that appears in the input PDF since it
|
||||
takes into consideration any dangling indirect object references
|
||||
from the original file. This prevents you from unwittingly creating
|
||||
an object that doesn't exist but that is referenced, which may have
|
||||
unintended side effects. (The PDF specification explicitly allows
|
||||
dangling references and says to treat them as nulls. This can happen
|
||||
if objects are removed from a PDF file.)
|
||||
|
||||
The second element is the objects dictionary. Each key in the objects
|
||||
dictionary is either ``"trailer"`` or a string of the form ``"obj:O G
|
||||
R"`` where ``O`` and ``G`` are the object and generation numbers and
|
||||
``R`` is the literal string ``R``. This is the PDF syntax for the
|
||||
indirect object reference prepended by ``obj:``. The value,
|
||||
representing the object itself, is a JSON object whose structure is
|
||||
described below.
|
||||
|
||||
Top-level Stream Objects
|
||||
Stream objects are represented as a JSON object with the single key
|
||||
@ -143,6 +179,7 @@ Top-level Stream Objects
|
||||
|
||||
- ``none``: stream data is not represented; no other keys are
|
||||
present
|
||||
specified.
|
||||
|
||||
- ``inline``: the stream data appears as a base64-encoded string as
|
||||
the value of the ``"data"`` key
|
||||
@ -249,57 +286,6 @@ Object Values
|
||||
the string representations of names and whose values are
|
||||
representations of PDF objects.
|
||||
|
||||
.. _json.output:
|
||||
|
||||
qpdf JSON Output
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
The format of the JSON written by qpdf's :qpdf:ref:`--json-output`
|
||||
flag or the ``QPDF::writeJSON`` API call is a JSON object consisting
|
||||
of a single key: ``"qpdf"``. This may be the only key, or it may be
|
||||
embedded in the output of ``qpdf --json``. Unknown keys are ignored
|
||||
for future compatibility. It is guaranteed that qpdf will never add
|
||||
any keys whose names start with ``xdata``, so users are free to add
|
||||
their own metadata using keys whose names start with ``xdata`` without
|
||||
fear of clashing with a future version of qpdf.
|
||||
|
||||
The ``"qpdf"`` key points to a two-element JSON array. The first element is
|
||||
a JSON object with the following keys:
|
||||
|
||||
- ``"jsonversion"`` -- a number indicating the JSON version used for
|
||||
writing. This will always be ``2``.
|
||||
|
||||
- ``"pdfversion"`` -- a string containing PDF version as indicated in
|
||||
the PDF header (e.g. ``"1.7"``, ``"2.0"``)
|
||||
|
||||
- ``pushedinheritedpageresources`` -- a boolean indicating whether
|
||||
the library pushed inherited resources down to the page level.
|
||||
Certain library calls cause this to happen, and qpdf needs to know
|
||||
when reading a JSON file back in whether it should do this as it may
|
||||
cause certain objects to be renumbered.
|
||||
|
||||
- ``calledgetallpages`` -- a boolean indicating whether
|
||||
``getAllPages`` was called prior to writing the JSON output. This
|
||||
method causes page tree repair to occur, which may renumber some
|
||||
objects (in very rare cases of corrupted page trees), so qpdf needs
|
||||
to know this information when reading a JSON file back in.
|
||||
|
||||
- ``"maxobjectid"`` -- a number indicating the object ID of the
|
||||
highest numbered object in the file. This is provided to make it
|
||||
easier for software that wants to add new objects to the file as you
|
||||
can safely start with one above that number when creating new
|
||||
objects. Note that the value of ``"maxobjectid"`` may be higher than
|
||||
the actual maximum object that appears in the input PDF since it
|
||||
takes into consideration any dangling indirect object references
|
||||
from the original file. This prevents you from unwittingly creating
|
||||
an object that doesn't exist but that is referenced, which may have
|
||||
unintended side effects. (The PDF specification explicitly allows
|
||||
dangling references and says to treat them as nulls. This can happen
|
||||
if objects are removed from a PDF file.)
|
||||
|
||||
The second element is a JSON object containing the actual PDF objects
|
||||
as described in :ref:`json.objects`.
|
||||
|
||||
Note that writing JSON output is done by ``QPDF``, not ``QPDFWriter``.
|
||||
As such, none of the things ``QPDFWriter`` does apply. This includes
|
||||
recompression of streams, renumbering of objects, anything to do with
|
||||
@ -325,7 +311,7 @@ qpdf JSON format.
|
||||
"pdfversion": "1.3",
|
||||
"pushedinheritedpageresources": false,
|
||||
"calledgetallpages": false,
|
||||
"maxobjectid": 5,
|
||||
"maxobjectid": 5
|
||||
},
|
||||
{
|
||||
"obj:1 0 R": {
|
||||
@ -389,8 +375,7 @@ qpdf JSON format.
|
||||
qpdf JSON Input
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
Output in the JSON output format described in :ref:`json.output` can
|
||||
be used in two different ways:
|
||||
The qpdf JSON output can be used in two different ways:
|
||||
|
||||
- By using the :qpdf:ref:`--json-input` flag or calling
|
||||
``QPDF::createFromJSON`` in place of ``QPDF::processFile``, a qpdf
|
||||
@ -408,8 +393,11 @@ Here are some important things to know about qpdf JSON input.
|
||||
- When a qpdf JSON file is used as the primary input file, it must be
|
||||
complete. This means
|
||||
|
||||
- A JSON version number must be specified with the ``"jsonversion"``
|
||||
key in the first array element
|
||||
|
||||
- A PDF version number must be specified with the ``"pdfversion"``
|
||||
key
|
||||
key in the first array element
|
||||
|
||||
- Stream data must be present for all streams
|
||||
|
||||
@ -422,6 +410,9 @@ Here are some important things to know about qpdf JSON input.
|
||||
- ``"maxobjectid"`` is ignored, so it is not necessary to update it
|
||||
when adding new objects.
|
||||
|
||||
- ``"calledgetallpages"`` and ``"pushedinheritedpageresources"`` are
|
||||
treated as false if omitted.
|
||||
|
||||
- ``"/Length"`` is ignored in all stream dictionaries. qpdf doesn't
|
||||
put it there when it creates JSON output, and it is not necessary
|
||||
to add it.
|
||||
@ -432,14 +423,13 @@ Here are some important things to know about qpdf JSON input.
|
||||
- Unknown keys at the to top level of the file, within ``objects``,
|
||||
at the top level of each individual object (inside the object that
|
||||
has the ``"value"`` or ``"stream"`` key) and directly within
|
||||
``"stream"`` are ignored for future compatibility. You should
|
||||
avoid putting your own values in those places if you wish to avoid
|
||||
risking that your JSON files will not work in future versions of
|
||||
qpdf. The exception to this advice is at the top level of the
|
||||
overall file where it is explicitly supported for you to add your
|
||||
own keys. For example, you could add your own metadata at the top
|
||||
level, and qpdf will ignore it. Note that extra top-level keys are
|
||||
not preserved when qpdf reads your JSON file.
|
||||
``"stream"`` are ignored for future compatibility. This includes
|
||||
other top-level keys generated by ``qpdf`` itself (such as
|
||||
``"pages"``). As such, those keys don't have to be consistent with
|
||||
the ``"qpdf"`` key if modifying a JSON file for conversion back to
|
||||
PDF. If you wish to store application-specific metadata, you can
|
||||
do so by adding a key whose name starts with ``x-``. qpdf is
|
||||
guaranteed not to add any of its own keys that starts with ``x-``.
|
||||
|
||||
- When qpdf reads a PDF file, the internal object numbers are always
|
||||
preserved. However, when qpdf writes a file using ``QPDFWriter``,
|
||||
@ -458,9 +448,9 @@ Here are some important things to know about qpdf JSON input.
|
||||
# edit pdf.json
|
||||
qpdf in.pdf out.pdf --update-from-json=pdf.json
|
||||
|
||||
The following will not produce predictable results because
|
||||
``out.pdf`` won't have the same object numbers as ``pdf.json`` and
|
||||
``in.pdf``.
|
||||
The following will produce unpredictable and probably incorrect
|
||||
results because ``out.pdf`` won't have the same object numbers as
|
||||
``pdf.json`` and ``in.pdf``.
|
||||
|
||||
::
|
||||
|
||||
@ -658,15 +648,16 @@ be aware of:
|
||||
- If a PDF file has certain types of errors in its pages tree (such as
|
||||
page objects that are direct or multiple pages sharing the same
|
||||
object ID), qpdf will automatically repair the pages tree. If you
|
||||
specify ``"objects"`` (and, with qpdf JSON version 1, also
|
||||
specify ``"qpdf"`` (or, with qpdf JSON version 1, ``"objects"`` or
|
||||
``"objectinfo"``) without any other keys, you will see the original
|
||||
pages tree without any corrections. If you specify any of keys that
|
||||
require page tree traversal (for example, ``"pages"``,
|
||||
``"outlines"``, or ``"pagelabel"``), then ``"objects"`` (and
|
||||
``"objectinfo"``) will show the repaired page tree so that object
|
||||
references will be consistent throughout the file. This is not an
|
||||
issue with :qpdf:ref:`--json-output`, which doesn't repair the pages
|
||||
tree.
|
||||
``"outlines"``, or ``"pagelabel"``), then ``"qpdf"`` (and
|
||||
``"objects"`` and ``"objectinfo"``) will show the repaired page
|
||||
tree so that object references will be consistent throughout the
|
||||
file. You can tell if this has happened by looking at the
|
||||
``"calledgetallpages"`` and ``"pushedinheritedpageresources"``
|
||||
fields in the first element of the ``"qpdf"`` array.
|
||||
|
||||
- While qpdf guarantees that keys present in the help will be present
|
||||
in the output, those fields may be null or empty if the information
|
||||
@ -743,16 +734,17 @@ version 2.
|
||||
dictionary containing either a ``"value"`` key or a ``"stream"``
|
||||
key, making it possible to distinguish streams from other objects.
|
||||
|
||||
- The ``"objectinfo"`` key has been removed in favor of a
|
||||
representation in ``"objects"`` that differentiates between a stream
|
||||
and other kinds of objects. In v1, it was not possible to tell a
|
||||
stream from a dictionary within ``"objects"``.
|
||||
- The ``"objectinfo"`` and ``"objects"`` keys have been removed in
|
||||
favor of a representation in ``"qpdf"`` that includes header
|
||||
information and differentiates between a stream and other kinds of
|
||||
objects. In v1, it was not possible to tell a stream from a
|
||||
dictionary within ``"objects"``, and the PDF version was not
|
||||
captured at all.
|
||||
|
||||
- Within the ``"objects"`` dictionary, keys are now ``"obj:O G R"``
|
||||
where ``O`` and ``G`` are the object and generation number.
|
||||
``"trailer"`` remains the key for the trailer dictionary. In v1, the
|
||||
``obj:`` prefix was not present. The rationale for this change is as
|
||||
follows:
|
||||
- Within the objects dictionary, keys are now ``"obj:O G R"`` where
|
||||
``O`` and ``G`` are the object and generation number. ``"trailer"``
|
||||
remains the key for the trailer dictionary. In v1, the ``obj:``
|
||||
prefix was not present. The rationale for this change is as follows:
|
||||
|
||||
- Having a unique prefix (``obj:``) makes it much easier to search
|
||||
in the JSON file for the definition of an object
|
||||
|
Loading…
x
Reference in New Issue
Block a user