diff --git a/TODO b/TODO index 18317fd4..d33141ba 100644 --- a/TODO +++ b/TODO @@ -50,18 +50,6 @@ Output JSON v2 Before starting on v2 format: -* Some if not all of the json output functionality should move from - QPDFJob to QPDF. There can top-level QPDF methods that take a - pipeline and write the JSON serialization to it. For things that - generate smaller amounts of output (constant-size stuff, lists of - attachments), we can also have a version that returns a string. For - the benefit of users of other languages, we can have something that - takes a FILE* or writes to stdout as well. This would be a good time - to make sure all the information from --check and other - informational options (--show-linearization, --show-encryption, - --show-xref, --list-attachments, --show-npages) is available in the - json output. - * Writing objects should write in numerical order with the trailer at the end. @@ -70,15 +58,18 @@ Before starting on v2 format: the input), not by overwriting, in case this has any unwanted side effects. -* Figure out how/whether to do schema checks with incremental write. - Consider changing the contract to allow fields to be absent even - when present in the schema. It's reasonable for people to check for - presence of a key. Most languages make this easy to do. - General things to remember: * deprecate getJSON without a version +* Make sure all the information from --check and other informational + options (--show-linearization, --show-encryption, --show-xref, + --list-attachments, --show-npages) is available in the json output. + +* Consider changing the contract to allow fields to be absent even + when present in the schema. It's reasonable for people to check for + presence of a key. Most languages make this easy to do. + * The choices for json_key (job.yml) will be different for v1 and v2. That information is already duplicated in multiple places. diff --git a/job.sums b/job.sums index 5bbb7688..b59e0cff 100644 --- a/job.sums +++ b/job.sums @@ -14,4 +14,4 @@ libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a947431 libqpdf/qpdf/auto_job_json_init.hh e7047a7c83737adfaae49abc295a579bb9b9e0a4644e911d1656a604cb202208 libqpdf/qpdf/auto_job_schema.hh cbbcae166cfecbdbdeb40c5a30870e03604a019a8b4f7a217d554a82431d2e5f manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580 -manual/cli.rst 8a5a12351df6a42f91d6d271b2d065a843d8daa5125d8723d474e4180d7abbf1 +manual/cli.rst 8684ca1f601f2832cded52d1b2f74730f97b7b85b57e31a399231731fbe80d26 diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index 1e456f24..468a0046 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -1592,6 +1593,13 @@ QPDFJob::json_out_schema_v1() void QPDFJob::doJSON(QPDF& pdf, Pipeline* p) { + std::string captured_json; + std::shared_ptr pl_str; + if (this->m->test_json_schema) { + pl_str = std::make_shared("capture json", p, captured_json); + p = pl_str.get(); + } + JSON j = JSON::makeDictionary(); // This version is updated every time a non-backward-compatible // change is made to the JSON format. Clients of the JSON are to @@ -1651,23 +1659,22 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) doJSONObjectinfo(pdf, j); } - // Check against schema + *p << j.unparse() << "\n"; - JSON schema = json_schema(&m->json_keys); - std::list errors; - if (!j.checkSchema(schema, errors)) { - *(this->m->cerr) - << "QPDFJob didn't create JSON that complies with its own rules.\n\ -Please report this as a bug at\n\ - https://github.com/qpdf/qpdf/issues/new\n\ -ideally with the file that caused the error and the output below. Thanks!\n\ -\n"; - for (auto const& error: errors) { - *(this->m->cerr) << error << std::endl; + if (this->m->test_json_schema) { + // Check against schema + JSON schema = json_schema(&m->json_keys); + std::list errors; + JSON captured = JSON::parse(captured_json); + if (!captured.checkSchema(schema, errors)) { + *(this->m->cerr) << "QPDFJob didn't create JSON that complies with" + " its own rules." + << std::endl; + for (auto const& error: errors) { + *(this->m->cerr) << error << std::endl; + } } } - - *p << j.unparse() << "\n"; } void diff --git a/manual/cli.rst b/manual/cli.rst index a853afd3..97ee85d7 100644 --- a/manual/cli.rst +++ b/manual/cli.rst @@ -3336,6 +3336,9 @@ Related Options This is used by qpdf's test suite to check consistency between the output of ``qpdf --json`` and the output of ``qpdf --json-help``. + This option causes an extra copy of the generated json to appear in + memory and is therefore unsuitable for use with large files. This + is why it's also not on by default. .. _unicode-passwords: diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 3b26c9c8..4ac6a0ec 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -1124,7 +1124,8 @@ foreach my $d (@json_files) } my $in = "$file.pdf"; $td->runtest("json $out", - {$td->COMMAND => ['qpdf', '--json', @$xargs, $in]}, + {$td->COMMAND => + ['qpdf', '--json=1', '--test-json-schema', @$xargs, $in]}, {$td->FILE => "$out.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); }