mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-31 14:01:59 +00:00
Make "objects" and "pages" consistent in JSON output
This commit is contained in:
parent
6b576797cd
commit
8b25de24c9
11
ChangeLog
11
ChangeLog
@ -1,3 +1,14 @@
|
||||
2022-05-04 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* json v1 output: make "pages" and "objects" consistent.
|
||||
Previously, "objects" always reflected the objects exactly as they
|
||||
appeared in the original file, while "pages" reflected objects
|
||||
after repair of the pages tree. This could be misleading. Now, if
|
||||
"pages" is specified, "objects" shows the effects of repairing the
|
||||
page tree, and if not, it doesn't. This makes no difference for
|
||||
correct PDF files that don't have problems in the pages tree. JSON
|
||||
v2 will behave in a similar way.
|
||||
|
||||
2022-05-03 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Add new Pipeline class Pl_String which appends to a std::string&
|
||||
|
@ -511,6 +511,7 @@
|
||||
"unfilterable",
|
||||
"unparse",
|
||||
"unpickling",
|
||||
"unrepaired",
|
||||
"unretrieved",
|
||||
"unversioned",
|
||||
"upages",
|
||||
|
@ -1618,15 +1618,7 @@ QPDFJob::doJSON(QPDF& pdf)
|
||||
bool all_keys = m->json_keys.empty();
|
||||
// The list of selectable top-level keys id duplicated in the
|
||||
// following places: job.yml, QPDFJob::json_schema, and
|
||||
// QPDFJob::doJSON. We do objects and objectinfo first so they
|
||||
// reflect the original file without any side effects caused by
|
||||
// other operations, such as repairing the pages tree.
|
||||
if (all_keys || m->json_keys.count("objects")) {
|
||||
doJSONObjects(pdf, j);
|
||||
}
|
||||
if (all_keys || m->json_keys.count("objectinfo")) {
|
||||
doJSONObjectinfo(pdf, j);
|
||||
}
|
||||
// QPDFJob::doJSON.
|
||||
if (all_keys || m->json_keys.count("pages")) {
|
||||
doJSONPages(pdf, j);
|
||||
}
|
||||
@ -1646,6 +1638,17 @@ QPDFJob::doJSON(QPDF& pdf)
|
||||
doJSONAttachments(pdf, j);
|
||||
}
|
||||
|
||||
// We do objects and objectinfo last so their information is
|
||||
// consistent with repairing the page tree. To see the original
|
||||
// file with any page tree problems and the page tree not
|
||||
// flattened, select objects/objectinfo without other keys.
|
||||
if (all_keys || m->json_keys.count("objects")) {
|
||||
doJSONObjects(pdf, j);
|
||||
}
|
||||
if (all_keys || m->json_keys.count("objectinfo")) {
|
||||
doJSONObjectinfo(pdf, j);
|
||||
}
|
||||
|
||||
// Check against schema
|
||||
|
||||
JSON schema = json_schema(&m->json_keys);
|
||||
|
@ -147,6 +147,16 @@ For the most part, the built-in JSON help tells you everything you need
|
||||
to know about the JSON format, but there are a few non-obvious things to
|
||||
be aware of:
|
||||
|
||||
- If a PDF file has certain types of errors in its pages tree (such as
|
||||
page objects that are direct or multiple pages sharing the same
|
||||
object ID), qpdf will automatically repair the pages tree. If you
|
||||
specify ``"objects"`` and/or ``"objectinfo"`` without any other
|
||||
keys, you will see the original pages tree without any corrections.
|
||||
If you specify any of keys that require page tree traversal (for
|
||||
example, ``"pages"``, ``"outlines"``, or ``"pagelabel"``), then
|
||||
``"objects"`` and ``"objectinfo"`` will show the repaired page tree
|
||||
so that object references will be consistent throughout the file.
|
||||
|
||||
- While qpdf guarantees that keys present in the help will be present
|
||||
in the output, those fields may be null or empty if the information
|
||||
is not known or absent in the file. Also, if you specify
|
||||
|
@ -125,6 +125,13 @@ For a detailed list of changes, please see the file
|
||||
|
||||
- Other changes
|
||||
|
||||
- In JSON v1 mode, the ``"objects"`` key now reflects the repaired
|
||||
pages tree if ``"pages"`` (or any other key that has the side
|
||||
effect of repairing the page tree) is specified. To see the
|
||||
original objects with any unrepaired page tree errors, specify
|
||||
``"objects"`` and/or ``"objectinfo"`` by themselves. This is
|
||||
consistent with how JSON v2 behaves.
|
||||
|
||||
- A new chapter on contributing to qpdf has been added to the
|
||||
documentation. See :ref:`contributing`.
|
||||
|
||||
|
@ -2829,7 +2829,7 @@ $td->runtest("check output",
|
||||
show_ntests();
|
||||
# ----------
|
||||
$td->notify("--- Page Tree Issues ---");
|
||||
$n_tests += 9;
|
||||
$n_tests += 11;
|
||||
|
||||
$td->runtest("linearize duplicated pages",
|
||||
{$td->COMMAND =>
|
||||
@ -2864,14 +2864,22 @@ $td->runtest("show direct pages",
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
|
||||
# Json mode for direct and duplicated pages illustrates that the
|
||||
# "objects" section still shows the original objects before correction
|
||||
# but the "pages" section shows the pages with their new object
|
||||
# numbers.
|
||||
# "objects" section the original objects before correction when
|
||||
# "pages" is not output but after correct when it is.# numbers.
|
||||
foreach my $f (qw(page_api_2 direct-pages))
|
||||
{
|
||||
$td->runtest("json for $f",
|
||||
{$td->COMMAND => "qpdf --json=latest $f.pdf"},
|
||||
{$td->FILE => "$f-json.out", $td->EXIT_STATUS => 0},
|
||||
$td->runtest("json for $f (objects only)",
|
||||
{$td->COMMAND =>
|
||||
"qpdf --json=latest $f.pdf" .
|
||||
" --json-key=objects --json-key=objectinfo"},
|
||||
{$td->FILE => "$f-json-objects.out", $td->EXIT_STATUS => 0},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("json for $f (with pages)",
|
||||
{$td->COMMAND =>
|
||||
"qpdf --json=latest $f.pdf" .
|
||||
" --json-key=objects --json-key=objectinfo" .
|
||||
" --json-key=pages"},
|
||||
{$td->FILE => "$f-json-pages.out", $td->EXIT_STATUS => 0},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
}
|
||||
|
||||
|
@ -1,37 +1,4 @@
|
||||
{
|
||||
"acroform": {
|
||||
"fields": [],
|
||||
"hasacroform": false,
|
||||
"needappearances": false
|
||||
},
|
||||
"attachments": {},
|
||||
"encrypt": {
|
||||
"capabilities": {
|
||||
"accessibility": true,
|
||||
"extract": true,
|
||||
"moddifyannotations": true,
|
||||
"modify": true,
|
||||
"modifyassembly": true,
|
||||
"modifyforms": true,
|
||||
"modifyother": true,
|
||||
"printhigh": true,
|
||||
"printlow": true
|
||||
},
|
||||
"encrypted": false,
|
||||
"ownerpasswordmatched": false,
|
||||
"parameters": {
|
||||
"P": 0,
|
||||
"R": 0,
|
||||
"V": 0,
|
||||
"bits": 0,
|
||||
"filemethod": "none",
|
||||
"key": null,
|
||||
"method": "none",
|
||||
"streammethod": "none",
|
||||
"stringmethod": "none"
|
||||
},
|
||||
"userpasswordmatched": false
|
||||
},
|
||||
"objectinfo": {
|
||||
"1 0 R": {
|
||||
"stream": {
|
||||
@ -145,30 +112,6 @@
|
||||
"/Size": 7
|
||||
}
|
||||
},
|
||||
"outlines": [],
|
||||
"pagelabels": [],
|
||||
"pages": [
|
||||
{
|
||||
"contents": [
|
||||
"3 0 R"
|
||||
],
|
||||
"images": [],
|
||||
"label": null,
|
||||
"object": "7 0 R",
|
||||
"outlines": [],
|
||||
"pageposfrom1": 1
|
||||
},
|
||||
{
|
||||
"contents": [
|
||||
"3 0 R"
|
||||
],
|
||||
"images": [],
|
||||
"label": null,
|
||||
"object": "8 0 R",
|
||||
"outlines": [],
|
||||
"pageposfrom1": 2
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"decodelevel": "generalized"
|
||||
},
|
157
qpdf/qtest/qpdf/direct-pages-json-pages.out
Normal file
157
qpdf/qtest/qpdf/direct-pages-json-pages.out
Normal file
@ -0,0 +1,157 @@
|
||||
{
|
||||
"objectinfo": {
|
||||
"1 0 R": {
|
||||
"stream": {
|
||||
"filter": null,
|
||||
"is": false,
|
||||
"length": null
|
||||
}
|
||||
},
|
||||
"2 0 R": {
|
||||
"stream": {
|
||||
"filter": null,
|
||||
"is": false,
|
||||
"length": null
|
||||
}
|
||||
},
|
||||
"3 0 R": {
|
||||
"stream": {
|
||||
"filter": null,
|
||||
"is": true,
|
||||
"length": 44
|
||||
}
|
||||
},
|
||||
"4 0 R": {
|
||||
"stream": {
|
||||
"filter": null,
|
||||
"is": false,
|
||||
"length": null
|
||||
}
|
||||
},
|
||||
"5 0 R": {
|
||||
"stream": {
|
||||
"filter": null,
|
||||
"is": false,
|
||||
"length": null
|
||||
}
|
||||
},
|
||||
"6 0 R": {
|
||||
"stream": {
|
||||
"filter": null,
|
||||
"is": false,
|
||||
"length": null
|
||||
}
|
||||
},
|
||||
"7 0 R": {
|
||||
"stream": {
|
||||
"filter": null,
|
||||
"is": false,
|
||||
"length": null
|
||||
}
|
||||
},
|
||||
"8 0 R": {
|
||||
"stream": {
|
||||
"filter": null,
|
||||
"is": false,
|
||||
"length": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"objects": {
|
||||
"1 0 R": {
|
||||
"/Pages": "2 0 R",
|
||||
"/Type": "/Catalog"
|
||||
},
|
||||
"2 0 R": {
|
||||
"/Count": 2,
|
||||
"/Kids": [
|
||||
"7 0 R",
|
||||
"8 0 R"
|
||||
],
|
||||
"/Type": "/Pages"
|
||||
},
|
||||
"3 0 R": {
|
||||
"/Length": "4 0 R"
|
||||
},
|
||||
"4 0 R": 44,
|
||||
"5 0 R": {
|
||||
"/BaseFont": "/Helvetica",
|
||||
"/Encoding": "/WinAnsiEncoding",
|
||||
"/Name": "/F1",
|
||||
"/Subtype": "/Type1",
|
||||
"/Type": "/Font"
|
||||
},
|
||||
"6 0 R": [
|
||||
"/PDF",
|
||||
"/Text"
|
||||
],
|
||||
"7 0 R": {
|
||||
"/Contents": "3 0 R",
|
||||
"/MediaBox": [
|
||||
0,
|
||||
0,
|
||||
612,
|
||||
792
|
||||
],
|
||||
"/Parent": "2 0 R",
|
||||
"/Resources": {
|
||||
"/Font": {
|
||||
"/F1": "5 0 R"
|
||||
},
|
||||
"/ProcSet": "6 0 R"
|
||||
},
|
||||
"/Type": "/Page"
|
||||
},
|
||||
"8 0 R": {
|
||||
"/Contents": "3 0 R",
|
||||
"/MediaBox": [
|
||||
0,
|
||||
0,
|
||||
612,
|
||||
792
|
||||
],
|
||||
"/Parent": "2 0 R",
|
||||
"/Resources": {
|
||||
"/Font": {
|
||||
"/F1": "5 0 R"
|
||||
},
|
||||
"/ProcSet": "6 0 R"
|
||||
},
|
||||
"/Type": "/Page"
|
||||
},
|
||||
"trailer": {
|
||||
"/ID": [
|
||||
"\u0013#¥fi|WzfsU…©6ŸÎ<",
|
||||
"7,¿DöÛ‹«`Ù&<\u000f\u000bÒj"
|
||||
],
|
||||
"/Root": "1 0 R",
|
||||
"/Size": 7
|
||||
}
|
||||
},
|
||||
"pages": [
|
||||
{
|
||||
"contents": [
|
||||
"3 0 R"
|
||||
],
|
||||
"images": [],
|
||||
"label": null,
|
||||
"object": "7 0 R",
|
||||
"outlines": [],
|
||||
"pageposfrom1": 1
|
||||
},
|
||||
{
|
||||
"contents": [
|
||||
"3 0 R"
|
||||
],
|
||||
"images": [],
|
||||
"label": null,
|
||||
"object": "8 0 R",
|
||||
"outlines": [],
|
||||
"pageposfrom1": 2
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"decodelevel": "generalized"
|
||||
},
|
||||
"version": 1
|
||||
}
|
160
qpdf/qtest/qpdf/page_api_2-json-objects.out
Normal file
160
qpdf/qtest/qpdf/page_api_2-json-objects.out
Normal file
@ -0,0 +1,160 @@
|
||||
{
|
||||
"objectinfo": {
|
||||
"1 0 R": {
|
||||
"stream": {
|
||||
"filter": null,
|
||||
"is": false,
|
||||
"length": null
|
||||
}
|
||||
},
|
||||
"10 0 R": {
|
||||
"stream": {
|
||||
"filter": null,
|
||||
"is": false,
|
||||
"length": null
|
||||
}
|
||||
},
|
||||
"2 0 R": {
|
||||
"stream": {
|
||||
"filter": null,
|
||||
"is": false,
|
||||
"length": null
|
||||
}
|
||||
},
|
||||
"3 0 R": {
|
||||
"stream": {
|
||||
"filter": null,
|
||||
"is": false,
|
||||
"length": null
|
||||
}
|
||||
},
|
||||
"4 0 R": {
|
||||
"stream": {
|
||||
"filter": null,
|
||||
"is": false,
|
||||
"length": null
|
||||
}
|
||||
},
|
||||
"5 0 R": {
|
||||
"stream": {
|
||||
"filter": null,
|
||||
"is": false,
|
||||
"length": null
|
||||
}
|
||||
},
|
||||
"6 0 R": {
|
||||
"stream": {
|
||||
"filter": null,
|
||||
"is": true,
|
||||
"length": 47
|
||||
}
|
||||
},
|
||||
"7 0 R": {
|
||||
"stream": {
|
||||
"filter": null,
|
||||
"is": false,
|
||||
"length": null
|
||||
}
|
||||
},
|
||||
"8 0 R": {
|
||||
"stream": {
|
||||
"filter": null,
|
||||
"is": false,
|
||||
"length": null
|
||||
}
|
||||
},
|
||||
"9 0 R": {
|
||||
"stream": {
|
||||
"filter": null,
|
||||
"is": true,
|
||||
"length": 47
|
||||
}
|
||||
}
|
||||
},
|
||||
"objects": {
|
||||
"1 0 R": {
|
||||
"/Pages": "3 0 R",
|
||||
"/Type": "/Catalog"
|
||||
},
|
||||
"10 0 R": 47,
|
||||
"2 0 R": {
|
||||
"/CreationDate": "D:20120621124041",
|
||||
"/Producer": "Apex PDFWriter"
|
||||
},
|
||||
"3 0 R": {
|
||||
"/Count": 3,
|
||||
"/Kids": [
|
||||
"4 0 R",
|
||||
"4 0 R",
|
||||
"5 0 R"
|
||||
],
|
||||
"/Type": "/Pages"
|
||||
},
|
||||
"4 0 R": {
|
||||
"/Contents": "6 0 R",
|
||||
"/MediaBox": [
|
||||
0,
|
||||
0,
|
||||
612,
|
||||
792
|
||||
],
|
||||
"/Parent": "3 0 R",
|
||||
"/Resources": {
|
||||
"/Font": {
|
||||
"/F1": "8 0 R"
|
||||
},
|
||||
"/ProcSet": [
|
||||
"/PDF",
|
||||
"/Text"
|
||||
]
|
||||
},
|
||||
"/Type": "/Page"
|
||||
},
|
||||
"5 0 R": {
|
||||
"/Contents": "9 0 R",
|
||||
"/MediaBox": [
|
||||
0,
|
||||
0,
|
||||
612,
|
||||
792
|
||||
],
|
||||
"/Parent": "3 0 R",
|
||||
"/Resources": {
|
||||
"/Font": {
|
||||
"/F1": "8 0 R"
|
||||
},
|
||||
"/ProcSet": [
|
||||
"/PDF",
|
||||
"/Text"
|
||||
]
|
||||
},
|
||||
"/Type": "/Page"
|
||||
},
|
||||
"6 0 R": {
|
||||
"/Length": "7 0 R"
|
||||
},
|
||||
"7 0 R": 47,
|
||||
"8 0 R": {
|
||||
"/BaseFont": "/Times-Roman",
|
||||
"/Encoding": "/WinAnsiEncoding",
|
||||
"/Subtype": "/Type1",
|
||||
"/Type": "/Font"
|
||||
},
|
||||
"9 0 R": {
|
||||
"/Length": "10 0 R"
|
||||
},
|
||||
"trailer": {
|
||||
"/ID": [
|
||||
"û˘·ƒÿ{5⁄\u0005Ú−S*º‘o",
|
||||
"÷\u0017ž³QY¿ÔÀ\u000f\u0012−¼ý˜\u0002"
|
||||
],
|
||||
"/Info": "2 0 R",
|
||||
"/Root": "1 0 R",
|
||||
"/Size": 11
|
||||
}
|
||||
},
|
||||
"parameters": {
|
||||
"decodelevel": "generalized"
|
||||
},
|
||||
"version": 1
|
||||
}
|
@ -1,37 +1,4 @@
|
||||
{
|
||||
"acroform": {
|
||||
"fields": [],
|
||||
"hasacroform": false,
|
||||
"needappearances": false
|
||||
},
|
||||
"attachments": {},
|
||||
"encrypt": {
|
||||
"capabilities": {
|
||||
"accessibility": true,
|
||||
"extract": true,
|
||||
"moddifyannotations": true,
|
||||
"modify": true,
|
||||
"modifyassembly": true,
|
||||
"modifyforms": true,
|
||||
"modifyother": true,
|
||||
"printhigh": true,
|
||||
"printlow": true
|
||||
},
|
||||
"encrypted": false,
|
||||
"ownerpasswordmatched": false,
|
||||
"parameters": {
|
||||
"P": 0,
|
||||
"R": 0,
|
||||
"V": 0,
|
||||
"bits": 0,
|
||||
"filemethod": "none",
|
||||
"key": null,
|
||||
"method": "none",
|
||||
"streammethod": "none",
|
||||
"stringmethod": "none"
|
||||
},
|
||||
"userpasswordmatched": false
|
||||
},
|
||||
"objectinfo": {
|
||||
"1 0 R": {
|
||||
"stream": {
|
||||
@ -47,6 +14,13 @@
|
||||
"length": null
|
||||
}
|
||||
},
|
||||
"11 0 R": {
|
||||
"stream": {
|
||||
"filter": null,
|
||||
"is": false,
|
||||
"length": null
|
||||
}
|
||||
},
|
||||
"2 0 R": {
|
||||
"stream": {
|
||||
"filter": null,
|
||||
@ -110,6 +84,26 @@
|
||||
"/Type": "/Catalog"
|
||||
},
|
||||
"10 0 R": 47,
|
||||
"11 0 R": {
|
||||
"/Contents": "6 0 R",
|
||||
"/MediaBox": [
|
||||
0,
|
||||
0,
|
||||
612,
|
||||
792
|
||||
],
|
||||
"/Parent": "3 0 R",
|
||||
"/Resources": {
|
||||
"/Font": {
|
||||
"/F1": "8 0 R"
|
||||
},
|
||||
"/ProcSet": [
|
||||
"/PDF",
|
||||
"/Text"
|
||||
]
|
||||
},
|
||||
"/Type": "/Page"
|
||||
},
|
||||
"2 0 R": {
|
||||
"/CreationDate": "D:20120621124041",
|
||||
"/Producer": "Apex PDFWriter"
|
||||
@ -118,7 +112,7 @@
|
||||
"/Count": 3,
|
||||
"/Kids": [
|
||||
"4 0 R",
|
||||
"4 0 R",
|
||||
"11 0 R",
|
||||
"5 0 R"
|
||||
],
|
||||
"/Type": "/Pages"
|
||||
@ -186,8 +180,6 @@
|
||||
"/Size": 11
|
||||
}
|
||||
},
|
||||
"outlines": [],
|
||||
"pagelabels": [],
|
||||
"pages": [
|
||||
{
|
||||
"contents": [
|
Loading…
Reference in New Issue
Block a user