2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-01-03 23:27:25 +00:00

Test --update-from-json

This commit is contained in:
Jay Berkenbilt 2022-05-20 11:10:12 -04:00
parent ef955b04b5
commit d065098089
17 changed files with 849 additions and 48 deletions

35
TODO
View File

@ -58,15 +58,10 @@ Some of this documentation has drifted from the actual implementation.
Make sure pages tree repair generates warnings.
* Reread from perspective of update
* Test all ignore cases with QTC
* Test case of correct file with dict before data/datafile
* Have a test case if possible that exercises the object description
which means we need some kind of semantic error that gets caught
after creation.
* Test invalid data, invalid data file
* Tests: round-trip through json, round-trip through qpdf --qdf
* Test to see if we get CR/NL on Windows, which is okay
Try to never flatten pages tree. Make sure we do something reasonable
with pages tree repair. The problem is that if pages tree repair is
@ -104,22 +99,20 @@ JSON to PDF:
Have --json-input and --update-from-json. With --json-input, the json
file must be complete, meaning all stream data, the trailer, and the
PDF version must be present. In --update-from-json, an object
explicitly set to null (not "value": null) is deleted. For streams
with no stream data, the dictionary is updated but the data is left
untouched. Other things that are omitted are left alone. Make sure
document that, when writing a PDF file from QPDF, there is no
expectation of object numbers being preserved. As such,
--update-from-json can only be used to update the exact file that the
json was created from. You can put multiple objects in the update
file, but you can't use a json from one file to update the output of a
previous update since the object numbers will have changed. Note that,
when creating from a JSON, object numbers are preserved in the
resulting QPDF object but still modified by QPDFWriter for the output.
This would be visible by combining --json-output and --json-input.
Also using --qdf with --create-from-json would show original object
IDs in comments. It will be important to capture this in the
documentation.
PDF version must be present. For streams with no stream data, the
dictionary is updated but the data is left untouched. Other things
that are omitted are left alone. Make sure document that, when writing
a PDF file from QPDF, there is no expectation of object numbers being
preserved. As such, --update-from-json can only be used to update the
exact file that the json was created from. You can put multiple
objects in the update file, but you can't use a json from one file to
update the output of a previous update since the object numbers will
have changed. Note that, when creating from a JSON, object numbers are
preserved in the resulting QPDF object but still modified by
QPDFWriter for the output. This would be visible by combining
--json-output and --json-input. Also using --qdf with
--create-from-json would show original object IDs in comments. It will
be important to capture this in the documentation.
When reading a JSON string, any string that doesn't look like a name
or indirect object or start with "b:" or "u:" should be considered an

View File

@ -1065,6 +1065,7 @@ class QPDF
bool saw_dict;
bool saw_data;
bool saw_datafile;
bool this_stream_needs_data;
std::vector<state_e> state_stack;
std::vector<QPDFObjectHandle> object_stack;
std::set<QPDFObjGen> reserved;

View File

@ -102,7 +102,8 @@ QPDF::JSONReactor::JSONReactor(
saw_stream(false),
saw_dict(false),
saw_data(false),
saw_datafile(false)
saw_datafile(false),
this_stream_needs_data(false)
{
state_stack.push_back(st_initial);
}
@ -111,8 +112,11 @@ void
QPDF::JSONReactor::error(size_t offset, std::string const& msg)
{
this->errors = true;
this->pdf.warn(
qpdf_e_json, this->cur_object, QIntC::to_offset(offset), msg);
std::string object = this->cur_object;
if (is->getName() != pdf.getFilename()) {
object += " from " + is->getName();
}
this->pdf.warn(qpdf_e_json, object, QIntC::to_offset(offset), msg);
}
bool
@ -196,20 +200,22 @@ QPDF::JSONReactor::containerEnd(JSON const& value)
QTC::TC("qpdf", "QPDF_json stream no dict");
error(value.getStart(), "\"stream\" is missing \"dict\"");
}
if (must_be_complete) {
if (saw_data == saw_datafile) {
if (saw_data == saw_datafile) {
if (this_stream_needs_data) {
QTC::TC("qpdf", "QPDF_json data datafile both or neither");
error(
value.getStart(),
"\"stream\" must have exactly one of \"data\" or "
"new \"stream\" must have exactly one of \"data\" or "
"\"datafile\"");
} else if (saw_datafile) {
QTC::TC("qpdf", "QPDF_json data and datafile");
error(
value.getStart(),
"existing \"stream\" may at most one of \"data\" or "
"\"datafile\"");
} else {
QTC::TC("qpdf", "QPDF_json no stream data in update mode");
}
} else if (saw_data && saw_datafile) {
// QXXXQ
/// QTC::TC("qpdf", "QPDF_json data and datafile");
error(
value.getStart(),
"\"stream\" may at most one of \"data\" or \"datafile\"");
}
}
} else if ((state == st_stream) || (state == st_object)) {
@ -320,7 +326,6 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
nestedState(key, value, st_trailer);
this->cur_object = "trailer";
} else if (std::regex_match(key, m, OBJ_KEY_RE)) {
// QXXXQ remember to handle null for delete
object_stack.push_back(reserveObject(m[1].str(), m[2].str()));
nestedState(key, value, st_object_top);
this->cur_object = key;
@ -347,9 +352,11 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
} else if (key == "stream") {
this->saw_stream = true;
nestedState(key, value, st_stream);
this->this_stream_needs_data = false;
if (tos.isStream()) {
// QXXXQ reusing -- need QTC
QTC::TC("qpdf", "QPDF_json updating existing stream");
} else {
this->this_stream_needs_data = true;
replacement =
pdf.reserveStream(tos.getObjectID(), tos.getGeneration());
replaceObject(tos, replacement);
@ -386,12 +393,11 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
throw std::logic_error("no object on stack in st_stream");
}
auto tos = object_stack.back();
auto uninitialized = QPDFObjectHandle();
if (!tos.isStream()) {
// QXXXQ QTC in update mode
error(value.getStart(), "this object is not a stream");
parse_error = true;
} else if (key == "dict") {
throw std::logic_error("top of stack is not stream in st_stream");
}
auto uninitialized = QPDFObjectHandle();
if (key == "dict") {
this->saw_dict = true;
// Since a stream dictionary must be a dictionary, we can
// use nestedState to transition to st_value.
@ -509,11 +515,12 @@ QPDF::JSONReactor::makeObject(JSON const& value)
"JSONReactor::makeObject didn't initialize the object");
}
// QXXXQ include object number in description
result.setObjectDescription(
&this->pdf,
this->is->getName() + " offset " +
QUtil::uint_to_string(value.getStart()));
std::string description = this->is->getName();
if (!this->cur_object.empty()) {
description += " " + this->cur_object + ",";
}
description += " offset " + QUtil::uint_to_string(value.getStart());
result.setObjectDescription(&this->pdf, description);
return result;
}

View File

@ -676,3 +676,6 @@ QPDF_json ignore unknown key in object_top 0
QPDF_json ignore unknown key in trailer 0
QPDF_json ignore unknown key in stream 0
QPDF_json non-trivial null reserved 0
QPDF_json data and datafile 0
QPDF_json no stream data in update mode 0
QPDF_json updating existing stream 0

View File

@ -171,5 +171,35 @@ $td->runtest("check PDF",
{$td->FILE => "a.pdf"},
{$td->FILE => "b.pdf"});
# Replace mode tests
$n_tests += 1;
$td->runtest("create PDF for replace",
{$td->COMMAND => "qpdf good13.pdf a.pdf" .
" --update-from-json=qpdf-json-update-errors.json"},
{$td->FILE => "update-from-json-errors.out",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
my @update_files = (
"update-stream-dict-only",
"update-stream-data",
"replace-with-stream",
"various-updates",
);
$n_tests += 2 * scalar(@update_files);
foreach my $f (@update_files) {
$td->runtest("update: $f",
{$td->COMMAND =>
"qpdf good13.pdf a.pdf --qdf --static-id" .
" --update-from-json=$f.json"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("$f: check updated",
{$td->FILE => "a.pdf"},
{$td->FILE => "$f-updated.pdf"});
}
cleanup();
$td->report($n_tests);

View File

@ -1,7 +1,7 @@
WARNING: qjson-obj-key-errors.json (obj:2 0 R, offset 218): object must have exactly one of "value" or "stream"
WARNING: qjson-obj-key-errors.json (obj:3 0 R, offset 516): object must have exactly one of "value" or "stream"
WARNING: qjson-obj-key-errors.json (obj:4 0 R, offset 684): "stream" is missing "dict"
WARNING: qjson-obj-key-errors.json (obj:4 0 R, offset 684): "stream" must have exactly one of "data" or "datafile"
WARNING: qjson-obj-key-errors.json (obj:5 0 R, offset 774): "stream" must have exactly one of "data" or "datafile"
WARNING: qjson-obj-key-errors.json (obj:4 0 R, offset 684): new "stream" must have exactly one of "data" or "datafile"
WARNING: qjson-obj-key-errors.json (obj:5 0 R, offset 774): new "stream" must have exactly one of "data" or "datafile"
WARNING: qjson-obj-key-errors.json (trailer, offset 1152): "trailer" is missing "value"
qpdf: qjson-obj-key-errors.json: errors found in JSON

View File

@ -1,5 +1,5 @@
WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 118): "stream.dict" must be a dictionary
WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 118): unrecognized string value
WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 98): "stream" must have exactly one of "data" or "datafile"
WARNING: qjson-stream-dict-not-dict.json (obj:1 0 R, offset 98): new "stream" must have exactly one of "data" or "datafile"
WARNING: qjson-stream-dict-not-dict.json: "qpdf-v2.objects.trailer" was not seen
qpdf: qjson-stream-dict-not-dict.json: errors found in JSON

View File

@ -0,0 +1,20 @@
{
"qpdf-v2": {
"objects": {
"obj:4 0 R": {
"stream": {
"data": "QlQKICAvRjEgMjQgVGYKICA3MiA3MjAgVGQKICAoUG90YXRvKSBUagpFVAo=",
"datafile": "file-too",
"dict": {}
}
},
"obj:20 0 R": {
"stream": {
"dict": {
"/Bad": "string-value"
}
}
}
}
}
}

View File

@ -0,0 +1,182 @@
%PDF-2.0
%¿÷¢þ
%QDF-1.0
%% Original object ID: 1 0
1 0 obj
<<
/Pages 3 0 R
/Type /Catalog
>>
endobj
%% Original object ID: 7 0
2 0 obj
<<
/dangling-ref-for-json-test [
4 0 R
]
/hex#20strings [
(Potato)
<01020300040560>
(AB)
]
/indirect 5 0 R
/names [
/nesting
/hex#20strings
/text#2fplain
]
/nesting <<
/a [
1
2
<<
/x (y)
>>
[
(z)
]
]
/b <<
/ (legal)
/a [
1
2
]
>>
>>
/strings [
(one)
<24a2>
()
(\(\))
(\()
(\))
(a\f\b\t\r\nb)
(")
("")
("\("\)")
<410042>
(a\nb)
(a b)
<efbbbfcf80>
<efbbbff09fa594>
]
>>
endobj
%% Original object ID: 2 0
3 0 obj
<<
/Count 1
/Kids [
7 0 R
]
/Type /Pages
>>
endobj
%% Original object ID: 9 0
4 0 obj
null
endobj
%% Original object ID: 8 0
5 0 obj
<<
/K /V
/Length 6 0 R
>>
stream
new-stream-here
endstream
endobj
6 0 obj
16
endobj
%% Page 1
%% Original object ID: 3 0
7 0 obj
<<
/Contents 8 0 R
/MediaBox [
0
0
612
792
]
/Parent 3 0 R
/Resources <<
/Font <<
/F1 10 0 R
>>
/ProcSet 11 0 R
>>
/Type /Page
>>
endobj
%% Contents for page 1
%% Original object ID: 4 0
8 0 obj
<<
/Length 9 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
9 0 obj
44
endobj
%% Original object ID: 6 0
10 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
%% Original object ID: 5 0
11 0 obj
[
/PDF
/Text
]
endobj
xref
0 12
0000000000 65535 f
0000000052 00000 n
0000000133 00000 n
0000000756 00000 n
0000000855 00000 n
0000000903 00000 n
0000000982 00000 n
0000001038 00000 n
0000001282 00000 n
0000001381 00000 n
0000001427 00000 n
0000001573 00000 n
trailer <<
/QTest 2 0 R
/Root 1 0 R
/Size 12
/ID [<31415926535897932384626433832795><31415926535897932384626433832795>]
>>
startxref
1609
%%EOF

View File

@ -0,0 +1,16 @@
{
"qpdf-v2": {
"pdfversion": "2.0",
"maxobjectid": 9,
"objects": {
"obj:8 0 R": {
"stream": {
"data": "bmV3LXN0cmVhbS1oZXJlCg==",
"dict": {
"/K": "/V"
}
}
}
}
}
}

View File

@ -0,0 +1,4 @@
WARNING: good13.pdf (obj:4 0 R from qpdf-json-update-errors.json, offset 73): existing "stream" may at most one of "data" or "datafile"
WARNING: good13.pdf (obj:20 0 R from qpdf-json-update-errors.json, offset 313): unrecognized string value
WARNING: good13.pdf (obj:20 0 R from qpdf-json-update-errors.json, offset 271): new "stream" must have exactly one of "data" or "datafile"
qpdf: qpdf-json-update-errors.json: errors found in JSON

View File

@ -0,0 +1,171 @@
%PDF-2.0
%¿÷¢þ
%QDF-1.0
%% Original object ID: 1 0
1 0 obj
<<
/Pages 3 0 R
/Type /Catalog
>>
endobj
%% Original object ID: 7 0
2 0 obj
<<
/dangling-ref-for-json-test [
4 0 R
]
/hex#20strings [
(Potato)
<01020300040560>
(AB)
]
/indirect 5 0 R
/names [
/nesting
/hex#20strings
/text#2fplain
]
/nesting <<
/a [
1
2
<<
/x (y)
>>
[
(z)
]
]
/b <<
/ (legal)
/a [
1
2
]
>>
>>
/strings [
(one)
<24a2>
()
(\(\))
(\()
(\))
(a\f\b\t\r\nb)
(")
("")
("\("\)")
<410042>
(a\nb)
(a b)
<efbbbfcf80>
<efbbbff09fa594>
]
>>
endobj
%% Original object ID: 2 0
3 0 obj
<<
/Count 1
/Kids [
6 0 R
]
/Type /Pages
>>
endobj
%% Original object ID: 9 0
4 0 obj
null
endobj
%% Original object ID: 8 0
5 0 obj
(hello)
endobj
%% Page 1
%% Original object ID: 3 0
6 0 obj
<<
/Contents 7 0 R
/MediaBox [
0
0
612
792
]
/Parent 3 0 R
/Resources <<
/Font <<
/F1 9 0 R
>>
/ProcSet 10 0 R
>>
/Type /Page
>>
endobj
%% Contents for page 1
%% Original object ID: 4 0
7 0 obj
<<
/Length 8 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Salad) Tj
ET
endstream
endobj
8 0 obj
43
endobj
%% Original object ID: 6 0
9 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
%% Original object ID: 5 0
10 0 obj
[
/PDF
/Text
]
endobj
xref
0 11
0000000000 65535 f
0000000052 00000 n
0000000133 00000 n
0000000756 00000 n
0000000855 00000 n
0000000903 00000 n
0000000964 00000 n
0000001207 00000 n
0000001305 00000 n
0000001351 00000 n
0000001496 00000 n
trailer <<
/QTest 2 0 R
/Root 1 0 R
/Size 11
/ID [<31415926535897932384626433832795><31415926535897932384626433832795>]
>>
startxref
1532
%%EOF

View File

@ -0,0 +1,20 @@
{
"qpdf-v2": {
"pdfversion": "2.0",
"maxobjectid": 9,
"objects": {
"obj:1 0 R": {
"value": {
"/Pages": "2 0 R",
"/Type": "/Catalog"
}
},
"obj:4 0 R": {
"stream": {
"data": "QlQKICAvRjEgMjQgVGYKICA3MiA3MjAgVGQKICAoU2FsYWQpIFRqCkVUCg==",
"dict": {}
}
}
}
}
}

View File

@ -0,0 +1,172 @@
%PDF-2.0
%¿÷¢þ
%QDF-1.0
%% Original object ID: 1 0
1 0 obj
<<
/Pages 3 0 R
/Type /Catalog
>>
endobj
%% Original object ID: 7 0
2 0 obj
<<
/dangling-ref-for-json-test [
4 0 R
]
/hex#20strings [
(Potato)
<01020300040560>
(AB)
]
/indirect 5 0 R
/names [
/nesting
/hex#20strings
/text#2fplain
]
/nesting <<
/a [
1
2
<<
/x (y)
>>
[
(z)
]
]
/b <<
/ (legal)
/a [
1
2
]
>>
>>
/strings [
(one)
<24a2>
()
(\(\))
(\()
(\))
(a\f\b\t\r\nb)
(")
("")
("\("\)")
<410042>
(a\nb)
(a b)
<efbbbfcf80>
<efbbbff09fa594>
]
>>
endobj
%% Original object ID: 2 0
3 0 obj
<<
/Count 1
/Kids [
6 0 R
]
/Type /Pages
>>
endobj
%% Original object ID: 9 0
4 0 obj
null
endobj
%% Original object ID: 8 0
5 0 obj
(hello)
endobj
%% Page 1
%% Original object ID: 3 0
6 0 obj
<<
/Contents 7 0 R
/MediaBox [
0
0
612
792
]
/Parent 3 0 R
/Resources <<
/Font <<
/F1 9 0 R
>>
/ProcSet 10 0 R
>>
/Type /Page
>>
endobj
%% Contents for page 1
%% Original object ID: 4 0
7 0 obj
<<
/Potato (salad)
/Length 8 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
8 0 obj
44
endobj
%% Original object ID: 6 0
9 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
%% Original object ID: 5 0
10 0 obj
[
/PDF
/Text
]
endobj
xref
0 11
0000000000 65535 f
0000000052 00000 n
0000000133 00000 n
0000000756 00000 n
0000000855 00000 n
0000000903 00000 n
0000000964 00000 n
0000001207 00000 n
0000001324 00000 n
0000001370 00000 n
0000001515 00000 n
trailer <<
/QTest 2 0 R
/Root 1 0 R
/Size 11
/ID [<31415926535897932384626433832795><31415926535897932384626433832795>]
>>
startxref
1551
%%EOF

View File

@ -0,0 +1,15 @@
{
"qpdf-v2": {
"pdfversion": "2.0",
"maxobjectid": 9,
"objects": {
"obj:4 0 R": {
"stream": {
"dict": {
"/Potato": "u:salad"
}
}
}
}
}
}

View File

@ -0,0 +1,128 @@
%PDF-2.0
%¿÷¢þ
%QDF-1.0
%% Original object ID: 1 0
1 0 obj
<<
/Pages 3 0 R
/Type /Catalog
>>
endobj
%% Original object ID: 7 0
2 0 obj
<<
/indirect 4 0 R
/now-#cf#80 [
5 0 R
]
/strings [
(one)
<feff03c0>
]
>>
endobj
%% Original object ID: 2 0
3 0 obj
<<
/Count 1
/Kids [
6 0 R
]
/Type /Pages
>>
endobj
%% Original object ID: 8 0
4 0 obj
[
(hello)
7 0 R
]
endobj
%% Original object ID: 9 0
5 0 obj
3.14159
endobj
%% Page 1
%% Original object ID: 3 0
6 0 obj
<<
/Contents 8 0 R
/MediaBox [
0
0
612
792
]
/Parent 3 0 R
/Resources <<
/Font <<
/F1 10 0 R
>>
>>
/Type /Page
>>
endobj
%% Original object ID: 10 0
7 0 obj
(this is new)
endobj
%% Contents for page 1
%% Original object ID: 4 0
8 0 obj
<<
/Length 9 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
9 0 obj
44
endobj
%% Original object ID: 6 0
10 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
xref
0 11
0000000000 65535 f
0000000052 00000 n
0000000133 00000 n
0000000272 00000 n
0000000371 00000 n
0000000436 00000 n
0000000497 00000 n
0000000699 00000 n
0000000779 00000 n
0000000878 00000 n
0000000924 00000 n
trailer <<
/QTest 2 0 R
/Root 1 0 R
/Size 11
/ID [<31415926535897932384626433832795><31415926535897932384626433832795>]
>>
startxref
1043
%%EOF

View File

@ -0,0 +1,39 @@
{
"qpdf-v2": {
"pdfversion": "2.0",
"maxobjectid": 9,
"objects": {
"obj:5 0 R": {
"value": null
},
"obj:7 0 R": {
"value": {
"/now-π": [
"9 0 R"
],
"/indirect": "8 0 R",
"/strings": [
"u:one",
"u:π"
]
}
},
"obj:8 0 R": {
"value": ["u:hello", "10 0 R"]
},
"obj:9 0 R": {
"value": 3.14159
},
"obj:10 0 R": {
"value": "u:this is new"
},
"trailer": {
"value": {
"/QTest": "7 0 R",
"/Root": "1 0 R",
"/Size": 9
}
}
}
}
}