mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 10:58:58 +00:00
Add "n:/pdf-name" to qpdf JSON for binary names (fixes #1072)
This commit is contained in:
parent
bb12a7ff8d
commit
4400ce84ee
@ -1,5 +1,12 @@
|
|||||||
2023-12-21 Jay Berkenbilt <ejb@ql.org>
|
2023-12-21 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
|
* Fix to QPDF JSON: the syntax "n:/pdf-syntax" is now accepted as
|
||||||
|
an alternative way to represent names. This can be used for any
|
||||||
|
name (e.g. "n:/text#2fplain"), but it is necessary when the name
|
||||||
|
contains binary characters. For example, /one#a0two must be
|
||||||
|
represented as "n:/one#a0two" since the single byte a0 is not
|
||||||
|
valid in JSON. Fixes #1072.
|
||||||
|
|
||||||
* From M. Holger: Refactor QPDFParser for performance. See #1059
|
* From M. Holger: Refactor QPDFParser for performance. See #1059
|
||||||
for a discussion.
|
for a discussion.
|
||||||
|
|
||||||
|
@ -57,6 +57,14 @@ QPDF_Name::getJSON(int json_version)
|
|||||||
if (json_version == 1) {
|
if (json_version == 1) {
|
||||||
return JSON::makeString(normalizeName(this->name));
|
return JSON::makeString(normalizeName(this->name));
|
||||||
} else {
|
} else {
|
||||||
return JSON::makeString(this->name);
|
bool has_8bit_chars;
|
||||||
|
bool is_valid_utf8;
|
||||||
|
bool is_utf16;
|
||||||
|
QUtil::analyze_encoding(this->name, has_8bit_chars, is_valid_utf8, is_utf16);
|
||||||
|
if (!has_8bit_chars || is_valid_utf8) {
|
||||||
|
return JSON::makeString(this->name);
|
||||||
|
} else {
|
||||||
|
return JSON::makeString("n:" + normalizeName(this->name));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -144,6 +144,12 @@ is_name(std::string const& v)
|
|||||||
return ((v.length() > 1) && (v.at(0) == '/'));
|
return ((v.length() > 1) && (v.at(0) == '/'));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
is_pdf_name(std::string const& v)
|
||||||
|
{
|
||||||
|
return ((v.length() > 3) && (v.substr(0, 3) == "n:/"));
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
QPDF::test_json_validators()
|
QPDF::test_json_validators()
|
||||||
{
|
{
|
||||||
@ -740,6 +746,8 @@ QPDF::JSONReactor::makeObject(JSON const& value)
|
|||||||
result = QPDFObjectHandle::newString(QUtil::hex_decode(str));
|
result = QPDFObjectHandle::newString(QUtil::hex_decode(str));
|
||||||
} else if (is_name(str_v)) {
|
} else if (is_name(str_v)) {
|
||||||
result = QPDFObjectHandle::newName(str_v);
|
result = QPDFObjectHandle::newName(str_v);
|
||||||
|
} else if (is_pdf_name(str_v)) {
|
||||||
|
result = QPDFObjectHandle::parse(str_v.substr(2));
|
||||||
} else {
|
} else {
|
||||||
QTC::TC("qpdf", "QPDF_json unrecognized string value");
|
QTC::TC("qpdf", "QPDF_json unrecognized string value");
|
||||||
error(value.getStart(), "unrecognized string value");
|
error(value.getStart(), "unrecognized string value");
|
||||||
|
@ -258,6 +258,12 @@ Object Values
|
|||||||
syntax resolved. For example, the name whose canonical form (per
|
syntax resolved. For example, the name whose canonical form (per
|
||||||
the PDF specification) is ``text/plain`` would be represented in
|
the PDF specification) is ``text/plain`` would be represented in
|
||||||
JSON as ``"/text/plain"`` and in PDF as ``"/text#2fplain"``.
|
JSON as ``"/text/plain"`` and in PDF as ``"/text#2fplain"``.
|
||||||
|
Starting with qpdf 11.7.0, the syntax ``"n:/pdf-syntax"`` is
|
||||||
|
accepted as an alternative. This can be used for any name (e.g.
|
||||||
|
``"n:/text#2fplain"``), but it is necessary when the name contains
|
||||||
|
binary characters. For example, ``/one#a0two`` must be represented
|
||||||
|
as ``"n:/one#a0two"`` since the single byte ``a0`` is not valid in
|
||||||
|
JSON.
|
||||||
|
|
||||||
- Indirect object references are represented as JSON strings that
|
- Indirect object references are represented as JSON strings that
|
||||||
look like a PDF indirect object reference and have the form
|
look like a PDF indirect object reference and have the form
|
||||||
@ -824,7 +830,8 @@ version 2.
|
|||||||
- Names are shown in qpdf's canonical form rather than in PDF
|
- Names are shown in qpdf's canonical form rather than in PDF
|
||||||
syntax. (Example: the PDF-syntax name ``/text#2fplain`` appeared
|
syntax. (Example: the PDF-syntax name ``/text#2fplain`` appeared
|
||||||
as ``"/text#2fplain"`` in v1 but appears as ``"/text/plain"`` in
|
as ``"/text#2fplain"`` in v1 but appears as ``"/text/plain"`` in
|
||||||
v2.
|
v2. In qpdf 11.7.0, a fix was made to accept ``"n:/pdf-syntax"``
|
||||||
|
for names containing binary characters.
|
||||||
|
|
||||||
- The top-level representation of an object in ``"objects"`` is a
|
- The top-level representation of an object in ``"objects"`` is a
|
||||||
dictionary containing either a ``"value"`` key or a ``"stream"``
|
dictionary containing either a ``"value"`` key or a ``"stream"``
|
||||||
|
@ -45,6 +45,13 @@ Planned changes for future 12.x (subject to change):
|
|||||||
reference streams, linearization hint streams, and object
|
reference streams, linearization hint streams, and object
|
||||||
streams. This has been fixed.
|
streams. This has been fixed.
|
||||||
|
|
||||||
|
- Fix to QPDF JSON: the syntax ``"n:/pdf-syntax"`` is now accepted
|
||||||
|
as an alternative way to represent names. This can be used for
|
||||||
|
any name (e.g. ``"n:/text#2fplain"``), but it is necessary when
|
||||||
|
the name contains binary characters. For example, ``/one#a0two``
|
||||||
|
must be represented as ``"n:/one#a0two"`` since the single byte
|
||||||
|
``a0`` is not valid in JSON.
|
||||||
|
|
||||||
- Build Enhancements:
|
- Build Enhancements:
|
||||||
|
|
||||||
- The qpdf test suite now passes when qpdf is linked with an
|
- The qpdf test suite now passes when qpdf is linked with an
|
||||||
|
@ -61,6 +61,7 @@ my @goodfiles = (
|
|||||||
'form-fields-and-annotations.pdf',
|
'form-fields-and-annotations.pdf',
|
||||||
'need-appearances.pdf',
|
'need-appearances.pdf',
|
||||||
'fxo-blue.pdf',
|
'fxo-blue.pdf',
|
||||||
|
'weird-tokens.pdf',
|
||||||
);
|
);
|
||||||
$n_tests += 6 * scalar(@goodfiles);
|
$n_tests += 6 * scalar(@goodfiles);
|
||||||
|
|
||||||
@ -341,5 +342,21 @@ $td->runtest("check C API write to JSON stream",
|
|||||||
{$td->FILE => "auto-4"},
|
{$td->FILE => "auto-4"},
|
||||||
{$td->FILE => "qpdf-ctest-47-4"});
|
{$td->FILE => "qpdf-ctest-47-4"});
|
||||||
|
|
||||||
|
# Bugs #1072 and #1079 illustrate cases that qpdf-json got wrong. In
|
||||||
|
# #1072, it was noticed that name tokens containing binary characters
|
||||||
|
# (using #xx) would generate invalid JSON, even though qpdf's own JSON
|
||||||
|
# parser would accept it. Also, the JSON spec allows real numbers in
|
||||||
|
# scientific notation, but the PDF spec does not.
|
||||||
|
$n_tests += 2;
|
||||||
|
$td->runtest("handle binary names",
|
||||||
|
{$td->COMMAND =>
|
||||||
|
"qpdf --json-output weird-tokens.pdf a.json"},
|
||||||
|
{$td->STRING => "", $td->EXIT_STATUS => 0});
|
||||||
|
# Round-trip is tested above.
|
||||||
|
$td->runtest("check json",
|
||||||
|
{$td->FILE => "a.json"},
|
||||||
|
{$td->FILE => "weird-tokens.json"},
|
||||||
|
$td->NORMALIZE_NEWLINES);
|
||||||
|
|
||||||
cleanup();
|
cleanup();
|
||||||
$td->report($n_tests);
|
$td->report($n_tests);
|
||||||
|
83
qpdf/qtest/qpdf/weird-tokens.json
Normal file
83
qpdf/qtest/qpdf/weird-tokens.json
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
{
|
||||||
|
"qpdf": [
|
||||||
|
{
|
||||||
|
"jsonversion": 2,
|
||||||
|
"pdfversion": "2.0",
|
||||||
|
"pushedinheritedpageresources": false,
|
||||||
|
"calledgetallpages": false,
|
||||||
|
"maxobjectid": 6
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"obj:1 0 R": {
|
||||||
|
"value": {
|
||||||
|
"/Extra": [
|
||||||
|
"u:Names with binary data",
|
||||||
|
"n:/ABCDEF+#ba#da#cc#e5",
|
||||||
|
"/ABCEDEF+π",
|
||||||
|
"n:/one+#a0two",
|
||||||
|
"/text/plain",
|
||||||
|
"u:Very small/large reals",
|
||||||
|
0.00001,
|
||||||
|
1000000000000
|
||||||
|
],
|
||||||
|
"/Pages": "2 0 R",
|
||||||
|
"/Type": "/Catalog"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"obj:2 0 R": {
|
||||||
|
"value": {
|
||||||
|
"/Count": 1,
|
||||||
|
"/Kids": [
|
||||||
|
"3 0 R"
|
||||||
|
],
|
||||||
|
"/Type": "/Pages"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"obj:3 0 R": {
|
||||||
|
"value": {
|
||||||
|
"/Contents": "4 0 R",
|
||||||
|
"/MediaBox": [
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
612,
|
||||||
|
792
|
||||||
|
],
|
||||||
|
"/Parent": "2 0 R",
|
||||||
|
"/Resources": {
|
||||||
|
"/Font": {
|
||||||
|
"/F1": "6 0 R"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/Type": "/Page"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"obj:4 0 R": {
|
||||||
|
"stream": {
|
||||||
|
"data": "QlQKICAvRjEgMjQgVGYKICA3MiA3MjAgVGQKICAoUG90YXRvKSBUagpFVAo=",
|
||||||
|
"dict": {}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"obj:5 0 R": {
|
||||||
|
"value": 44
|
||||||
|
},
|
||||||
|
"obj:6 0 R": {
|
||||||
|
"value": {
|
||||||
|
"/BaseFont": "/Helvetica",
|
||||||
|
"/Encoding": "/WinAnsiEncoding",
|
||||||
|
"/Subtype": "/Type1",
|
||||||
|
"/Type": "/Font"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"trailer": {
|
||||||
|
"value": {
|
||||||
|
"/ID": [
|
||||||
|
"b:42841c13bbf709d79a200fa1691836f8",
|
||||||
|
"b:728c020f464c3cf7e02c12605fa7d88b"
|
||||||
|
],
|
||||||
|
"/Root": "1 0 R",
|
||||||
|
"/Size": 7
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
95
qpdf/qtest/qpdf/weird-tokens.pdf
Normal file
95
qpdf/qtest/qpdf/weird-tokens.pdf
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
%PDF-2.0
|
||||||
|
%¿÷¢þ
|
||||||
|
%QDF-1.0
|
||||||
|
|
||||||
|
1 0 obj
|
||||||
|
<<
|
||||||
|
/Extra [
|
||||||
|
(Names with binary data)
|
||||||
|
/ABCDEF+#ba#da#cc#e5
|
||||||
|
/ABCEDEF+#cf#80
|
||||||
|
/one+#a0two
|
||||||
|
/text#2fplain
|
||||||
|
(Very small/large reals)
|
||||||
|
0.00001
|
||||||
|
1000000000000
|
||||||
|
]
|
||||||
|
/Pages 2 0 R
|
||||||
|
/Type /Catalog
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
2 0 obj
|
||||||
|
<<
|
||||||
|
/Count 1
|
||||||
|
/Kids [
|
||||||
|
3 0 R
|
||||||
|
]
|
||||||
|
/Type /Pages
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
%% Page 1
|
||||||
|
3 0 obj
|
||||||
|
<<
|
||||||
|
/Contents 4 0 R
|
||||||
|
/MediaBox [
|
||||||
|
0
|
||||||
|
0
|
||||||
|
612
|
||||||
|
792
|
||||||
|
]
|
||||||
|
/Parent 2 0 R
|
||||||
|
/Resources <<
|
||||||
|
/Font <<
|
||||||
|
/F1 6 0 R
|
||||||
|
>>
|
||||||
|
>>
|
||||||
|
/Type /Page
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
%% Contents for page 1
|
||||||
|
4 0 obj
|
||||||
|
<<
|
||||||
|
/Length 5 0 R
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
BT
|
||||||
|
/F1 24 Tf
|
||||||
|
72 720 Td
|
||||||
|
(Potato) Tj
|
||||||
|
ET
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
|
||||||
|
5 0 obj
|
||||||
|
44
|
||||||
|
endobj
|
||||||
|
|
||||||
|
6 0 obj
|
||||||
|
<<
|
||||||
|
/BaseFont /Helvetica
|
||||||
|
/Encoding /WinAnsiEncoding
|
||||||
|
/Subtype /Type1
|
||||||
|
/Type /Font
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
xref
|
||||||
|
0 7
|
||||||
|
0000000000 65535 f
|
||||||
|
0000000025 00000 n
|
||||||
|
0000000261 00000 n
|
||||||
|
0000000343 00000 n
|
||||||
|
0000000539 00000 n
|
||||||
|
0000000638 00000 n
|
||||||
|
0000000657 00000 n
|
||||||
|
trailer <<
|
||||||
|
/Root 1 0 R
|
||||||
|
/Size 7
|
||||||
|
/ID [<42841c13bbf709d79a200fa1691836f8><728c020f464c3cf7e02c12605fa7d88b>]
|
||||||
|
>>
|
||||||
|
startxref
|
||||||
|
763
|
||||||
|
%%EOF
|
Loading…
Reference in New Issue
Block a user