mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 02:49:00 +00:00
Add "n:/pdf-name" to qpdf JSON for binary names (fixes #1072)
This commit is contained in:
parent
bb12a7ff8d
commit
4400ce84ee
@ -1,5 +1,12 @@
|
||||
2023-12-21 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Fix to QPDF JSON: the syntax "n:/pdf-syntax" is now accepted as
|
||||
an alternative way to represent names. This can be used for any
|
||||
name (e.g. "n:/text#2fplain"), but it is necessary when the name
|
||||
contains binary characters. For example, /one#a0two must be
|
||||
represented as "n:/one#a0two" since the single byte a0 is not
|
||||
valid in JSON. Fixes #1072.
|
||||
|
||||
* From M. Holger: Refactor QPDFParser for performance. See #1059
|
||||
for a discussion.
|
||||
|
||||
|
@ -57,6 +57,14 @@ QPDF_Name::getJSON(int json_version)
|
||||
if (json_version == 1) {
|
||||
return JSON::makeString(normalizeName(this->name));
|
||||
} else {
|
||||
return JSON::makeString(this->name);
|
||||
bool has_8bit_chars;
|
||||
bool is_valid_utf8;
|
||||
bool is_utf16;
|
||||
QUtil::analyze_encoding(this->name, has_8bit_chars, is_valid_utf8, is_utf16);
|
||||
if (!has_8bit_chars || is_valid_utf8) {
|
||||
return JSON::makeString(this->name);
|
||||
} else {
|
||||
return JSON::makeString("n:" + normalizeName(this->name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -144,6 +144,12 @@ is_name(std::string const& v)
|
||||
return ((v.length() > 1) && (v.at(0) == '/'));
|
||||
}
|
||||
|
||||
static bool
|
||||
is_pdf_name(std::string const& v)
|
||||
{
|
||||
return ((v.length() > 3) && (v.substr(0, 3) == "n:/"));
|
||||
}
|
||||
|
||||
bool
|
||||
QPDF::test_json_validators()
|
||||
{
|
||||
@ -740,6 +746,8 @@ QPDF::JSONReactor::makeObject(JSON const& value)
|
||||
result = QPDFObjectHandle::newString(QUtil::hex_decode(str));
|
||||
} else if (is_name(str_v)) {
|
||||
result = QPDFObjectHandle::newName(str_v);
|
||||
} else if (is_pdf_name(str_v)) {
|
||||
result = QPDFObjectHandle::parse(str_v.substr(2));
|
||||
} else {
|
||||
QTC::TC("qpdf", "QPDF_json unrecognized string value");
|
||||
error(value.getStart(), "unrecognized string value");
|
||||
|
@ -258,6 +258,12 @@ Object Values
|
||||
syntax resolved. For example, the name whose canonical form (per
|
||||
the PDF specification) is ``text/plain`` would be represented in
|
||||
JSON as ``"/text/plain"`` and in PDF as ``"/text#2fplain"``.
|
||||
Starting with qpdf 11.7.0, the syntax ``"n:/pdf-syntax"`` is
|
||||
accepted as an alternative. This can be used for any name (e.g.
|
||||
``"n:/text#2fplain"``), but it is necessary when the name contains
|
||||
binary characters. For example, ``/one#a0two`` must be represented
|
||||
as ``"n:/one#a0two"`` since the single byte ``a0`` is not valid in
|
||||
JSON.
|
||||
|
||||
- Indirect object references are represented as JSON strings that
|
||||
look like a PDF indirect object reference and have the form
|
||||
@ -824,7 +830,8 @@ version 2.
|
||||
- Names are shown in qpdf's canonical form rather than in PDF
|
||||
syntax. (Example: the PDF-syntax name ``/text#2fplain`` appeared
|
||||
as ``"/text#2fplain"`` in v1 but appears as ``"/text/plain"`` in
|
||||
v2.
|
||||
v2. In qpdf 11.7.0, a fix was made to accept ``"n:/pdf-syntax"``
|
||||
for names containing binary characters.
|
||||
|
||||
- The top-level representation of an object in ``"objects"`` is a
|
||||
dictionary containing either a ``"value"`` key or a ``"stream"``
|
||||
|
@ -45,6 +45,13 @@ Planned changes for future 12.x (subject to change):
|
||||
reference streams, linearization hint streams, and object
|
||||
streams. This has been fixed.
|
||||
|
||||
- Fix to QPDF JSON: the syntax ``"n:/pdf-syntax"`` is now accepted
|
||||
as an alternative way to represent names. This can be used for
|
||||
any name (e.g. ``"n:/text#2fplain"``), but it is necessary when
|
||||
the name contains binary characters. For example, ``/one#a0two``
|
||||
must be represented as ``"n:/one#a0two"`` since the single byte
|
||||
``a0`` is not valid in JSON.
|
||||
|
||||
- Build Enhancements:
|
||||
|
||||
- The qpdf test suite now passes when qpdf is linked with an
|
||||
|
@ -61,6 +61,7 @@ my @goodfiles = (
|
||||
'form-fields-and-annotations.pdf',
|
||||
'need-appearances.pdf',
|
||||
'fxo-blue.pdf',
|
||||
'weird-tokens.pdf',
|
||||
);
|
||||
$n_tests += 6 * scalar(@goodfiles);
|
||||
|
||||
@ -341,5 +342,21 @@ $td->runtest("check C API write to JSON stream",
|
||||
{$td->FILE => "auto-4"},
|
||||
{$td->FILE => "qpdf-ctest-47-4"});
|
||||
|
||||
# Bugs #1072 and #1079 illustrate cases that qpdf-json got wrong. In
|
||||
# #1072, it was noticed that name tokens containing binary characters
|
||||
# (using #xx) would generate invalid JSON, even though qpdf's own JSON
|
||||
# parser would accept it. Also, the JSON spec allows real numbers in
|
||||
# scientific notation, but the PDF spec does not.
|
||||
$n_tests += 2;
|
||||
$td->runtest("handle binary names",
|
||||
{$td->COMMAND =>
|
||||
"qpdf --json-output weird-tokens.pdf a.json"},
|
||||
{$td->STRING => "", $td->EXIT_STATUS => 0});
|
||||
# Round-trip is tested above.
|
||||
$td->runtest("check json",
|
||||
{$td->FILE => "a.json"},
|
||||
{$td->FILE => "weird-tokens.json"},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
|
||||
cleanup();
|
||||
$td->report($n_tests);
|
||||
|
83
qpdf/qtest/qpdf/weird-tokens.json
Normal file
83
qpdf/qtest/qpdf/weird-tokens.json
Normal file
@ -0,0 +1,83 @@
|
||||
{
|
||||
"qpdf": [
|
||||
{
|
||||
"jsonversion": 2,
|
||||
"pdfversion": "2.0",
|
||||
"pushedinheritedpageresources": false,
|
||||
"calledgetallpages": false,
|
||||
"maxobjectid": 6
|
||||
},
|
||||
{
|
||||
"obj:1 0 R": {
|
||||
"value": {
|
||||
"/Extra": [
|
||||
"u:Names with binary data",
|
||||
"n:/ABCDEF+#ba#da#cc#e5",
|
||||
"/ABCEDEF+π",
|
||||
"n:/one+#a0two",
|
||||
"/text/plain",
|
||||
"u:Very small/large reals",
|
||||
0.00001,
|
||||
1000000000000
|
||||
],
|
||||
"/Pages": "2 0 R",
|
||||
"/Type": "/Catalog"
|
||||
}
|
||||
},
|
||||
"obj:2 0 R": {
|
||||
"value": {
|
||||
"/Count": 1,
|
||||
"/Kids": [
|
||||
"3 0 R"
|
||||
],
|
||||
"/Type": "/Pages"
|
||||
}
|
||||
},
|
||||
"obj:3 0 R": {
|
||||
"value": {
|
||||
"/Contents": "4 0 R",
|
||||
"/MediaBox": [
|
||||
0,
|
||||
0,
|
||||
612,
|
||||
792
|
||||
],
|
||||
"/Parent": "2 0 R",
|
||||
"/Resources": {
|
||||
"/Font": {
|
||||
"/F1": "6 0 R"
|
||||
}
|
||||
},
|
||||
"/Type": "/Page"
|
||||
}
|
||||
},
|
||||
"obj:4 0 R": {
|
||||
"stream": {
|
||||
"data": "QlQKICAvRjEgMjQgVGYKICA3MiA3MjAgVGQKICAoUG90YXRvKSBUagpFVAo=",
|
||||
"dict": {}
|
||||
}
|
||||
},
|
||||
"obj:5 0 R": {
|
||||
"value": 44
|
||||
},
|
||||
"obj:6 0 R": {
|
||||
"value": {
|
||||
"/BaseFont": "/Helvetica",
|
||||
"/Encoding": "/WinAnsiEncoding",
|
||||
"/Subtype": "/Type1",
|
||||
"/Type": "/Font"
|
||||
}
|
||||
},
|
||||
"trailer": {
|
||||
"value": {
|
||||
"/ID": [
|
||||
"b:42841c13bbf709d79a200fa1691836f8",
|
||||
"b:728c020f464c3cf7e02c12605fa7d88b"
|
||||
],
|
||||
"/Root": "1 0 R",
|
||||
"/Size": 7
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
95
qpdf/qtest/qpdf/weird-tokens.pdf
Normal file
95
qpdf/qtest/qpdf/weird-tokens.pdf
Normal file
@ -0,0 +1,95 @@
|
||||
%PDF-2.0
|
||||
%¿÷¢þ
|
||||
%QDF-1.0
|
||||
|
||||
1 0 obj
|
||||
<<
|
||||
/Extra [
|
||||
(Names with binary data)
|
||||
/ABCDEF+#ba#da#cc#e5
|
||||
/ABCEDEF+#cf#80
|
||||
/one+#a0two
|
||||
/text#2fplain
|
||||
(Very small/large reals)
|
||||
0.00001
|
||||
1000000000000
|
||||
]
|
||||
/Pages 2 0 R
|
||||
/Type /Catalog
|
||||
>>
|
||||
endobj
|
||||
|
||||
2 0 obj
|
||||
<<
|
||||
/Count 1
|
||||
/Kids [
|
||||
3 0 R
|
||||
]
|
||||
/Type /Pages
|
||||
>>
|
||||
endobj
|
||||
|
||||
%% Page 1
|
||||
3 0 obj
|
||||
<<
|
||||
/Contents 4 0 R
|
||||
/MediaBox [
|
||||
0
|
||||
0
|
||||
612
|
||||
792
|
||||
]
|
||||
/Parent 2 0 R
|
||||
/Resources <<
|
||||
/Font <<
|
||||
/F1 6 0 R
|
||||
>>
|
||||
>>
|
||||
/Type /Page
|
||||
>>
|
||||
endobj
|
||||
|
||||
%% Contents for page 1
|
||||
4 0 obj
|
||||
<<
|
||||
/Length 5 0 R
|
||||
>>
|
||||
stream
|
||||
BT
|
||||
/F1 24 Tf
|
||||
72 720 Td
|
||||
(Potato) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
|
||||
5 0 obj
|
||||
44
|
||||
endobj
|
||||
|
||||
6 0 obj
|
||||
<<
|
||||
/BaseFont /Helvetica
|
||||
/Encoding /WinAnsiEncoding
|
||||
/Subtype /Type1
|
||||
/Type /Font
|
||||
>>
|
||||
endobj
|
||||
|
||||
xref
|
||||
0 7
|
||||
0000000000 65535 f
|
||||
0000000025 00000 n
|
||||
0000000261 00000 n
|
||||
0000000343 00000 n
|
||||
0000000539 00000 n
|
||||
0000000638 00000 n
|
||||
0000000657 00000 n
|
||||
trailer <<
|
||||
/Root 1 0 R
|
||||
/Size 7
|
||||
/ID [<42841c13bbf709d79a200fa1691836f8><728c020f464c3cf7e02c12605fa7d88b>]
|
||||
>>
|
||||
startxref
|
||||
763
|
||||
%%EOF
|
Loading…
Reference in New Issue
Block a user