mirror of
https://github.com/qpdf/qpdf.git
synced 2024-10-31 19:02:30 +00:00
Implement JSON v2 for String
Also refine the herustic for deciding whether to use hexadecimal notation for a string.
This commit is contained in:
parent
16f4f94cd9
commit
3246923cf2
@ -45,8 +45,32 @@ QPDF_String::unparse()
|
||||
JSON
|
||||
QPDF_String::getJSON(int json_version)
|
||||
{
|
||||
// QXXXQ
|
||||
return JSON::makeString(getUTF8Val());
|
||||
if (json_version == 1) {
|
||||
return JSON::makeString(getUTF8Val());
|
||||
}
|
||||
// See if we can unambiguously represent as Unicode.
|
||||
bool is_unicode = false;
|
||||
std::string result;
|
||||
std::string candidate = getUTF8Val();
|
||||
if (QUtil::is_utf16(this->val) || QUtil::is_explicit_utf8(this->val)) {
|
||||
is_unicode = true;
|
||||
result = candidate;
|
||||
} else if (!useHexString()) {
|
||||
std::string test;
|
||||
if (QUtil::utf8_to_pdf_doc(candidate, test, '?') &&
|
||||
(test == this->val)) {
|
||||
// This is a PDF-doc string that can be losslessly encoded
|
||||
// as Unicode.
|
||||
is_unicode = true;
|
||||
result = candidate;
|
||||
}
|
||||
}
|
||||
if (is_unicode) {
|
||||
result = "u:" + result;
|
||||
} else {
|
||||
result = "b:" + QUtil::hex_encode(this->val);
|
||||
}
|
||||
return JSON::makeString(result);
|
||||
}
|
||||
|
||||
QPDFObject::object_type_e
|
||||
@ -61,41 +85,32 @@ QPDF_String::getTypeName() const
|
||||
return "string";
|
||||
}
|
||||
|
||||
bool
|
||||
QPDF_String::useHexString() const
|
||||
{
|
||||
// Heuristic: use the hexadecimal representation of a string if
|
||||
// there are any non-printable (in PDF Doc encoding) characters or
|
||||
// if too large of a proportion of the string consists of
|
||||
// non-ASCII characters.
|
||||
bool nonprintable = false;
|
||||
unsigned int non_ascii = 0;
|
||||
for (unsigned int i = 0; i < this->val.length(); ++i) {
|
||||
char ch = this->val.at(i);
|
||||
if ((ch == 0) ||
|
||||
(!(is_ascii_printable(ch) || strchr("\n\r\t\b\f", ch)))) {
|
||||
if ((ch >= 0) && (ch < 24)) {
|
||||
nonprintable = true;
|
||||
}
|
||||
++non_ascii;
|
||||
}
|
||||
}
|
||||
return (nonprintable || (5 * non_ascii > val.length()));
|
||||
}
|
||||
|
||||
std::string
|
||||
QPDF_String::unparse(bool force_binary)
|
||||
{
|
||||
bool use_hexstring = force_binary;
|
||||
if (!use_hexstring) {
|
||||
unsigned int nonprintable = 0;
|
||||
int consecutive_printable = 0;
|
||||
for (unsigned int i = 0; i < this->val.length(); ++i) {
|
||||
char ch = this->val.at(i);
|
||||
// Note: do not use locale to determine printability. The
|
||||
// PDF specification accepts arbitrary binary data. Some
|
||||
// locales imply multibyte characters. We'll consider
|
||||
// something printable if it is printable in 7-bit ASCII.
|
||||
// We'll code this manually rather than being rude and
|
||||
// setting locale.
|
||||
if ((ch == 0) ||
|
||||
(!(is_ascii_printable(ch) || strchr("\n\r\t\b\f", ch)))) {
|
||||
++nonprintable;
|
||||
consecutive_printable = 0;
|
||||
} else {
|
||||
if (++consecutive_printable > 5) {
|
||||
// If there are more than 5 consecutive printable
|
||||
// characters, I want to see them as such.
|
||||
nonprintable = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Use hex notation if more than 20% of the characters are not
|
||||
// printable in plain ASCII.
|
||||
if (5 * nonprintable > val.length()) {
|
||||
use_hexstring = true;
|
||||
}
|
||||
}
|
||||
bool use_hexstring = force_binary || useHexString();
|
||||
std::string result;
|
||||
if (use_hexstring) {
|
||||
result += "<" + QUtil::hex_encode(this->val) + ">";
|
||||
|
@ -20,6 +20,7 @@ class QPDF_String: public QPDFObject
|
||||
std::string getUTF8Val() const;
|
||||
|
||||
private:
|
||||
bool useHexString() const;
|
||||
std::string val;
|
||||
};
|
||||
|
||||
|
Binary file not shown.
@ -65,8 +65,8 @@
|
||||
],
|
||||
"trailer": {
|
||||
"/ID": [
|
||||
"\u0013#¥fi|WzfsU…©6ŸÎ<",
|
||||
"7,¿DöÛ‹«`Ù&<\u000f\u000bÒj"
|
||||
"b:1323a5937c577a66735583a93698ce3c",
|
||||
"b:372cbf44f6db88ab60d9263c0f0bd26a"
|
||||
],
|
||||
"/Root": "1 0 R",
|
||||
"/Size": 7
|
||||
|
@ -89,8 +89,8 @@
|
||||
},
|
||||
"trailer": {
|
||||
"/ID": [
|
||||
"\u0013#¥fi|WzfsU…©6ŸÎ<",
|
||||
"7,¿DöÛ‹«`Ù&<\u000f\u000bÒj"
|
||||
"b:1323a5937c577a66735583a93698ce3c",
|
||||
"b:372cbf44f6db88ab60d9263c0f0bd26a"
|
||||
],
|
||||
"/Root": "1 0 R",
|
||||
"/Size": 7
|
||||
|
@ -9,7 +9,7 @@ three lines
|
||||
(string with \nCRLF and\nCR and\nLF)
|
||||
and another
|
||||
indentation
|
||||
(\001B%DEF)<01>
|
||||
<014225444546><01>
|
||||
<8a8b>
|
||||
(ab)
|
||||
<8c><dd> ) >
|
||||
|
@ -1,9 +1,9 @@
|
||||
{
|
||||
"/k1": "scalar1",
|
||||
"/k1": "u:scalar1",
|
||||
"/k2": 16059,
|
||||
"/k3": {
|
||||
"/a": "a",
|
||||
"/b": "conflict: seen",
|
||||
"/a": "u:a",
|
||||
"/b": "u:conflict: seen",
|
||||
"/c": [
|
||||
2,
|
||||
3
|
||||
@ -12,7 +12,7 @@
|
||||
"/y": 25,
|
||||
"/z": 26
|
||||
},
|
||||
"/e": "e"
|
||||
"/e": "u:e"
|
||||
},
|
||||
"/k4": {
|
||||
"/A": 65,
|
||||
@ -24,11 +24,11 @@
|
||||
"/k5": [
|
||||
"/one",
|
||||
2,
|
||||
"three",
|
||||
"u:three",
|
||||
[
|
||||
"/four"
|
||||
],
|
||||
"two"
|
||||
"u:two"
|
||||
]
|
||||
}
|
||||
/A
|
||||
|
@ -9,8 +9,8 @@
|
||||
"/Type": "/Catalog"
|
||||
},
|
||||
"2 0 R": {
|
||||
"/CreationDate": "D:20120621124041",
|
||||
"/Producer": "Apex PDFWriter"
|
||||
"/CreationDate": "u:D:20120621124041",
|
||||
"/Producer": "u:Apex PDFWriter"
|
||||
},
|
||||
"3 0 R": {
|
||||
"/Count": 3,
|
||||
@ -77,8 +77,8 @@
|
||||
"10 0 R": 47,
|
||||
"trailer": {
|
||||
"/ID": [
|
||||
"û˘·ƒÿ{5⁄\u0005Ú−S*º‘o",
|
||||
"÷\u0017ž³QY¿ÔÀ\u000f\u0012−¼ý˜\u0002"
|
||||
"b:fb18b786ff7b358705da8a532aba8f6f",
|
||||
"b:f7179eb35159bfd4c00f128abcfd1f02"
|
||||
],
|
||||
"/Info": "2 0 R",
|
||||
"/Root": "1 0 R",
|
||||
|
@ -41,8 +41,8 @@
|
||||
"/Type": "/Catalog"
|
||||
},
|
||||
"2 0 R": {
|
||||
"/CreationDate": "D:20120621124041",
|
||||
"/Producer": "Apex PDFWriter"
|
||||
"/CreationDate": "u:D:20120621124041",
|
||||
"/Producer": "u:Apex PDFWriter"
|
||||
},
|
||||
"3 0 R": {
|
||||
"/Count": 3,
|
||||
@ -129,8 +129,8 @@
|
||||
},
|
||||
"trailer": {
|
||||
"/ID": [
|
||||
"û˘·ƒÿ{5⁄\u0005Ú−S*º‘o",
|
||||
"÷\u0017ž³QY¿ÔÀ\u000f\u0012−¼ý˜\u0002"
|
||||
"b:fb18b786ff7b358705da8a532aba8f6f",
|
||||
"b:f7179eb35159bfd4c00f128abcfd1f02"
|
||||
],
|
||||
"/Info": "2 0 R",
|
||||
"/Root": "1 0 R",
|
||||
|
Loading…
Reference in New Issue
Block a user