Add new method QPDF_Name::analyzeJSONEncoding

Provide a custom method to check whether a name is valid utf8. Integrate
checking for characters that need to be escaped in JSON.
This commit is contained in:
m-holger 2024-02-10 12:03:28 +00:00
parent e2737ab646
commit 431987475b
3 changed files with 78 additions and 31 deletions

View File

@ -77,15 +77,11 @@ QPDF_Dictionary::getJSON(int json_version)
if (json_version == 1) {
j.addDictionaryMember(
QPDF_Name::normalizeName(iter.first), iter.second.getJSON(json_version));
} else if (auto res = QPDF_Name::analyzeJSONEncoding(iter.first); res.first) {
j.addDictionaryMember(iter.first, iter.second.getJSON(json_version));
} else {
bool has_8bit_chars;
bool is_valid_utf8;
bool is_utf16;
QUtil::analyze_encoding(iter.first, has_8bit_chars, is_valid_utf8, is_utf16);
std::string key = !has_8bit_chars || is_valid_utf8
? iter.first
: "n:" + QPDF_Name::normalizeName(iter.first);
j.addDictionaryMember(key, iter.second.getJSON(json_version));
j.addDictionaryMember(
"n:" + QPDF_Name::normalizeName(iter.first), iter.second.getJSON(json_version));
}
}
}
@ -100,18 +96,17 @@ QPDF_Dictionary::writeJSON(int json_version, JSON::Writer& p)
if (!iter.second.isNull()) {
p.writeNext();
if (json_version == 1) {
p << "\"" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first)) << "\": ";
} else {
bool has_8bit_chars;
bool is_valid_utf8;
bool is_utf16;
QUtil::analyze_encoding(iter.first, has_8bit_chars, is_valid_utf8, is_utf16);
if (!has_8bit_chars || is_valid_utf8) {
p << "\"" << JSON::Writer::encode_string(iter.first) << "\": ";
p << "\"" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first))
<< "\": ";
} else if (auto res = QPDF_Name::analyzeJSONEncoding(iter.first); res.first) {
if (res.second) {
p << "\"" << iter.first << "\": ";
} else {
p << "\"n:" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first))
<< "\": ";
p << "\"" << JSON::Writer::encode_string(iter.first) << "\": ";
}
} else {
p << "\"n:" << JSON::Writer::encode_string(QPDF_Name::normalizeName(iter.first))
<< "\": ";
}
iter.second.writeJSON(json_version, p);
}

View File

@ -3,6 +3,8 @@
#include <qpdf/JSON_writer.hh>
#include <qpdf/QUtil.hh>
#include <string_view>
QPDF_Name::QPDF_Name(std::string const& name) :
QPDFValue(::ot_name, "name"),
name(name)
@ -52,20 +54,65 @@ QPDF_Name::unparse()
return normalizeName(this->name);
}
std::pair<bool, bool>
QPDF_Name::analyzeJSONEncoding(const std::string& name)
{
std::basic_string_view<unsigned char> view{
reinterpret_cast<const unsigned char*>(name.data()), name.size()};
int tail = 0; // Number of continuation characters expected.
bool tail2 = false; // Potential overlong 3 octet utf-8.
bool tail3 = false; // potential overlong 4 octet
bool needs_escaping = false;
for (auto const& c: view) {
if (tail) {
if ((c & 0xc0) != 0x80) {
return {false, false};
}
if (tail2) {
if ((c & 0xe0) == 0x80) {
return {false, false};
}
tail2 = false;
} else if (tail3) {
if ((c & 0xf0) == 0x80) {
return {false, false};
}
tail3 = false;
}
tail--;
} else if (c < 0x80) {
if (!needs_escaping) {
needs_escaping = !((c > 34 && c != '\\') || c == ' ' || c == 33);
}
} else if ((c & 0xe0) == 0xc0) {
if ((c & 0xfe) == 0xc0) {
return {false, false};
}
tail = 1;
} else if ((c & 0xf0) == 0xe0) {
tail2 = (c == 0xe0);
tail = 2;
} else if ((c & 0xf8) == 0xf0) {
tail3 = (c == 0xf0);
tail = 3;
} else {
return {false, false};
}
}
return {tail == 0, !needs_escaping};
}
JSON
QPDF_Name::getJSON(int json_version)
{
if (json_version == 1) {
return JSON::makeString(normalizeName(this->name));
} else {
bool has_8bit_chars;
bool is_valid_utf8;
bool is_utf16;
QUtil::analyze_encoding(this->name, has_8bit_chars, is_valid_utf8, is_utf16);
if (!has_8bit_chars || is_valid_utf8) {
return JSON::makeString(this->name);
if (auto res = analyzeJSONEncoding(name); res.first) {
return JSON::makeString(name);
} else {
return JSON::makeString("n:" + normalizeName(this->name));
return JSON::makeString("n:" + normalizeName(name));
}
}
}
@ -76,12 +123,12 @@ QPDF_Name::writeJSON(int json_version, JSON::Writer& p)
if (json_version == 1) {
p << "\"" << JSON::Writer::encode_string(normalizeName(name)) << "\"";
} else {
bool has_8bit_chars;
bool is_valid_utf8;
bool is_utf16;
QUtil::analyze_encoding(this->name, has_8bit_chars, is_valid_utf8, is_utf16);
if (!has_8bit_chars || is_valid_utf8) {
p << "\"" << JSON::Writer::encode_string(name) << "\"";
if (auto res = analyzeJSONEncoding(name); res.first) {
if (res.second) {
p << "\"" << name << "\"";
} else {
p << "\"" << JSON::Writer::encode_string(name) << "\"";
}
} else {
p << "\"n:" << JSON::Writer::encode_string(normalizeName(name)) << "\"";
}

View File

@ -15,6 +15,11 @@ class QPDF_Name: public QPDFValue
// Put # into strings with characters unsuitable for name token
static std::string normalizeName(std::string const& name);
// Check whether name is valid utf-8 and whether it contains characters that require escaping.
// Return {false, false} if the name is not valid utf-8, otherwise return {true, true} if no
// characters require or {true, false} if escaping is required.
static std::pair<bool, bool> analyzeJSONEncoding(std::string const& name);
std::string
getStringValue() const override
{