2008-04-29 12:55:25 +00:00
|
|
|
#include <qpdf/QPDF_String.hh>
|
|
|
|
|
|
|
|
#include <qpdf/QUtil.hh>
|
2008-11-23 18:49:13 +00:00
|
|
|
#include <qpdf/QTC.hh>
|
|
|
|
|
2008-04-29 12:55:25 +00:00
|
|
|
// DO NOT USE ctype -- it is locale dependent for some things, and
|
|
|
|
// it's not worth the risk of including it in case it may accidentally
|
|
|
|
// be used.
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
// See above about ctype.
|
2019-06-20 23:35:23 -04:00
|
|
|
static bool is_ascii_printable(char ch)
|
2009-10-17 17:31:52 +00:00
|
|
|
{
|
|
|
|
return ((ch >= 32) && (ch <= 126));
|
|
|
|
}
|
2019-06-20 23:35:23 -04:00
|
|
|
static bool is_iso_latin1_printable(char ch)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
2019-06-20 23:35:23 -04:00
|
|
|
return (((ch >= 32) && (ch <= 126)) ||
|
|
|
|
(static_cast<unsigned char>(ch) >= 160));
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
QPDF_String::QPDF_String(std::string const& val) :
|
|
|
|
val(val)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
QPDF_String::~QPDF_String()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2019-01-05 12:54:41 -05:00
|
|
|
QPDF_String*
|
|
|
|
QPDF_String::new_utf16(std::string const& utf8_val)
|
|
|
|
{
|
2019-01-05 13:00:18 -05:00
|
|
|
return new QPDF_String(QUtil::utf8_to_utf16(utf8_val));
|
2018-06-21 14:03:45 -04:00
|
|
|
}
|
|
|
|
|
2008-04-29 12:55:25 +00:00
|
|
|
std::string
|
|
|
|
QPDF_String::unparse()
|
|
|
|
{
|
|
|
|
return unparse(false);
|
|
|
|
}
|
|
|
|
|
2018-12-17 17:40:29 -05:00
|
|
|
JSON
|
|
|
|
QPDF_String::getJSON()
|
|
|
|
{
|
|
|
|
return JSON::makeString(getUTF8Val());
|
|
|
|
}
|
|
|
|
|
2013-01-22 09:57:07 -05:00
|
|
|
QPDFObject::object_type_e
|
|
|
|
QPDF_String::getTypeCode() const
|
|
|
|
{
|
|
|
|
return QPDFObject::ot_string;
|
|
|
|
}
|
|
|
|
|
|
|
|
char const*
|
|
|
|
QPDF_String::getTypeName() const
|
|
|
|
{
|
|
|
|
return "string";
|
|
|
|
}
|
|
|
|
|
2008-04-29 12:55:25 +00:00
|
|
|
std::string
|
|
|
|
QPDF_String::unparse(bool force_binary)
|
|
|
|
{
|
|
|
|
bool use_hexstring = force_binary;
|
|
|
|
if (! use_hexstring)
|
|
|
|
{
|
|
|
|
unsigned int nonprintable = 0;
|
|
|
|
int consecutive_printable = 0;
|
|
|
|
for (unsigned int i = 0; i < this->val.length(); ++i)
|
|
|
|
{
|
2013-10-05 19:42:39 -04:00
|
|
|
char ch = this->val.at(i);
|
2009-10-17 17:31:52 +00:00
|
|
|
// Note: do not use locale to determine printability. The
|
|
|
|
// PDF specification accepts arbitrary binary data. Some
|
|
|
|
// locales imply multibyte characters. We'll consider
|
|
|
|
// something printable if it is printable in 7-bit ASCII.
|
|
|
|
// We'll code this manually rather than being rude and
|
|
|
|
// setting locale.
|
|
|
|
if ((ch == 0) || (! (is_ascii_printable(ch) ||
|
2008-04-29 12:55:25 +00:00
|
|
|
strchr("\n\r\t\b\f", ch))))
|
|
|
|
{
|
|
|
|
++nonprintable;
|
|
|
|
consecutive_printable = 0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (++consecutive_printable > 5)
|
|
|
|
{
|
|
|
|
// If there are more than 5 consecutive printable
|
|
|
|
// characters, I want to see them as such.
|
|
|
|
nonprintable = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Use hex notation if more than 20% of the characters are not
|
2009-10-17 17:31:52 +00:00
|
|
|
// printable in plain ASCII.
|
2008-04-29 12:55:25 +00:00
|
|
|
if (5 * nonprintable > val.length())
|
|
|
|
{
|
|
|
|
use_hexstring = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
std::string result;
|
|
|
|
if (use_hexstring)
|
|
|
|
{
|
2013-01-25 08:59:55 -05:00
|
|
|
result += "<" + QUtil::hex_encode(this->val) + ">";
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
result += "(";
|
|
|
|
for (unsigned int i = 0; i < this->val.length(); ++i)
|
|
|
|
{
|
2013-10-05 19:42:39 -04:00
|
|
|
char ch = this->val.at(i);
|
2008-04-29 12:55:25 +00:00
|
|
|
switch (ch)
|
|
|
|
{
|
|
|
|
case '\n':
|
|
|
|
result += "\\n";
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\r':
|
|
|
|
result += "\\r";
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\t':
|
|
|
|
result += "\\t";
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\b':
|
|
|
|
result += "\\b";
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\f':
|
|
|
|
result += "\\f";
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '(':
|
|
|
|
result += "\\(";
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ')':
|
|
|
|
result += "\\)";
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\\':
|
|
|
|
result += "\\\\";
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
if (is_iso_latin1_printable(ch))
|
|
|
|
{
|
2013-10-05 19:42:39 -04:00
|
|
|
result += this->val.at(i);
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2013-02-28 16:20:45 -05:00
|
|
|
result += "\\" + QUtil::int_to_string_base(
|
|
|
|
static_cast<int>(static_cast<unsigned char>(ch)),
|
|
|
|
8, 3);
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
result += ")";
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string
|
|
|
|
QPDF_String::getVal() const
|
|
|
|
{
|
|
|
|
return this->val;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string
|
|
|
|
QPDF_String::getUTF8Val() const
|
|
|
|
{
|
2019-01-13 08:00:14 -05:00
|
|
|
if (QUtil::is_utf16(this->val))
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
2019-01-13 08:00:14 -05:00
|
|
|
return QUtil::utf16_to_utf8(this->val);
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2019-01-13 08:00:14 -05:00
|
|
|
return QUtil::pdf_doc_to_utf8(this->val);
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
}
|