2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-05-29 00:10:54 +00:00
qpdf/libqpdf/QPDF_String.cc
Jay Berkenbilt d71f05ca07 Fix sign and conversion warnings (major)
This makes all integer type conversions that have potential data loss
explicit with calls that do range checks and raise an exception. After
this commit, qpdf builds with no warnings when -Wsign-conversion
-Wconversion is used with gcc or clang or when -W3 -Wd4800 is used
with MSVC. This significantly reduces the likelihood of potential
crashes from bogus integer values.

There are some parts of the code that take int when they should take
size_t or an offset. Such places would make qpdf not support files
with more than 2^31 of something that usually wouldn't be so large. In
the event that such a file shows up and is valid, at least qpdf would
raise an error in the right spot so the issue could be legitimately
addressed rather than failing in some weird way because of a silent
overflow condition.
2019-06-21 13:17:21 -04:00

186 lines
3.4 KiB
C++

#include <qpdf/QPDF_String.hh>
#include <qpdf/QUtil.hh>
#include <qpdf/QTC.hh>
// DO NOT USE ctype -- it is locale dependent for some things, and
// it's not worth the risk of including it in case it may accidentally
// be used.
#include <string.h>
// See above about ctype.
static bool is_ascii_printable(char ch)
{
return ((ch >= 32) && (ch <= 126));
}
static bool is_iso_latin1_printable(char ch)
{
return (((ch >= 32) && (ch <= 126)) ||
(static_cast<unsigned char>(ch) >= 160));
}
QPDF_String::QPDF_String(std::string const& val) :
val(val)
{
}
QPDF_String::~QPDF_String()
{
}
QPDF_String*
QPDF_String::new_utf16(std::string const& utf8_val)
{
return new QPDF_String(QUtil::utf8_to_utf16(utf8_val));
}
std::string
QPDF_String::unparse()
{
return unparse(false);
}
JSON
QPDF_String::getJSON()
{
return JSON::makeString(getUTF8Val());
}
QPDFObject::object_type_e
QPDF_String::getTypeCode() const
{
return QPDFObject::ot_string;
}
char const*
QPDF_String::getTypeName() const
{
return "string";
}
std::string
QPDF_String::unparse(bool force_binary)
{
bool use_hexstring = force_binary;
if (! use_hexstring)
{
unsigned int nonprintable = 0;
int consecutive_printable = 0;
for (unsigned int i = 0; i < this->val.length(); ++i)
{
char ch = this->val.at(i);
// Note: do not use locale to determine printability. The
// PDF specification accepts arbitrary binary data. Some
// locales imply multibyte characters. We'll consider
// something printable if it is printable in 7-bit ASCII.
// We'll code this manually rather than being rude and
// setting locale.
if ((ch == 0) || (! (is_ascii_printable(ch) ||
strchr("\n\r\t\b\f", ch))))
{
++nonprintable;
consecutive_printable = 0;
}
else
{
if (++consecutive_printable > 5)
{
// If there are more than 5 consecutive printable
// characters, I want to see them as such.
nonprintable = 0;
break;
}
}
}
// Use hex notation if more than 20% of the characters are not
// printable in plain ASCII.
if (5 * nonprintable > val.length())
{
use_hexstring = true;
}
}
std::string result;
if (use_hexstring)
{
result += "<" + QUtil::hex_encode(this->val) + ">";
}
else
{
result += "(";
for (unsigned int i = 0; i < this->val.length(); ++i)
{
char ch = this->val.at(i);
switch (ch)
{
case '\n':
result += "\\n";
break;
case '\r':
result += "\\r";
break;
case '\t':
result += "\\t";
break;
case '\b':
result += "\\b";
break;
case '\f':
result += "\\f";
break;
case '(':
result += "\\(";
break;
case ')':
result += "\\)";
break;
case '\\':
result += "\\\\";
break;
default:
if (is_iso_latin1_printable(ch))
{
result += this->val.at(i);
}
else
{
result += "\\" + QUtil::int_to_string_base(
static_cast<int>(static_cast<unsigned char>(ch)),
8, 3);
}
break;
}
}
result += ")";
}
return result;
}
std::string
QPDF_String::getVal() const
{
return this->val;
}
std::string
QPDF_String::getUTF8Val() const
{
if (QUtil::is_utf16(this->val))
{
return QUtil::utf16_to_utf8(this->val);
}
else
{
return QUtil::pdf_doc_to_utf8(this->val);
}
}