Further improvements to handling binary strings

This commit is contained in:
Jay Berkenbilt 2021-12-19 13:52:19 -05:00
parent 92613a1eec
commit ea73bf72e0
6 changed files with 83 additions and 13 deletions

View File

@ -1,3 +1,11 @@
2021-12-19 Jay Berkenbilt <ejb@ql.org>
* C API: clarify documentation around string lengths. Add two new
methods: qpdf_oh_get_binary_string_value and
qpdf_oh_new_binary_string to make the need to handle the length
and data separate in more explicit in cases in which the string
data may contain embedded null characters.
2021-12-17 Jay Berkenbilt <ejb@ql.org>
* C API: simplify error handling for uncaught errors (never in a

View File

@ -61,10 +61,12 @@
* subsequent function calls, sometimes even to different
* functions. If you want a string to last past the next qpdf call
* or after a call to qpdf_cleanup, you should make a copy of it.
* It is possible for the internal string data to contain null
* characters. To handle that case, you call
* qpdf_get_last_string_length() to get the length of whatever
* string was just returned.
*
* Since it is possible for a PDF string to contain null
* characters, a function that returns data originating from a PDF
* string may also contain null characters. To handle that case,
* you call qpdf_get_last_string_length() to get the length of
* whatever string was just returned. See STRING FUNCTIONS below.
*
* Most functions defined here have obvious counterparts that are
* methods to either QPDF or QPDFWriter. Please see comments in
@ -189,14 +191,6 @@ extern "C" {
QPDF_DLL
void qpdf_cleanup(qpdf_data* qpdf);
/* Return the length of the last string returned. This enables you
* to retrieve the entire string for cases in which a char*
* returned by one of the functions below points to a string with
* embedded null characters.
*/
QPDF_DLL
size_t qpdf_get_last_string_length(qpdf_data qpdf);
/* ERROR REPORTING */
/* Returns 1 if there is an error condition. The error condition
@ -716,10 +710,29 @@ extern "C" {
QPDF_DLL
char const* qpdf_oh_get_name(qpdf_data qpdf, qpdf_oh oh);
/* Return the length of the last string returned. This enables you
* to retrieve the entire string for cases in which a char*
* returned by one of the functions below points to a string with
* embedded null characters. The function
* qpdf_oh_get_binary_string_value takes a length pointer, which
* can be useful if you are retrieving the value of a string that
* is expected to contain binary data, such as a checksum or
* document ID. It is always valid to call
* qpdf_get_last_string_length, but it is usually not necessary as
* C strings returned by the library are only expected to be able
* to contain null characters if their values originate from PDF
* strings in the input.
*/
QPDF_DLL
size_t qpdf_get_last_string_length(qpdf_data qpdf);
QPDF_DLL
char const* qpdf_oh_get_string_value(qpdf_data qpdf, qpdf_oh oh);
QPDF_DLL
char const* qpdf_oh_get_utf8_value(qpdf_data qpdf, qpdf_oh oh);
QPDF_DLL
char const* qpdf_oh_get_binary_string_value(
qpdf_data qpdf, qpdf_oh oh, size_t* length);
QPDF_DLL
int qpdf_oh_get_array_n_items(qpdf_data qpdf, qpdf_oh oh);
@ -772,6 +785,12 @@ extern "C" {
qpdf_oh qpdf_oh_new_string(qpdf_data qpdf, char const* str);
QPDF_DLL
qpdf_oh qpdf_oh_new_unicode_string(qpdf_data qpdf, char const* utf8_str);
/* Use qpdf_oh_new_binary_string for creating a string that may
* contain atrbitary binary data including embedded null characters.
*/
QPDF_DLL
qpdf_oh qpdf_oh_new_binary_string(
qpdf_data qpdf, char const* str, size_t length);
QPDF_DLL
qpdf_oh qpdf_oh_new_array(qpdf_data qpdf);
QPDF_DLL

View File

@ -1292,6 +1292,20 @@ char const* qpdf_oh_get_utf8_value(qpdf_data qpdf, qpdf_oh oh)
});
}
char const* qpdf_oh_get_binary_string_value(
qpdf_data qpdf, qpdf_oh oh, size_t* length)
{
return do_with_oh<char const*>(
qpdf, oh,
return_T<char const*>(""),
[qpdf, length](QPDFObjectHandle& o) {
QTC::TC("qpdf", "qpdf-c called qpdf_oh_get_binary_string_value");
qpdf->tmp_string = o.getStringValue();
*length = qpdf->tmp_string.length();
return qpdf->tmp_string.c_str();
});
}
int qpdf_oh_get_array_n_items(qpdf_data qpdf, qpdf_oh oh)
{
return do_with_oh<int>(
@ -1425,6 +1439,14 @@ qpdf_oh qpdf_oh_new_unicode_string(qpdf_data qpdf, char const* utf8_str)
return new_object(qpdf, QPDFObjectHandle::newUnicodeString(utf8_str));
}
qpdf_oh qpdf_oh_new_binary_string(
qpdf_data qpdf, char const* str, size_t length)
{
QTC::TC("qpdf", "qpdf-c called qpdf_oh_new_binary_string");
return new_object(
qpdf, QPDFObjectHandle::newString(std::string(str, length)));
}
qpdf_oh qpdf_oh_new_array(qpdf_data qpdf)
{
QTC::TC("qpdf", "qpdf-c called qpdf_oh_new_array");

View File

@ -46,6 +46,12 @@ For a detailed list of changes, please see the file
- C API Enhancements
- Many thanks to M. Holger whose contributions have heavily
influenced these C API enhancements. His several suggestions,
pull requests, questions, and critical reading of documentation
and comments have resulted in significant usability improvements
to the C API.
- Overhaul error handling for the object handle functions C API.
Some rare error conditions that would previously have caused a
crash are now trapped and reported, and the functions that
@ -80,6 +86,10 @@ For a detailed list of changes, please see the file
- Add ``qpdf_oh_get_type_code`` and ``qpdf_oh_get_type_name``.
- Add ``qpdf_oh_get_binary_string_value`` and
``qpdf_oh_new_binary_string`` for making it easier to deal with
strings that contain embedded null characters.
10.4.0: November 16, 2021
- Handling of Weak Cryptography Algorithms

View File

@ -781,8 +781,17 @@ static void test27(char const* infile,
assert(strcmp(qpdf_oh_get_string_value(qpdf, p_string_with_null),
"one") == 0);
assert(qpdf_get_last_string_length(qpdf) == 7);
/* memcmp adds a character to verify the trailing null */
assert(memcmp(qpdf_oh_get_string_value(qpdf, p_string_with_null),
"one\000two", 7) == 0);
"one\000two", 8) == 0);
size_t length = 0;
p_string_with_null = qpdf_oh_new_binary_string(qpdf, "potato\000salad", 12);
/* memcmp adds a character to verify the trailing null */
assert(memcmp(qpdf_oh_get_binary_string_value(
qpdf, p_string_with_null, &length),
"potato\000salad", 13) == 0);
assert(qpdf_get_last_string_length(qpdf) == 12);
assert(length == 12);
}
static void test28(char const* infile,

View File

@ -625,3 +625,5 @@ qpdf-c stream data buf set 1
qpdf-c called qpdf_oh_get_page_content_data 0
qpdf-c called qpdf_oh_replace_stream_data 0
qpdf-c silence oh errors 0
qpdf-c called qpdf_oh_get_binary_string_value 0
qpdf-c called qpdf_oh_new_binary_string 0