diff --git a/ChangeLog b/ChangeLog index 5f02ba43..23c213a6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2021-12-19 Jay Berkenbilt + + * C API: clarify documentation around string lengths. Add two new + methods: qpdf_oh_get_binary_string_value and + qpdf_oh_new_binary_string to make the need to handle the length + and data separate in more explicit in cases in which the string + data may contain embedded null characters. + 2021-12-17 Jay Berkenbilt * C API: simplify error handling for uncaught errors (never in a diff --git a/include/qpdf/qpdf-c.h b/include/qpdf/qpdf-c.h index e261c7c1..7369e616 100644 --- a/include/qpdf/qpdf-c.h +++ b/include/qpdf/qpdf-c.h @@ -61,10 +61,12 @@ * subsequent function calls, sometimes even to different * functions. If you want a string to last past the next qpdf call * or after a call to qpdf_cleanup, you should make a copy of it. - * It is possible for the internal string data to contain null - * characters. To handle that case, you call - * qpdf_get_last_string_length() to get the length of whatever - * string was just returned. + * + * Since it is possible for a PDF string to contain null + * characters, a function that returns data originating from a PDF + * string may also contain null characters. To handle that case, + * you call qpdf_get_last_string_length() to get the length of + * whatever string was just returned. See STRING FUNCTIONS below. * * Most functions defined here have obvious counterparts that are * methods to either QPDF or QPDFWriter. Please see comments in @@ -189,14 +191,6 @@ extern "C" { QPDF_DLL void qpdf_cleanup(qpdf_data* qpdf); - /* Return the length of the last string returned. This enables you - * to retrieve the entire string for cases in which a char* - * returned by one of the functions below points to a string with - * embedded null characters. - */ - QPDF_DLL - size_t qpdf_get_last_string_length(qpdf_data qpdf); - /* ERROR REPORTING */ /* Returns 1 if there is an error condition. The error condition @@ -716,10 +710,29 @@ extern "C" { QPDF_DLL char const* qpdf_oh_get_name(qpdf_data qpdf, qpdf_oh oh); + /* Return the length of the last string returned. This enables you + * to retrieve the entire string for cases in which a char* + * returned by one of the functions below points to a string with + * embedded null characters. The function + * qpdf_oh_get_binary_string_value takes a length pointer, which + * can be useful if you are retrieving the value of a string that + * is expected to contain binary data, such as a checksum or + * document ID. It is always valid to call + * qpdf_get_last_string_length, but it is usually not necessary as + * C strings returned by the library are only expected to be able + * to contain null characters if their values originate from PDF + * strings in the input. + */ + QPDF_DLL + size_t qpdf_get_last_string_length(qpdf_data qpdf); + QPDF_DLL char const* qpdf_oh_get_string_value(qpdf_data qpdf, qpdf_oh oh); QPDF_DLL char const* qpdf_oh_get_utf8_value(qpdf_data qpdf, qpdf_oh oh); + QPDF_DLL + char const* qpdf_oh_get_binary_string_value( + qpdf_data qpdf, qpdf_oh oh, size_t* length); QPDF_DLL int qpdf_oh_get_array_n_items(qpdf_data qpdf, qpdf_oh oh); @@ -772,6 +785,12 @@ extern "C" { qpdf_oh qpdf_oh_new_string(qpdf_data qpdf, char const* str); QPDF_DLL qpdf_oh qpdf_oh_new_unicode_string(qpdf_data qpdf, char const* utf8_str); + /* Use qpdf_oh_new_binary_string for creating a string that may + * contain atrbitary binary data including embedded null characters. + */ + QPDF_DLL + qpdf_oh qpdf_oh_new_binary_string( + qpdf_data qpdf, char const* str, size_t length); QPDF_DLL qpdf_oh qpdf_oh_new_array(qpdf_data qpdf); QPDF_DLL diff --git a/libqpdf/qpdf-c.cc b/libqpdf/qpdf-c.cc index 9593a011..5f702272 100644 --- a/libqpdf/qpdf-c.cc +++ b/libqpdf/qpdf-c.cc @@ -1292,6 +1292,20 @@ char const* qpdf_oh_get_utf8_value(qpdf_data qpdf, qpdf_oh oh) }); } +char const* qpdf_oh_get_binary_string_value( + qpdf_data qpdf, qpdf_oh oh, size_t* length) +{ + return do_with_oh( + qpdf, oh, + return_T(""), + [qpdf, length](QPDFObjectHandle& o) { + QTC::TC("qpdf", "qpdf-c called qpdf_oh_get_binary_string_value"); + qpdf->tmp_string = o.getStringValue(); + *length = qpdf->tmp_string.length(); + return qpdf->tmp_string.c_str(); + }); +} + int qpdf_oh_get_array_n_items(qpdf_data qpdf, qpdf_oh oh) { return do_with_oh( @@ -1425,6 +1439,14 @@ qpdf_oh qpdf_oh_new_unicode_string(qpdf_data qpdf, char const* utf8_str) return new_object(qpdf, QPDFObjectHandle::newUnicodeString(utf8_str)); } +qpdf_oh qpdf_oh_new_binary_string( + qpdf_data qpdf, char const* str, size_t length) +{ + QTC::TC("qpdf", "qpdf-c called qpdf_oh_new_binary_string"); + return new_object( + qpdf, QPDFObjectHandle::newString(std::string(str, length))); +} + qpdf_oh qpdf_oh_new_array(qpdf_data qpdf) { QTC::TC("qpdf", "qpdf-c called qpdf_oh_new_array"); diff --git a/manual/release-notes.rst b/manual/release-notes.rst index e5d73c90..08187ae5 100644 --- a/manual/release-notes.rst +++ b/manual/release-notes.rst @@ -46,6 +46,12 @@ For a detailed list of changes, please see the file - C API Enhancements + - Many thanks to M. Holger whose contributions have heavily + influenced these C API enhancements. His several suggestions, + pull requests, questions, and critical reading of documentation + and comments have resulted in significant usability improvements + to the C API. + - Overhaul error handling for the object handle functions C API. Some rare error conditions that would previously have caused a crash are now trapped and reported, and the functions that @@ -80,6 +86,10 @@ For a detailed list of changes, please see the file - Add ``qpdf_oh_get_type_code`` and ``qpdf_oh_get_type_name``. + - Add ``qpdf_oh_get_binary_string_value`` and + ``qpdf_oh_new_binary_string`` for making it easier to deal with + strings that contain embedded null characters. + 10.4.0: November 16, 2021 - Handling of Weak Cryptography Algorithms diff --git a/qpdf/qpdf-ctest.c b/qpdf/qpdf-ctest.c index dbad4e99..953b24d0 100644 --- a/qpdf/qpdf-ctest.c +++ b/qpdf/qpdf-ctest.c @@ -781,8 +781,17 @@ static void test27(char const* infile, assert(strcmp(qpdf_oh_get_string_value(qpdf, p_string_with_null), "one") == 0); assert(qpdf_get_last_string_length(qpdf) == 7); + /* memcmp adds a character to verify the trailing null */ assert(memcmp(qpdf_oh_get_string_value(qpdf, p_string_with_null), - "one\000two", 7) == 0); + "one\000two", 8) == 0); + size_t length = 0; + p_string_with_null = qpdf_oh_new_binary_string(qpdf, "potato\000salad", 12); + /* memcmp adds a character to verify the trailing null */ + assert(memcmp(qpdf_oh_get_binary_string_value( + qpdf, p_string_with_null, &length), + "potato\000salad", 13) == 0); + assert(qpdf_get_last_string_length(qpdf) == 12); + assert(length == 12); } static void test28(char const* infile, diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 034f8d8e..35417d4c 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -625,3 +625,5 @@ qpdf-c stream data buf set 1 qpdf-c called qpdf_oh_get_page_content_data 0 qpdf-c called qpdf_oh_replace_stream_data 0 qpdf-c silence oh errors 0 +qpdf-c called qpdf_oh_get_binary_string_value 0 +qpdf-c called qpdf_oh_new_binary_string 0