diff --git a/ChangeLog b/ChangeLog index 64518c5e..a1211834 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2020-01-14 Jay Berkenbilt + + * Add QUtil::call_main_from_wmain, a helper function that can be + called in the body of wmain to convert UTF-16 arguments to UTF-8 + arguments and then call another main function. + 2020-01-13 Jay Berkenbilt * QUtil::read_lines_from_file: add new versions that use FILE*, diff --git a/include/qpdf/QUtil.hh b/include/qpdf/QUtil.hh index da136342..ef701cbb 100644 --- a/include/qpdf/QUtil.hh +++ b/include/qpdf/QUtil.hh @@ -362,6 +362,13 @@ namespace QUtil // command-line tool. May throw std::runtime_error. QPDF_DLL std::vector parse_numrange(char const* range, int max); + + // Take an argv array consisting of wchar_t, as when wmain is + // invoked, convert all UTF-16 encoded strings to UTF-8, and call + // another main. + QPDF_DLL + int call_main_from_wmain(int argc, wchar_t* argv[], + std::function realmain); }; #endif // QUTIL_HH diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc index 63adbed2..1d29bccf 100644 --- a/libqpdf/QUtil.cc +++ b/libqpdf/QUtil.cc @@ -23,6 +23,7 @@ #include #include #include +#include #ifdef _WIN32 #include #include @@ -2361,3 +2362,38 @@ QUtil::possible_repaired_encodings(std::string supplied) } return t; } + +int +QUtil::call_main_from_wmain(int argc, wchar_t* argv[], std::function realmain) +{ + // argv contains UTF-16-encoded strings with a 16-bit wchar_t. + // Convert this to UTF-8-encoded strings for compatibility with + // other systems. That way the rest of qpdf.cc can just act like + // arguments are UTF-8. + + std::vector> utf8_argv; + for (int i = 0; i < argc; ++i) + { + std::string utf16; + for (size_t j = 0; j < wcslen(argv[i]); ++j) + { + unsigned short codepoint = static_cast(argv[i][j]); + utf16.append(1, static_cast( + QIntC::to_uchar(codepoint >> 8))); + utf16.append(1, static_cast( + QIntC::to_uchar(codepoint & 0xff))); + } + std::string utf8 = QUtil::utf16_to_utf8(utf16); + utf8_argv.push_back(std::shared_ptr(QUtil::copy_string(utf8.c_str()), std::default_delete())); + } + auto utf8_argv_sp = + std::shared_ptr(new char*[1+utf8_argv.size()], std::default_delete()); + char** new_argv = utf8_argv_sp.get(); + for (size_t i = 0; i < utf8_argv.size(); ++i) + { + new_argv[i] = utf8_argv.at(i).get(); + } + argc = QIntC::to_int(utf8_argv.size()); + new_argv[argc] = 0; + return realmain(argc, new_argv); +} diff --git a/libtests/qtest/qutil/qutil.out b/libtests/qtest/qutil/qutil.out index 5fe841ac..9da8cecf 100644 --- a/libtests/qtest/qutil/qutil.out +++ b/libtests/qtest/qutil/qutil.out @@ -105,3 +105,7 @@ rename file create file rename over existing delete file +---- wmain +ascii +10 ÷ 2 = 5 +qwww÷π diff --git a/libtests/qutil.cc b/libtests/qutil.cc index 935cdfc2..b8429a44 100644 --- a/libtests/qutil.cc +++ b/libtests/qutil.cc @@ -543,6 +543,17 @@ void rename_delete_test() assert_no_file("old\xcf\x80.~tmp"); } +void wmain_test() +{ + auto realmain = [](int argc, char* argv[]) { + for (int i = 0; i < argc; ++i) { std::cout << argv[i] << std::endl; } return 0; }; + wchar_t* argv[3]; + argv[0] = const_cast(L"ascii"); + argv[1] = const_cast(L"10 \xf7 2 = 5"); + argv[2] = const_cast(L"qwww\xf7\x03c0"); + QUtil::call_main_from_wmain(3, argv, realmain); +} + int main(int argc, char* argv[]) { try @@ -573,6 +584,8 @@ int main(int argc, char* argv[]) hex_encode_decode_test(); std::cout << "---- rename/delete" << std::endl; rename_delete_test(); + std::cout << "---- wmain" << std::endl; + wmain_test(); } catch (std::exception& e) { diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc index ea17e93c..2139d8b2 100644 --- a/qpdf/qpdf.cc +++ b/qpdf/qpdf.cc @@ -5304,36 +5304,7 @@ int realmain(int argc, char* argv[]) extern "C" int wmain(int argc, wchar_t* argv[]) { - // If wmain is supported, argv contains UTF-16-encoded strings - // with a 16-bit wchar_t. Convert this to UTF-8-encoded strings - // for compatibility with other systems. That way the rest of - // qpdf.cc can just act like arguments are UTF-8. - std::vector > utf8_argv; - for (int i = 0; i < argc; ++i) - { - std::string utf16; - for (size_t j = 0; j < wcslen(argv[i]); ++j) - { - unsigned short codepoint = static_cast(argv[i][j]); - utf16.append(1, static_cast( - QIntC::to_uchar(codepoint >> 8))); - utf16.append(1, static_cast( - QIntC::to_uchar(codepoint & 0xff))); - } - std::string utf8 = QUtil::utf16_to_utf8(utf16); - utf8_argv.push_back( - PointerHolder(true, QUtil::copy_string(utf8.c_str()))); - } - PointerHolder utf8_argv_ph = - PointerHolder(true, new char*[1+utf8_argv.size()]); - char** new_argv = utf8_argv_ph.getPointer(); - for (size_t i = 0; i < utf8_argv.size(); ++i) - { - new_argv[i] = utf8_argv.at(i).getPointer(); - } - argc = QIntC::to_int(utf8_argv.size()); - new_argv[argc] = 0; - return realmain(argc, new_argv); + return QUtil::call_main_from_wmain(argc, argv, realmain); } #else