From 011695dfdf52e7a83f0eeceb85d0d2c06e7df7da Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 20 Apr 2019 18:14:32 -0400 Subject: [PATCH] Support Unicode in filenames (fixes #298) --- ChangeLog | 3 ++ libqpdf/QUtil.cc | 33 +++++++++++++++++++- qpdf/qtest/qpdf.test | 14 +++++++-- qpdf/qtest/qpdf/check-unicode-filename-1.out | 6 ++++ qpdf/qtest/qpdf/check-unicode-filename-2.out | 6 ++++ 5 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 qpdf/qtest/qpdf/check-unicode-filename-1.out create mode 100644 qpdf/qtest/qpdf/check-unicode-filename-2.out diff --git a/ChangeLog b/ChangeLog index e62317b8..8514e379 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2019-04-20 Jay Berkenbilt + * Handle Unicode characters in filenames. The changes to support + Unicode on the CLI in Windows broke Unicode filenames. Fixes #298. + * Slightly tighten logic that determines whether an object is a page. The previous logic was sometimes failing to preserve annotations because they were passing the overly loose test for diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc index 7ea3f5e7..816c2dfa 100644 --- a/libqpdf/QUtil.cc +++ b/libqpdf/QUtil.cc @@ -354,11 +354,42 @@ FILE* QUtil::safe_fopen(char const* filename, char const* mode) { FILE* f = 0; +#ifdef _WIN32 + // Convert the utf-8 encoded filename argument to wchar_t*. First, + // convert to utf16, then to wchar_t*. Note that u16 will start + // with the UTF16 marker, which we skip. + std::string u16 = utf8_to_utf16(filename); + size_t len = u16.length(); + size_t wlen = (len / 2) - 1; + PointerHolder wfilenamep(true, new wchar_t[wlen + 1]); + wchar_t* wfilename = wfilenamep.getPointer(); + wfilename[wlen] = 0; + for (unsigned int i = 2; i < len; i += 2) + { + wfilename[(i/2) - 1] = + static_cast( + (static_cast(u16.at(i)) << 8) + + static_cast(u16.at(i+1))); + } + PointerHolder wmodep(true, new wchar_t(strlen(mode) + 1)); + wchar_t* wmode = wmodep.getPointer(); + wmode[strlen(mode)] = 0; + for (size_t i = 0; i < strlen(mode); ++i) + { + wmode[i] = mode[i]; + } + #ifdef _MSC_VER - errno_t err = fopen_s(&f, filename, mode); + errno_t err = _wfopen_s(&f, wfilename, wmode); if (err != 0) { errno = err; + } +#else + f = _wfopen(wfilename, wmode); +#endif + if (f == 0) + { throw_system_error(std::string("open ") + filename); } #else diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 43219109..4dd0909f 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -135,7 +135,7 @@ foreach my $c (@completion_tests) show_ntests(); # ---------- $td->notify("--- Argument Parsing ---"); -$n_tests += 6; +$n_tests += 8; $td->runtest("required argument", {$td->COMMAND => "qpdf --password minimal.pdf"}, @@ -167,6 +167,16 @@ $td->runtest("extra overlay filename", {$td->REGEXP => ".*overlay file already specified.*", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); +foreach my $d (['auto-ü', 1], ['auto-öπ', 2]) +{ + my ($u, $n) = @$d; + copy('minimal.pdf', "$u.pdf"); + $td->runtest("unicode filename $u", + {$td->COMMAND => "qpdf --check $u.pdf"}, + {$td->FILE => "check-unicode-filename-$n.out", + $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +} show_ntests(); # ---------- @@ -4093,5 +4103,5 @@ sub get_md5_checksum sub cleanup { system("rm -rf *.ps *.pnm ?.pdf ?.qdf *.enc* tif1 tif2 tiff-cache"); - system("rm -rf *split-out* ???-kfo.pdf *.tmpout \@file.pdf"); + system("rm -rf *split-out* ???-kfo.pdf *.tmpout \@file.pdf auto-*.pdf"); } diff --git a/qpdf/qtest/qpdf/check-unicode-filename-1.out b/qpdf/qtest/qpdf/check-unicode-filename-1.out new file mode 100644 index 00000000..7ff8c445 --- /dev/null +++ b/qpdf/qtest/qpdf/check-unicode-filename-1.out @@ -0,0 +1,6 @@ +checking auto-ü.pdf +PDF Version: 1.3 +File is not encrypted +File is not linearized +No syntax or stream encoding errors found; the file may still contain +errors that qpdf cannot detect diff --git a/qpdf/qtest/qpdf/check-unicode-filename-2.out b/qpdf/qtest/qpdf/check-unicode-filename-2.out new file mode 100644 index 00000000..62c32cb5 --- /dev/null +++ b/qpdf/qtest/qpdf/check-unicode-filename-2.out @@ -0,0 +1,6 @@ +checking auto-öπ.pdf +PDF Version: 1.3 +File is not encrypted +File is not linearized +No syntax or stream encoding errors found; the file may still contain +errors that qpdf cannot detect