Support Unicode in filenames (fixes #298)

This commit is contained in:
Jay Berkenbilt 2019-04-20 18:14:32 -04:00
parent 4ccb29912a
commit 011695dfdf
5 changed files with 59 additions and 3 deletions

View File

@ -1,5 +1,8 @@
2019-04-20 Jay Berkenbilt <ejb@ql.org>
* Handle Unicode characters in filenames. The changes to support
Unicode on the CLI in Windows broke Unicode filenames. Fixes #298.
* Slightly tighten logic that determines whether an object is a
page. The previous logic was sometimes failing to preserve
annotations because they were passing the overly loose test for

View File

@ -354,11 +354,42 @@ FILE*
QUtil::safe_fopen(char const* filename, char const* mode)
{
FILE* f = 0;
#ifdef _WIN32
// Convert the utf-8 encoded filename argument to wchar_t*. First,
// convert to utf16, then to wchar_t*. Note that u16 will start
// with the UTF16 marker, which we skip.
std::string u16 = utf8_to_utf16(filename);
size_t len = u16.length();
size_t wlen = (len / 2) - 1;
PointerHolder<wchar_t> wfilenamep(true, new wchar_t[wlen + 1]);
wchar_t* wfilename = wfilenamep.getPointer();
wfilename[wlen] = 0;
for (unsigned int i = 2; i < len; i += 2)
{
wfilename[(i/2) - 1] =
static_cast<wchar_t>(
(static_cast<unsigned char>(u16.at(i)) << 8) +
static_cast<unsigned char>(u16.at(i+1)));
}
PointerHolder<wchar_t> wmodep(true, new wchar_t(strlen(mode) + 1));
wchar_t* wmode = wmodep.getPointer();
wmode[strlen(mode)] = 0;
for (size_t i = 0; i < strlen(mode); ++i)
{
wmode[i] = mode[i];
}
#ifdef _MSC_VER
errno_t err = fopen_s(&f, filename, mode);
errno_t err = _wfopen_s(&f, wfilename, wmode);
if (err != 0)
{
errno = err;
}
#else
f = _wfopen(wfilename, wmode);
#endif
if (f == 0)
{
throw_system_error(std::string("open ") + filename);
}
#else

View File

@ -135,7 +135,7 @@ foreach my $c (@completion_tests)
show_ntests();
# ----------
$td->notify("--- Argument Parsing ---");
$n_tests += 6;
$n_tests += 8;
$td->runtest("required argument",
{$td->COMMAND => "qpdf --password minimal.pdf"},
@ -167,6 +167,16 @@ $td->runtest("extra overlay filename",
{$td->REGEXP => ".*overlay file already specified.*",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
foreach my $d (['auto-ü', 1], ['auto-öπ', 2])
{
my ($u, $n) = @$d;
copy('minimal.pdf', "$u.pdf");
$td->runtest("unicode filename $u",
{$td->COMMAND => "qpdf --check $u.pdf"},
{$td->FILE => "check-unicode-filename-$n.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
show_ntests();
# ----------
@ -4093,5 +4103,5 @@ sub get_md5_checksum
sub cleanup
{
system("rm -rf *.ps *.pnm ?.pdf ?.qdf *.enc* tif1 tif2 tiff-cache");
system("rm -rf *split-out* ???-kfo.pdf *.tmpout \@file.pdf");
system("rm -rf *split-out* ???-kfo.pdf *.tmpout \@file.pdf auto-*.pdf");
}

View File

@ -0,0 +1,6 @@
checking auto-ü.pdf
PDF Version: 1.3
File is not encrypted
File is not linearized
No syntax or stream encoding errors found; the file may still contain
errors that qpdf cannot detect

View File

@ -0,0 +1,6 @@
checking auto-öπ.pdf
PDF Version: 1.3
File is not encrypted
File is not linearized
No syntax or stream encoding errors found; the file may still contain
errors that qpdf cannot detect