2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-06-04 03:10:52 +00:00

Support Unicode in filenames (fixes #298)

This commit is contained in:
Jay Berkenbilt 2019-04-20 18:14:32 -04:00
parent 4ccb29912a
commit 011695dfdf
5 changed files with 59 additions and 3 deletions

View File

@ -1,5 +1,8 @@
2019-04-20 Jay Berkenbilt <ejb@ql.org> 2019-04-20 Jay Berkenbilt <ejb@ql.org>
* Handle Unicode characters in filenames. The changes to support
Unicode on the CLI in Windows broke Unicode filenames. Fixes #298.
* Slightly tighten logic that determines whether an object is a * Slightly tighten logic that determines whether an object is a
page. The previous logic was sometimes failing to preserve page. The previous logic was sometimes failing to preserve
annotations because they were passing the overly loose test for annotations because they were passing the overly loose test for

View File

@ -354,11 +354,42 @@ FILE*
QUtil::safe_fopen(char const* filename, char const* mode) QUtil::safe_fopen(char const* filename, char const* mode)
{ {
FILE* f = 0; FILE* f = 0;
#ifdef _WIN32
// Convert the utf-8 encoded filename argument to wchar_t*. First,
// convert to utf16, then to wchar_t*. Note that u16 will start
// with the UTF16 marker, which we skip.
std::string u16 = utf8_to_utf16(filename);
size_t len = u16.length();
size_t wlen = (len / 2) - 1;
PointerHolder<wchar_t> wfilenamep(true, new wchar_t[wlen + 1]);
wchar_t* wfilename = wfilenamep.getPointer();
wfilename[wlen] = 0;
for (unsigned int i = 2; i < len; i += 2)
{
wfilename[(i/2) - 1] =
static_cast<wchar_t>(
(static_cast<unsigned char>(u16.at(i)) << 8) +
static_cast<unsigned char>(u16.at(i+1)));
}
PointerHolder<wchar_t> wmodep(true, new wchar_t(strlen(mode) + 1));
wchar_t* wmode = wmodep.getPointer();
wmode[strlen(mode)] = 0;
for (size_t i = 0; i < strlen(mode); ++i)
{
wmode[i] = mode[i];
}
#ifdef _MSC_VER #ifdef _MSC_VER
errno_t err = fopen_s(&f, filename, mode); errno_t err = _wfopen_s(&f, wfilename, wmode);
if (err != 0) if (err != 0)
{ {
errno = err; errno = err;
}
#else
f = _wfopen(wfilename, wmode);
#endif
if (f == 0)
{
throw_system_error(std::string("open ") + filename); throw_system_error(std::string("open ") + filename);
} }
#else #else

View File

@ -135,7 +135,7 @@ foreach my $c (@completion_tests)
show_ntests(); show_ntests();
# ---------- # ----------
$td->notify("--- Argument Parsing ---"); $td->notify("--- Argument Parsing ---");
$n_tests += 6; $n_tests += 8;
$td->runtest("required argument", $td->runtest("required argument",
{$td->COMMAND => "qpdf --password minimal.pdf"}, {$td->COMMAND => "qpdf --password minimal.pdf"},
@ -167,6 +167,16 @@ $td->runtest("extra overlay filename",
{$td->REGEXP => ".*overlay file already specified.*", {$td->REGEXP => ".*overlay file already specified.*",
$td->EXIT_STATUS => 2}, $td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES); $td->NORMALIZE_NEWLINES);
foreach my $d (['auto-ü', 1], ['auto-öπ', 2])
{
my ($u, $n) = @$d;
copy('minimal.pdf', "$u.pdf");
$td->runtest("unicode filename $u",
{$td->COMMAND => "qpdf --check $u.pdf"},
{$td->FILE => "check-unicode-filename-$n.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
show_ntests(); show_ntests();
# ---------- # ----------
@ -4093,5 +4103,5 @@ sub get_md5_checksum
sub cleanup sub cleanup
{ {
system("rm -rf *.ps *.pnm ?.pdf ?.qdf *.enc* tif1 tif2 tiff-cache"); system("rm -rf *.ps *.pnm ?.pdf ?.qdf *.enc* tif1 tif2 tiff-cache");
system("rm -rf *split-out* ???-kfo.pdf *.tmpout \@file.pdf"); system("rm -rf *split-out* ???-kfo.pdf *.tmpout \@file.pdf auto-*.pdf");
} }

View File

@ -0,0 +1,6 @@
checking auto-ü.pdf
PDF Version: 1.3
File is not encrypted
File is not linearized
No syntax or stream encoding errors found; the file may still contain
errors that qpdf cannot detect

View File

@ -0,0 +1,6 @@
checking auto-öπ.pdf
PDF Version: 1.3
File is not encrypted
File is not linearized
No syntax or stream encoding errors found; the file may still contain
errors that qpdf cannot detect