mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 10:58:58 +00:00
Improve Unicode filename testing
Remove dependency on the behavior of perl for reliable creation of Unicode file names on Windows.
This commit is contained in:
parent
7ff234a92f
commit
03e27709f3
8
TODO
8
TODO
@ -170,6 +170,14 @@ I find it useful to make reference to them in this list
|
||||
|
||||
* Pl_TIFFPredictor is pretty slow.
|
||||
|
||||
* Support for handling file names with Unicode characters in Windows
|
||||
is incomplete. qpdf seems to support them okay from a functionality
|
||||
standpoint, and the right thing happens if you pass in UTF-8
|
||||
encoded filenames to QPDF library routines in Windows (they are
|
||||
converted internally to wchar_t*), but file names are encoded in
|
||||
UTF-8 on output, which doesn't produce nice error messages or
|
||||
output on Windows in some cases.
|
||||
|
||||
* If we ever wanted to do anything more with character encoding, see
|
||||
../misc/character-encoding/, which includes machine-readable dump
|
||||
of table D.2 in the ISO-32000 PDF spec. This shows the mapping
|
||||
|
@ -2612,6 +2612,31 @@ outfile.pdf</option>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
</sect1>
|
||||
<sect1 id="ref.unicode-files">
|
||||
<title>A Note About Unicode File Names</title>
|
||||
<para>
|
||||
When strings are passed to qpdf library routines either as
|
||||
<literal>char*</literal> or as <literal>std::string</literal>,
|
||||
they are treated as byte arrays except where otherwise noted. When
|
||||
Unicode is desired, qpdf wants UTF-8 unless otherwise noted in
|
||||
comments in header files. In modern UNIX/Linux environments, this
|
||||
generally does the right thing. In Windows, it's a bit more
|
||||
complicated. Starting in qpdf 8.4.0, passwords that contain
|
||||
Unicode characters are handled much better, and starting in qpdf
|
||||
8.4.1, the library attempts to properly handle Unicode characters
|
||||
in filenames. In particular, in Windows, if a UTF-8 encoded string
|
||||
is used as a filename in either <classname>QPDF</classname> or
|
||||
<classname>QPDFWriter</classname>, it is internally converted to
|
||||
<literal>wchar_t*</literal>, and Unicode-aware Windows APIs are
|
||||
used. As such, qpdf will generally operate properly on files with
|
||||
non-ASCII characters in their names as long as the filenames are
|
||||
UTF-8 encoded for passing into the qpdf library API, but there are
|
||||
still some rough edges, such as the encoding of the filenames in
|
||||
error messages our CLI output messages. Patches or bug reports are
|
||||
welcome for any continuing issues with Unicode file names in
|
||||
Windows.
|
||||
</para>
|
||||
</sect1>
|
||||
</chapter>
|
||||
<chapter id="ref.json">
|
||||
<title>QPDF JSON</title>
|
||||
|
@ -5,7 +5,8 @@ BINS_qpdf = \
|
||||
test_large_file \
|
||||
test_pdf_doc_encoding \
|
||||
test_pdf_unicode \
|
||||
test_tokenizer
|
||||
test_tokenizer \
|
||||
test_unicode_filenames
|
||||
CBINS_qpdf = qpdf-ctest
|
||||
|
||||
TARGETS_qpdf = $(foreach B,$(BINS_qpdf) $(CBINS_qpdf),qpdf/$(OUTPUT_DIR)/$(call binname,$(B)))
|
||||
@ -20,6 +21,8 @@ TC_SRCS_qpdf = $(wildcard libqpdf/*.cc) $(wildcard qpdf/*.cc)
|
||||
|
||||
XCXXFLAGS_qpdf_qpdf := $(WINDOWS_WMAIN_COMPILE)
|
||||
XLDFLAGS_qpdf_qpdf := $(WINDOWS_WMAIN_LINK)
|
||||
XCXXFLAGS_qpdf_test_unicode_filenames := $(WINDOWS_WMAIN_COMPILE)
|
||||
XLDFLAGS_qpdf_test_unicode_filenames := $(WINDOWS_WMAIN_LINK)
|
||||
|
||||
$(foreach B,$(BINS_qpdf),$(eval \
|
||||
OBJS_$(B) = $(call src_to_obj,qpdf/$(B).cc)))
|
||||
|
@ -135,7 +135,7 @@ foreach my $c (@completion_tests)
|
||||
show_ntests();
|
||||
# ----------
|
||||
$td->notify("--- Argument Parsing ---");
|
||||
$n_tests += 8;
|
||||
$n_tests += 6;
|
||||
|
||||
$td->runtest("required argument",
|
||||
{$td->COMMAND => "qpdf --password minimal.pdf"},
|
||||
@ -167,10 +167,21 @@ $td->runtest("extra overlay filename",
|
||||
{$td->REGEXP => ".*overlay file already specified.*",
|
||||
$td->EXIT_STATUS => 2},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
|
||||
show_ntests();
|
||||
# ----------
|
||||
$td->notify("--- Unicode Filenames ---");
|
||||
$n_tests += 3;
|
||||
|
||||
$td->runtest("create unicode filenames",
|
||||
{$td->COMMAND => "test_unicode_filenames"},
|
||||
{$td->STRING => "created Unicode filenames\n",
|
||||
$td->EXIT_STATUS => 0},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
|
||||
foreach my $d (['auto-ü', 1], ['auto-öπ', 2])
|
||||
{
|
||||
my ($u, $n) = @$d;
|
||||
copy('minimal.pdf', "$u.pdf");
|
||||
$td->runtest("unicode filename $u",
|
||||
{$td->COMMAND => "qpdf --check $u.pdf"},
|
||||
{$td->FILE => "check-unicode-filename-$n.out",
|
||||
|
81
qpdf/test_unicode_filenames.cc
Normal file
81
qpdf/test_unicode_filenames.cc
Normal file
@ -0,0 +1,81 @@
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#include <direct.h>
|
||||
#include <io.h>
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
static void do_copy(FILE* in, FILE* out)
|
||||
{
|
||||
if ((in == 0) || (out == 0))
|
||||
{
|
||||
std::cerr << "errors opening files" << std::endl;
|
||||
exit(2);
|
||||
}
|
||||
char buf[10240];
|
||||
size_t len = 0;
|
||||
while ((len = fread(buf, 1, sizeof(buf), in)) > 0)
|
||||
{
|
||||
fwrite(buf, 1, len, out);
|
||||
}
|
||||
if (len != 0)
|
||||
{
|
||||
std::cerr << "errors reading or writing" << std::endl;
|
||||
exit(2);
|
||||
}
|
||||
fclose(in);
|
||||
fclose(out);
|
||||
}
|
||||
|
||||
#ifdef WINDOWS_WMAIN
|
||||
|
||||
void copy(wchar_t const* outname)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
FILE* in = 0;
|
||||
_wfopen_s(&in, L"minimal.pdf", L"rb");
|
||||
FILE* out = 0;
|
||||
_wfopen_s(&out, outname, L"wb");
|
||||
#else
|
||||
FILE* in = _wfopen(L"minimal.pdf", L"rb");
|
||||
FILE* out = _wfopen(outname, L"wb");
|
||||
#endif
|
||||
do_copy(in, out);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
int wmain(int argc, wchar_t* argv[])
|
||||
{
|
||||
// Unicode
|
||||
wchar_t const* f1 = L"auto-\xfc.pdf";
|
||||
wchar_t const* f2 = L"auto-\xf6\x03c0.pdf";
|
||||
copy(f1);
|
||||
copy(f2);
|
||||
std::cout << "created Unicode filenames" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void copy(char const* outname)
|
||||
{
|
||||
FILE* in = fopen("minimal.pdf", "rb");
|
||||
FILE* out = fopen(outname, "wb");
|
||||
do_copy(in, out);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
// Explicit UTF-8 encoding
|
||||
char const* f1 = "auto-\xc3\xbc.pdf";
|
||||
char const* f2 = "auto-\xc3\xb6\xcf\x80.pdf";
|
||||
copy(f1);
|
||||
copy(f2);
|
||||
std::cout << "created Unicode filenames" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user