lib/scanner: Fix UTF-8 normalization on ZFS (fixes #4649)

It turns out that ZFS doesn't do any normalization when storing files,
but does do normalization "as part of any comparison process".

In practice, this seems to mean that if you LStat a normalized filename,
ZFS will return the FileInfo for the un-normalized version of that
filename.

This meant that our test to see whether a separate file with a
normalized version of the filename already exists was failing, as we
were detecting the same file.

The fix is to use os.SameFile, to see whether we're getting the same
FileInfo from the normalized and un-normalized versions of the same
filename.

One complication is that ZFS also seems to apply its magic to os.Rename,
meaning that we can't use it to rename an un-normalized file to its
normalized filename. Instead we have to move via a temporary object. If
the move to the temporary object fails, that's OK, we can skip it and
move on. If the move from the temporary object fails however, I'm not
sure of the best approach: the current one is to leave the temporary
file name as-is, and get Syncthing to syncronize it, so at least we
don't lose the file. I'm not sure if there are any implications of this
however.

As part of reworking normalizePath, I spotted that it appeared to be
returning the wrong thing: the doc and the surrounding code expecting it
to return the normalized filename, but it was returning the
un-normalized one. I fixed this, but it seems suspicious that, if the
previous behaviour was incorrect, noone ever ran afoul of it. Maybe all
filesystems will do some searching and give you a normalized filename if
you request an unnormalized one.

As part of this, I found that TestNormalization was broken: it was
passing, when in fact one of the files it should have verified was
present was missing. Maybe this was related to the above issue with
normalizePath's return value, I'm not sure. Fixed en route.

Kindly tested by @khinsen on the forum, and it appears to work.

GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4646
This commit is contained in:
Antony Male 2018-01-05 18:11:09 +00:00 committed by Jakob Borg
parent d87287c0d0
commit db03562d43
6 changed files with 70 additions and 31 deletions

View File

@ -299,6 +299,18 @@ func (f *BasicFilesystem) URI() string {
return strings.TrimPrefix(f.root, `\\?\`) return strings.TrimPrefix(f.root, `\\?\`)
} }
func (f *BasicFilesystem) SameFile(fi1, fi2 FileInfo) bool {
// Like os.SameFile, we always return false unless fi1 and fi2 were created
// by this package's Stat/Lstat method.
f1, ok1 := fi1.(fsFileInfo)
f2, ok2 := fi2.(fsFileInfo)
if !ok1 || !ok2 {
return false
}
return os.SameFile(f1.FileInfo, f2.FileInfo)
}
// fsFile implements the fs.File interface on top of an os.File // fsFile implements the fs.File interface on top of an os.File
type fsFile struct { type fsFile struct {
*os.File *os.File

View File

@ -42,6 +42,7 @@ func (fs *errorFilesystem) Roots() ([]string, error)
func (fs *errorFilesystem) Usage(name string) (Usage, error) { return Usage{}, fs.err } func (fs *errorFilesystem) Usage(name string) (Usage, error) { return Usage{}, fs.err }
func (fs *errorFilesystem) Type() FilesystemType { return fs.fsType } func (fs *errorFilesystem) Type() FilesystemType { return fs.fsType }
func (fs *errorFilesystem) URI() string { return fs.uri } func (fs *errorFilesystem) URI() string { return fs.uri }
func (fs *errorFilesystem) SameFile(fi1, fi2 FileInfo) bool { return false }
func (fs *errorFilesystem) Watch(path string, ignore Matcher, ctx context.Context, ignorePerms bool) (<-chan Event, error) { func (fs *errorFilesystem) Watch(path string, ignore Matcher, ctx context.Context, ignorePerms bool) (<-chan Event, error) {
return nil, fs.err return nil, fs.err
} }

View File

@ -43,6 +43,7 @@ type Filesystem interface {
Usage(name string) (Usage, error) Usage(name string) (Usage, error)
Type() FilesystemType Type() FilesystemType
URI() string URI() string
SameFile(fi1, fi2 FileInfo) bool
} }
// The File interface abstracts access to a regular file, being a somewhat // The File interface abstracts access to a regular file, being a somewhat

View File

@ -46,7 +46,7 @@ func IsTemporary(name string) bool {
return false return false
} }
func TempName(name string) string { func TempNameWithPrefix(name, prefix string) string {
tdir := filepath.Dir(name) tdir := filepath.Dir(name)
tbase := filepath.Base(name) tbase := filepath.Base(name)
if len(tbase) > maxFilenameLength { if len(tbase) > maxFilenameLength {
@ -54,6 +54,10 @@ func TempName(name string) string {
hash.Write([]byte(name)) hash.Write([]byte(name))
tbase = fmt.Sprintf("%x", hash.Sum(nil)) tbase = fmt.Sprintf("%x", hash.Sum(nil))
} }
tname := fmt.Sprintf("%s%s.tmp", TempPrefix, tbase) tname := fmt.Sprintf("%s%s.tmp", prefix, tbase)
return filepath.Join(tdir, tname) return filepath.Join(tdir, tname)
} }
func TempName(name string) string {
return TempNameWithPrefix(name, TempPrefix)
}

View File

@ -257,7 +257,7 @@ func (w *walker) walkAndHashFiles(ctx context.Context, fchan, dchan chan protoco
return skip return skip
} }
path, shouldSkip := w.normalizePath(path) path, shouldSkip := w.normalizePath(path, info)
if shouldSkip { if shouldSkip {
return skip return skip
} }
@ -419,7 +419,7 @@ func (w *walker) walkSymlink(ctx context.Context, relPath string, dchan chan pro
// normalizePath returns the normalized relative path (possibly after fixing // normalizePath returns the normalized relative path (possibly after fixing
// it on disk), or skip is true. // it on disk), or skip is true.
func (w *walker) normalizePath(path string) (normPath string, skip bool) { func (w *walker) normalizePath(path string, info fs.FileInfo) (normPath string, skip bool) {
if runtime.GOOS == "darwin" { if runtime.GOOS == "darwin" {
// Mac OS X file names should always be NFD normalized. // Mac OS X file names should always be NFD normalized.
normPath = norm.NFD.String(path) normPath = norm.NFD.String(path)
@ -430,33 +430,54 @@ func (w *walker) normalizePath(path string) (normPath string, skip bool) {
normPath = norm.NFC.String(path) normPath = norm.NFC.String(path)
} }
if path != normPath { if path == normPath {
// The file name was not normalized. // The file name is already normalized: nothing to do
return path, false
if !w.AutoNormalize {
// We're not authorized to do anything about it, so complain and skip.
l.Warnf("File name %q is not in the correct UTF8 normalization form; skipping.", path)
return "", true
}
// We will attempt to normalize it.
if _, err := w.Filesystem.Lstat(normPath); fs.IsNotExist(err) {
// Nothing exists with the normalized filename. Good.
if err = w.Filesystem.Rename(path, normPath); err != nil {
l.Infof(`Error normalizing UTF8 encoding of file "%s": %v`, path, err)
return "", true
}
l.Infof(`Normalized UTF8 encoding of file name "%s".`, path)
} else {
// There is something already in the way at the normalized
// file name.
l.Infof(`File "%s" path has UTF8 encoding conflict with another file; ignoring.`, path)
return "", true
}
} }
return path, false if !w.AutoNormalize {
// We're not authorized to do anything about it, so complain and skip.
l.Warnf("File name %q is not in the correct UTF8 normalization form; skipping.", path)
return "", true
}
// We will attempt to normalize it.
normInfo, err := w.Filesystem.Lstat(normPath)
if fs.IsNotExist(err) {
// Nothing exists with the normalized filename. Good.
if err = w.Filesystem.Rename(path, normPath); err != nil {
l.Infof(`Error normalizing UTF8 encoding of file "%s": %v`, path, err)
return "", true
}
l.Infof(`Normalized UTF8 encoding of file name "%s".`, path)
} else if w.Filesystem.SameFile(info, normInfo) {
// With some filesystems (ZFS), if there is an un-normalized path and you ask whether the normalized
// version exists, it responds with true. Therefore we need to check fs.SameFile as well.
// In this case, a call to Rename won't do anything, so we have to rename via a temp file.
// We don't want to use the standard syncthing prefix here, as that will result in the file being ignored
// and eventually deleted by Syncthing if the rename back fails.
tempPath := fs.TempNameWithPrefix(normPath, "")
if err = w.Filesystem.Rename(path, tempPath); err != nil {
l.Infof(`Error during normalizing UTF8 encoding of file "%s" (renamed to "%s"): %v`, path, tempPath, err)
return "", true
}
if err = w.Filesystem.Rename(tempPath, normPath); err != nil {
// I don't ever expect this to happen, but if it does, we should probably tell our caller that the normalized
// path is the temp path: that way at least the user's data still gets synced.
l.Warnf(`Error renaming "%s" to "%s" while normalizating UTF8 encoding: %v. You will want to rename this file back manually`, tempPath, normPath, err)
return tempPath, false
}
} else {
// There is something already in the way at the normalized
// file name.
l.Infof(`File "%s" path has UTF8 encoding conflict with another file; ignoring.`, path)
return "", true
}
return normPath, false
} }
func (w *walker) checkDir() error { func (w *walker) checkDir() error {

View File

@ -259,9 +259,9 @@ func TestNormalization(t *testing.T) {
files := fileList(tmp).testfiles() files := fileList(tmp).testfiles()
// We should have one file per combination, plus the directories // We should have one file per combination, plus the directories
// themselves // themselves, plus the "testdata/normalization" directory
expectedNum := numValid*numValid + numValid expectedNum := numValid*numValid + numValid + 1
if len(files) != expectedNum { if len(files) != expectedNum {
t.Errorf("Expected %d files, got %d", expectedNum, len(files)) t.Errorf("Expected %d files, got %d", expectedNum, len(files))
} }