lib/fs: Ignore normalization differences in case insensitive lookup (fixes #7677) (#7678)

2024-12-22 19:08:58 +00:00 · 2021-05-17 12:35:03 +02:00 · 2021-05-17 12:35:03 +02:00 · 97437cad64
commit 97437cad64
parent 5b90a98650
7 changed files with 93 additions and 27 deletions
--- a/lib/fs/basicfs_windows.go
+++ b/lib/fs/basicfs_windows.go
@ -157,9 +157,9 @@ func (f *BasicFilesystem) Roots() ([]string, error) {
 // pathseparator.
 func (f *BasicFilesystem) unrootedChecked(absPath string, roots []string) (string, error) {
 	absPath = f.resolveWin83(absPath)
-	lowerAbsPath := UnicodeLowercase(absPath)
+	lowerAbsPath := UnicodeLowercaseNormalized(absPath)
 	for _, root := range roots {
-		lowerRoot := UnicodeLowercase(root)
+		lowerRoot := UnicodeLowercaseNormalized(root)
 		if lowerAbsPath+string(PathSeparator) == lowerRoot {
 			return ".", nil
 		}
@ -171,7 +171,7 @@ func (f *BasicFilesystem) unrootedChecked(absPath string, roots []string) (strin
 }
 func rel(path, prefix string) string {
-	lowerRel := strings.TrimPrefix(strings.TrimPrefix(UnicodeLowercase(path), UnicodeLowercase(prefix)), string(PathSeparator))
+	lowerRel := strings.TrimPrefix(strings.TrimPrefix(UnicodeLowercaseNormalized(path), UnicodeLowercaseNormalized(prefix)), string(PathSeparator))
 	return path[len(path)-len(lowerRel):]
 }
@ -193,8 +193,8 @@ func (f *BasicFilesystem) resolveWin83(absPath string) string {
 	}
 	// Failed getting the long path. Return the part of the path which is
 	// already a long path.
-	lowerRoot := UnicodeLowercase(f.root)
+	lowerRoot := UnicodeLowercaseNormalized(f.root)
-	for absPath = filepath.Dir(absPath); strings.HasPrefix(UnicodeLowercase(absPath), lowerRoot); absPath = filepath.Dir(absPath) {
+	for absPath = filepath.Dir(absPath); strings.HasPrefix(UnicodeLowercaseNormalized(absPath), lowerRoot); absPath = filepath.Dir(absPath) {
 		if !isMaybeWin83(absPath) {
 			return absPath
 		}
--- a/lib/fs/casefs.go
+++ b/lib/fs/casefs.go
@ -15,6 +15,7 @@ import (
 	"time"
 	lru "github.com/hashicorp/golang-lru"
 	"golang.org/x/text/unicode/norm"
 )
 const (
@ -375,7 +376,10 @@ func (f *caseFilesystem) checkCaseExisting(name string) error {
 	if err != nil {
 		return err
 	}
-	if realName != name {
+	// We normalize the normalization (hah!) of the strings before
 	// comparing, as we don't want to treat a normalization difference as a
 	// case conflict.
 	if norm.NFC.String(realName) != norm.NFC.String(name) {
 		return &ErrCaseConflict{name, realName}
 	}
 	return nil
@ -424,7 +428,7 @@ func (r *defaultRealCaser) realCase(name string) (string, error) {
 			lastLower := ""
 			for _, n := range dirNames {
 				node.children[n] = struct{}{}
-				lower := UnicodeLowercase(n)
+				lower := UnicodeLowercaseNormalized(n)
 				if lower != lastLower {
 					node.lowerToReal[lower] = n
 					lastLower = n
@ -437,7 +441,7 @@ func (r *defaultRealCaser) realCase(name string) (string, error) {
 		// Try to find a direct or case match
 		if _, ok := node.children[comp]; !ok {
-			comp, ok = node.lowerToReal[UnicodeLowercase(comp)]
+			comp, ok = node.lowerToReal[UnicodeLowercaseNormalized(comp)]
 			if !ok {
 				return "", ErrNotExist
 			}
--- a/lib/fs/fakefs.go
+++ b/lib/fs/fakefs.go
@ -186,7 +186,7 @@ type fakeEntry struct {
 func (fs *fakeFS) entryForName(name string) *fakeEntry {
 	// bug: lookup doesn't work through symlinks.
 	if fs.insens {
-		name = UnicodeLowercase(name)
+		name = UnicodeLowercaseNormalized(name)
 	}
 	name = filepath.ToSlash(name)
@ -285,7 +285,7 @@ func (fs *fakeFS) create(name string) (*fakeEntry, error) {
 	}
 	if fs.insens {
-		base = UnicodeLowercase(base)
+		base = UnicodeLowercaseNormalized(base)
 	}
 	if fs.withContent {
@ -373,7 +373,7 @@ func (fs *fakeFS) Mkdir(name string, perm FileMode) error {
 		return os.ErrExist
 	}
 	if fs.insens {
-		key = UnicodeLowercase(key)
+		key = UnicodeLowercaseNormalized(key)
 	}
 	if _, ok := entry.children[key]; ok {
 		return os.ErrExist
@ -402,7 +402,7 @@ func (fs *fakeFS) MkdirAll(name string, perm FileMode) error {
 	for _, comp := range comps {
 		key := comp
 		if fs.insens {
-			key = UnicodeLowercase(key)
+			key = UnicodeLowercaseNormalized(key)
 		}
 		next, ok := entry.children[key]
@ -465,7 +465,7 @@ func (fs *fakeFS) OpenFile(name string, flags int, mode FileMode) (File, error)
 	}
 	if fs.insens {
-		key = UnicodeLowercase(key)
+		key = UnicodeLowercaseNormalized(key)
 	}
 	if flags&os.O_EXCL != 0 {
 		if _, ok := entry.children[key]; ok {
@ -508,7 +508,7 @@ func (fs *fakeFS) Remove(name string) error {
 	time.Sleep(fs.latency)
 	if fs.insens {
-		name = UnicodeLowercase(name)
+		name = UnicodeLowercaseNormalized(name)
 	}
 	entry := fs.entryForName(name)
@ -531,7 +531,7 @@ func (fs *fakeFS) RemoveAll(name string) error {
 	time.Sleep(fs.latency)
 	if fs.insens {
-		name = UnicodeLowercase(name)
+		name = UnicodeLowercaseNormalized(name)
 	}
 	entry := fs.entryForName(filepath.Dir(name))
@ -555,8 +555,8 @@ func (fs *fakeFS) Rename(oldname, newname string) error {
 	newKey := filepath.Base(newname)
 	if fs.insens {
-		oldKey = UnicodeLowercase(oldKey)
+		oldKey = UnicodeLowercaseNormalized(oldKey)
-		newKey = UnicodeLowercase(newKey)
+		newKey = UnicodeLowercaseNormalized(newKey)
 	}
 	p0 := fs.entryForName(filepath.Dir(oldname))
@ -651,7 +651,7 @@ func (fs *fakeFS) SameFile(fi1, fi2 FileInfo) bool {
 	// where ModTime is not that precise
 	var ok bool
 	if fs.insens {
-		ok = UnicodeLowercase(fi1.Name()) == UnicodeLowercase(fi2.Name())
+		ok = UnicodeLowercaseNormalized(fi1.Name()) == UnicodeLowercaseNormalized(fi2.Name())
 	} else {
 		ok = fi1.Name() == fi2.Name()
 	}
--- a/lib/fs/folding.go
+++ b/lib/fs/folding.go
@ -10,9 +10,13 @@ import (
 	"strings"
 	"unicode"
 	"unicode/utf8"
 	"golang.org/x/text/unicode/norm"
 )
-func UnicodeLowercase(s string) string {
+// UnicodeLowercaseNormalized returns the Unicode lower case variant of s,
 // having also normalized it to normalization form C.
 func UnicodeLowercaseNormalized(s string) string {
 	i := firstCaseChange(s)
 	if i == -1 {
 		return s
@ -28,7 +32,7 @@ func UnicodeLowercase(s string) string {
 	for _, r := range s[i:] {
 		rs.WriteRune(unicode.ToLower(unicode.ToUpper(r)))
 	}
-	return rs.String()
+	return norm.NFC.String(rs.String())
 }
 // Byte index of the first rune r s.t. lower(upper(r)) != r.
--- a/lib/fs/folding_test.go
+++ b/lib/fs/folding_test.go
@ -44,13 +44,15 @@ var caseCases = [][2]string{
 	{"チャーハン", "チャーハン"},
 	// Some special Unicode characters, however, are folded by OSes.
 	{"\u212A", "k"},
 	// Folding renormalizes to NFC
 	{"A\xCC\x88", "\xC3\xA4"}, // ä
 }
-func TestUnicodeLowercase(t *testing.T) {
+func TestUnicodeLowercaseNormalized(t *testing.T) {
 	for _, tc := range caseCases {
-		res := UnicodeLowercase(tc[0])
+		res := UnicodeLowercaseNormalized(tc[0])
 		if res != tc[1] {
-			t.Errorf("UnicodeLowercase(%q) => %q, expected %q", tc[0], res, tc[1])
+			t.Errorf("UnicodeLowercaseNormalized(%q) => %q, expected %q", tc[0], res, tc[1])
 		}
 	}
 }
@ -60,7 +62,7 @@ func BenchmarkUnicodeLowercaseMaybeChange(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		for _, s := range caseCases {
-			UnicodeLowercase(s[0])
+			UnicodeLowercaseNormalized(s[0])
 		}
 	}
 }
@ -70,7 +72,7 @@ func BenchmarkUnicodeLowercaseNoChange(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		for _, s := range caseCases {
-			UnicodeLowercase(s[1])
+			UnicodeLowercaseNormalized(s[1])
 		}
 	}
 }
--- a/lib/fs/mtimefs.go
+++ b/lib/fs/mtimefs.go
@ -157,7 +157,7 @@ func (f *mtimeFS) wrapperType() filesystemWrapperType {
 func (f *mtimeFS) save(name string, real, virtual time.Time) {
 	if f.caseInsensitive {
-		name = UnicodeLowercase(name)
+		name = UnicodeLowercaseNormalized(name)
 	}
 	if real.Equal(virtual) {
@ -177,7 +177,7 @@ func (f *mtimeFS) save(name string, real, virtual time.Time) {
 func (f *mtimeFS) load(name string) (MtimeMapping, error) {
 	if f.caseInsensitive {
-		name = UnicodeLowercase(name)
+		name = UnicodeLowercaseNormalized(name)
 	}
 	data, exists, err := f.db.Bytes(name)
--- a/lib/scanner/walk_test.go
+++ b/lib/scanner/walk_test.go
@ -251,6 +251,62 @@ func TestNormalization(t *testing.T) {
 	}
 }
 func TestNormalizationDarwinCaseFS(t *testing.T) {
 	// This tests that normalization works on Darwin, through a CaseFS.
 	if runtime.GOOS != "darwin" {
 		t.Skip("Normalization test not possible on non-Darwin")
 		return
 	}
 	testFs := fs.NewCaseFilesystem(testFs)
 	testFs.RemoveAll("normalization")
 	defer testFs.RemoveAll("normalization")
 	testFs.MkdirAll("normalization", 0755)
 	const (
 		inNFC = "\xC3\x84"
 		inNFD = "\x41\xCC\x88"
 	)
 	// Create dir in NFC
 	if err := testFs.Mkdir(filepath.Join("normalization", "dir-"+inNFC), 0755); err != nil {
 		t.Fatal(err)
 	}
 	// Create file in NFC
 	fd, err := testFs.Create(filepath.Join("normalization", "dir-"+inNFC, "file-"+inNFC))
 	if err != nil {
 		t.Fatal(err)
 	}
 	fd.Close()
 	// Walk, which should normalize and return
 	walkDir(testFs, "normalization", nil, nil, 0)
 	tmp := walkDir(testFs, "normalization", nil, nil, 0)
 	if len(tmp) != 3 {
 		t.Error("Expected one file and one dir scanned")
 	}
 	// Verify we see the normalized entries in the result
 	foundFile := false
 	foundDir := false
 	for _, f := range tmp {
 		if f.Name == filepath.Join("normalization", "dir-"+inNFD) {
 			foundDir = true
 			continue
 		}
 		if f.Name == filepath.Join("normalization", "dir-"+inNFD, "file-"+inNFD) {
 			foundFile = true
 			continue
 		}
 	}
 	if !foundFile || !foundDir {
 		t.Error("Didn't find expected normalization form")
 	}
 }
 func TestIssue1507(t *testing.T) {
 	w := &walker{}
 	w.Matcher = ignore.New(w.Filesystem)