lib/fs: Ignore normalization differences in case insensitive lookup (fixes #7677) (#7678)

This commit is contained in:
Jakob Borg 2021-05-17 12:35:03 +02:00 committed by GitHub
parent 5b90a98650
commit 97437cad64
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 93 additions and 27 deletions

View File

@ -157,9 +157,9 @@ func (f *BasicFilesystem) Roots() ([]string, error) {
// pathseparator. // pathseparator.
func (f *BasicFilesystem) unrootedChecked(absPath string, roots []string) (string, error) { func (f *BasicFilesystem) unrootedChecked(absPath string, roots []string) (string, error) {
absPath = f.resolveWin83(absPath) absPath = f.resolveWin83(absPath)
lowerAbsPath := UnicodeLowercase(absPath) lowerAbsPath := UnicodeLowercaseNormalized(absPath)
for _, root := range roots { for _, root := range roots {
lowerRoot := UnicodeLowercase(root) lowerRoot := UnicodeLowercaseNormalized(root)
if lowerAbsPath+string(PathSeparator) == lowerRoot { if lowerAbsPath+string(PathSeparator) == lowerRoot {
return ".", nil return ".", nil
} }
@ -171,7 +171,7 @@ func (f *BasicFilesystem) unrootedChecked(absPath string, roots []string) (strin
} }
func rel(path, prefix string) string { func rel(path, prefix string) string {
lowerRel := strings.TrimPrefix(strings.TrimPrefix(UnicodeLowercase(path), UnicodeLowercase(prefix)), string(PathSeparator)) lowerRel := strings.TrimPrefix(strings.TrimPrefix(UnicodeLowercaseNormalized(path), UnicodeLowercaseNormalized(prefix)), string(PathSeparator))
return path[len(path)-len(lowerRel):] return path[len(path)-len(lowerRel):]
} }
@ -193,8 +193,8 @@ func (f *BasicFilesystem) resolveWin83(absPath string) string {
} }
// Failed getting the long path. Return the part of the path which is // Failed getting the long path. Return the part of the path which is
// already a long path. // already a long path.
lowerRoot := UnicodeLowercase(f.root) lowerRoot := UnicodeLowercaseNormalized(f.root)
for absPath = filepath.Dir(absPath); strings.HasPrefix(UnicodeLowercase(absPath), lowerRoot); absPath = filepath.Dir(absPath) { for absPath = filepath.Dir(absPath); strings.HasPrefix(UnicodeLowercaseNormalized(absPath), lowerRoot); absPath = filepath.Dir(absPath) {
if !isMaybeWin83(absPath) { if !isMaybeWin83(absPath) {
return absPath return absPath
} }

View File

@ -15,6 +15,7 @@ import (
"time" "time"
lru "github.com/hashicorp/golang-lru" lru "github.com/hashicorp/golang-lru"
"golang.org/x/text/unicode/norm"
) )
const ( const (
@ -375,7 +376,10 @@ func (f *caseFilesystem) checkCaseExisting(name string) error {
if err != nil { if err != nil {
return err return err
} }
if realName != name { // We normalize the normalization (hah!) of the strings before
// comparing, as we don't want to treat a normalization difference as a
// case conflict.
if norm.NFC.String(realName) != norm.NFC.String(name) {
return &ErrCaseConflict{name, realName} return &ErrCaseConflict{name, realName}
} }
return nil return nil
@ -424,7 +428,7 @@ func (r *defaultRealCaser) realCase(name string) (string, error) {
lastLower := "" lastLower := ""
for _, n := range dirNames { for _, n := range dirNames {
node.children[n] = struct{}{} node.children[n] = struct{}{}
lower := UnicodeLowercase(n) lower := UnicodeLowercaseNormalized(n)
if lower != lastLower { if lower != lastLower {
node.lowerToReal[lower] = n node.lowerToReal[lower] = n
lastLower = n lastLower = n
@ -437,7 +441,7 @@ func (r *defaultRealCaser) realCase(name string) (string, error) {
// Try to find a direct or case match // Try to find a direct or case match
if _, ok := node.children[comp]; !ok { if _, ok := node.children[comp]; !ok {
comp, ok = node.lowerToReal[UnicodeLowercase(comp)] comp, ok = node.lowerToReal[UnicodeLowercaseNormalized(comp)]
if !ok { if !ok {
return "", ErrNotExist return "", ErrNotExist
} }

View File

@ -186,7 +186,7 @@ type fakeEntry struct {
func (fs *fakeFS) entryForName(name string) *fakeEntry { func (fs *fakeFS) entryForName(name string) *fakeEntry {
// bug: lookup doesn't work through symlinks. // bug: lookup doesn't work through symlinks.
if fs.insens { if fs.insens {
name = UnicodeLowercase(name) name = UnicodeLowercaseNormalized(name)
} }
name = filepath.ToSlash(name) name = filepath.ToSlash(name)
@ -285,7 +285,7 @@ func (fs *fakeFS) create(name string) (*fakeEntry, error) {
} }
if fs.insens { if fs.insens {
base = UnicodeLowercase(base) base = UnicodeLowercaseNormalized(base)
} }
if fs.withContent { if fs.withContent {
@ -373,7 +373,7 @@ func (fs *fakeFS) Mkdir(name string, perm FileMode) error {
return os.ErrExist return os.ErrExist
} }
if fs.insens { if fs.insens {
key = UnicodeLowercase(key) key = UnicodeLowercaseNormalized(key)
} }
if _, ok := entry.children[key]; ok { if _, ok := entry.children[key]; ok {
return os.ErrExist return os.ErrExist
@ -402,7 +402,7 @@ func (fs *fakeFS) MkdirAll(name string, perm FileMode) error {
for _, comp := range comps { for _, comp := range comps {
key := comp key := comp
if fs.insens { if fs.insens {
key = UnicodeLowercase(key) key = UnicodeLowercaseNormalized(key)
} }
next, ok := entry.children[key] next, ok := entry.children[key]
@ -465,7 +465,7 @@ func (fs *fakeFS) OpenFile(name string, flags int, mode FileMode) (File, error)
} }
if fs.insens { if fs.insens {
key = UnicodeLowercase(key) key = UnicodeLowercaseNormalized(key)
} }
if flags&os.O_EXCL != 0 { if flags&os.O_EXCL != 0 {
if _, ok := entry.children[key]; ok { if _, ok := entry.children[key]; ok {
@ -508,7 +508,7 @@ func (fs *fakeFS) Remove(name string) error {
time.Sleep(fs.latency) time.Sleep(fs.latency)
if fs.insens { if fs.insens {
name = UnicodeLowercase(name) name = UnicodeLowercaseNormalized(name)
} }
entry := fs.entryForName(name) entry := fs.entryForName(name)
@ -531,7 +531,7 @@ func (fs *fakeFS) RemoveAll(name string) error {
time.Sleep(fs.latency) time.Sleep(fs.latency)
if fs.insens { if fs.insens {
name = UnicodeLowercase(name) name = UnicodeLowercaseNormalized(name)
} }
entry := fs.entryForName(filepath.Dir(name)) entry := fs.entryForName(filepath.Dir(name))
@ -555,8 +555,8 @@ func (fs *fakeFS) Rename(oldname, newname string) error {
newKey := filepath.Base(newname) newKey := filepath.Base(newname)
if fs.insens { if fs.insens {
oldKey = UnicodeLowercase(oldKey) oldKey = UnicodeLowercaseNormalized(oldKey)
newKey = UnicodeLowercase(newKey) newKey = UnicodeLowercaseNormalized(newKey)
} }
p0 := fs.entryForName(filepath.Dir(oldname)) p0 := fs.entryForName(filepath.Dir(oldname))
@ -651,7 +651,7 @@ func (fs *fakeFS) SameFile(fi1, fi2 FileInfo) bool {
// where ModTime is not that precise // where ModTime is not that precise
var ok bool var ok bool
if fs.insens { if fs.insens {
ok = UnicodeLowercase(fi1.Name()) == UnicodeLowercase(fi2.Name()) ok = UnicodeLowercaseNormalized(fi1.Name()) == UnicodeLowercaseNormalized(fi2.Name())
} else { } else {
ok = fi1.Name() == fi2.Name() ok = fi1.Name() == fi2.Name()
} }

View File

@ -10,9 +10,13 @@ import (
"strings" "strings"
"unicode" "unicode"
"unicode/utf8" "unicode/utf8"
"golang.org/x/text/unicode/norm"
) )
func UnicodeLowercase(s string) string { // UnicodeLowercaseNormalized returns the Unicode lower case variant of s,
// having also normalized it to normalization form C.
func UnicodeLowercaseNormalized(s string) string {
i := firstCaseChange(s) i := firstCaseChange(s)
if i == -1 { if i == -1 {
return s return s
@ -28,7 +32,7 @@ func UnicodeLowercase(s string) string {
for _, r := range s[i:] { for _, r := range s[i:] {
rs.WriteRune(unicode.ToLower(unicode.ToUpper(r))) rs.WriteRune(unicode.ToLower(unicode.ToUpper(r)))
} }
return rs.String() return norm.NFC.String(rs.String())
} }
// Byte index of the first rune r s.t. lower(upper(r)) != r. // Byte index of the first rune r s.t. lower(upper(r)) != r.

View File

@ -44,13 +44,15 @@ var caseCases = [][2]string{
{"チャーハン", "チャーハン"}, {"チャーハン", "チャーハン"},
// Some special Unicode characters, however, are folded by OSes. // Some special Unicode characters, however, are folded by OSes.
{"\u212A", "k"}, {"\u212A", "k"},
// Folding renormalizes to NFC
{"A\xCC\x88", "\xC3\xA4"}, // ä
} }
func TestUnicodeLowercase(t *testing.T) { func TestUnicodeLowercaseNormalized(t *testing.T) {
for _, tc := range caseCases { for _, tc := range caseCases {
res := UnicodeLowercase(tc[0]) res := UnicodeLowercaseNormalized(tc[0])
if res != tc[1] { if res != tc[1] {
t.Errorf("UnicodeLowercase(%q) => %q, expected %q", tc[0], res, tc[1]) t.Errorf("UnicodeLowercaseNormalized(%q) => %q, expected %q", tc[0], res, tc[1])
} }
} }
} }
@ -60,7 +62,7 @@ func BenchmarkUnicodeLowercaseMaybeChange(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
for _, s := range caseCases { for _, s := range caseCases {
UnicodeLowercase(s[0]) UnicodeLowercaseNormalized(s[0])
} }
} }
} }
@ -70,7 +72,7 @@ func BenchmarkUnicodeLowercaseNoChange(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
for _, s := range caseCases { for _, s := range caseCases {
UnicodeLowercase(s[1]) UnicodeLowercaseNormalized(s[1])
} }
} }
} }

View File

@ -157,7 +157,7 @@ func (f *mtimeFS) wrapperType() filesystemWrapperType {
func (f *mtimeFS) save(name string, real, virtual time.Time) { func (f *mtimeFS) save(name string, real, virtual time.Time) {
if f.caseInsensitive { if f.caseInsensitive {
name = UnicodeLowercase(name) name = UnicodeLowercaseNormalized(name)
} }
if real.Equal(virtual) { if real.Equal(virtual) {
@ -177,7 +177,7 @@ func (f *mtimeFS) save(name string, real, virtual time.Time) {
func (f *mtimeFS) load(name string) (MtimeMapping, error) { func (f *mtimeFS) load(name string) (MtimeMapping, error) {
if f.caseInsensitive { if f.caseInsensitive {
name = UnicodeLowercase(name) name = UnicodeLowercaseNormalized(name)
} }
data, exists, err := f.db.Bytes(name) data, exists, err := f.db.Bytes(name)

View File

@ -251,6 +251,62 @@ func TestNormalization(t *testing.T) {
} }
} }
func TestNormalizationDarwinCaseFS(t *testing.T) {
// This tests that normalization works on Darwin, through a CaseFS.
if runtime.GOOS != "darwin" {
t.Skip("Normalization test not possible on non-Darwin")
return
}
testFs := fs.NewCaseFilesystem(testFs)
testFs.RemoveAll("normalization")
defer testFs.RemoveAll("normalization")
testFs.MkdirAll("normalization", 0755)
const (
inNFC = "\xC3\x84"
inNFD = "\x41\xCC\x88"
)
// Create dir in NFC
if err := testFs.Mkdir(filepath.Join("normalization", "dir-"+inNFC), 0755); err != nil {
t.Fatal(err)
}
// Create file in NFC
fd, err := testFs.Create(filepath.Join("normalization", "dir-"+inNFC, "file-"+inNFC))
if err != nil {
t.Fatal(err)
}
fd.Close()
// Walk, which should normalize and return
walkDir(testFs, "normalization", nil, nil, 0)
tmp := walkDir(testFs, "normalization", nil, nil, 0)
if len(tmp) != 3 {
t.Error("Expected one file and one dir scanned")
}
// Verify we see the normalized entries in the result
foundFile := false
foundDir := false
for _, f := range tmp {
if f.Name == filepath.Join("normalization", "dir-"+inNFD) {
foundDir = true
continue
}
if f.Name == filepath.Join("normalization", "dir-"+inNFD, "file-"+inNFD) {
foundFile = true
continue
}
}
if !foundFile || !foundDir {
t.Error("Didn't find expected normalization form")
}
}
func TestIssue1507(t *testing.T) { func TestIssue1507(t *testing.T) {
w := &walker{} w := &walker{}
w.Matcher = ignore.New(w.Filesystem) w.Matcher = ignore.New(w.Filesystem)