mirror of
https://github.com/octoleo/syncthing.git
synced 2024-11-17 10:35:11 +00:00
In ignores, normalize the input when parsing it. When scanning, normalize earlier such that the path is already normalized when checking ignores. This requires splitting normalization of the string from normalization of the file, as we don't want to attempt the latter if the file is ignored. Closes #9598 --------- Co-authored-by: Jakob Borg <jakob@kastelo.net>
This commit is contained in:
parent
3c476542d2
commit
605fd6d726
@ -18,7 +18,9 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/gobwas/glob"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
|
||||
"github.com/syncthing/syncthing/lib/build"
|
||||
"github.com/syncthing/syncthing/lib/fs"
|
||||
"github.com/syncthing/syncthing/lib/ignore/ignoreresult"
|
||||
"github.com/syncthing/syncthing/lib/osutil"
|
||||
@ -212,6 +214,11 @@ func (m *Matcher) parseLocked(r io.Reader, file string) error {
|
||||
}
|
||||
|
||||
// Match matches the patterns plus temporary and internal files.
|
||||
//
|
||||
// The "file" parameter must be in the OS' native unicode format (NFD on macos,
|
||||
// NFC everywhere else). This is always the case in real usage in syncthing, as
|
||||
// we ensure native unicode normalisation on all entry points (scanning and from
|
||||
// protocol) - so no need to normalize when calling this, except e.g. in tests.
|
||||
func (m *Matcher) Match(file string) (result ignoreresult.R) {
|
||||
switch {
|
||||
case fs.IsTemporary(file):
|
||||
@ -387,6 +394,10 @@ func loadParseIncludeFile(filesystem fs.Filesystem, file string, cd ChangeDetect
|
||||
}
|
||||
|
||||
func parseLine(line string) ([]Pattern, error) {
|
||||
// We use native normalization internally, thus the patterns must match
|
||||
// that to avoid false negative matches.
|
||||
line = nativeUnicodeNorm(line)
|
||||
|
||||
pattern := Pattern{
|
||||
result: ignoreresult.Ignored,
|
||||
}
|
||||
@ -464,6 +475,13 @@ func parseLine(line string) ([]Pattern, error) {
|
||||
return patterns, nil
|
||||
}
|
||||
|
||||
func nativeUnicodeNorm(s string) string {
|
||||
if build.IsDarwin || build.IsIOS {
|
||||
return norm.NFD.String(s)
|
||||
}
|
||||
return norm.NFC.String(s)
|
||||
}
|
||||
|
||||
func parseIgnoreFile(fs fs.Filesystem, fd io.Reader, currentFile string, cd ChangeDetector, linesSeen map[string]struct{}) ([]string, []Pattern, error) {
|
||||
var patterns []Pattern
|
||||
|
||||
|
@ -805,7 +805,10 @@ func TestIssue3174(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if !pats.Match("åäö").IsIgnored() {
|
||||
// The pattern above is normalized when parsing, and in order for this
|
||||
// string to match the pattern, it needs to use the same normalization. And
|
||||
// Go always uses NFC regardless of OS, while we use NFD on macos.
|
||||
if !pats.Match(nativeUnicodeNorm("åäö")).IsIgnored() {
|
||||
t.Error("Should match")
|
||||
}
|
||||
}
|
||||
|
@ -293,6 +293,11 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco
|
||||
return skip
|
||||
}
|
||||
|
||||
// Just in case the filesystem doesn't produce the normalization the OS
|
||||
// uses, and we use internally.
|
||||
nonNormPath := path
|
||||
path = normalizePath(path)
|
||||
|
||||
if m := w.Matcher.Match(path); m.IsIgnored() {
|
||||
l.Debugln(w, "ignored (patterns):", path)
|
||||
// Only descend if matcher says so and the current file is not a symlink.
|
||||
@ -319,9 +324,23 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco
|
||||
return nil
|
||||
}
|
||||
|
||||
if path != nonNormPath {
|
||||
if !w.AutoNormalize {
|
||||
// We're not authorized to do anything about it, so complain and skip.
|
||||
handleError(ctx, "normalizing path", nonNormPath, errUTF8Normalization, finishedChan)
|
||||
return skip
|
||||
}
|
||||
|
||||
path, err = w.applyNormalization(nonNormPath, path, info)
|
||||
if err != nil {
|
||||
handleError(ctx, "normalizing path", nonNormPath, err, finishedChan)
|
||||
return skip
|
||||
}
|
||||
}
|
||||
|
||||
if ignoredParent == "" {
|
||||
// parent isn't ignored, nothing special
|
||||
return w.handleItem(ctx, path, info, toHashChan, finishedChan, skip)
|
||||
return w.handleItem(ctx, path, info, toHashChan, finishedChan)
|
||||
}
|
||||
|
||||
// Part of current path below the ignored (potential) parent
|
||||
@ -330,7 +349,7 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco
|
||||
// ignored path isn't actually a parent of the current path
|
||||
if rel == path {
|
||||
ignoredParent = ""
|
||||
return w.handleItem(ctx, path, info, toHashChan, finishedChan, skip)
|
||||
return w.handleItem(ctx, path, info, toHashChan, finishedChan)
|
||||
}
|
||||
|
||||
// The previously ignored parent directories of the current, not
|
||||
@ -345,7 +364,7 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco
|
||||
handleError(ctx, "scan", ignoredParent, err, finishedChan)
|
||||
return skip
|
||||
}
|
||||
if err = w.handleItem(ctx, ignoredParent, info, toHashChan, finishedChan, skip); err != nil {
|
||||
if err = w.handleItem(ctx, ignoredParent, info, toHashChan, finishedChan); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@ -355,14 +374,7 @@ func (w *walker) walkAndHashFiles(ctx context.Context, toHashChan chan<- protoco
|
||||
}
|
||||
}
|
||||
|
||||
func (w *walker) handleItem(ctx context.Context, path string, info fs.FileInfo, toHashChan chan<- protocol.FileInfo, finishedChan chan<- ScanResult, skip error) error {
|
||||
oldPath := path
|
||||
path, err := w.normalizePath(path, info)
|
||||
if err != nil {
|
||||
handleError(ctx, "normalizing path", oldPath, err, finishedChan)
|
||||
return skip
|
||||
}
|
||||
|
||||
func (w *walker) handleItem(ctx context.Context, path string, info fs.FileInfo, toHashChan chan<- protocol.FileInfo, finishedChan chan<- ScanResult) error {
|
||||
switch {
|
||||
case info.IsSymlink():
|
||||
if err := w.walkSymlink(ctx, path, info, finishedChan); err != nil {
|
||||
@ -375,13 +387,13 @@ func (w *walker) handleItem(ctx context.Context, path string, info fs.FileInfo,
|
||||
return nil
|
||||
|
||||
case info.IsDir():
|
||||
err = w.walkDir(ctx, path, info, finishedChan)
|
||||
return w.walkDir(ctx, path, info, finishedChan)
|
||||
|
||||
case info.IsRegular():
|
||||
err = w.walkRegular(ctx, path, info, toHashChan)
|
||||
return w.walkRegular(ctx, path, info, toHashChan)
|
||||
}
|
||||
|
||||
return err
|
||||
return fmt.Errorf("bug: file info for %v is neither symlink, dir nor regular", path)
|
||||
}
|
||||
|
||||
func (w *walker) walkRegular(ctx context.Context, relPath string, info fs.FileInfo, toHashChan chan<- protocol.FileInfo) error {
|
||||
@ -550,30 +562,21 @@ func (w *walker) walkSymlink(ctx context.Context, relPath string, info fs.FileIn
|
||||
return nil
|
||||
}
|
||||
|
||||
// normalizePath returns the normalized relative path (possibly after fixing
|
||||
// it on disk), or skip is true.
|
||||
func (w *walker) normalizePath(path string, info fs.FileInfo) (normPath string, err error) {
|
||||
func normalizePath(path string) string {
|
||||
if build.IsDarwin || build.IsIOS {
|
||||
// Mac OS X file names should always be NFD normalized.
|
||||
normPath = norm.NFD.String(path)
|
||||
} else {
|
||||
// Every other OS in the known universe uses NFC or just plain
|
||||
// doesn't bother to define an encoding. In our case *we* do care,
|
||||
// so we enforce NFC regardless.
|
||||
normPath = norm.NFC.String(path)
|
||||
}
|
||||
|
||||
if path == normPath {
|
||||
// The file name is already normalized: nothing to do
|
||||
return path, nil
|
||||
}
|
||||
|
||||
if !w.AutoNormalize {
|
||||
// We're not authorized to do anything about it, so complain and skip.
|
||||
|
||||
return "", errUTF8Normalization
|
||||
return norm.NFD.String(path)
|
||||
}
|
||||
// Every other OS in the known universe uses NFC or just plain
|
||||
// doesn't bother to define an encoding. In our case *we* do care,
|
||||
// so we enforce NFC regardless.
|
||||
return norm.NFC.String(path)
|
||||
}
|
||||
|
||||
// applyNormalization fixes the normalization of the file on disk, i.e. ensures
|
||||
// the file at path ends up named normPath. It shouldn't but may happen that the
|
||||
// file ends up with a different name, in which case that one should be scanned.
|
||||
func (w *walker) applyNormalization(path, normPath string, info fs.FileInfo) (string, error) {
|
||||
// We will attempt to normalize it.
|
||||
normInfo, err := w.Filesystem.Lstat(normPath)
|
||||
if fs.IsNotExist(err) {
|
||||
|
Loading…
Reference in New Issue
Block a user