From 9818e2b550319ae31d8e364ac68c73ca9f5a4760 Mon Sep 17 00:00:00 2001 From: Jakob Borg Date: Tue, 26 Aug 2014 10:11:25 +0200 Subject: [PATCH] Use more fnmatch-like matcher in .stignore (fixes #426) --- fnmatch/fnmatch.go | 60 +++++++++++++++++++ fnmatch/fnmatch_test.go | 74 +++++++++++++++++++++++ integration/test-folders.sh | 1 + integration/test-http.sh | 1 + integration/test-merge.sh | 1 + model/model.go | 2 +- scanner/walk.go | 116 ++++++++++++++++++++++++++---------- scanner/walk_test.go | 60 +++++++++++-------- 8 files changed, 260 insertions(+), 55 deletions(-) create mode 100644 fnmatch/fnmatch.go create mode 100644 fnmatch/fnmatch_test.go diff --git a/fnmatch/fnmatch.go b/fnmatch/fnmatch.go new file mode 100644 index 000000000..c6011f484 --- /dev/null +++ b/fnmatch/fnmatch.go @@ -0,0 +1,60 @@ +// Copyright (C) 2014 Jakob Borg and Contributors (see the CONTRIBUTORS file). +// All rights reserved. Use of this source code is governed by an MIT-style +// license that can be found in the LICENSE file. + +package fnmatch + +import ( + "path/filepath" + "regexp" + "runtime" + "strings" +) + +const ( + FNM_NOESCAPE = (1 << iota) + FNM_PATHNAME + FNM_CASEFOLD +) + +func Convert(pattern string, flags int) (*regexp.Regexp, error) { + if runtime.GOOS == "windows" { + flags |= FNM_NOESCAPE + pattern = filepath.FromSlash(pattern) + } + + any := "." + if flags&FNM_PATHNAME != 0 { + any = "[^/]" + } + if flags&FNM_NOESCAPE != 0 { + pattern = strings.Replace(pattern, "\\", "\\\\", -1) + } else { + pattern = strings.Replace(pattern, "\\*", "[:escapedstar:]", -1) + pattern = strings.Replace(pattern, "\\?", "[:escapedques:]", -1) + pattern = strings.Replace(pattern, "\\.", "[:escapeddot:]", -1) + } + pattern = strings.Replace(pattern, ".", "\\.", -1) + pattern = strings.Replace(pattern, "**", "[:doublestar:]", -1) + pattern = strings.Replace(pattern, "*", any+"*", -1) + pattern = strings.Replace(pattern, "[:doublestar:]", ".*", -1) + pattern = strings.Replace(pattern, "?", any, -1) + pattern = strings.Replace(pattern, "[:escapedstar:]", "\\*", -1) + pattern = strings.Replace(pattern, "[:escapedques:]", "\\?", -1) + pattern = strings.Replace(pattern, "[:escapeddot:]", "\\.", -1) + pattern = "^" + pattern + "$" + if flags&FNM_CASEFOLD != 0 { + pattern = "(?i)" + pattern + } + return regexp.Compile(pattern) +} + +// Matches the pattern against the string, with the given flags, +// and returns true if the match is successful. +func Match(pattern, s string, flags int) (bool, error) { + exp, err := Convert(pattern, flags) + if err != nil { + return false, err + } + return exp.MatchString(s), nil +} diff --git a/fnmatch/fnmatch_test.go b/fnmatch/fnmatch_test.go new file mode 100644 index 000000000..8b7855634 --- /dev/null +++ b/fnmatch/fnmatch_test.go @@ -0,0 +1,74 @@ +// Copyright (C) 2014 Jakob Borg and Contributors (see the CONTRIBUTORS file). +// All rights reserved. Use of this source code is governed by an MIT-style +// license that can be found in the LICENSE file. + +package fnmatch + +import ( + "testing" +) + +var testCases = []struct { + pat string + name string + flags int + match bool +}{ + {"", "", 0, true}, + {"*", "", 0, true}, + {"*", "foo", 0, true}, + {"*", "bar", 0, true}, + {"*", "*", 0, true}, + {"**", "f", 0, true}, + {"**", "foo.txt", 0, true}, + {"*.*", "foo.txt", 0, true}, + {"foo*.txt", "foobar.txt", 0, true}, + {"foo.txt", "foo.txt", 0, true}, + {"foo\\.txt", "foo.txt", 0, true}, + {"foo\\*.txt", "foo*.txt", 0, true}, + {"foo\\.txt", "foo.txt", FNM_NOESCAPE, false}, + + {"foo.txt", "bar/foo.txt", 0, false}, + {"*/foo.txt", "bar/foo.txt", 0, true}, + {"f?o.txt", "foo.txt", 0, true}, + {"f?o.txt", "fooo.txt", 0, false}, + {"f[ab]o.txt", "foo.txt", 0, false}, + {"f[ab]o.txt", "fao.txt", 0, true}, + {"f[ab]o.txt", "fbo.txt", 0, true}, + {"f[ab]o.txt", "fco.txt", 0, false}, + {"f[ab]o.txt", "fabo.txt", 0, false}, + {"f[ab]o.txt", "f[ab]o.txt", 0, false}, + {"f\\[ab\\]o.txt", "f[ab]o.txt", 0, true}, + {"f\\[ab\\]o.txt", "f[ab]o.txt", FNM_NOESCAPE, false}, + {"f\\\\\\[ab\\\\\\]o.txt", "f\\[ab\\]o.txt", 0, true}, + + {"*foo.txt", "bar/foo.txt", 0, true}, + {"*foo.txt", "bar/foo.txt", FNM_PATHNAME, false}, + {"*/foo.txt", "bar/foo.txt", 0, true}, + {"*/foo.txt", "bar/foo.txt", FNM_PATHNAME, true}, + {"*/foo.txt", "bar/baz/foo.txt", 0, true}, + {"*/foo.txt", "bar/baz/foo.txt", FNM_PATHNAME, false}, + {"**/foo.txt", "bar/baz/foo.txt", 0, true}, + {"**/foo.txt", "bar/baz/foo.txt", FNM_PATHNAME, true}, + + {"foo.txt", "foo.TXT", 0, false}, + {"foo.txt", "foo.TXT", FNM_CASEFOLD, true}, +} + +func TestMatch(t *testing.T) { + for _, tc := range testCases { + if m, err := Match(tc.pat, tc.name, tc.flags); m != tc.match { + if err != nil { + t.Error(err) + } else { + t.Errorf("Match(%q, %q, %d) != %v", tc.pat, tc.name, tc.flags, tc.match) + } + } + } +} + +func TestInvalid(t *testing.T) { + if _, err := Match("foo[bar", "...", 0); err == nil { + t.Error("Unexpected nil error") + } +} diff --git a/integration/test-folders.sh b/integration/test-folders.sh index 57d0766b1..2ff26fcc3 100755 --- a/integration/test-folders.sh +++ b/integration/test-folders.sh @@ -75,6 +75,7 @@ testConvergence() { fi } +chmod -R +w s? s??-? rm -rf s? s??-? rm -rf f?/*.idx.gz f?/index diff --git a/integration/test-http.sh b/integration/test-http.sh index 883790e70..20095667b 100755 --- a/integration/test-http.sh +++ b/integration/test-http.sh @@ -19,6 +19,7 @@ echo Building go build http.go echo Starting +chmod -R +w s1 s2 rm -rf s1 s2 h1/index h2/index syncthing -home h1 > 1.out 2>&1 & syncthing -home h2 > 2.out 2>&1 & diff --git a/integration/test-merge.sh b/integration/test-merge.sh index f5395ce27..e5c0b17ce 100755 --- a/integration/test-merge.sh +++ b/integration/test-merge.sh @@ -114,6 +114,7 @@ alterFiles() { } rm -rf h?/*.idx.gz h?/index +chmod -R +w s? s??-? s4d rm -rf s? s??-? s4d echo "Setting up files..." diff --git a/model/model.go b/model/model.go index 4c5b41d86..b4f947280 100644 --- a/model/model.go +++ b/model/model.go @@ -800,7 +800,7 @@ func (m *Model) ScanRepoSub(repo, sub string) error { } m.setState(repo, RepoScanning) - fchan, _, err := w.Walk() + fchan, err := w.Walk() if err != nil { return err diff --git a/scanner/walk.go b/scanner/walk.go index 2541cbb69..8cb189725 100644 --- a/scanner/walk.go +++ b/scanner/walk.go @@ -5,17 +5,19 @@ package scanner import ( - "bytes" + "bufio" "errors" - "fmt" - "io/ioutil" + "io" "os" + "path" "path/filepath" + "regexp" "runtime" "strings" "code.google.com/p/go.text/unicode/norm" + "github.com/syncthing/syncthing/fnmatch" "github.com/syncthing/syncthing/lamport" "github.com/syncthing/syncthing/protocol" ) @@ -53,29 +55,30 @@ type CurrentFiler interface { // Walk returns the list of files found in the local repository by scanning the // file system. Files are blockwise hashed. -func (w *Walker) Walk() (chan protocol.FileInfo, map[string][]string, error) { +func (w *Walker) Walk() (chan protocol.FileInfo, error) { if debug { l.Debugln("Walk", w.Dir, w.Sub, w.BlockSize, w.IgnoreFile) } err := checkDir(w.Dir) if err != nil { - return nil, nil, err + return nil, err } - ignore := make(map[string][]string) files := make(chan protocol.FileInfo) hashedFiles := make(chan protocol.FileInfo) newParallelHasher(w.Dir, w.BlockSize, runtime.NumCPU(), hashedFiles, files) - hashFiles := w.walkAndHashFiles(files, ignore) + var ignores []*regexp.Regexp go func() { - filepath.Walk(w.Dir, w.loadIgnoreFiles(w.Dir, ignore)) + filepath.Walk(w.Dir, w.loadIgnoreFiles(w.Dir, &ignores)) + + hashFiles := w.walkAndHashFiles(files, ignores) filepath.Walk(filepath.Join(w.Dir, w.Sub), hashFiles) close(files) }() - return hashedFiles, ignore, nil + return hashedFiles, nil } // CleanTempFiles removes all files that match the temporary filename pattern. @@ -83,7 +86,7 @@ func (w *Walker) CleanTempFiles() { filepath.Walk(w.Dir, w.cleanTempFile) } -func (w *Walker) loadIgnoreFiles(dir string, ign map[string][]string) filepath.WalkFunc { +func (w *Walker) loadIgnoreFiles(dir string, ignores *[]*regexp.Regexp) filepath.WalkFunc { return func(p string, info os.FileInfo, err error) error { if err != nil { return nil @@ -96,23 +99,78 @@ func (w *Walker) loadIgnoreFiles(dir string, ign map[string][]string) filepath.W if pn, sn := filepath.Split(rn); sn == w.IgnoreFile { pn := filepath.Clean(pn) - bs, _ := ioutil.ReadFile(p) - lines := bytes.Split(bs, []byte("\n")) - var patterns []string - for _, line := range lines { - lineStr := strings.TrimSpace(string(line)) - if len(lineStr) > 0 { - patterns = append(patterns, lineStr) - } - } - ign[pn] = patterns + dirIgnores := loadIgnoreFile(p, pn) + *ignores = append(*ignores, dirIgnores...) } return nil } } -func (w *Walker) walkAndHashFiles(fchan chan protocol.FileInfo, ign map[string][]string) filepath.WalkFunc { +func loadIgnoreFile(ignFile, base string) []*regexp.Regexp { + fd, err := os.Open(ignFile) + if err != nil { + return nil + } + defer fd.Close() + return parseIgnoreFile(fd, base) +} + +func parseIgnoreFile(fd io.Reader, base string) []*regexp.Regexp { + var exps []*regexp.Regexp + scanner := bufio.NewScanner(fd) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if line == "" { + continue + } + + if strings.HasPrefix(line, "/") { + // Pattern is rooted in the current dir only + exp, err := fnmatch.Convert(path.Join(base, line[1:]), fnmatch.FNM_PATHNAME) + if err != nil { + l.Warnf("Invalid pattern %q in ignore file", line) + continue + } + exps = append(exps, exp) + } else if strings.HasPrefix(line, "**/") { + // Add the pattern as is, and without **/ so it matches in current dir + exp, err := fnmatch.Convert(line, fnmatch.FNM_PATHNAME) + if err != nil { + l.Warnf("Invalid pattern %q in ignore file", line) + continue + } + exps = append(exps, exp) + + exp, err = fnmatch.Convert(path.Join(base, line[3:]), fnmatch.FNM_PATHNAME) + if err != nil { + l.Warnf("Invalid pattern %q in ignore file", line) + continue + } + exps = append(exps, exp) + } else { + // Path name or pattern, add it so it matches files both in + // current directory and subdirs. + exp, err := fnmatch.Convert(path.Join(base, line), fnmatch.FNM_PATHNAME) + if err != nil { + l.Warnf("Invalid pattern %q in ignore file", line) + continue + } + exps = append(exps, exp) + + exp, err = fnmatch.Convert(path.Join(base, "**", line), fnmatch.FNM_PATHNAME) + if err != nil { + l.Warnf("Invalid pattern %q in ignore file", line) + continue + } + exps = append(exps, exp) + } + } + + return exps +} + +func (w *Walker) walkAndHashFiles(fchan chan protocol.FileInfo, ignores []*regexp.Regexp) filepath.WalkFunc { return func(p string, info os.FileInfo, err error) error { if err != nil { if debug { @@ -141,7 +199,7 @@ func (w *Walker) walkAndHashFiles(fchan chan protocol.FileInfo, ign map[string][ return nil } - if sn := filepath.Base(rn); sn == w.IgnoreFile || sn == ".stversions" || w.ignoreFile(ign, rn) { + if sn := filepath.Base(rn); sn == w.IgnoreFile || sn == ".stversions" || w.ignoreFile(ignores, rn) { // An ignored file if debug { l.Debugln("ignored:", rn) @@ -225,15 +283,13 @@ func (w *Walker) cleanTempFile(path string, info os.FileInfo, err error) error { return nil } -func (w *Walker) ignoreFile(patterns map[string][]string, file string) bool { - first, last := filepath.Split(file) - for prefix, pats := range patterns { - if prefix == "." || prefix == first || strings.HasPrefix(first, fmt.Sprintf("%s%c", prefix, os.PathSeparator)) { - for _, pattern := range pats { - if match, _ := filepath.Match(pattern, last); match || pattern == last { - return true - } +func (w *Walker) ignoreFile(patterns []*regexp.Regexp, file string) bool { + for _, pattern := range patterns { + if pattern.MatchString(file) { + if debug { + l.Debugf("%q matches %v", file, pattern) } + return true } } return false diff --git a/scanner/walk_test.go b/scanner/walk_test.go index 77f2ef42d..943843b2a 100644 --- a/scanner/walk_test.go +++ b/scanner/walk_test.go @@ -5,10 +5,9 @@ package scanner import ( + "bytes" "fmt" "path/filepath" - "reflect" - "runtime" "sort" "testing" "time" @@ -38,7 +37,7 @@ func TestWalkSub(t *testing.T) { BlockSize: 128 * 1024, IgnoreFile: ".stignore", } - fchan, _, err := w.Walk() + fchan, err := w.Walk() var files []protocol.FileInfo for f := range fchan { files = append(files, f) @@ -61,7 +60,7 @@ func TestWalk(t *testing.T) { BlockSize: 128 * 1024, IgnoreFile: ".stignore", } - fchan, ignores, err := w.Walk() + fchan, err := w.Walk() var files []protocol.FileInfo for f := range fchan { files = append(files, f) @@ -95,10 +94,6 @@ func TestWalk(t *testing.T) { t.Errorf("Unrealistic modtime %d for test %d", mt, i) } } - - if !reflect.DeepEqual(ignores, correctIgnores) { - t.Errorf("Incorrect ignores\n %v\n %v", correctIgnores, ignores) - } } func TestWalkError(t *testing.T) { @@ -107,7 +102,7 @@ func TestWalkError(t *testing.T) { BlockSize: 128 * 1024, IgnoreFile: ".stignore", } - _, _, err := w.Walk() + _, err := w.Walk() if err == nil { t.Error("no error from missing directory") @@ -118,7 +113,7 @@ func TestWalkError(t *testing.T) { BlockSize: 128 * 1024, IgnoreFile: ".stignore", } - _, _, err = w.Walk() + _, err = w.Walk() if err == nil { t.Error("no error from non-directory") @@ -126,29 +121,41 @@ func TestWalkError(t *testing.T) { } func TestIgnore(t *testing.T) { - pattern := "q\\[abc\\]y" - // On Windows, escaping is disabled. - // Instead, '\\' is treated as path separator. - if runtime.GOOS == "windows" { - pattern = "q[abc]y" - } + patStr := bytes.NewBufferString(` + t2 + /t3 + sub/dir/* + */other/test + **/deep + `) + patterns := parseIgnoreFile(patStr, "") + + patStr = bytes.NewBufferString(` + bar + z* + q[abc]x + `) + patterns = append(patterns, parseIgnoreFile(patStr, "foo")...) + + patStr = bytes.NewBufferString(` + quux + .* + `) + patterns = append(patterns, parseIgnoreFile(patStr, "foo/baz")...) - var patterns = map[string][]string{ - ".": {"t2"}, - "foo": {"bar", "z*", "q[abc]x", pattern}, - filepath.Join("foo", "baz"): {"quux", ".*"}, - } var tests = []struct { f string r bool }{ {filepath.Join("foo", "bar"), true}, + {filepath.Join("t3"), true}, {filepath.Join("foofoo"), false}, {filepath.Join("foo", "quux"), false}, {filepath.Join("foo", "zuux"), true}, {filepath.Join("foo", "qzuux"), false}, {filepath.Join("foo", "baz", "t1"), false}, {filepath.Join("foo", "baz", "t2"), true}, + {filepath.Join("foo", "baz", "t3"), false}, {filepath.Join("foo", "baz", "bar"), true}, {filepath.Join("foo", "baz", "quuxa"), false}, {filepath.Join("foo", "baz", "aquux"), false}, @@ -156,9 +163,14 @@ func TestIgnore(t *testing.T) { {filepath.Join("foo", "baz", "zquux"), true}, {filepath.Join("foo", "baz", "quux"), true}, {filepath.Join("foo", "bazz", "quux"), false}, - {filepath.Join("foo", "bazz", "q[abc]x"), true}, - {filepath.Join("foo", "bazz", "qax"), true}, - {filepath.Join("foo", "bazz", "q[abc]y"), true}, + {filepath.Join("sub", "dir", "hej"), true}, + {filepath.Join("deeper", "sub", "dir", "hej"), true}, + {filepath.Join("other", "test"), false}, + {filepath.Join("sub", "other", "test"), true}, + {filepath.Join("deeper", "sub", "other", "test"), true}, + {filepath.Join("deep"), true}, + {filepath.Join("deeper", "deep"), true}, + {filepath.Join("deeper", "deeper", "deep"), true}, } w := Walker{}