From 2c89f04be769a357732a201124c6f16790d498cb Mon Sep 17 00:00:00 2001 From: Jakob Borg Date: Tue, 23 Dec 2014 10:05:08 +0100 Subject: [PATCH] Refactor ignore handling (...) This uses persistent Matcher objects that can reload their content and provide a hash string that can be used to check if it's changed. The cache is local to each Matcher object instead of kept globally. --- internal/ignore/cache.go | 36 +------ internal/ignore/ignore.go | 173 +++++++++++++++++++++------------ internal/ignore/ignore_test.go | 137 ++++++++++++++++++++++---- internal/model/model.go | 7 +- internal/scanner/walk_test.go | 6 +- 5 files changed, 239 insertions(+), 120 deletions(-) diff --git a/internal/ignore/cache.go b/internal/ignore/cache.go index 3667bcf45..115275b19 100644 --- a/internal/ignore/cache.go +++ b/internal/ignore/cache.go @@ -15,26 +15,11 @@ package ignore -import ( - "sync" - "time" -) - -var ( - caches = make(map[string]*cache) - cacheMut sync.Mutex -) - -func init() { - // Periodically go through the cache and remove cache entries that have - // not been touched in the last two hours. - go cleanIgnoreCaches(2 * time.Hour) -} +import "time" type cache struct { patterns []Pattern entries map[string]cacheEntry - mut sync.Mutex } type cacheEntry struct { @@ -50,46 +35,27 @@ func newCache(patterns []Pattern) *cache { } func (c *cache) clean(d time.Duration) { - c.mut.Lock() for k, v := range c.entries { if time.Since(v.access) > d { delete(c.entries, k) } } - c.mut.Unlock() } func (c *cache) get(key string) (result, ok bool) { - c.mut.Lock() res, ok := c.entries[key] if ok { res.access = time.Now() c.entries[key] = res } - c.mut.Unlock() return res.value, ok } func (c *cache) set(key string, val bool) { - c.mut.Lock() c.entries[key] = cacheEntry{val, time.Now()} - c.mut.Unlock() } func (c *cache) len() int { - c.mut.Lock() l := len(c.entries) - c.mut.Unlock() return l } - -func cleanIgnoreCaches(dur time.Duration) { - for { - time.Sleep(dur) - cacheMut.Lock() - for _, v := range caches { - v.clean(dur) - } - cacheMut.Unlock() - } -} diff --git a/internal/ignore/ignore.go b/internal/ignore/ignore.go index e74c7ad21..d1b68c73c 100644 --- a/internal/ignore/ignore.go +++ b/internal/ignore/ignore.go @@ -17,6 +17,8 @@ package ignore import ( "bufio" + "bytes" + "crypto/md5" "fmt" "io" "os" @@ -24,6 +26,7 @@ import ( "regexp" "strings" "sync" + "time" "github.com/syncthing/syncthing/internal/fnmatch" ) @@ -33,51 +36,76 @@ type Pattern struct { include bool } +func (p Pattern) String() string { + if p.include { + return p.match.String() + } else { + return "(?exclude)" + p.match.String() + } +} + type Matcher struct { - patterns []Pattern - matches *cache - mut sync.Mutex + patterns []Pattern + withCache bool + matches *cache + curHash string + stop chan struct{} + mut sync.Mutex } -func Load(file string, cache bool) (*Matcher, error) { - seen := make(map[string]bool) - matcher, err := loadIgnoreFile(file, seen) - if !cache || err != nil { - return matcher, err +func New(withCache bool) *Matcher { + m := &Matcher{ + withCache: withCache, + stop: make(chan struct{}), } - - cacheMut.Lock() - defer cacheMut.Unlock() - - // Get the current cache object for the given file - cached, ok := caches[file] - if !ok || !patternsEqual(cached.patterns, matcher.patterns) { - // Nothing in cache or a cache mismatch, create a new cache which will - // store matches for the given set of patterns. - // Initialize oldMatches to indicate that we are interested in - // caching. - cached = newCache(matcher.patterns) - matcher.matches = cached - caches[file] = cached - return matcher, nil + if withCache { + go m.clean(2 * time.Hour) } - - // Patterns haven't changed, so we can reuse the old matches, create a new - // matches map and update the pointer. (This prevents matches map from - // growing indefinately, as we only cache whatever we've matched in the last - // iteration, rather than through runtime history) - matcher.matches = cached - return matcher, nil + return m } -func Parse(r io.Reader, file string) (*Matcher, error) { - seen := map[string]bool{ - file: true, +func (m *Matcher) Load(file string) error { + // No locking, Parse() does the locking + + fd, err := os.Open(file) + if err != nil { + // We do a parse with empty patterns to clear out the hash, cache etc. + m.Parse(&bytes.Buffer{}, file) + return err } - return parseIgnoreFile(r, file, seen) + defer fd.Close() + + return m.Parse(fd, file) +} + +func (m *Matcher) Parse(r io.Reader, file string) error { + m.mut.Lock() + defer m.mut.Unlock() + + seen := map[string]bool{file: true} + patterns, err := parseIgnoreFile(r, file, seen) + // Error is saved and returned at the end. We process the patterns + // (possibly blank) anyway. + + newHash := hashPatterns(patterns) + if newHash == m.curHash { + // We've already loaded exactly these patterns. + return err + } + + m.curHash = newHash + m.patterns = patterns + if m.withCache { + m.matches = newCache(patterns) + } + + return err } func (m *Matcher) Match(file string) (result bool) { + m.mut.Lock() + defer m.mut.Unlock() + if len(m.patterns) == 0 { return false } @@ -108,18 +136,53 @@ func (m *Matcher) Match(file string) (result bool) { // Patterns return a list of the loaded regexp patterns, as strings func (m *Matcher) Patterns() []string { + m.mut.Lock() + defer m.mut.Unlock() + patterns := make([]string, len(m.patterns)) for i, pat := range m.patterns { - if pat.include { - patterns[i] = pat.match.String() - } else { - patterns[i] = "(?exclude)" + pat.match.String() - } + patterns[i] = pat.String() } return patterns } -func loadIgnoreFile(file string, seen map[string]bool) (*Matcher, error) { +func (m *Matcher) Hash() string { + m.mut.Lock() + defer m.mut.Unlock() + return m.curHash +} + +func (m *Matcher) Stop() { + close(m.stop) +} + +func (m *Matcher) clean(d time.Duration) { + t := time.NewTimer(d / 2) + for { + select { + case <-m.stop: + return + case <-t.C: + m.mut.Lock() + if m.matches != nil { + m.matches.clean(d) + } + t.Reset(d / 2) + m.mut.Unlock() + } + } +} + +func hashPatterns(patterns []Pattern) string { + h := md5.New() + for _, pat := range patterns { + h.Write([]byte(pat.String())) + h.Write([]byte("\n")) + } + return fmt.Sprintf("%x", h.Sum(nil)) +} + +func loadIgnoreFile(file string, seen map[string]bool) ([]Pattern, error) { if seen[file] { return nil, fmt.Errorf("Multiple include of ignore file %q", file) } @@ -134,8 +197,8 @@ func loadIgnoreFile(file string, seen map[string]bool) (*Matcher, error) { return parseIgnoreFile(fd, file, seen) } -func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*Matcher, error) { - var exps Matcher +func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) ([]Pattern, error) { + var patterns []Pattern addPattern := func(line string) error { include := true @@ -150,27 +213,27 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*M if err != nil { return fmt.Errorf("Invalid pattern %q in ignore file", line) } - exps.patterns = append(exps.patterns, Pattern{exp, include}) + patterns = append(patterns, Pattern{exp, include}) } else if strings.HasPrefix(line, "**/") { // Add the pattern as is, and without **/ so it matches in current dir exp, err := fnmatch.Convert(line, fnmatch.FNM_PATHNAME) if err != nil { return fmt.Errorf("Invalid pattern %q in ignore file", line) } - exps.patterns = append(exps.patterns, Pattern{exp, include}) + patterns = append(patterns, Pattern{exp, include}) exp, err = fnmatch.Convert(line[3:], fnmatch.FNM_PATHNAME) if err != nil { return fmt.Errorf("Invalid pattern %q in ignore file", line) } - exps.patterns = append(exps.patterns, Pattern{exp, include}) + patterns = append(patterns, Pattern{exp, include}) } else if strings.HasPrefix(line, "#include ") { includeFile := filepath.Join(filepath.Dir(currentFile), line[len("#include "):]) includes, err := loadIgnoreFile(includeFile, seen) if err != nil { return err } - exps.patterns = append(exps.patterns, includes.patterns...) + patterns = append(patterns, includes...) } else { // Path name or pattern, add it so it matches files both in // current directory and subdirs. @@ -178,13 +241,13 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*M if err != nil { return fmt.Errorf("Invalid pattern %q in ignore file", line) } - exps.patterns = append(exps.patterns, Pattern{exp, include}) + patterns = append(patterns, Pattern{exp, include}) exp, err = fnmatch.Convert("**/"+line, fnmatch.FNM_PATHNAME) if err != nil { return fmt.Errorf("Invalid pattern %q in ignore file", line) } - exps.patterns = append(exps.patterns, Pattern{exp, include}) + patterns = append(patterns, Pattern{exp, include}) } return nil } @@ -218,17 +281,5 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*M } } - return &exps, nil -} - -func patternsEqual(a, b []Pattern) bool { - if len(a) != len(b) { - return false - } - for i := range a { - if a[i].include != b[i].include || a[i].match.String() != b[i].match.String() { - return false - } - } - return true + return patterns, nil } diff --git a/internal/ignore/ignore_test.go b/internal/ignore/ignore_test.go index 27a4c3c77..20c36e905 100644 --- a/internal/ignore/ignore_test.go +++ b/internal/ignore/ignore_test.go @@ -25,7 +25,8 @@ import ( ) func TestIgnore(t *testing.T) { - pats, err := Load("testdata/.stignore", true) + pats := New(true) + err := pats.Load("testdata/.stignore") if err != nil { t.Fatal(err) } @@ -74,7 +75,8 @@ func TestExcludes(t *testing.T) { i*2 !ign2 ` - pats, err := Parse(bytes.NewBufferString(stignore), ".stignore") + pats := New(true) + err := pats.Parse(bytes.NewBufferString(stignore), ".stignore") if err != nil { t.Fatal(err) } @@ -114,15 +116,19 @@ func TestBadPatterns(t *testing.T) { } for _, pat := range badPatterns { - parsed, err := Parse(bytes.NewBufferString(pat), ".stignore") + err := New(true).Parse(bytes.NewBufferString(pat), ".stignore") if err == nil { - t.Errorf("No error for pattern %q: %v", pat, parsed) + t.Errorf("No error for pattern %q", pat) } } } func TestCaseSensitivity(t *testing.T) { - ign, _ := Parse(bytes.NewBufferString("test"), ".stignore") + ign := New(true) + err := ign.Parse(bytes.NewBufferString("test"), ".stignore") + if err != nil { + t.Error(err) + } match := []string{"test"} dontMatch := []string{"foo"} @@ -170,7 +176,8 @@ func TestCaching(t *testing.T) { fd2.WriteString("/y/\n") - pats, err := Load(fd1.Name(), true) + pats := New(true) + err = pats.Load(fd1.Name()) if err != nil { t.Fatal(err) } @@ -193,9 +200,9 @@ func TestCaching(t *testing.T) { t.Fatal("Expected 4 cached results") } - // Reload file, expect old outcomes to be provided + // Reload file, expect old outcomes to be preserved - pats, err = Load(fd1.Name(), true) + err = pats.Load(fd1.Name()) if err != nil { t.Fatal(err) } @@ -207,7 +214,7 @@ func TestCaching(t *testing.T) { fd2.WriteString("/z/\n") - pats, err = Load(fd1.Name(), true) + err = pats.Load(fd1.Name()) if err != nil { t.Fatal(err) } @@ -222,9 +229,9 @@ func TestCaching(t *testing.T) { pats.Match(letter) } - // Verify that outcomes provided on next laod + // Verify that outcomes preserved on next laod - pats, err = Load(fd1.Name(), true) + err = pats.Load(fd1.Name()) if err != nil { t.Fatal(err) } @@ -236,7 +243,7 @@ func TestCaching(t *testing.T) { fd1.WriteString("/a/\n") - pats, err = Load(fd1.Name(), true) + err = pats.Load(fd1.Name()) if err != nil { t.Fatal(err) } @@ -252,7 +259,7 @@ func TestCaching(t *testing.T) { // Verify that outcomes provided on next laod - pats, err = Load(fd1.Name(), true) + err = pats.Load(fd1.Name()) if err != nil { t.Fatal(err) } @@ -273,7 +280,11 @@ func TestCommentsAndBlankLines(t *testing.T) { ` - pats, _ := Parse(bytes.NewBufferString(stignore), ".stignore") + pats := New(true) + err := pats.Parse(bytes.NewBufferString(stignore), ".stignore") + if err != nil { + t.Error(err) + } if len(pats.patterns) > 0 { t.Errorf("Expected no patterns") } @@ -297,7 +308,11 @@ flamingo *.crow *.crow ` - pats, _ := Parse(bytes.NewBufferString(stignore), ".stignore") + pats := New(false) + err := pats.Parse(bytes.NewBufferString(stignore), ".stignore") + if err != nil { + b.Error(err) + } b.ResetTimer() for i := 0; i < b.N; i++ { @@ -335,7 +350,8 @@ flamingo } // Load the patterns - pats, err := Load(fd.Name(), true) + pats := New(true) + err = pats.Load(fd.Name()) if err != nil { b.Fatal(err) } @@ -344,7 +360,7 @@ flamingo // This load should now load the cached outcomes as the set of patterns // has not changed. - pats, err = Load(fd.Name(), true) + err = pats.Load(fd.Name()) if err != nil { b.Fatal(err) } @@ -370,7 +386,8 @@ func TestCacheReload(t *testing.T) { t.Fatal(err) } - pats, err := Load(fd.Name(), true) + pats := New(true) + err = pats.Load(fd.Name()) if err != nil { t.Fatal(err) } @@ -402,7 +419,7 @@ func TestCacheReload(t *testing.T) { t.Fatal(err) } - pats, err = Load(fd.Name(), true) + err = pats.Load(fd.Name()) if err != nil { t.Fatal(err) } @@ -419,3 +436,85 @@ func TestCacheReload(t *testing.T) { t.Error("Unexpected non-match for f3") } } + +func TestHash(t *testing.T) { + p1 := New(true) + err := p1.Load("testdata/.stignore") + if err != nil { + t.Fatal(err) + } + + // Same list of patterns as testdata/.stignore, after expansion + stignore := ` + dir2/dfile + dir3 + bfile + dir1/cfile + **/efile + /ffile + lost+found + ` + p2 := New(true) + err = p2.Parse(bytes.NewBufferString(stignore), ".stignore") + if err != nil { + t.Fatal(err) + } + + // Not same list of patterns + stignore = ` + dir2/dfile + dir3 + bfile + dir1/cfile + /ffile + lost+found + ` + p3 := New(true) + err = p3.Parse(bytes.NewBufferString(stignore), ".stignore") + if err != nil { + t.Fatal(err) + } + + if p1.Hash() == "" { + t.Error("p1 hash blank") + } + if p2.Hash() == "" { + t.Error("p2 hash blank") + } + if p3.Hash() == "" { + t.Error("p3 hash blank") + } + if p1.Hash() != p2.Hash() { + t.Error("p1-p2 hashes differ") + } + if p1.Hash() == p3.Hash() { + t.Error("p1-p3 hashes same") + } +} + +func TestHashOfEmpty(t *testing.T) { + p1 := New(true) + err := p1.Load("testdata/.stignore") + if err != nil { + t.Fatal(err) + } + + firstHash := p1.Hash() + + // Reloading with a non-existent file should empty the patterns and + // recalculate the hash. d41d8cd98f00b204e9800998ecf8427e is the md5 of + // nothing. + + p1.Load("file/does/not/exist") + secondHash := p1.Hash() + + if firstHash == secondHash { + t.Error("hash did not change") + } + if secondHash != "d41d8cd98f00b204e9800998ecf8427e" { + t.Error("second hash is not hash of empty string") + } + if len(p1.patterns) != 0 { + t.Error("there are more than zero patterns") + } +} diff --git a/internal/model/model.go b/internal/model/model.go index 9ab9a8db4..fa9f143f0 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -1040,7 +1040,8 @@ func (m *Model) AddFolder(cfg config.FolderConfiguration) { m.deviceFolders[device.DeviceID] = append(m.deviceFolders[device.DeviceID], cfg.ID) } - ignores, _ := ignore.Load(filepath.Join(cfg.Path, ".stignore"), m.cfg.Options().CacheIgnoredFiles) + ignores := ignore.New(m.cfg.Options().CacheIgnoredFiles) + _ = ignores.Load(filepath.Join(cfg.Path, ".stignore")) // Ignore error, there might not be an .stignore m.folderIgnores[cfg.ID] = ignores m.addedFolder = true @@ -1083,8 +1084,8 @@ func (m *Model) ScanFolderSub(folder, sub string) error { fs, ok := m.folderFiles[folder] dir := m.folderCfgs[folder].Path - ignores, _ := ignore.Load(filepath.Join(dir, ".stignore"), m.cfg.Options().CacheIgnoredFiles) - m.folderIgnores[folder] = ignores + ignores := m.folderIgnores[folder] + _ = ignores.Load(filepath.Join(dir, ".stignore")) // Ignore error, there might not be an .stignore w := &scanner.Walker{ Dir: dir, diff --git a/internal/scanner/walk_test.go b/internal/scanner/walk_test.go index 859b81d6b..f01d3232c 100644 --- a/internal/scanner/walk_test.go +++ b/internal/scanner/walk_test.go @@ -58,7 +58,8 @@ func init() { } func TestWalkSub(t *testing.T) { - ignores, err := ignore.Load("testdata/.stignore", false) + ignores := ignore.New(false) + err := ignores.Load("testdata/.stignore") if err != nil { t.Fatal(err) } @@ -93,7 +94,8 @@ func TestWalkSub(t *testing.T) { } func TestWalk(t *testing.T) { - ignores, err := ignore.Load("testdata/.stignore", false) + ignores := ignore.New(false) + err := ignores.Load("testdata/.stignore") if err != nil { t.Fatal(err) }