From 2c89f04be769a357732a201124c6f16790d498cb Mon Sep 17 00:00:00 2001 From: Jakob Borg Date: Tue, 23 Dec 2014 10:05:08 +0100 Subject: [PATCH 1/2] Refactor ignore handling (...) This uses persistent Matcher objects that can reload their content and provide a hash string that can be used to check if it's changed. The cache is local to each Matcher object instead of kept globally. --- internal/ignore/cache.go | 36 +------ internal/ignore/ignore.go | 173 +++++++++++++++++++++------------ internal/ignore/ignore_test.go | 137 ++++++++++++++++++++++---- internal/model/model.go | 7 +- internal/scanner/walk_test.go | 6 +- 5 files changed, 239 insertions(+), 120 deletions(-) diff --git a/internal/ignore/cache.go b/internal/ignore/cache.go index 3667bcf45..115275b19 100644 --- a/internal/ignore/cache.go +++ b/internal/ignore/cache.go @@ -15,26 +15,11 @@ package ignore -import ( - "sync" - "time" -) - -var ( - caches = make(map[string]*cache) - cacheMut sync.Mutex -) - -func init() { - // Periodically go through the cache and remove cache entries that have - // not been touched in the last two hours. - go cleanIgnoreCaches(2 * time.Hour) -} +import "time" type cache struct { patterns []Pattern entries map[string]cacheEntry - mut sync.Mutex } type cacheEntry struct { @@ -50,46 +35,27 @@ func newCache(patterns []Pattern) *cache { } func (c *cache) clean(d time.Duration) { - c.mut.Lock() for k, v := range c.entries { if time.Since(v.access) > d { delete(c.entries, k) } } - c.mut.Unlock() } func (c *cache) get(key string) (result, ok bool) { - c.mut.Lock() res, ok := c.entries[key] if ok { res.access = time.Now() c.entries[key] = res } - c.mut.Unlock() return res.value, ok } func (c *cache) set(key string, val bool) { - c.mut.Lock() c.entries[key] = cacheEntry{val, time.Now()} - c.mut.Unlock() } func (c *cache) len() int { - c.mut.Lock() l := len(c.entries) - c.mut.Unlock() return l } - -func cleanIgnoreCaches(dur time.Duration) { - for { - time.Sleep(dur) - cacheMut.Lock() - for _, v := range caches { - v.clean(dur) - } - cacheMut.Unlock() - } -} diff --git a/internal/ignore/ignore.go b/internal/ignore/ignore.go index e74c7ad21..d1b68c73c 100644 --- a/internal/ignore/ignore.go +++ b/internal/ignore/ignore.go @@ -17,6 +17,8 @@ package ignore import ( "bufio" + "bytes" + "crypto/md5" "fmt" "io" "os" @@ -24,6 +26,7 @@ import ( "regexp" "strings" "sync" + "time" "github.com/syncthing/syncthing/internal/fnmatch" ) @@ -33,51 +36,76 @@ type Pattern struct { include bool } +func (p Pattern) String() string { + if p.include { + return p.match.String() + } else { + return "(?exclude)" + p.match.String() + } +} + type Matcher struct { - patterns []Pattern - matches *cache - mut sync.Mutex + patterns []Pattern + withCache bool + matches *cache + curHash string + stop chan struct{} + mut sync.Mutex } -func Load(file string, cache bool) (*Matcher, error) { - seen := make(map[string]bool) - matcher, err := loadIgnoreFile(file, seen) - if !cache || err != nil { - return matcher, err +func New(withCache bool) *Matcher { + m := &Matcher{ + withCache: withCache, + stop: make(chan struct{}), } - - cacheMut.Lock() - defer cacheMut.Unlock() - - // Get the current cache object for the given file - cached, ok := caches[file] - if !ok || !patternsEqual(cached.patterns, matcher.patterns) { - // Nothing in cache or a cache mismatch, create a new cache which will - // store matches for the given set of patterns. - // Initialize oldMatches to indicate that we are interested in - // caching. - cached = newCache(matcher.patterns) - matcher.matches = cached - caches[file] = cached - return matcher, nil + if withCache { + go m.clean(2 * time.Hour) } - - // Patterns haven't changed, so we can reuse the old matches, create a new - // matches map and update the pointer. (This prevents matches map from - // growing indefinately, as we only cache whatever we've matched in the last - // iteration, rather than through runtime history) - matcher.matches = cached - return matcher, nil + return m } -func Parse(r io.Reader, file string) (*Matcher, error) { - seen := map[string]bool{ - file: true, +func (m *Matcher) Load(file string) error { + // No locking, Parse() does the locking + + fd, err := os.Open(file) + if err != nil { + // We do a parse with empty patterns to clear out the hash, cache etc. + m.Parse(&bytes.Buffer{}, file) + return err } - return parseIgnoreFile(r, file, seen) + defer fd.Close() + + return m.Parse(fd, file) +} + +func (m *Matcher) Parse(r io.Reader, file string) error { + m.mut.Lock() + defer m.mut.Unlock() + + seen := map[string]bool{file: true} + patterns, err := parseIgnoreFile(r, file, seen) + // Error is saved and returned at the end. We process the patterns + // (possibly blank) anyway. + + newHash := hashPatterns(patterns) + if newHash == m.curHash { + // We've already loaded exactly these patterns. + return err + } + + m.curHash = newHash + m.patterns = patterns + if m.withCache { + m.matches = newCache(patterns) + } + + return err } func (m *Matcher) Match(file string) (result bool) { + m.mut.Lock() + defer m.mut.Unlock() + if len(m.patterns) == 0 { return false } @@ -108,18 +136,53 @@ func (m *Matcher) Match(file string) (result bool) { // Patterns return a list of the loaded regexp patterns, as strings func (m *Matcher) Patterns() []string { + m.mut.Lock() + defer m.mut.Unlock() + patterns := make([]string, len(m.patterns)) for i, pat := range m.patterns { - if pat.include { - patterns[i] = pat.match.String() - } else { - patterns[i] = "(?exclude)" + pat.match.String() - } + patterns[i] = pat.String() } return patterns } -func loadIgnoreFile(file string, seen map[string]bool) (*Matcher, error) { +func (m *Matcher) Hash() string { + m.mut.Lock() + defer m.mut.Unlock() + return m.curHash +} + +func (m *Matcher) Stop() { + close(m.stop) +} + +func (m *Matcher) clean(d time.Duration) { + t := time.NewTimer(d / 2) + for { + select { + case <-m.stop: + return + case <-t.C: + m.mut.Lock() + if m.matches != nil { + m.matches.clean(d) + } + t.Reset(d / 2) + m.mut.Unlock() + } + } +} + +func hashPatterns(patterns []Pattern) string { + h := md5.New() + for _, pat := range patterns { + h.Write([]byte(pat.String())) + h.Write([]byte("\n")) + } + return fmt.Sprintf("%x", h.Sum(nil)) +} + +func loadIgnoreFile(file string, seen map[string]bool) ([]Pattern, error) { if seen[file] { return nil, fmt.Errorf("Multiple include of ignore file %q", file) } @@ -134,8 +197,8 @@ func loadIgnoreFile(file string, seen map[string]bool) (*Matcher, error) { return parseIgnoreFile(fd, file, seen) } -func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*Matcher, error) { - var exps Matcher +func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) ([]Pattern, error) { + var patterns []Pattern addPattern := func(line string) error { include := true @@ -150,27 +213,27 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*M if err != nil { return fmt.Errorf("Invalid pattern %q in ignore file", line) } - exps.patterns = append(exps.patterns, Pattern{exp, include}) + patterns = append(patterns, Pattern{exp, include}) } else if strings.HasPrefix(line, "**/") { // Add the pattern as is, and without **/ so it matches in current dir exp, err := fnmatch.Convert(line, fnmatch.FNM_PATHNAME) if err != nil { return fmt.Errorf("Invalid pattern %q in ignore file", line) } - exps.patterns = append(exps.patterns, Pattern{exp, include}) + patterns = append(patterns, Pattern{exp, include}) exp, err = fnmatch.Convert(line[3:], fnmatch.FNM_PATHNAME) if err != nil { return fmt.Errorf("Invalid pattern %q in ignore file", line) } - exps.patterns = append(exps.patterns, Pattern{exp, include}) + patterns = append(patterns, Pattern{exp, include}) } else if strings.HasPrefix(line, "#include ") { includeFile := filepath.Join(filepath.Dir(currentFile), line[len("#include "):]) includes, err := loadIgnoreFile(includeFile, seen) if err != nil { return err } - exps.patterns = append(exps.patterns, includes.patterns...) + patterns = append(patterns, includes...) } else { // Path name or pattern, add it so it matches files both in // current directory and subdirs. @@ -178,13 +241,13 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*M if err != nil { return fmt.Errorf("Invalid pattern %q in ignore file", line) } - exps.patterns = append(exps.patterns, Pattern{exp, include}) + patterns = append(patterns, Pattern{exp, include}) exp, err = fnmatch.Convert("**/"+line, fnmatch.FNM_PATHNAME) if err != nil { return fmt.Errorf("Invalid pattern %q in ignore file", line) } - exps.patterns = append(exps.patterns, Pattern{exp, include}) + patterns = append(patterns, Pattern{exp, include}) } return nil } @@ -218,17 +281,5 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*M } } - return &exps, nil -} - -func patternsEqual(a, b []Pattern) bool { - if len(a) != len(b) { - return false - } - for i := range a { - if a[i].include != b[i].include || a[i].match.String() != b[i].match.String() { - return false - } - } - return true + return patterns, nil } diff --git a/internal/ignore/ignore_test.go b/internal/ignore/ignore_test.go index 27a4c3c77..20c36e905 100644 --- a/internal/ignore/ignore_test.go +++ b/internal/ignore/ignore_test.go @@ -25,7 +25,8 @@ import ( ) func TestIgnore(t *testing.T) { - pats, err := Load("testdata/.stignore", true) + pats := New(true) + err := pats.Load("testdata/.stignore") if err != nil { t.Fatal(err) } @@ -74,7 +75,8 @@ func TestExcludes(t *testing.T) { i*2 !ign2 ` - pats, err := Parse(bytes.NewBufferString(stignore), ".stignore") + pats := New(true) + err := pats.Parse(bytes.NewBufferString(stignore), ".stignore") if err != nil { t.Fatal(err) } @@ -114,15 +116,19 @@ func TestBadPatterns(t *testing.T) { } for _, pat := range badPatterns { - parsed, err := Parse(bytes.NewBufferString(pat), ".stignore") + err := New(true).Parse(bytes.NewBufferString(pat), ".stignore") if err == nil { - t.Errorf("No error for pattern %q: %v", pat, parsed) + t.Errorf("No error for pattern %q", pat) } } } func TestCaseSensitivity(t *testing.T) { - ign, _ := Parse(bytes.NewBufferString("test"), ".stignore") + ign := New(true) + err := ign.Parse(bytes.NewBufferString("test"), ".stignore") + if err != nil { + t.Error(err) + } match := []string{"test"} dontMatch := []string{"foo"} @@ -170,7 +176,8 @@ func TestCaching(t *testing.T) { fd2.WriteString("/y/\n") - pats, err := Load(fd1.Name(), true) + pats := New(true) + err = pats.Load(fd1.Name()) if err != nil { t.Fatal(err) } @@ -193,9 +200,9 @@ func TestCaching(t *testing.T) { t.Fatal("Expected 4 cached results") } - // Reload file, expect old outcomes to be provided + // Reload file, expect old outcomes to be preserved - pats, err = Load(fd1.Name(), true) + err = pats.Load(fd1.Name()) if err != nil { t.Fatal(err) } @@ -207,7 +214,7 @@ func TestCaching(t *testing.T) { fd2.WriteString("/z/\n") - pats, err = Load(fd1.Name(), true) + err = pats.Load(fd1.Name()) if err != nil { t.Fatal(err) } @@ -222,9 +229,9 @@ func TestCaching(t *testing.T) { pats.Match(letter) } - // Verify that outcomes provided on next laod + // Verify that outcomes preserved on next laod - pats, err = Load(fd1.Name(), true) + err = pats.Load(fd1.Name()) if err != nil { t.Fatal(err) } @@ -236,7 +243,7 @@ func TestCaching(t *testing.T) { fd1.WriteString("/a/\n") - pats, err = Load(fd1.Name(), true) + err = pats.Load(fd1.Name()) if err != nil { t.Fatal(err) } @@ -252,7 +259,7 @@ func TestCaching(t *testing.T) { // Verify that outcomes provided on next laod - pats, err = Load(fd1.Name(), true) + err = pats.Load(fd1.Name()) if err != nil { t.Fatal(err) } @@ -273,7 +280,11 @@ func TestCommentsAndBlankLines(t *testing.T) { ` - pats, _ := Parse(bytes.NewBufferString(stignore), ".stignore") + pats := New(true) + err := pats.Parse(bytes.NewBufferString(stignore), ".stignore") + if err != nil { + t.Error(err) + } if len(pats.patterns) > 0 { t.Errorf("Expected no patterns") } @@ -297,7 +308,11 @@ flamingo *.crow *.crow ` - pats, _ := Parse(bytes.NewBufferString(stignore), ".stignore") + pats := New(false) + err := pats.Parse(bytes.NewBufferString(stignore), ".stignore") + if err != nil { + b.Error(err) + } b.ResetTimer() for i := 0; i < b.N; i++ { @@ -335,7 +350,8 @@ flamingo } // Load the patterns - pats, err := Load(fd.Name(), true) + pats := New(true) + err = pats.Load(fd.Name()) if err != nil { b.Fatal(err) } @@ -344,7 +360,7 @@ flamingo // This load should now load the cached outcomes as the set of patterns // has not changed. - pats, err = Load(fd.Name(), true) + err = pats.Load(fd.Name()) if err != nil { b.Fatal(err) } @@ -370,7 +386,8 @@ func TestCacheReload(t *testing.T) { t.Fatal(err) } - pats, err := Load(fd.Name(), true) + pats := New(true) + err = pats.Load(fd.Name()) if err != nil { t.Fatal(err) } @@ -402,7 +419,7 @@ func TestCacheReload(t *testing.T) { t.Fatal(err) } - pats, err = Load(fd.Name(), true) + err = pats.Load(fd.Name()) if err != nil { t.Fatal(err) } @@ -419,3 +436,85 @@ func TestCacheReload(t *testing.T) { t.Error("Unexpected non-match for f3") } } + +func TestHash(t *testing.T) { + p1 := New(true) + err := p1.Load("testdata/.stignore") + if err != nil { + t.Fatal(err) + } + + // Same list of patterns as testdata/.stignore, after expansion + stignore := ` + dir2/dfile + dir3 + bfile + dir1/cfile + **/efile + /ffile + lost+found + ` + p2 := New(true) + err = p2.Parse(bytes.NewBufferString(stignore), ".stignore") + if err != nil { + t.Fatal(err) + } + + // Not same list of patterns + stignore = ` + dir2/dfile + dir3 + bfile + dir1/cfile + /ffile + lost+found + ` + p3 := New(true) + err = p3.Parse(bytes.NewBufferString(stignore), ".stignore") + if err != nil { + t.Fatal(err) + } + + if p1.Hash() == "" { + t.Error("p1 hash blank") + } + if p2.Hash() == "" { + t.Error("p2 hash blank") + } + if p3.Hash() == "" { + t.Error("p3 hash blank") + } + if p1.Hash() != p2.Hash() { + t.Error("p1-p2 hashes differ") + } + if p1.Hash() == p3.Hash() { + t.Error("p1-p3 hashes same") + } +} + +func TestHashOfEmpty(t *testing.T) { + p1 := New(true) + err := p1.Load("testdata/.stignore") + if err != nil { + t.Fatal(err) + } + + firstHash := p1.Hash() + + // Reloading with a non-existent file should empty the patterns and + // recalculate the hash. d41d8cd98f00b204e9800998ecf8427e is the md5 of + // nothing. + + p1.Load("file/does/not/exist") + secondHash := p1.Hash() + + if firstHash == secondHash { + t.Error("hash did not change") + } + if secondHash != "d41d8cd98f00b204e9800998ecf8427e" { + t.Error("second hash is not hash of empty string") + } + if len(p1.patterns) != 0 { + t.Error("there are more than zero patterns") + } +} diff --git a/internal/model/model.go b/internal/model/model.go index 9ab9a8db4..fa9f143f0 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -1040,7 +1040,8 @@ func (m *Model) AddFolder(cfg config.FolderConfiguration) { m.deviceFolders[device.DeviceID] = append(m.deviceFolders[device.DeviceID], cfg.ID) } - ignores, _ := ignore.Load(filepath.Join(cfg.Path, ".stignore"), m.cfg.Options().CacheIgnoredFiles) + ignores := ignore.New(m.cfg.Options().CacheIgnoredFiles) + _ = ignores.Load(filepath.Join(cfg.Path, ".stignore")) // Ignore error, there might not be an .stignore m.folderIgnores[cfg.ID] = ignores m.addedFolder = true @@ -1083,8 +1084,8 @@ func (m *Model) ScanFolderSub(folder, sub string) error { fs, ok := m.folderFiles[folder] dir := m.folderCfgs[folder].Path - ignores, _ := ignore.Load(filepath.Join(dir, ".stignore"), m.cfg.Options().CacheIgnoredFiles) - m.folderIgnores[folder] = ignores + ignores := m.folderIgnores[folder] + _ = ignores.Load(filepath.Join(dir, ".stignore")) // Ignore error, there might not be an .stignore w := &scanner.Walker{ Dir: dir, diff --git a/internal/scanner/walk_test.go b/internal/scanner/walk_test.go index 859b81d6b..f01d3232c 100644 --- a/internal/scanner/walk_test.go +++ b/internal/scanner/walk_test.go @@ -58,7 +58,8 @@ func init() { } func TestWalkSub(t *testing.T) { - ignores, err := ignore.Load("testdata/.stignore", false) + ignores := ignore.New(false) + err := ignores.Load("testdata/.stignore") if err != nil { t.Fatal(err) } @@ -93,7 +94,8 @@ func TestWalkSub(t *testing.T) { } func TestWalk(t *testing.T) { - ignores, err := ignore.Load("testdata/.stignore", false) + ignores := ignore.New(false) + err := ignores.Load("testdata/.stignore") if err != nil { t.Fatal(err) } From cadbb6bbce571f9d4abe274f33a407517d91eaa2 Mon Sep 17 00:00:00 2001 From: Jakob Borg Date: Tue, 23 Dec 2014 10:06:51 +0100 Subject: [PATCH 2/2] Move ignore handling from index recv to puller (fixes #1133) With this change we accept updates for ignored files from other devices, and check the ignore patterns at pull time. When we detect that the ignore patterns have changed we do a full check of files that we might now need to pull. --- internal/model/model.go | 10 ++++------ internal/model/puller.go | 27 ++++++++++++++++++++++++--- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/internal/model/model.go b/internal/model/model.go index fa9f143f0..dc5b096ea 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -452,7 +452,6 @@ func (m *Model) Index(deviceID protocol.DeviceID, folder string, fs []protocol.F m.fmut.RLock() files, ok := m.folderFiles[folder] - ignores, _ := m.folderIgnores[folder] m.fmut.RUnlock() if !ok { @@ -461,9 +460,9 @@ func (m *Model) Index(deviceID protocol.DeviceID, folder string, fs []protocol.F for i := 0; i < len(fs); { lamport.Default.Tick(fs[i].Version) - if (ignores != nil && ignores.Match(fs[i].Name)) || symlinkInvalid(fs[i].IsSymlink()) { + if symlinkInvalid(fs[i].IsSymlink()) { if debug { - l.Debugln("dropping update for ignored/unsupported symlink", fs[i]) + l.Debugln("dropping update for unsupported symlink", fs[i]) } fs[i] = fs[len(fs)-1] fs = fs[:len(fs)-1] @@ -496,7 +495,6 @@ func (m *Model) IndexUpdate(deviceID protocol.DeviceID, folder string, fs []prot m.fmut.RLock() files, ok := m.folderFiles[folder] - ignores, _ := m.folderIgnores[folder] m.fmut.RUnlock() if !ok { @@ -505,9 +503,9 @@ func (m *Model) IndexUpdate(deviceID protocol.DeviceID, folder string, fs []prot for i := 0; i < len(fs); { lamport.Default.Tick(fs[i].Version) - if (ignores != nil && ignores.Match(fs[i].Name)) || symlinkInvalid(fs[i].IsSymlink()) { + if symlinkInvalid(fs[i].IsSymlink()) { if debug { - l.Debugln("dropping update for ignored/unsupported symlink", fs[i]) + l.Debugln("dropping update for unsupported symlink", fs[i]) } fs[i] = fs[len(fs)-1] fs = fs[:len(fs)-1] diff --git a/internal/model/puller.go b/internal/model/puller.go index 371c143c9..1d9fea681 100644 --- a/internal/model/puller.go +++ b/internal/model/puller.go @@ -30,6 +30,7 @@ import ( "github.com/syncthing/syncthing/internal/config" "github.com/syncthing/syncthing/internal/events" + "github.com/syncthing/syncthing/internal/ignore" "github.com/syncthing/syncthing/internal/osutil" "github.com/syncthing/syncthing/internal/protocol" "github.com/syncthing/syncthing/internal/scanner" @@ -100,6 +101,7 @@ func (p *Puller) Serve() { }() var prevVer uint64 + var prevIgnoreHash string // We don't start pulling files until a scan has been completed. initialScanCompleted := false @@ -125,6 +127,20 @@ loop: continue } + p.model.fmut.RLock() + curIgnores := p.model.folderIgnores[p.folder] + p.model.fmut.RUnlock() + + if newHash := curIgnores.Hash(); newHash != prevIgnoreHash { + // The ignore patterns have changed. We need to re-evaluate if + // there are files we need now that were ignored before. + if debug { + l.Debugln(p, "ignore patterns have changed, resetting prevVer") + } + prevVer = 0 + prevIgnoreHash = newHash + } + // RemoteLocalVersion() is a fast call, doesn't touch the database. curVer := p.model.RemoteLocalVersion(p.folder) if curVer == prevVer { @@ -149,7 +165,7 @@ loop: checksum = true } - changed := p.pullerIteration(checksum) + changed := p.pullerIteration(checksum, curIgnores) if debug { l.Debugln(p, "changed", changed) } @@ -167,7 +183,7 @@ loop: // them, but at the same time we have the local // version that includes those files in curVer. So we // catch the case that localVersion might have - // decresed here. + // decreased here. l.Debugln(p, "adjusting curVer", lv) curVer = lv } @@ -233,7 +249,7 @@ func (p *Puller) String() string { // returns the number items that should have been synced (even those that // might have failed). One puller iteration handles all files currently // flagged as needed in the folder. -func (p *Puller) pullerIteration(checksum bool) int { +func (p *Puller) pullerIteration(checksum bool, ignores *ignore.Matcher) int { pullChan := make(chan pullBlockState) copyChan := make(chan copyBlocksState) finisherChan := make(chan *sharedPullerState) @@ -298,6 +314,11 @@ func (p *Puller) pullerIteration(checksum bool) int { file := intf.(protocol.FileInfo) + if ignores.Match(file.Name) { + // This is an ignored file. Skip it, continue iteration. + return true + } + events.Default.Log(events.ItemStarted, map[string]string{ "folder": p.folder, "item": file.Name,