From 665c5992f0960a499dd4e4ebaeb115b7612b2d97 Mon Sep 17 00:00:00 2001 From: Audrius Butkevicius Date: Sun, 12 Oct 2014 22:35:15 +0100 Subject: [PATCH] Cache ignore file matches --- internal/config/config.go | 1 + internal/config/config_test.go | 2 + internal/config/testdata/overridenvalues.xml | 1 + internal/ignore/ignore.go | 109 ++++++++-- internal/ignore/ignore_test.go | 207 ++++++++++++++++++- internal/model/model.go | 10 +- internal/model/model_test.go | 2 +- internal/scanner/walk.go | 2 +- internal/scanner/walk_test.go | 4 +- 9 files changed, 302 insertions(+), 36 deletions(-) diff --git a/internal/config/config.go b/internal/config/config.go index 58ccc9f3c..1503bd0c1 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -148,6 +148,7 @@ type OptionsConfiguration struct { RestartOnWakeup bool `xml:"restartOnWakeup" default:"true"` AutoUpgradeIntervalH int `xml:"autoUpgradeIntervalH" default:"12"` // 0 for off KeepTemporariesH int `xml:"keepTemporariesH" default:"24"` // 0 for off + CacheIgnoredFiles bool `xml:"cacheIgnoredFiles" default:"true"` Deprecated_RescanIntervalS int `xml:"rescanIntervalS,omitempty" json:"-"` Deprecated_UREnabled bool `xml:"urEnabled,omitempty" json:"-"` diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 9c4102539..093de8b95 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -50,6 +50,7 @@ func TestDefaultValues(t *testing.T) { RestartOnWakeup: true, AutoUpgradeIntervalH: 12, KeepTemporariesH: 24, + CacheIgnoredFiles: true, } cfg := New(device1) @@ -138,6 +139,7 @@ func TestOverriddenValues(t *testing.T) { RestartOnWakeup: false, AutoUpgradeIntervalH: 24, KeepTemporariesH: 48, + CacheIgnoredFiles: false, } cfg, err := Load("testdata/overridenvalues.xml", device1) diff --git a/internal/config/testdata/overridenvalues.xml b/internal/config/testdata/overridenvalues.xml index 9d5340fbd..bf8cb2fa8 100755 --- a/internal/config/testdata/overridenvalues.xml +++ b/internal/config/testdata/overridenvalues.xml @@ -18,5 +18,6 @@ false 24 48 + false diff --git a/internal/ignore/ignore.go b/internal/ignore/ignore.go index 95a6876e7..39ab7d21e 100644 --- a/internal/ignore/ignore.go +++ b/internal/ignore/ignore.go @@ -23,31 +23,94 @@ import ( "path/filepath" "regexp" "strings" + "sync" "github.com/syncthing/syncthing/internal/fnmatch" ) +var caches = make(map[string]MatcherCache) + type Pattern struct { match *regexp.Regexp include bool } -type Patterns []Pattern +type Matcher struct { + patterns []Pattern + oldMatches map[string]bool -func Load(file string) (Patterns, error) { - seen := make(map[string]bool) - return loadIgnoreFile(file, seen) + newMatches map[string]bool + mut sync.Mutex } -func Parse(r io.Reader, file string) (Patterns, error) { +type MatcherCache struct { + patterns []Pattern + matches *map[string]bool +} + +func Load(file string, cache bool) (*Matcher, error) { + seen := make(map[string]bool) + matcher, err := loadIgnoreFile(file, seen) + if !cache || err != nil { + return matcher, err + } + + // Get the current cache object for the given file + cached, ok := caches[file] + if !ok || !patternsEqual(cached.patterns, matcher.patterns) { + // Nothing in cache or a cache mismatch, create a new cache which will + // store matches for the given set of patterns. + // Initialize oldMatches to indicate that we are interested in + // caching. + matcher.oldMatches = make(map[string]bool) + matcher.newMatches = make(map[string]bool) + caches[file] = MatcherCache{ + patterns: matcher.patterns, + matches: &matcher.newMatches, + } + return matcher, nil + } + + // Patterns haven't changed, so we can reuse the old matches, create a new + // matches map and update the pointer. (This prevents matches map from + // growing indefinately, as we only cache whatever we've matched in the last + // iteration, rather than through runtime history) + matcher.oldMatches = *cached.matches + matcher.newMatches = make(map[string]bool) + cached.matches = &matcher.newMatches + caches[file] = cached + return matcher, nil +} + +func Parse(r io.Reader, file string) (*Matcher, error) { seen := map[string]bool{ file: true, } return parseIgnoreFile(r, file, seen) } -func (l Patterns) Match(file string) bool { - for _, pattern := range l { +func (m *Matcher) Match(file string) (result bool) { + if len(m.patterns) == 0 { + return false + } + + // We have old matches map set, means we should do caching + if m.oldMatches != nil { + // Capture the result to the new matches regardless of who returns it + defer func() { + m.mut.Lock() + m.newMatches[file] = result + m.mut.Unlock() + }() + // Check perhaps we've seen this file before, and we already know + // what the outcome is going to be. + result, ok := m.oldMatches[file] + if ok { + return result + } + } + + for _, pattern := range m.patterns { if pattern.match.MatchString(file) { return pattern.include } @@ -55,7 +118,7 @@ func (l Patterns) Match(file string) bool { return false } -func loadIgnoreFile(file string, seen map[string]bool) (Patterns, error) { +func loadIgnoreFile(file string, seen map[string]bool) (*Matcher, error) { if seen[file] { return nil, fmt.Errorf("Multiple include of ignore file %q", file) } @@ -70,8 +133,8 @@ func loadIgnoreFile(file string, seen map[string]bool) (Patterns, error) { return parseIgnoreFile(fd, file, seen) } -func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (Patterns, error) { - var exps Patterns +func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*Matcher, error) { + var exps Matcher addPattern := func(line string) error { include := true @@ -86,27 +149,27 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (Pa if err != nil { return fmt.Errorf("Invalid pattern %q in ignore file", line) } - exps = append(exps, Pattern{exp, include}) + exps.patterns = append(exps.patterns, Pattern{exp, include}) } else if strings.HasPrefix(line, "**/") { // Add the pattern as is, and without **/ so it matches in current dir exp, err := fnmatch.Convert(line, fnmatch.FNM_PATHNAME) if err != nil { return fmt.Errorf("Invalid pattern %q in ignore file", line) } - exps = append(exps, Pattern{exp, include}) + exps.patterns = append(exps.patterns, Pattern{exp, include}) exp, err = fnmatch.Convert(line[3:], fnmatch.FNM_PATHNAME) if err != nil { return fmt.Errorf("Invalid pattern %q in ignore file", line) } - exps = append(exps, Pattern{exp, include}) + exps.patterns = append(exps.patterns, Pattern{exp, include}) } else if strings.HasPrefix(line, "#include ") { includeFile := filepath.Join(filepath.Dir(currentFile), line[len("#include "):]) includes, err := loadIgnoreFile(includeFile, seen) if err != nil { return err } else { - exps = append(exps, includes...) + exps.patterns = append(exps.patterns, includes.patterns...) } } else { // Path name or pattern, add it so it matches files both in @@ -115,13 +178,13 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (Pa if err != nil { return fmt.Errorf("Invalid pattern %q in ignore file", line) } - exps = append(exps, Pattern{exp, include}) + exps.patterns = append(exps.patterns, Pattern{exp, include}) exp, err = fnmatch.Convert("**/"+line, fnmatch.FNM_PATHNAME) if err != nil { return fmt.Errorf("Invalid pattern %q in ignore file", line) } - exps = append(exps, Pattern{exp, include}) + exps.patterns = append(exps.patterns, Pattern{exp, include}) } return nil } @@ -155,5 +218,17 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (Pa } } - return exps, nil + return &exps, nil +} + +func patternsEqual(a, b []Pattern) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i].include != b[i].include || a[i].match.String() != b[i].match.String() { + return false + } + } + return true } diff --git a/internal/ignore/ignore_test.go b/internal/ignore/ignore_test.go index d5cace3a6..b1eb29e16 100644 --- a/internal/ignore/ignore_test.go +++ b/internal/ignore/ignore_test.go @@ -13,19 +13,19 @@ // You should have received a copy of the GNU General Public License along // with this program. If not, see . -package ignore_test +package ignore import ( "bytes" + "io/ioutil" + "os" "path/filepath" "runtime" "testing" - - "github.com/syncthing/syncthing/internal/ignore" ) func TestIgnore(t *testing.T) { - pats, err := ignore.Load("testdata/.stignore") + pats, err := Load("testdata/.stignore", true) if err != nil { t.Fatal(err) } @@ -72,7 +72,7 @@ func TestExcludes(t *testing.T) { i*2 !ign2 ` - pats, err := ignore.Parse(bytes.NewBufferString(stignore), ".stignore") + pats, err := Parse(bytes.NewBufferString(stignore), ".stignore") if err != nil { t.Fatal(err) } @@ -112,7 +112,7 @@ func TestBadPatterns(t *testing.T) { } for _, pat := range badPatterns { - parsed, err := ignore.Parse(bytes.NewBufferString(pat), ".stignore") + parsed, err := Parse(bytes.NewBufferString(pat), ".stignore") if err == nil { t.Errorf("No error for pattern %q: %v", pat, parsed) } @@ -120,7 +120,7 @@ func TestBadPatterns(t *testing.T) { } func TestCaseSensitivity(t *testing.T) { - ign, _ := ignore.Parse(bytes.NewBufferString("test"), ".stignore") + ign, _ := Parse(bytes.NewBufferString("test"), ".stignore") match := []string{"test"} dontMatch := []string{"foo"} @@ -145,6 +145,144 @@ func TestCaseSensitivity(t *testing.T) { } } +func TestCaching(t *testing.T) { + fd1, err := ioutil.TempFile("", "") + if err != nil { + t.Fatal(err) + } + + fd2, err := ioutil.TempFile("", "") + if err != nil { + t.Fatal(err) + } + + defer fd1.Close() + defer fd2.Close() + defer os.Remove(fd1.Name()) + defer os.Remove(fd2.Name()) + + _, err = fd1.WriteString("/x/\n#include " + filepath.Base(fd2.Name()) + "\n") + if err != nil { + t.Fatal(err) + } + + fd2.WriteString("/y/\n") + + pats, err := Load(fd1.Name(), true) + if err != nil { + t.Fatal(err) + } + + if pats.oldMatches == nil || len(pats.oldMatches) != 0 { + t.Fatal("Expected empty map") + } + + if pats.newMatches == nil || len(pats.newMatches) != 0 { + t.Fatal("Expected empty map") + } + + if len(pats.patterns) != 4 { + t.Fatal("Incorrect number of patterns loaded", len(pats.patterns), "!=", 4) + } + + // Cache some outcomes + + for _, letter := range []string{"a", "b", "x", "y"} { + pats.Match(letter) + } + + if len(pats.newMatches) != 4 { + t.Fatal("Expected 4 cached results") + } + + // Reload file, expect old outcomes to be provided + + pats, err = Load(fd1.Name(), true) + if err != nil { + t.Fatal(err) + } + if len(pats.oldMatches) != 4 { + t.Fatal("Expected 4 cached results") + } + + // Match less this time + + for _, letter := range []string{"b", "x", "y"} { + pats.Match(letter) + } + + if len(pats.newMatches) != 3 { + t.Fatal("Expected 3 cached results") + } + + // Reload file, expect the new outcomes to be provided + + pats, err = Load(fd1.Name(), true) + if err != nil { + t.Fatal(err) + } + if len(pats.oldMatches) != 3 { + t.Fatal("Expected 3 cached results", len(pats.oldMatches)) + } + + // Modify the include file, expect empty cache + + fd2.WriteString("/z/\n") + + pats, err = Load(fd1.Name(), true) + if err != nil { + t.Fatal(err) + } + + if len(pats.oldMatches) != 0 { + t.Fatal("Expected 0 cached results") + } + + // Cache some outcomes again + + for _, letter := range []string{"b", "x", "y"} { + pats.Match(letter) + } + + // Verify that outcomes provided on next laod + + pats, err = Load(fd1.Name(), true) + if err != nil { + t.Fatal(err) + } + if len(pats.oldMatches) != 3 { + t.Fatal("Expected 3 cached results") + } + + // Modify the root file, expect cache to be invalidated + + fd1.WriteString("/a/\n") + + pats, err = Load(fd1.Name(), true) + if err != nil { + t.Fatal(err) + } + if len(pats.oldMatches) != 0 { + t.Fatal("Expected cache invalidation") + } + + // Cache some outcomes again + + for _, letter := range []string{"b", "x", "y"} { + pats.Match(letter) + } + + // Verify that outcomes provided on next laod + + pats, err = Load(fd1.Name(), true) + if err != nil { + t.Fatal(err) + } + if len(pats.oldMatches) != 3 { + t.Fatal("Expected 3 cached results") + } +} + func TestCommentsAndBlankLines(t *testing.T) { stignore := ` // foo @@ -157,8 +295,8 @@ func TestCommentsAndBlankLines(t *testing.T) { ` - pats, _ := ignore.Parse(bytes.NewBufferString(stignore), ".stignore") - if len(pats) > 0 { + pats, _ := Parse(bytes.NewBufferString(stignore), ".stignore") + if len(pats.patterns) > 0 { t.Errorf("Expected no patterns") } } @@ -181,10 +319,59 @@ flamingo *.crow *.crow ` - pats, _ := ignore.Parse(bytes.NewBufferString(stignore), ".stignore") + pats, _ := Parse(bytes.NewBufferString(stignore), ".stignore") b.ResetTimer() for i := 0; i < b.N; i++ { result = pats.Match("filename") } } + +func BenchmarkMatchCached(b *testing.B) { + stignore := ` +.frog +.frog* +.frogfox +.whale +.whale/* +.dolphin +.dolphin/* +~ferret~.* +.ferret.* +flamingo.* +flamingo +*.crow +*.crow + ` + // Caches per file, hence write the patterns to a file. + fd, err := ioutil.TempFile("", "") + if err != nil { + b.Fatal(err) + } + + _, err = fd.WriteString(stignore) + defer fd.Close() + defer os.Remove(fd.Name()) + if err != nil { + b.Fatal(err) + } + + // Load the patterns + pats, err := Load(fd.Name(), true) + if err != nil { + b.Fatal(err) + } + // Cache the outcome for "filename" + pats.Match("filename") + + // This load should now load the cached outcomes as the set of patterns + // has not changed. + pats, err = Load(fd.Name(), true) + if err != nil { + b.Fatal(err) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + result = pats.Match("filename") + } +} diff --git a/internal/model/model.go b/internal/model/model.go index 4e0323396..01dd8477b 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -93,7 +93,7 @@ type Model struct { folderDevices map[string][]protocol.DeviceID // folder -> deviceIDs deviceFolders map[protocol.DeviceID][]string // deviceID -> folders deviceStatRefs map[protocol.DeviceID]*stats.DeviceStatisticsReference // deviceID -> statsRef - folderIgnores map[string]ignore.Patterns // folder -> list of ignore patterns + folderIgnores map[string]*ignore.Matcher // folder -> matcher object folderRunners map[string]service // folder -> puller or scanner fmut sync.RWMutex // protects the above @@ -130,7 +130,7 @@ func NewModel(cfg *config.ConfigWrapper, deviceName, clientName, clientVersion s folderDevices: make(map[string][]protocol.DeviceID), deviceFolders: make(map[protocol.DeviceID][]string), deviceStatRefs: make(map[protocol.DeviceID]*stats.DeviceStatisticsReference), - folderIgnores: make(map[string]ignore.Patterns), + folderIgnores: make(map[string]*ignore.Matcher), folderRunners: make(map[string]service), folderState: make(map[string]folderState), folderStateChanged: make(map[string]time.Time), @@ -829,7 +829,7 @@ func (m *Model) deviceWasSeen(deviceID protocol.DeviceID) { m.deviceStatRef(deviceID).WasSeen() } -func sendIndexes(conn protocol.Connection, folder string, fs *files.Set, ignores ignore.Patterns) { +func sendIndexes(conn protocol.Connection, folder string, fs *files.Set, ignores *ignore.Matcher) { deviceID := conn.ID() name := conn.Name() var err error @@ -854,7 +854,7 @@ func sendIndexes(conn protocol.Connection, folder string, fs *files.Set, ignores } } -func sendIndexTo(initial bool, minLocalVer uint64, conn protocol.Connection, folder string, fs *files.Set, ignores ignore.Patterns) (uint64, error) { +func sendIndexTo(initial bool, minLocalVer uint64, conn protocol.Connection, folder string, fs *files.Set, ignores *ignore.Matcher) (uint64, error) { deviceID := conn.ID() name := conn.Name() batch := make([]protocol.FileInfo, 0, indexBatchSize) @@ -1006,7 +1006,7 @@ func (m *Model) ScanFolderSub(folder, sub string) error { fs, ok := m.folderFiles[folder] dir := m.folderCfgs[folder].Path - ignores, _ := ignore.Load(filepath.Join(dir, ".stignore")) + ignores, _ := ignore.Load(filepath.Join(dir, ".stignore"), m.cfg.Options().CacheIgnoredFiles) m.folderIgnores[folder] = ignores w := &scanner.Walker{ diff --git a/internal/model/model_test.go b/internal/model/model_test.go index 36b276b24..b37dd0bd4 100644 --- a/internal/model/model_test.go +++ b/internal/model/model_test.go @@ -390,7 +390,7 @@ func TestIgnores(t *testing.T) { } db, _ := leveldb.Open(storage.NewMemStorage(), nil) - m := NewModel(nil, "device", "syncthing", "dev", db) + m := NewModel(config.Wrap("", config.Configuration{}), "device", "syncthing", "dev", db) m.AddFolder(config.FolderConfiguration{ID: "default", Path: "testdata"}) expected := []string{ diff --git a/internal/scanner/walk.go b/internal/scanner/walk.go index 5208a13bc..1e6f958cd 100644 --- a/internal/scanner/walk.go +++ b/internal/scanner/walk.go @@ -36,7 +36,7 @@ type Walker struct { // BlockSize controls the size of the block used when hashing. BlockSize int // List of patterns to ignore - Ignores ignore.Patterns + Ignores *ignore.Matcher // If TempNamer is not nil, it is used to ignore tempory files when walking. TempNamer TempNamer // If CurrentFiler is not nil, it is queried for the current file before rescanning. diff --git a/internal/scanner/walk_test.go b/internal/scanner/walk_test.go index 7e02b46ae..084aaabd1 100644 --- a/internal/scanner/walk_test.go +++ b/internal/scanner/walk_test.go @@ -58,7 +58,7 @@ func init() { } func TestWalkSub(t *testing.T) { - ignores, err := ignore.Load("testdata/.stignore") + ignores, err := ignore.Load("testdata/.stignore", false) if err != nil { t.Fatal(err) } @@ -93,7 +93,7 @@ func TestWalkSub(t *testing.T) { } func TestWalk(t *testing.T) { - ignores, err := ignore.Load("testdata/.stignore") + ignores, err := ignore.Load("testdata/.stignore", false) if err != nil { t.Fatal(err) }