Refactor ignore handling (...)

This uses persistent Matcher objects that can reload their content and
provide a hash string that can be used to check if it's changed. The
cache is local to each Matcher object instead of kept globally.
This commit is contained in:
Jakob Borg 2014-12-23 10:05:08 +01:00
parent 597011e3a9
commit 2c89f04be7
5 changed files with 239 additions and 120 deletions

View File

@ -15,26 +15,11 @@
package ignore package ignore
import ( import "time"
"sync"
"time"
)
var (
caches = make(map[string]*cache)
cacheMut sync.Mutex
)
func init() {
// Periodically go through the cache and remove cache entries that have
// not been touched in the last two hours.
go cleanIgnoreCaches(2 * time.Hour)
}
type cache struct { type cache struct {
patterns []Pattern patterns []Pattern
entries map[string]cacheEntry entries map[string]cacheEntry
mut sync.Mutex
} }
type cacheEntry struct { type cacheEntry struct {
@ -50,46 +35,27 @@ func newCache(patterns []Pattern) *cache {
} }
func (c *cache) clean(d time.Duration) { func (c *cache) clean(d time.Duration) {
c.mut.Lock()
for k, v := range c.entries { for k, v := range c.entries {
if time.Since(v.access) > d { if time.Since(v.access) > d {
delete(c.entries, k) delete(c.entries, k)
} }
} }
c.mut.Unlock()
} }
func (c *cache) get(key string) (result, ok bool) { func (c *cache) get(key string) (result, ok bool) {
c.mut.Lock()
res, ok := c.entries[key] res, ok := c.entries[key]
if ok { if ok {
res.access = time.Now() res.access = time.Now()
c.entries[key] = res c.entries[key] = res
} }
c.mut.Unlock()
return res.value, ok return res.value, ok
} }
func (c *cache) set(key string, val bool) { func (c *cache) set(key string, val bool) {
c.mut.Lock()
c.entries[key] = cacheEntry{val, time.Now()} c.entries[key] = cacheEntry{val, time.Now()}
c.mut.Unlock()
} }
func (c *cache) len() int { func (c *cache) len() int {
c.mut.Lock()
l := len(c.entries) l := len(c.entries)
c.mut.Unlock()
return l return l
} }
func cleanIgnoreCaches(dur time.Duration) {
for {
time.Sleep(dur)
cacheMut.Lock()
for _, v := range caches {
v.clean(dur)
}
cacheMut.Unlock()
}
}

View File

@ -17,6 +17,8 @@ package ignore
import ( import (
"bufio" "bufio"
"bytes"
"crypto/md5"
"fmt" "fmt"
"io" "io"
"os" "os"
@ -24,6 +26,7 @@ import (
"regexp" "regexp"
"strings" "strings"
"sync" "sync"
"time"
"github.com/syncthing/syncthing/internal/fnmatch" "github.com/syncthing/syncthing/internal/fnmatch"
) )
@ -33,51 +36,76 @@ type Pattern struct {
include bool include bool
} }
func (p Pattern) String() string {
if p.include {
return p.match.String()
} else {
return "(?exclude)" + p.match.String()
}
}
type Matcher struct { type Matcher struct {
patterns []Pattern patterns []Pattern
withCache bool
matches *cache matches *cache
curHash string
stop chan struct{}
mut sync.Mutex mut sync.Mutex
} }
func Load(file string, cache bool) (*Matcher, error) { func New(withCache bool) *Matcher {
seen := make(map[string]bool) m := &Matcher{
matcher, err := loadIgnoreFile(file, seen) withCache: withCache,
if !cache || err != nil { stop: make(chan struct{}),
return matcher, err
} }
if withCache {
cacheMut.Lock() go m.clean(2 * time.Hour)
defer cacheMut.Unlock()
// Get the current cache object for the given file
cached, ok := caches[file]
if !ok || !patternsEqual(cached.patterns, matcher.patterns) {
// Nothing in cache or a cache mismatch, create a new cache which will
// store matches for the given set of patterns.
// Initialize oldMatches to indicate that we are interested in
// caching.
cached = newCache(matcher.patterns)
matcher.matches = cached
caches[file] = cached
return matcher, nil
} }
return m
// Patterns haven't changed, so we can reuse the old matches, create a new
// matches map and update the pointer. (This prevents matches map from
// growing indefinately, as we only cache whatever we've matched in the last
// iteration, rather than through runtime history)
matcher.matches = cached
return matcher, nil
} }
func Parse(r io.Reader, file string) (*Matcher, error) { func (m *Matcher) Load(file string) error {
seen := map[string]bool{ // No locking, Parse() does the locking
file: true,
fd, err := os.Open(file)
if err != nil {
// We do a parse with empty patterns to clear out the hash, cache etc.
m.Parse(&bytes.Buffer{}, file)
return err
} }
return parseIgnoreFile(r, file, seen) defer fd.Close()
return m.Parse(fd, file)
}
func (m *Matcher) Parse(r io.Reader, file string) error {
m.mut.Lock()
defer m.mut.Unlock()
seen := map[string]bool{file: true}
patterns, err := parseIgnoreFile(r, file, seen)
// Error is saved and returned at the end. We process the patterns
// (possibly blank) anyway.
newHash := hashPatterns(patterns)
if newHash == m.curHash {
// We've already loaded exactly these patterns.
return err
}
m.curHash = newHash
m.patterns = patterns
if m.withCache {
m.matches = newCache(patterns)
}
return err
} }
func (m *Matcher) Match(file string) (result bool) { func (m *Matcher) Match(file string) (result bool) {
m.mut.Lock()
defer m.mut.Unlock()
if len(m.patterns) == 0 { if len(m.patterns) == 0 {
return false return false
} }
@ -108,18 +136,53 @@ func (m *Matcher) Match(file string) (result bool) {
// Patterns return a list of the loaded regexp patterns, as strings // Patterns return a list of the loaded regexp patterns, as strings
func (m *Matcher) Patterns() []string { func (m *Matcher) Patterns() []string {
m.mut.Lock()
defer m.mut.Unlock()
patterns := make([]string, len(m.patterns)) patterns := make([]string, len(m.patterns))
for i, pat := range m.patterns { for i, pat := range m.patterns {
if pat.include { patterns[i] = pat.String()
patterns[i] = pat.match.String()
} else {
patterns[i] = "(?exclude)" + pat.match.String()
}
} }
return patterns return patterns
} }
func loadIgnoreFile(file string, seen map[string]bool) (*Matcher, error) { func (m *Matcher) Hash() string {
m.mut.Lock()
defer m.mut.Unlock()
return m.curHash
}
func (m *Matcher) Stop() {
close(m.stop)
}
func (m *Matcher) clean(d time.Duration) {
t := time.NewTimer(d / 2)
for {
select {
case <-m.stop:
return
case <-t.C:
m.mut.Lock()
if m.matches != nil {
m.matches.clean(d)
}
t.Reset(d / 2)
m.mut.Unlock()
}
}
}
func hashPatterns(patterns []Pattern) string {
h := md5.New()
for _, pat := range patterns {
h.Write([]byte(pat.String()))
h.Write([]byte("\n"))
}
return fmt.Sprintf("%x", h.Sum(nil))
}
func loadIgnoreFile(file string, seen map[string]bool) ([]Pattern, error) {
if seen[file] { if seen[file] {
return nil, fmt.Errorf("Multiple include of ignore file %q", file) return nil, fmt.Errorf("Multiple include of ignore file %q", file)
} }
@ -134,8 +197,8 @@ func loadIgnoreFile(file string, seen map[string]bool) (*Matcher, error) {
return parseIgnoreFile(fd, file, seen) return parseIgnoreFile(fd, file, seen)
} }
func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*Matcher, error) { func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) ([]Pattern, error) {
var exps Matcher var patterns []Pattern
addPattern := func(line string) error { addPattern := func(line string) error {
include := true include := true
@ -150,27 +213,27 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*M
if err != nil { if err != nil {
return fmt.Errorf("Invalid pattern %q in ignore file", line) return fmt.Errorf("Invalid pattern %q in ignore file", line)
} }
exps.patterns = append(exps.patterns, Pattern{exp, include}) patterns = append(patterns, Pattern{exp, include})
} else if strings.HasPrefix(line, "**/") { } else if strings.HasPrefix(line, "**/") {
// Add the pattern as is, and without **/ so it matches in current dir // Add the pattern as is, and without **/ so it matches in current dir
exp, err := fnmatch.Convert(line, fnmatch.FNM_PATHNAME) exp, err := fnmatch.Convert(line, fnmatch.FNM_PATHNAME)
if err != nil { if err != nil {
return fmt.Errorf("Invalid pattern %q in ignore file", line) return fmt.Errorf("Invalid pattern %q in ignore file", line)
} }
exps.patterns = append(exps.patterns, Pattern{exp, include}) patterns = append(patterns, Pattern{exp, include})
exp, err = fnmatch.Convert(line[3:], fnmatch.FNM_PATHNAME) exp, err = fnmatch.Convert(line[3:], fnmatch.FNM_PATHNAME)
if err != nil { if err != nil {
return fmt.Errorf("Invalid pattern %q in ignore file", line) return fmt.Errorf("Invalid pattern %q in ignore file", line)
} }
exps.patterns = append(exps.patterns, Pattern{exp, include}) patterns = append(patterns, Pattern{exp, include})
} else if strings.HasPrefix(line, "#include ") { } else if strings.HasPrefix(line, "#include ") {
includeFile := filepath.Join(filepath.Dir(currentFile), line[len("#include "):]) includeFile := filepath.Join(filepath.Dir(currentFile), line[len("#include "):])
includes, err := loadIgnoreFile(includeFile, seen) includes, err := loadIgnoreFile(includeFile, seen)
if err != nil { if err != nil {
return err return err
} }
exps.patterns = append(exps.patterns, includes.patterns...) patterns = append(patterns, includes...)
} else { } else {
// Path name or pattern, add it so it matches files both in // Path name or pattern, add it so it matches files both in
// current directory and subdirs. // current directory and subdirs.
@ -178,13 +241,13 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*M
if err != nil { if err != nil {
return fmt.Errorf("Invalid pattern %q in ignore file", line) return fmt.Errorf("Invalid pattern %q in ignore file", line)
} }
exps.patterns = append(exps.patterns, Pattern{exp, include}) patterns = append(patterns, Pattern{exp, include})
exp, err = fnmatch.Convert("**/"+line, fnmatch.FNM_PATHNAME) exp, err = fnmatch.Convert("**/"+line, fnmatch.FNM_PATHNAME)
if err != nil { if err != nil {
return fmt.Errorf("Invalid pattern %q in ignore file", line) return fmt.Errorf("Invalid pattern %q in ignore file", line)
} }
exps.patterns = append(exps.patterns, Pattern{exp, include}) patterns = append(patterns, Pattern{exp, include})
} }
return nil return nil
} }
@ -218,17 +281,5 @@ func parseIgnoreFile(fd io.Reader, currentFile string, seen map[string]bool) (*M
} }
} }
return &exps, nil return patterns, nil
}
func patternsEqual(a, b []Pattern) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if a[i].include != b[i].include || a[i].match.String() != b[i].match.String() {
return false
}
}
return true
} }

View File

@ -25,7 +25,8 @@ import (
) )
func TestIgnore(t *testing.T) { func TestIgnore(t *testing.T) {
pats, err := Load("testdata/.stignore", true) pats := New(true)
err := pats.Load("testdata/.stignore")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -74,7 +75,8 @@ func TestExcludes(t *testing.T) {
i*2 i*2
!ign2 !ign2
` `
pats, err := Parse(bytes.NewBufferString(stignore), ".stignore") pats := New(true)
err := pats.Parse(bytes.NewBufferString(stignore), ".stignore")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -114,15 +116,19 @@ func TestBadPatterns(t *testing.T) {
} }
for _, pat := range badPatterns { for _, pat := range badPatterns {
parsed, err := Parse(bytes.NewBufferString(pat), ".stignore") err := New(true).Parse(bytes.NewBufferString(pat), ".stignore")
if err == nil { if err == nil {
t.Errorf("No error for pattern %q: %v", pat, parsed) t.Errorf("No error for pattern %q", pat)
} }
} }
} }
func TestCaseSensitivity(t *testing.T) { func TestCaseSensitivity(t *testing.T) {
ign, _ := Parse(bytes.NewBufferString("test"), ".stignore") ign := New(true)
err := ign.Parse(bytes.NewBufferString("test"), ".stignore")
if err != nil {
t.Error(err)
}
match := []string{"test"} match := []string{"test"}
dontMatch := []string{"foo"} dontMatch := []string{"foo"}
@ -170,7 +176,8 @@ func TestCaching(t *testing.T) {
fd2.WriteString("/y/\n") fd2.WriteString("/y/\n")
pats, err := Load(fd1.Name(), true) pats := New(true)
err = pats.Load(fd1.Name())
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -193,9 +200,9 @@ func TestCaching(t *testing.T) {
t.Fatal("Expected 4 cached results") t.Fatal("Expected 4 cached results")
} }
// Reload file, expect old outcomes to be provided // Reload file, expect old outcomes to be preserved
pats, err = Load(fd1.Name(), true) err = pats.Load(fd1.Name())
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -207,7 +214,7 @@ func TestCaching(t *testing.T) {
fd2.WriteString("/z/\n") fd2.WriteString("/z/\n")
pats, err = Load(fd1.Name(), true) err = pats.Load(fd1.Name())
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -222,9 +229,9 @@ func TestCaching(t *testing.T) {
pats.Match(letter) pats.Match(letter)
} }
// Verify that outcomes provided on next laod // Verify that outcomes preserved on next laod
pats, err = Load(fd1.Name(), true) err = pats.Load(fd1.Name())
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -236,7 +243,7 @@ func TestCaching(t *testing.T) {
fd1.WriteString("/a/\n") fd1.WriteString("/a/\n")
pats, err = Load(fd1.Name(), true) err = pats.Load(fd1.Name())
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -252,7 +259,7 @@ func TestCaching(t *testing.T) {
// Verify that outcomes provided on next laod // Verify that outcomes provided on next laod
pats, err = Load(fd1.Name(), true) err = pats.Load(fd1.Name())
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -273,7 +280,11 @@ func TestCommentsAndBlankLines(t *testing.T) {
` `
pats, _ := Parse(bytes.NewBufferString(stignore), ".stignore") pats := New(true)
err := pats.Parse(bytes.NewBufferString(stignore), ".stignore")
if err != nil {
t.Error(err)
}
if len(pats.patterns) > 0 { if len(pats.patterns) > 0 {
t.Errorf("Expected no patterns") t.Errorf("Expected no patterns")
} }
@ -297,7 +308,11 @@ flamingo
*.crow *.crow
*.crow *.crow
` `
pats, _ := Parse(bytes.NewBufferString(stignore), ".stignore") pats := New(false)
err := pats.Parse(bytes.NewBufferString(stignore), ".stignore")
if err != nil {
b.Error(err)
}
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
@ -335,7 +350,8 @@ flamingo
} }
// Load the patterns // Load the patterns
pats, err := Load(fd.Name(), true) pats := New(true)
err = pats.Load(fd.Name())
if err != nil { if err != nil {
b.Fatal(err) b.Fatal(err)
} }
@ -344,7 +360,7 @@ flamingo
// This load should now load the cached outcomes as the set of patterns // This load should now load the cached outcomes as the set of patterns
// has not changed. // has not changed.
pats, err = Load(fd.Name(), true) err = pats.Load(fd.Name())
if err != nil { if err != nil {
b.Fatal(err) b.Fatal(err)
} }
@ -370,7 +386,8 @@ func TestCacheReload(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
pats, err := Load(fd.Name(), true) pats := New(true)
err = pats.Load(fd.Name())
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -402,7 +419,7 @@ func TestCacheReload(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
pats, err = Load(fd.Name(), true) err = pats.Load(fd.Name())
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -419,3 +436,85 @@ func TestCacheReload(t *testing.T) {
t.Error("Unexpected non-match for f3") t.Error("Unexpected non-match for f3")
} }
} }
func TestHash(t *testing.T) {
p1 := New(true)
err := p1.Load("testdata/.stignore")
if err != nil {
t.Fatal(err)
}
// Same list of patterns as testdata/.stignore, after expansion
stignore := `
dir2/dfile
dir3
bfile
dir1/cfile
**/efile
/ffile
lost+found
`
p2 := New(true)
err = p2.Parse(bytes.NewBufferString(stignore), ".stignore")
if err != nil {
t.Fatal(err)
}
// Not same list of patterns
stignore = `
dir2/dfile
dir3
bfile
dir1/cfile
/ffile
lost+found
`
p3 := New(true)
err = p3.Parse(bytes.NewBufferString(stignore), ".stignore")
if err != nil {
t.Fatal(err)
}
if p1.Hash() == "" {
t.Error("p1 hash blank")
}
if p2.Hash() == "" {
t.Error("p2 hash blank")
}
if p3.Hash() == "" {
t.Error("p3 hash blank")
}
if p1.Hash() != p2.Hash() {
t.Error("p1-p2 hashes differ")
}
if p1.Hash() == p3.Hash() {
t.Error("p1-p3 hashes same")
}
}
func TestHashOfEmpty(t *testing.T) {
p1 := New(true)
err := p1.Load("testdata/.stignore")
if err != nil {
t.Fatal(err)
}
firstHash := p1.Hash()
// Reloading with a non-existent file should empty the patterns and
// recalculate the hash. d41d8cd98f00b204e9800998ecf8427e is the md5 of
// nothing.
p1.Load("file/does/not/exist")
secondHash := p1.Hash()
if firstHash == secondHash {
t.Error("hash did not change")
}
if secondHash != "d41d8cd98f00b204e9800998ecf8427e" {
t.Error("second hash is not hash of empty string")
}
if len(p1.patterns) != 0 {
t.Error("there are more than zero patterns")
}
}

View File

@ -1040,7 +1040,8 @@ func (m *Model) AddFolder(cfg config.FolderConfiguration) {
m.deviceFolders[device.DeviceID] = append(m.deviceFolders[device.DeviceID], cfg.ID) m.deviceFolders[device.DeviceID] = append(m.deviceFolders[device.DeviceID], cfg.ID)
} }
ignores, _ := ignore.Load(filepath.Join(cfg.Path, ".stignore"), m.cfg.Options().CacheIgnoredFiles) ignores := ignore.New(m.cfg.Options().CacheIgnoredFiles)
_ = ignores.Load(filepath.Join(cfg.Path, ".stignore")) // Ignore error, there might not be an .stignore
m.folderIgnores[cfg.ID] = ignores m.folderIgnores[cfg.ID] = ignores
m.addedFolder = true m.addedFolder = true
@ -1083,8 +1084,8 @@ func (m *Model) ScanFolderSub(folder, sub string) error {
fs, ok := m.folderFiles[folder] fs, ok := m.folderFiles[folder]
dir := m.folderCfgs[folder].Path dir := m.folderCfgs[folder].Path
ignores, _ := ignore.Load(filepath.Join(dir, ".stignore"), m.cfg.Options().CacheIgnoredFiles) ignores := m.folderIgnores[folder]
m.folderIgnores[folder] = ignores _ = ignores.Load(filepath.Join(dir, ".stignore")) // Ignore error, there might not be an .stignore
w := &scanner.Walker{ w := &scanner.Walker{
Dir: dir, Dir: dir,

View File

@ -58,7 +58,8 @@ func init() {
} }
func TestWalkSub(t *testing.T) { func TestWalkSub(t *testing.T) {
ignores, err := ignore.Load("testdata/.stignore", false) ignores := ignore.New(false)
err := ignores.Load("testdata/.stignore")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -93,7 +94,8 @@ func TestWalkSub(t *testing.T) {
} }
func TestWalk(t *testing.T) { func TestWalk(t *testing.T) {
ignores, err := ignore.Load("testdata/.stignore", false) ignores := ignore.New(false)
err := ignores.Load("testdata/.stignore")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }