2
2
mirror of https://github.com/octoleo/restic.git synced 2024-12-01 17:23:57 +00:00

Merge pull request #2997 from MichaelEischer/faster-excludes

Speedup exclude/include checking
This commit is contained in:
Alexander Neumann 2020-11-02 11:07:21 +01:00 committed by GitHub
commit 6509c207f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 246 additions and 87 deletions

View File

@ -140,13 +140,15 @@ func runRestore(opts RestoreOptions, gopts GlobalOptions, args []string) error {
return nil return nil
} }
excludePatterns := filter.ParsePatterns(opts.Exclude)
insensitiveExcludePatterns := filter.ParsePatterns(opts.InsensitiveExclude)
selectExcludeFilter := func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) { selectExcludeFilter := func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) {
matched, _, err := filter.List(opts.Exclude, item) matched, err := filter.List(excludePatterns, item)
if err != nil { if err != nil {
Warnf("error for exclude pattern: %v", err) Warnf("error for exclude pattern: %v", err)
} }
matchedInsensitive, _, err := filter.List(opts.InsensitiveExclude, strings.ToLower(item)) matchedInsensitive, err := filter.List(insensitiveExcludePatterns, strings.ToLower(item))
if err != nil { if err != nil {
Warnf("error for iexclude pattern: %v", err) Warnf("error for iexclude pattern: %v", err)
} }
@ -161,13 +163,15 @@ func runRestore(opts RestoreOptions, gopts GlobalOptions, args []string) error {
return selectedForRestore, childMayBeSelected return selectedForRestore, childMayBeSelected
} }
includePatterns := filter.ParsePatterns(opts.Include)
insensitiveIncludePatterns := filter.ParsePatterns(opts.InsensitiveInclude)
selectIncludeFilter := func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) { selectIncludeFilter := func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) {
matched, childMayMatch, err := filter.List(opts.Include, item) matched, childMayMatch, err := filter.ListWithChild(includePatterns, item)
if err != nil { if err != nil {
Warnf("error for include pattern: %v", err) Warnf("error for include pattern: %v", err)
} }
matchedInsensitive, childMayMatchInsensitive, err := filter.List(opts.InsensitiveInclude, strings.ToLower(item)) matchedInsensitive, childMayMatchInsensitive, err := filter.ListWithChild(insensitiveIncludePatterns, strings.ToLower(item))
if err != nil { if err != nil {
Warnf("error for iexclude pattern: %v", err) Warnf("error for iexclude pattern: %v", err)
} }

View File

@ -74,8 +74,9 @@ type RejectFunc func(path string, fi os.FileInfo) bool
// rejectByPattern returns a RejectByNameFunc which rejects files that match // rejectByPattern returns a RejectByNameFunc which rejects files that match
// one of the patterns. // one of the patterns.
func rejectByPattern(patterns []string) RejectByNameFunc { func rejectByPattern(patterns []string) RejectByNameFunc {
parsedPatterns := filter.ParsePatterns(patterns)
return func(item string) bool { return func(item string) bool {
matched, _, err := filter.List(patterns, item) matched, err := filter.List(parsedPatterns, item)
if err != nil { if err != nil {
Warnf("error for exclude pattern: %v", err) Warnf("error for exclude pattern: %v", err)
} }

View File

@ -11,6 +11,47 @@ import (
// second argument. // second argument.
var ErrBadString = errors.New("filter.Match: string is empty") var ErrBadString = errors.New("filter.Match: string is empty")
type patternPart struct {
pattern string // First is "/" for absolute pattern; "" for "**".
isSimple bool
}
// Pattern represents a preparsed filter pattern
type Pattern []patternPart
func prepareStr(str string) ([]string, error) {
if str == "" {
return nil, ErrBadString
}
return splitPath(str), nil
}
func preparePattern(pattern string) Pattern {
parts := splitPath(filepath.Clean(pattern))
patterns := make([]patternPart, len(parts))
for i, part := range parts {
isSimple := !strings.ContainsAny(part, "\\[]*?")
// Replace "**" with the empty string to get faster comparisons
// (length-check only) in hasDoubleWildcard.
if part == "**" {
part = ""
}
patterns[i] = patternPart{part, isSimple}
}
return patterns
}
// Split p into path components. Assuming p has been Cleaned, no component
// will be empty. For absolute paths, the first component is "/".
func splitPath(p string) []string {
parts := strings.Split(filepath.ToSlash(p), "/")
if parts[0] == "" {
parts[0] = "/"
}
return parts
}
// Match returns true if str matches the pattern. When the pattern is // Match returns true if str matches the pattern. When the pattern is
// malformed, filepath.ErrBadPattern is returned. The empty pattern matches // malformed, filepath.ErrBadPattern is returned. The empty pattern matches
// everything, when str is the empty string ErrBadString is returned. // everything, when str is the empty string ErrBadString is returned.
@ -26,21 +67,13 @@ func Match(pattern, str string) (matched bool, err error) {
return true, nil return true, nil
} }
pattern = filepath.Clean(pattern) patterns := preparePattern(pattern)
strs, err := prepareStr(str)
if str == "" { if err != nil {
return false, ErrBadString return false, err
} }
// convert file path separator to '/'
if filepath.Separator != '/' {
pattern = strings.Replace(pattern, string(filepath.Separator), "/", -1)
str = strings.Replace(str, string(filepath.Separator), "/", -1)
}
patterns := strings.Split(pattern, "/")
strs := strings.Split(str, "/")
return match(patterns, strs) return match(patterns, strs)
} }
@ -59,26 +92,18 @@ func ChildMatch(pattern, str string) (matched bool, err error) {
return true, nil return true, nil
} }
pattern = filepath.Clean(pattern) patterns := preparePattern(pattern)
strs, err := prepareStr(str)
if str == "" { if err != nil {
return false, ErrBadString return false, err
} }
// convert file path separator to '/'
if filepath.Separator != '/' {
pattern = strings.Replace(pattern, string(filepath.Separator), "/", -1)
str = strings.Replace(str, string(filepath.Separator), "/", -1)
}
patterns := strings.Split(pattern, "/")
strs := strings.Split(str, "/")
return childMatch(patterns, strs) return childMatch(patterns, strs)
} }
func childMatch(patterns, strs []string) (matched bool, err error) { func childMatch(patterns Pattern, strs []string) (matched bool, err error) {
if patterns[0] != "" { if patterns[0].pattern != "/" {
// relative pattern can always be nested down // relative pattern can always be nested down
return true, nil return true, nil
} }
@ -99,9 +124,9 @@ func childMatch(patterns, strs []string) (matched bool, err error) {
return match(patterns[0:l], strs) return match(patterns[0:l], strs)
} }
func hasDoubleWildcard(list []string) (ok bool, pos int) { func hasDoubleWildcard(list Pattern) (ok bool, pos int) {
for i, item := range list { for i, item := range list {
if item == "**" { if item.pattern == "" {
return true, i return true, i
} }
} }
@ -109,14 +134,18 @@ func hasDoubleWildcard(list []string) (ok bool, pos int) {
return false, 0 return false, 0
} }
func match(patterns, strs []string) (matched bool, err error) { func match(patterns Pattern, strs []string) (matched bool, err error) {
if ok, pos := hasDoubleWildcard(patterns); ok { if ok, pos := hasDoubleWildcard(patterns); ok {
// gradually expand '**' into separate wildcards // gradually expand '**' into separate wildcards
for i := 0; i <= len(strs)-len(patterns)+1; i++ { newPat := make(Pattern, len(strs))
newPat := make([]string, pos) // copy static prefix once
copy(newPat, patterns[:pos]) copy(newPat, patterns[:pos])
for k := 0; k < i; k++ { for i := 0; i <= len(strs)-len(patterns)+1; i++ {
newPat = append(newPat, "*") // limit to static prefix and already appended '*'
newPat := newPat[:pos+i]
// in the first iteration the wildcard expands to nothing
if i > 0 {
newPat[pos+i-1] = patternPart{"*", false}
} }
newPat = append(newPat, patterns[pos+1:]...) newPat = append(newPat, patterns[pos+1:]...)
@ -138,14 +167,28 @@ func match(patterns, strs []string) (matched bool, err error) {
} }
if len(patterns) <= len(strs) { if len(patterns) <= len(strs) {
minOffset := 0
maxOffset := len(strs) - len(patterns)
// special case absolute patterns
if patterns[0].pattern == "/" {
maxOffset = 0
} else if strs[0] == "/" {
// skip absolute path marker if pattern is not rooted
minOffset = 1
}
outer: outer:
for offset := len(strs) - len(patterns); offset >= 0; offset-- { for offset := maxOffset; offset >= minOffset; offset-- {
for i := len(patterns) - 1; i >= 0; i-- { for i := len(patterns) - 1; i >= 0; i-- {
ok, err := filepath.Match(patterns[i], strs[offset+i]) var ok bool
if patterns[i].isSimple {
ok = patterns[i].pattern == strs[offset+i]
} else {
ok, err = filepath.Match(patterns[i].pattern, strs[offset+i])
if err != nil { if err != nil {
return false, errors.Wrap(err, "Match") return false, errors.Wrap(err, "Match")
} }
}
if !ok { if !ok {
continue outer continue outer
@ -159,23 +202,56 @@ func match(patterns, strs []string) (matched bool, err error) {
return false, nil return false, nil
} }
// List returns true if str matches one of the patterns. Empty patterns are // ParsePatterns prepares a list of patterns for use with List.
// ignored. func ParsePatterns(patterns []string) []Pattern {
func List(patterns []string, str string) (matched bool, childMayMatch bool, err error) { patpat := make([]Pattern, 0)
for _, pat := range patterns { for _, pat := range patterns {
if pat == "" { if pat == "" {
continue continue
} }
m, err := Match(pat, str) pats := preparePattern(pat)
patpat = append(patpat, pats)
}
return patpat
}
// List returns true if str matches one of the patterns. Empty patterns are ignored.
func List(patterns []Pattern, str string) (matched bool, err error) {
matched, _, err = list(patterns, false, str)
return matched, err
}
// ListWithChild returns true if str matches one of the patterns. Empty patterns are ignored.
func ListWithChild(patterns []Pattern, str string) (matched bool, childMayMatch bool, err error) {
return list(patterns, true, str)
}
// List returns true if str matches one of the patterns. Empty patterns are ignored.
func list(patterns []Pattern, checkChildMatches bool, str string) (matched bool, childMayMatch bool, err error) {
if len(patterns) == 0 {
return false, false, nil
}
strs, err := prepareStr(str)
if err != nil {
return false, false, err
}
for _, pat := range patterns {
m, err := match(pat, strs)
if err != nil { if err != nil {
return false, false, err return false, false, err
} }
c, err := ChildMatch(pat, str) var c bool
if checkChildMatches {
c, err = childMatch(pat, strs)
if err != nil { if err != nil {
return false, false, err return false, false, err
} }
} else {
c = true
}
matched = matched || m matched = matched || m
childMayMatch = childMayMatch || c childMayMatch = childMayMatch || c

View File

@ -243,22 +243,25 @@ var filterListTests = []struct {
patterns []string patterns []string
path string path string
match bool match bool
childMatch bool
}{ }{
{[]string{"*.go"}, "/foo/bar/test.go", true}, {[]string{}, "/foo/bar/test.go", false, false},
{[]string{"*.c"}, "/foo/bar/test.go", false}, {[]string{"*.go"}, "/foo/bar/test.go", true, true},
{[]string{"*.go", "*.c"}, "/foo/bar/test.go", true}, {[]string{"*.c"}, "/foo/bar/test.go", false, true},
{[]string{"*"}, "/foo/bar/test.go", true}, {[]string{"*.go", "*.c"}, "/foo/bar/test.go", true, true},
{[]string{"x"}, "/foo/bar/test.go", false}, {[]string{"*"}, "/foo/bar/test.go", true, true},
{[]string{"?"}, "/foo/bar/test.go", false}, {[]string{"x"}, "/foo/bar/test.go", false, true},
{[]string{"?", "x"}, "/foo/bar/x", true}, {[]string{"?"}, "/foo/bar/test.go", false, true},
{[]string{"/*/*/bar/test.*"}, "/foo/bar/test.go", false}, {[]string{"?", "x"}, "/foo/bar/x", true, true},
{[]string{"/*/*/bar/test.*", "*.go"}, "/foo/bar/test.go", true}, {[]string{"/*/*/bar/test.*"}, "/foo/bar/test.go", false, false},
{[]string{"", "*.c"}, "/foo/bar/test.go", false}, {[]string{"/*/*/bar/test.*", "*.go"}, "/foo/bar/test.go", true, true},
{[]string{"", "*.c"}, "/foo/bar/test.go", false, true},
} }
func TestList(t *testing.T) { func TestList(t *testing.T) {
for i, test := range filterListTests { for i, test := range filterListTests {
match, _, err := filter.List(test.patterns, test.path) patterns := filter.ParsePatterns(test.patterns)
match, err := filter.List(patterns, test.path)
if err != nil { if err != nil {
t.Errorf("test %d failed: expected no error for patterns %q, but error returned: %v", t.Errorf("test %d failed: expected no error for patterns %q, but error returned: %v",
i, test.patterns, err) i, test.patterns, err)
@ -266,19 +269,64 @@ func TestList(t *testing.T) {
} }
if match != test.match { if match != test.match {
t.Errorf("test %d: filter.MatchList(%q, %q): expected %v, got %v", t.Errorf("test %d: filter.List(%q, %q): expected %v, got %v",
i, test.patterns, test.path, test.match, match) i, test.patterns, test.path, test.match, match)
} }
match, childMatch, err := filter.ListWithChild(patterns, test.path)
if err != nil {
t.Errorf("test %d failed: expected no error for patterns %q, but error returned: %v",
i, test.patterns, err)
continue
}
if match != test.match || childMatch != test.childMatch {
t.Errorf("test %d: filter.ListWithChild(%q, %q): expected %v, %v, got %v, %v",
i, test.patterns, test.path, test.match, test.childMatch, match, childMatch)
}
} }
} }
func ExampleList() { func ExampleList() {
match, _, _ := filter.List([]string{"*.c", "*.go"}, "/home/user/file.go") patterns := filter.ParsePatterns([]string{"*.c", "*.go"})
match, _ := filter.List(patterns, "/home/user/file.go")
fmt.Printf("match: %v\n", match) fmt.Printf("match: %v\n", match)
// Output: // Output:
// match: true // match: true
} }
func TestInvalidStrs(t *testing.T) {
_, err := filter.Match("test", "")
if err == nil {
t.Error("Match accepted invalid path")
}
_, err = filter.ChildMatch("test", "")
if err == nil {
t.Error("ChildMatch accepted invalid path")
}
patterns := []string{"test"}
_, err = filter.List(filter.ParsePatterns(patterns), "")
if err == nil {
t.Error("List accepted invalid path")
}
}
func TestInvalidPattern(t *testing.T) {
patterns := []string{"test/["}
_, err := filter.List(filter.ParsePatterns(patterns), "test/example")
if err == nil {
t.Error("List accepted invalid pattern")
}
patterns = []string{"test/**/["}
_, err = filter.List(filter.ParsePatterns(patterns), "test/example")
if err == nil {
t.Error("List accepted invalid pattern")
}
}
func extractTestLines(t testing.TB) (lines []string) { func extractTestLines(t testing.TB) (lines []string) {
f, err := os.Open("testdata/libreoffice.txt.bz2") f, err := os.Open("testdata/libreoffice.txt.bz2")
if err != nil { if err != nil {
@ -360,19 +408,47 @@ func BenchmarkFilterLines(b *testing.B) {
} }
func BenchmarkFilterPatterns(b *testing.B) { func BenchmarkFilterPatterns(b *testing.B) {
patterns := []string{ lines := extractTestLines(b)
modlines := make([]string, 200)
for i, line := range lines {
if i >= len(modlines) {
break
}
modlines[i] = line + "-does-not-match"
}
tests := []struct {
name string
patterns []filter.Pattern
matches uint
}{
{"Relative", filter.ParsePatterns([]string{
"does-not-match",
"sdk/*", "sdk/*",
"*.html", "*.html",
}), 22185},
{"Absolute", filter.ParsePatterns([]string{
"/etc",
"/home/*/test",
"/usr/share/doc/libreoffice/sdk/docs/java",
}), 150},
{"Wildcard", filter.ParsePatterns([]string{
"/etc/**/example",
"/home/**/test",
"/usr/**/java",
}), 150},
{"ManyNoMatch", filter.ParsePatterns(modlines), 0},
} }
lines := extractTestLines(b)
var c uint
for _, test := range tests {
b.Run(test.name, func(b *testing.B) {
var c uint
b.ReportAllocs()
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
c = 0 c = 0
for _, line := range lines { for _, line := range lines {
match, _, err := filter.List(patterns, line) match, err := filter.List(test.patterns, line)
if err != nil { if err != nil {
b.Fatal(err) b.Fatal(err)
} }
@ -382,8 +458,10 @@ func BenchmarkFilterPatterns(b *testing.B) {
} }
} }
if c != 22185 { if c != test.matches {
b.Fatalf("wrong number of matches: expected 22185, got %d", c) b.Fatalf("wrong number of matches: expected %d, got %d", test.matches, c)
} }
} }
})
}
} }