From 4a36993c193f40cf5bd9788fddb85e63b56653c2 Mon Sep 17 00:00:00 2001 From: Loic Nageleisen Date: Fri, 16 Jun 2017 16:46:16 +0200 Subject: [PATCH] Smarter filter when children won't match This improves restore performance by several orders of magniture by not going through the whole tree recursively when we can anticipate that no match will ever occur. --- cmd/restic/cmd_backup.go | 2 +- cmd/restic/cmd_restore.go | 12 +++--- internal/filter/filter.go | 65 +++++++++++++++++++++++++--- internal/filter/filter_test.go | 77 ++++++++++++++++++++++++++++++++-- internal/restic/restorer.go | 10 ++--- 5 files changed, 145 insertions(+), 21 deletions(-) diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index ba22a07cf..292220cda 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -417,7 +417,7 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, args []string) error { } selectFilter := func(item string, fi os.FileInfo) bool { - matched, err := filter.List(opts.Excludes, item) + matched, _, err := filter.List(opts.Excludes, item) if err != nil { Warnf("error for exclude pattern: %v", err) } diff --git a/cmd/restic/cmd_restore.go b/cmd/restic/cmd_restore.go index 4348b1bbb..a8fed66a4 100644 --- a/cmd/restic/cmd_restore.go +++ b/cmd/restic/cmd_restore.go @@ -113,22 +113,22 @@ func runRestore(opts RestoreOptions, gopts GlobalOptions, args []string) error { return nil } - selectExcludeFilter := func(item string, dstpath string, node *restic.Node) bool { - matched, err := filter.List(opts.Exclude, item) + selectExcludeFilter := func(item string, dstpath string, node *restic.Node) (bool, bool) { + matched, childMayMatch, err := filter.List(opts.Exclude, item) if err != nil { Warnf("error for exclude pattern: %v", err) } - return !matched + return !matched, childMayMatch } - selectIncludeFilter := func(item string, dstpath string, node *restic.Node) bool { - matched, err := filter.List(opts.Include, item) + selectIncludeFilter := func(item string, dstpath string, node *restic.Node) (bool, bool) { + matched, childMayMatch, err := filter.List(opts.Include, item) if err != nil { Warnf("error for include pattern: %v", err) } - return matched + return matched, childMayMatch } if len(opts.Exclude) > 0 { diff --git a/internal/filter/filter.go b/internal/filter/filter.go index 8ed8d1ef7..0b25c98c4 100644 --- a/internal/filter/filter.go +++ b/internal/filter/filter.go @@ -40,6 +40,51 @@ func Match(pattern, str string) (matched bool, err error) { return match(patterns, strs) } +// ChildMatch returns true if children of str can match the pattern. When the pattern is +// malformed, filepath.ErrBadPattern is returned. The empty pattern matches +// everything, when str is the empty string ErrBadString is returned. +// +// Pattern can be a combination of patterns suitable for filepath.Match, joined +// by filepath.Separator. +func ChildMatch(pattern, str string) (matched bool, err error) { + if pattern == "" { + return true, nil + } + + pattern = filepath.Clean(pattern) + + if str == "" { + return false, ErrBadString + } + + // convert file path separator to '/' + if filepath.Separator != '/' { + pattern = strings.Replace(pattern, string(filepath.Separator), "/", -1) + str = strings.Replace(str, string(filepath.Separator), "/", -1) + } + + patterns := strings.Split(pattern, "/") + strs := strings.Split(str, "/") + + return childMatch(patterns, strs) +} + +func childMatch(patterns, strs []string) (matched bool, err error) { + if patterns[0] != "" { + // relative pattern can always be nested down + return true, nil + } + + // match path against absolute pattern prefix + l := 0 + if len(strs) > len(patterns) { + l = len(patterns) + } else { + l = len(strs) + } + return match(patterns[0:l], strs) +} + func hasDoubleWildcard(list []string) (ok bool, pos int) { for i, item := range list { if item == "**" { @@ -102,21 +147,29 @@ func match(patterns, strs []string) (matched bool, err error) { // List returns true if str matches one of the patterns. Empty patterns are // ignored. -func List(patterns []string, str string) (matched bool, err error) { +func List(patterns []string, str string) (matched bool, childMayMatch bool, err error) { for _, pat := range patterns { if pat == "" { continue } - matched, err = Match(pat, str) + m, err := Match(pat, str) if err != nil { - return false, err + return false, false, err } - if matched { - return true, nil + c, err := ChildMatch(pat, str) + if err != nil { + return false, false, err + } + + matched = matched || m + childMayMatch = childMayMatch || c + + if matched && childMayMatch { + return true, true, nil } } - return false, nil + return matched, childMayMatch, nil } diff --git a/internal/filter/filter_test.go b/internal/filter/filter_test.go index 26d2a272c..5e940369c 100644 --- a/internal/filter/filter_test.go +++ b/internal/filter/filter_test.go @@ -124,6 +124,77 @@ func TestMatch(t *testing.T) { } } +var childMatchTests = []struct { + pattern string + path string + match bool +}{ + {"", "", true}, + {"", "/foo", true}, + {"", "/x/y/z/foo", true}, + {"foo/bar", "/foo", true}, + {"baz/bar", "/foo", true}, + {"foo", "/foo/bar", true}, + {"bar", "/foo", true}, + {"baz", "/foo/bar", true}, + {"*", "/foo", true}, + {"*", "/foo/bar", true}, + {"/foo/bar", "/foo", true}, + {"/foo/bar/baz", "/foo", true}, + {"/foo/bar/baz", "/foo/bar", true}, + {"/foo/bar/baz", "/foo/baz", false}, + {"/foo/**/baz", "/foo/bar/baz", true}, + {"/foo/**/qux", "/foo/bar/baz/qux", true}, + {"/baz/bar", "/foo", false}, + {"/foo", "/foo/bar", true}, + {"/*", "/foo", true}, + {"/*", "/foo/bar", true}, + {"/foo", "/foo/bar", true}, + {"/**", "/foo", true}, + {"/*/**", "/foo", true}, + {"/*/**", "/foo/bar", true}, + {"/*/bar", "/foo", true}, + {"/bar/*", "/foo", false}, + {"/foo/*/baz", "/foo/bar", true}, + {"/foo/*/baz", "/foo/baz", true}, + {"/foo/*/baz", "/bar/baz", false}, + {"/**/*", "/foo", true}, + {"/**/bar", "/foo/bar", true}, +} + +func testchildpattern(t *testing.T, pattern, path string, shouldMatch bool) { + match, err := filter.ChildMatch(pattern, path) + if err != nil { + t.Errorf("test child pattern %q failed: expected no error for path %q, but error returned: %v", + pattern, path, err) + } + + if match != shouldMatch { + t.Errorf("test: filter.ChildMatch(%q, %q): expected %v, got %v", + pattern, path, shouldMatch, match) + } +} + +func TestChildMatch(t *testing.T) { + for _, test := range childMatchTests { + testchildpattern(t, test.pattern, test.path, test.match) + + // Test with native path separator + if filepath.Separator != '/' { + // Test with pattern as native + pattern := strings.Replace(test.pattern, "/", string(filepath.Separator), -1) + testchildpattern(t, pattern, test.path, test.match) + + // Test with path as native + path := strings.Replace(test.path, "/", string(filepath.Separator), -1) + testchildpattern(t, test.pattern, path, test.match) + + // Test with both pattern and path as native + testchildpattern(t, pattern, path, test.match) + } + } +} + func ExampleMatch() { match, _ := filter.Match("*.go", "/home/user/file.go") fmt.Printf("match: %v\n", match) @@ -157,7 +228,7 @@ var filterListTests = []struct { func TestList(t *testing.T) { for i, test := range filterListTests { - match, err := filter.List(test.patterns, test.path) + match, _, err := filter.List(test.patterns, test.path) if err != nil { t.Errorf("test %d failed: expected no error for patterns %q, but error returned: %v", i, test.patterns, err) @@ -172,7 +243,7 @@ func TestList(t *testing.T) { } func ExampleList() { - match, _ := filter.List([]string{"*.c", "*.go"}, "/home/user/file.go") + match, _, _ := filter.List([]string{"*.c", "*.go"}, "/home/user/file.go") fmt.Printf("match: %v\n", match) // Output: // match: true @@ -271,7 +342,7 @@ func BenchmarkFilterPatterns(b *testing.B) { for i := 0; i < b.N; i++ { c = 0 for _, line := range lines { - match, err := filter.List(patterns, line) + match, _, err := filter.List(patterns, line) if err != nil { b.Fatal(err) } diff --git a/internal/restic/restorer.go b/internal/restic/restorer.go index bd5be9c5c..252a4640b 100644 --- a/internal/restic/restorer.go +++ b/internal/restic/restorer.go @@ -17,7 +17,7 @@ type Restorer struct { sn *Snapshot Error func(dir string, node *Node, err error) error - SelectFilter func(item string, dstpath string, node *Node) bool + SelectFilter func(item string, dstpath string, node *Node) (bool, bool) } var restorerAbortOnAllErrors = func(str string, node *Node, err error) error { return err } @@ -26,7 +26,7 @@ var restorerAbortOnAllErrors = func(str string, node *Node, err error) error { r func NewRestorer(repo Repository, id ID) (*Restorer, error) { r := &Restorer{ repo: repo, Error: restorerAbortOnAllErrors, - SelectFilter: func(string, string, *Node) bool { return true }, + SelectFilter: func(string, string, *Node) (bool, bool) { return true, true }, } var err error @@ -46,9 +46,9 @@ func (res *Restorer) restoreTo(ctx context.Context, dst string, dir string, tree } for _, node := range tree.Nodes { - selectedForRestore := res.SelectFilter(filepath.Join(dir, node.Name), + selectedForRestore, childMayMatch := res.SelectFilter(filepath.Join(dir, node.Name), filepath.Join(dst, dir, node.Name), node) - debug.Log("SelectForRestore returned %v", selectedForRestore) + debug.Log("SelectFilter returned %v %v", selectedForRestore, childMayMatch) if selectedForRestore { err := res.restoreNodeTo(ctx, node, dir, dst, idx) @@ -57,7 +57,7 @@ func (res *Restorer) restoreTo(ctx context.Context, dst string, dir string, tree } } - if node.Type == "dir" { + if node.Type == "dir" && childMayMatch { if node.Subtree == nil { return errors.Errorf("Dir without subtree in tree %v", treeID.Str()) }