2
2
mirror of https://github.com/octoleo/restic.git synced 2024-11-05 04:47:51 +00:00
restic/internal/filter/filter.go
Michael Eischer 0acc3c5923 filter: special case patterns without globbing characters
In case a part of a path is a simple string, we can just check for
equality without complex parsing in filepath.Match.

name                          old time/op    new time/op    delta
FilterLines-4                   34.8ms ±17%    41.2ms ±23%  +18.36%  (p=0.000 n=10+10)
FilterPatterns/Relative-4       21.7ms ± 6%    12.1ms ±23%  -44.46%  (p=0.000 n=10+10)
FilterPatterns/Absolute-4       10.0ms ± 5%     9.1ms ±11%   -9.80%  (p=0.006 n=10+9)
FilterPatterns/Wildcard-4       47.0ms ± 7%    42.2ms ± 5%  -10.19%  (p=0.000 n=9+10)
FilterPatterns/ManyNoMatch-4     190ms ± 1%     131ms ±20%  -31.47%  (p=0.000 n=8+10)

name                          old alloc/op   new alloc/op   delta
FilterPatterns/Relative-4       3.57MB ± 0%    3.57MB ± 0%     ~     (p=0.870 n=9+9)
FilterPatterns/Absolute-4       3.57MB ± 0%    3.57MB ± 0%     ~     (p=0.145 n=10+10)
FilterPatterns/Wildcard-4       14.3MB ± 0%    19.7MB ± 0%  +37.91%  (p=0.000 n=10+10)
FilterPatterns/ManyNoMatch-4    3.57MB ± 0%    3.57MB ± 0%     ~     (p=0.421 n=10+9)

name                          old allocs/op  new allocs/op  delta
FilterPatterns/Relative-4        22.2k ± 0%     22.2k ± 0%     ~     (all equal)
FilterPatterns/Absolute-4        22.2k ± 0%     22.2k ± 0%     ~     (all equal)
FilterPatterns/Wildcard-4        88.7k ± 0%     88.7k ± 0%     ~     (all equal)
FilterPatterns/ManyNoMatch-4     22.2k ± 0%     22.2k ± 0%     ~     (all equal)
2020-10-07 20:55:43 +02:00

260 lines
6.1 KiB
Go

package filter
import (
"path/filepath"
"strings"
"github.com/restic/restic/internal/errors"
)
// ErrBadString is returned when Match is called with the empty string as the
// second argument.
var ErrBadString = errors.New("filter.Match: string is empty")
type patternPart struct {
pattern string
isSimple bool
}
// Pattern represents a preparsed filter pattern
type Pattern []patternPart
func prepareStr(str string) ([]string, error) {
if str == "" {
return nil, ErrBadString
}
// convert file path separator to '/'
if filepath.Separator != '/' {
str = strings.Replace(str, string(filepath.Separator), "/", -1)
}
return strings.Split(str, "/"), nil
}
func preparePattern(pattern string) Pattern {
pattern = filepath.Clean(pattern)
// convert file path separator to '/'
if filepath.Separator != '/' {
pattern = strings.Replace(pattern, string(filepath.Separator), "/", -1)
}
parts := strings.Split(pattern, "/")
patterns := make([]patternPart, len(parts))
for i, part := range parts {
isSimple := !strings.ContainsAny(part, "\\[]*?")
patterns[i] = patternPart{part, isSimple}
}
return patterns
}
// Match returns true if str matches the pattern. When the pattern is
// malformed, filepath.ErrBadPattern is returned. The empty pattern matches
// everything, when str is the empty string ErrBadString is returned.
//
// Pattern can be a combination of patterns suitable for filepath.Match, joined
// by filepath.Separator.
//
// In addition patterns suitable for filepath.Match, pattern accepts a
// recursive wildcard '**', which greedily matches an arbitrary number of
// intermediate directories.
func Match(pattern, str string) (matched bool, err error) {
if pattern == "" {
return true, nil
}
patterns := preparePattern(pattern)
strs, err := prepareStr(str)
if err != nil {
return false, err
}
return match(patterns, strs)
}
// ChildMatch returns true if children of str can match the pattern. When the pattern is
// malformed, filepath.ErrBadPattern is returned. The empty pattern matches
// everything, when str is the empty string ErrBadString is returned.
//
// Pattern can be a combination of patterns suitable for filepath.Match, joined
// by filepath.Separator.
//
// In addition patterns suitable for filepath.Match, pattern accepts a
// recursive wildcard '**', which greedily matches an arbitrary number of
// intermediate directories.
func ChildMatch(pattern, str string) (matched bool, err error) {
if pattern == "" {
return true, nil
}
patterns := preparePattern(pattern)
strs, err := prepareStr(str)
if err != nil {
return false, err
}
return childMatch(patterns, strs)
}
func childMatch(patterns Pattern, strs []string) (matched bool, err error) {
if patterns[0].pattern != "" {
// relative pattern can always be nested down
return true, nil
}
ok, pos := hasDoubleWildcard(patterns)
if ok && len(strs) >= pos {
// cut off at the double wildcard
strs = strs[:pos]
}
// match path against absolute pattern prefix
l := 0
if len(strs) > len(patterns) {
l = len(patterns)
} else {
l = len(strs)
}
return match(patterns[0:l], strs)
}
func hasDoubleWildcard(list Pattern) (ok bool, pos int) {
for i, item := range list {
if item.pattern == "**" {
return true, i
}
}
return false, 0
}
func match(patterns Pattern, strs []string) (matched bool, err error) {
if ok, pos := hasDoubleWildcard(patterns); ok {
// gradually expand '**' into separate wildcards
newPat := make(Pattern, len(strs))
// copy static prefix once
copy(newPat, patterns[:pos])
for i := 0; i <= len(strs)-len(patterns)+1; i++ {
// limit to static prefix and already appended '*'
newPat := newPat[:pos+i]
// in the first iteration the wildcard expands to nothing
if i > 0 {
newPat[pos+i-1] = patternPart{"*", false}
}
newPat = append(newPat, patterns[pos+1:]...)
matched, err := match(newPat, strs)
if err != nil {
return false, err
}
if matched {
return true, nil
}
}
return false, nil
}
if len(patterns) == 0 && len(strs) == 0 {
return true, nil
}
if len(patterns) <= len(strs) {
maxOffset := len(strs) - len(patterns)
// special case absolute patterns
if patterns[0].pattern == "" {
maxOffset = 0
}
outer:
for offset := maxOffset; offset >= 0; offset-- {
for i := len(patterns) - 1; i >= 0; i-- {
var ok bool
if patterns[i].isSimple {
ok = patterns[i].pattern == strs[offset+i]
} else {
ok, err = filepath.Match(patterns[i].pattern, strs[offset+i])
if err != nil {
return false, errors.Wrap(err, "Match")
}
}
if !ok {
continue outer
}
}
return true, nil
}
}
return false, nil
}
// ParsePatterns prepares a list of patterns for use with List.
func ParsePatterns(patterns []string) []Pattern {
patpat := make([]Pattern, 0)
for _, pat := range patterns {
if pat == "" {
continue
}
pats := preparePattern(pat)
patpat = append(patpat, pats)
}
return patpat
}
// List returns true if str matches one of the patterns. Empty patterns are ignored.
func List(patterns []Pattern, str string) (matched bool, err error) {
matched, _, err = list(patterns, false, str)
return matched, err
}
// ListWithChild returns true if str matches one of the patterns. Empty patterns are ignored.
func ListWithChild(patterns []Pattern, str string) (matched bool, childMayMatch bool, err error) {
return list(patterns, true, str)
}
// List returns true if str matches one of the patterns. Empty patterns are ignored.
func list(patterns []Pattern, checkChildMatches bool, str string) (matched bool, childMayMatch bool, err error) {
if len(patterns) == 0 {
return false, false, nil
}
strs, err := prepareStr(str)
if err != nil {
return false, false, err
}
for _, pat := range patterns {
m, err := match(pat, strs)
if err != nil {
return false, false, err
}
var c bool
if checkChildMatches {
c, err = childMatch(pat, strs)
if err != nil {
return false, false, err
}
} else {
c = true
}
matched = matched || m
childMayMatch = childMayMatch || c
if matched && childMayMatch {
return true, true, nil
}
}
return matched, childMayMatch, nil
}