walker: Remove ignoreTrees functionality

It was only used in two places:
- stats: apparently as a minor performance optimization, which is
  unlikely to be important
- find: filtered directories would be ignored. However, this
  optimization missed that it is possible that two directories have the
  exact same content. Such directories would be incorrectly ignored too.
  Example:
```
mkdir test test/a test/b
restic backup test
restic find latest test/b
-> incorrectly does not return anything
```

Thus, remove the functionality as it's apparently too complex to use
correctly.
This commit is contained in:
Michael Eischer 2024-01-06 13:59:47 +01:00
parent 03e06d0797
commit 0b39940fdb
7 changed files with 77 additions and 281 deletions

View File

@ -0,0 +1,6 @@
Bugfix: `find` ignored directories in some cases
In some cases, the `find` command ignored empty or moved directories. This has
been fixed.
https://github.com/restic/restic/pull/4615

View File

@ -244,13 +244,12 @@ func (s *statefulOutput) Finish() {
// Finder bundles information needed to find a file or directory.
type Finder struct {
repo restic.Repository
pat findPattern
out statefulOutput
ignoreTrees restic.IDSet
blobIDs map[string]struct{}
treeIDs map[string]struct{}
itemsFound int
repo restic.Repository
pat findPattern
out statefulOutput
blobIDs map[string]struct{}
treeIDs map[string]struct{}
itemsFound int
}
func (f *Finder) findInSnapshot(ctx context.Context, sn *restic.Snapshot) error {
@ -261,17 +260,17 @@ func (f *Finder) findInSnapshot(ctx context.Context, sn *restic.Snapshot) error
}
f.out.newsn = sn
return walker.Walk(ctx, f.repo, *sn.Tree, f.ignoreTrees, func(parentTreeID restic.ID, nodepath string, node *restic.Node, err error) (bool, error) {
return walker.Walk(ctx, f.repo, *sn.Tree, func(parentTreeID restic.ID, nodepath string, node *restic.Node, err error) error {
if err != nil {
debug.Log("Error loading tree %v: %v", parentTreeID, err)
Printf("Unable to load tree %s\n ... which belongs to snapshot %s\n", parentTreeID, sn.ID())
return false, walker.ErrSkipNode
return walker.ErrSkipNode
}
if node == nil {
return false, nil
return nil
}
normalizedNodepath := nodepath
@ -284,7 +283,7 @@ func (f *Finder) findInSnapshot(ctx context.Context, sn *restic.Snapshot) error
for _, pat := range f.pat.pattern {
found, err := filter.Match(pat, normalizedNodepath)
if err != nil {
return false, err
return err
}
if found {
foundMatch = true
@ -292,16 +291,13 @@ func (f *Finder) findInSnapshot(ctx context.Context, sn *restic.Snapshot) error
}
}
var (
ignoreIfNoMatch = true
errIfNoMatch error
)
var errIfNoMatch error
if node.Type == "dir" {
var childMayMatch bool
for _, pat := range f.pat.pattern {
mayMatch, err := filter.ChildMatch(pat, normalizedNodepath)
if err != nil {
return false, err
return err
}
if mayMatch {
childMayMatch = true
@ -310,30 +306,27 @@ func (f *Finder) findInSnapshot(ctx context.Context, sn *restic.Snapshot) error
}
if !childMayMatch {
ignoreIfNoMatch = true
errIfNoMatch = walker.ErrSkipNode
} else {
ignoreIfNoMatch = false
}
}
if !foundMatch {
return ignoreIfNoMatch, errIfNoMatch
return errIfNoMatch
}
if !f.pat.oldest.IsZero() && node.ModTime.Before(f.pat.oldest) {
debug.Log(" ModTime is older than %s\n", f.pat.oldest)
return ignoreIfNoMatch, errIfNoMatch
return errIfNoMatch
}
if !f.pat.newest.IsZero() && node.ModTime.After(f.pat.newest) {
debug.Log(" ModTime is newer than %s\n", f.pat.newest)
return ignoreIfNoMatch, errIfNoMatch
return errIfNoMatch
}
debug.Log(" found match\n")
f.out.PrintPattern(nodepath, node)
return false, nil
return nil
})
}
@ -345,17 +338,17 @@ func (f *Finder) findIDs(ctx context.Context, sn *restic.Snapshot) error {
}
f.out.newsn = sn
return walker.Walk(ctx, f.repo, *sn.Tree, f.ignoreTrees, func(parentTreeID restic.ID, nodepath string, node *restic.Node, err error) (bool, error) {
return walker.Walk(ctx, f.repo, *sn.Tree, func(parentTreeID restic.ID, nodepath string, node *restic.Node, err error) error {
if err != nil {
debug.Log("Error loading tree %v: %v", parentTreeID, err)
Printf("Unable to load tree %s\n ... which belongs to snapshot %s\n", parentTreeID, sn.ID())
return false, walker.ErrSkipNode
return walker.ErrSkipNode
}
if node == nil {
return false, nil
return nil
}
if node.Type == "dir" && f.treeIDs != nil {
@ -373,7 +366,7 @@ func (f *Finder) findIDs(ctx context.Context, sn *restic.Snapshot) error {
// looking for blobs)
if f.itemsFound >= len(f.treeIDs) && f.blobIDs == nil {
// Return an error to terminate the Walk
return true, errors.New("OK")
return errors.New("OK")
}
}
}
@ -394,7 +387,7 @@ func (f *Finder) findIDs(ctx context.Context, sn *restic.Snapshot) error {
}
}
return false, nil
return nil
})
}
@ -593,10 +586,9 @@ func runFind(ctx context.Context, opts FindOptions, gopts GlobalOptions, args []
}
f := &Finder{
repo: repo,
pat: pat,
out: statefulOutput{ListLong: opts.ListLong, HumanReadable: opts.HumanReadable, JSON: gopts.JSON},
ignoreTrees: restic.NewIDSet(),
repo: repo,
pat: pat,
out: statefulOutput{ListLong: opts.ListLong, HumanReadable: opts.HumanReadable, JSON: gopts.JSON},
}
if opts.BlobID {

View File

@ -230,12 +230,12 @@ func runLs(ctx context.Context, opts LsOptions, gopts GlobalOptions, args []stri
printSnapshot(sn)
err = walker.Walk(ctx, repo, *sn.Tree, nil, func(_ restic.ID, nodepath string, node *restic.Node, err error) (bool, error) {
err = walker.Walk(ctx, repo, *sn.Tree, func(_ restic.ID, nodepath string, node *restic.Node, err error) error {
if err != nil {
return false, err
return err
}
if node == nil {
return false, nil
return nil
}
if withinDir(nodepath) {
@ -245,22 +245,22 @@ func runLs(ctx context.Context, opts LsOptions, gopts GlobalOptions, args []stri
// if recursive listing is requested, signal the walker that it
// should continue walking recursively
if opts.Recursive {
return false, nil
return nil
}
}
// if there's an upcoming match deeper in the tree (but we're not
// there yet), signal the walker to descend into any subdirs
if approachingMatchingTree(nodepath) {
return false, nil
return nil
}
// otherwise, signal the walker to not walk recursively into any
// subdirs
if node.Type == "dir" {
return false, walker.ErrSkipNode
return walker.ErrSkipNode
}
return false, nil
return nil
})
if err != nil {

View File

@ -203,7 +203,7 @@ func statsWalkSnapshot(ctx context.Context, snapshot *restic.Snapshot, repo rest
}
hardLinkIndex := restorer.NewHardlinkIndex[struct{}]()
err := walker.Walk(ctx, repo, *snapshot.Tree, restic.NewIDSet(), statsWalkTree(repo, opts, stats, hardLinkIndex))
err := walker.Walk(ctx, repo, *snapshot.Tree, statsWalkTree(repo, opts, stats, hardLinkIndex))
if err != nil {
return fmt.Errorf("walking tree %s: %v", *snapshot.Tree, err)
}
@ -212,12 +212,12 @@ func statsWalkSnapshot(ctx context.Context, snapshot *restic.Snapshot, repo rest
}
func statsWalkTree(repo restic.Repository, opts StatsOptions, stats *statsContainer, hardLinkIndex *restorer.HardlinkIndex[struct{}]) walker.WalkFunc {
return func(parentTreeID restic.ID, npath string, node *restic.Node, nodeErr error) (bool, error) {
return func(parentTreeID restic.ID, npath string, node *restic.Node, nodeErr error) error {
if nodeErr != nil {
return true, nodeErr
return nodeErr
}
if node == nil {
return true, nil
return nil
}
if opts.countMode == countModeUniqueFilesByContents || opts.countMode == countModeBlobsPerFile {
@ -247,7 +247,7 @@ func statsWalkTree(repo restic.Repository, opts StatsOptions, stats *statsContai
// is always a data blob since we're accessing it via a file's Content array
blobSize, found := repo.LookupBlobSize(blobID, restic.DataBlob)
if !found {
return true, fmt.Errorf("blob %s not found for tree %s", blobID, parentTreeID)
return fmt.Errorf("blob %s not found for tree %s", blobID, parentTreeID)
}
// count the blob's size, then add this blob by this
@ -274,11 +274,9 @@ func statsWalkTree(repo restic.Repository, opts StatsOptions, stats *statsContai
hardLinkIndex.Add(node.Inode, node.DeviceID, struct{}{})
stats.TotalSize += node.Size
}
return false, nil
}
return true, nil
return nil
}
}

View File

@ -70,27 +70,27 @@ func sendNodes(ctx context.Context, repo restic.Repository, root *restic.Node, c
return nil
}
err := walker.Walk(ctx, repo, *root.Subtree, nil, func(_ restic.ID, nodepath string, node *restic.Node, err error) (bool, error) {
err := walker.Walk(ctx, repo, *root.Subtree, func(_ restic.ID, nodepath string, node *restic.Node, err error) error {
if err != nil {
return false, err
return err
}
if node == nil {
return false, nil
return nil
}
node.Path = path.Join(root.Path, nodepath)
if !IsFile(node) && !IsDir(node) && !IsLink(node) {
return false, nil
return nil
}
select {
case ch <- node:
case <-ctx.Done():
return false, ctx.Err()
return ctx.Err()
}
return false, nil
return nil
})
return err

View File

@ -21,21 +21,14 @@ var ErrSkipNode = errors.New("skip this node")
// When the special value ErrSkipNode is returned and node is a dir node, it is
// not walked. When the node is not a dir node, the remaining items in this
// tree are skipped.
//
// Setting ignore to true tells Walk that it should not visit the node again.
// For tree nodes, this means that the function is not called for the
// referenced tree. If the node is not a tree, and all nodes in the current
// tree have ignore set to true, the current tree will not be visited again.
// When err is not nil and different from ErrSkipNode, the value returned for
// ignore is ignored.
type WalkFunc func(parentTreeID restic.ID, path string, node *restic.Node, nodeErr error) (ignore bool, err error)
type WalkFunc func(parentTreeID restic.ID, path string, node *restic.Node, nodeErr error) (err error)
// Walk calls walkFn recursively for each node in root. If walkFn returns an
// error, it is passed up the call stack. The trees in ignoreTrees are not
// walked. If walkFn ignores trees, these are added to the set.
func Walk(ctx context.Context, repo restic.BlobLoader, root restic.ID, ignoreTrees restic.IDSet, walkFn WalkFunc) error {
func Walk(ctx context.Context, repo restic.BlobLoader, root restic.ID, walkFn WalkFunc) error {
tree, err := restic.LoadTree(ctx, repo, root)
_, err = walkFn(root, "/", nil, err)
err = walkFn(root, "/", nil, err)
if err != nil {
if err == ErrSkipNode {
@ -44,24 +37,13 @@ func Walk(ctx context.Context, repo restic.BlobLoader, root restic.ID, ignoreTre
return err
}
if ignoreTrees == nil {
ignoreTrees = restic.NewIDSet()
}
_, err = walk(ctx, repo, "/", root, tree, ignoreTrees, walkFn)
return err
return walk(ctx, repo, "/", root, tree, walkFn)
}
// walk recursively traverses the tree, ignoring subtrees when the ID of the
// subtree is in ignoreTrees. If err is nil and ignore is true, the subtree ID
// will be added to ignoreTrees by walk.
func walk(ctx context.Context, repo restic.BlobLoader, prefix string, parentTreeID restic.ID, tree *restic.Tree, ignoreTrees restic.IDSet, walkFn WalkFunc) (ignore bool, err error) {
var allNodesIgnored = true
if len(tree.Nodes) == 0 {
allNodesIgnored = false
}
func walk(ctx context.Context, repo restic.BlobLoader, prefix string, parentTreeID restic.ID, tree *restic.Tree, walkFn WalkFunc) (err error) {
sort.Slice(tree.Nodes, func(i, j int) bool {
return tree.Nodes[i].Name < tree.Nodes[j].Name
})
@ -70,68 +52,40 @@ func walk(ctx context.Context, repo restic.BlobLoader, prefix string, parentTree
p := path.Join(prefix, node.Name)
if node.Type == "" {
return false, errors.Errorf("node type is empty for node %q", node.Name)
return errors.Errorf("node type is empty for node %q", node.Name)
}
if node.Type != "dir" {
ignore, err := walkFn(parentTreeID, p, node, nil)
err := walkFn(parentTreeID, p, node, nil)
if err != nil {
if err == ErrSkipNode {
// skip the remaining entries in this tree
return allNodesIgnored, nil
return nil
}
return false, err
}
if !ignore {
allNodesIgnored = false
return err
}
continue
}
if node.Subtree == nil {
return false, errors.Errorf("subtree for node %v in tree %v is nil", node.Name, p)
}
if ignoreTrees.Has(*node.Subtree) {
continue
return errors.Errorf("subtree for node %v in tree %v is nil", node.Name, p)
}
subtree, err := restic.LoadTree(ctx, repo, *node.Subtree)
ignore, err := walkFn(parentTreeID, p, node, err)
err = walkFn(parentTreeID, p, node, err)
if err != nil {
if err == ErrSkipNode {
if ignore {
ignoreTrees.Insert(*node.Subtree)
}
continue
}
return false, err
}
if ignore {
ignoreTrees.Insert(*node.Subtree)
}
if !ignore {
allNodesIgnored = false
}
ignore, err = walk(ctx, repo, p, *node.Subtree, subtree, ignoreTrees, walkFn)
err = walk(ctx, repo, p, *node.Subtree, subtree, walkFn)
if err != nil {
return false, err
}
if ignore {
ignoreTrees.Insert(*node.Subtree)
}
if !ignore {
allNodesIgnored = false
return err
}
}
return allNodesIgnored, nil
return nil
}

View File

@ -99,22 +99,22 @@ type checkFunc func(t testing.TB) (walker WalkFunc, final func(testing.TB))
func checkItemOrder(want []string) checkFunc {
pos := 0
return func(t testing.TB) (walker WalkFunc, final func(testing.TB)) {
walker = func(treeID restic.ID, path string, node *restic.Node, err error) (bool, error) {
walker = func(treeID restic.ID, path string, node *restic.Node, err error) error {
if err != nil {
t.Errorf("error walking %v: %v", path, err)
return false, err
return err
}
if pos >= len(want) {
t.Errorf("additional unexpected path found: %v", path)
return false, nil
return nil
}
if path != want[pos] {
t.Errorf("wrong path found, want %q, got %q", want[pos], path)
}
pos++
return false, nil
return nil
}
final = func(t testing.TB) {
@ -131,22 +131,22 @@ func checkItemOrder(want []string) checkFunc {
func checkParentTreeOrder(want []string) checkFunc {
pos := 0
return func(t testing.TB) (walker WalkFunc, final func(testing.TB)) {
walker = func(treeID restic.ID, path string, node *restic.Node, err error) (bool, error) {
walker = func(treeID restic.ID, path string, node *restic.Node, err error) error {
if err != nil {
t.Errorf("error walking %v: %v", path, err)
return false, err
return err
}
if pos >= len(want) {
t.Errorf("additional unexpected parent tree ID found: %v", treeID)
return false, nil
return nil
}
if treeID.String() != want[pos] {
t.Errorf("wrong parent tree ID found, want %q, got %q", want[pos], treeID.String())
}
pos++
return false, nil
return nil
}
final = func(t testing.TB) {
@ -165,15 +165,15 @@ func checkSkipFor(skipFor map[string]struct{}, wantPaths []string) checkFunc {
var pos int
return func(t testing.TB) (walker WalkFunc, final func(testing.TB)) {
walker = func(treeID restic.ID, path string, node *restic.Node, err error) (bool, error) {
walker = func(treeID restic.ID, path string, node *restic.Node, err error) error {
if err != nil {
t.Errorf("error walking %v: %v", path, err)
return false, err
return err
}
if pos >= len(wantPaths) {
t.Errorf("additional unexpected path found: %v", path)
return false, nil
return nil
}
if path != wantPaths[pos] {
@ -182,50 +182,10 @@ func checkSkipFor(skipFor map[string]struct{}, wantPaths []string) checkFunc {
pos++
if _, ok := skipFor[path]; ok {
return false, ErrSkipNode
return ErrSkipNode
}
return false, nil
}
final = func(t testing.TB) {
if pos != len(wantPaths) {
t.Errorf("wrong number of paths returned, want %d, got %d", len(wantPaths), pos)
}
}
return walker, final
}
}
// checkIgnore returns ErrSkipNode if path is in skipFor and sets ignore according
// to ignoreFor. It checks that the paths the walk func is called for are exactly
// the ones in wantPaths.
func checkIgnore(skipFor map[string]struct{}, ignoreFor map[string]bool, wantPaths []string) checkFunc {
var pos int
return func(t testing.TB) (walker WalkFunc, final func(testing.TB)) {
walker = func(treeID restic.ID, path string, node *restic.Node, err error) (bool, error) {
if err != nil {
t.Errorf("error walking %v: %v", path, err)
return false, err
}
if pos >= len(wantPaths) {
t.Errorf("additional unexpected path found: %v", path)
return ignoreFor[path], nil
}
if path != wantPaths[pos] {
t.Errorf("wrong path found, want %q, got %q", wantPaths[pos], path)
}
pos++
if _, ok := skipFor[path]; ok {
return ignoreFor[path], ErrSkipNode
}
return ignoreFor[path], nil
return nil
}
final = func(t testing.TB) {
@ -272,16 +232,6 @@ func TestWalker(t *testing.T) {
"/subdir",
},
),
checkIgnore(
map[string]struct{}{}, map[string]bool{
"/subdir": true,
}, []string{
"/",
"/foo",
"/subdir",
"/subdir/subfile",
},
),
},
},
{
@ -409,81 +359,6 @@ func TestWalker(t *testing.T) {
"57ee8960c7a86859b090a76e5d013f83d10c0ce11d5460076ca8468706f784ab", // tree /subdir3
"c2efeff7f217a4dfa12a16e8bb3cefedd37c00873605c29e5271c6061030672f", // tree /
}),
checkIgnore(
map[string]struct{}{
"/subdir1": {},
}, map[string]bool{
"/subdir1": true,
}, []string{
"/",
"/foo",
"/subdir1",
"/zzz other",
},
),
checkIgnore(
map[string]struct{}{}, map[string]bool{
"/subdir1": true,
}, []string{
"/",
"/foo",
"/subdir1",
"/subdir1/subfile1",
"/subdir1/subfile2",
"/subdir1/subfile3",
"/zzz other",
},
),
checkIgnore(
map[string]struct{}{
"/subdir2": {},
}, map[string]bool{
"/subdir2": true,
}, []string{
"/",
"/foo",
"/subdir1",
"/subdir1/subfile1",
"/subdir1/subfile2",
"/subdir1/subfile3",
"/subdir2",
"/zzz other",
},
),
checkIgnore(
map[string]struct{}{}, map[string]bool{
"/subdir1/subfile1": true,
"/subdir1/subfile2": true,
"/subdir1/subfile3": true,
}, []string{
"/",
"/foo",
"/subdir1",
"/subdir1/subfile1",
"/subdir1/subfile2",
"/subdir1/subfile3",
"/zzz other",
},
),
checkIgnore(
map[string]struct{}{}, map[string]bool{
"/subdir2/subfile1": true,
"/subdir2/subfile2": true,
"/subdir2/subfile3": true,
}, []string{
"/",
"/foo",
"/subdir1",
"/subdir1/subfile1",
"/subdir1/subfile2",
"/subdir1/subfile3",
"/subdir2",
"/subdir2/subfile1",
"/subdir2/subfile2",
"/subdir2/subfile3",
"/zzz other",
},
),
},
},
{
@ -513,35 +388,6 @@ func TestWalker(t *testing.T) {
}),
},
},
{
tree: TestTree{
"subdir1": TestTree{},
"subdir2": TestTree{},
"subdir3": TestTree{
"file": TestFile{},
},
"subdir4": TestTree{},
"subdir5": TestTree{
"file": TestFile{},
},
"subdir6": TestTree{},
},
checks: []checkFunc{
checkIgnore(
map[string]struct{}{}, map[string]bool{
"/subdir2": true,
}, []string{
"/",
"/subdir1",
"/subdir2",
"/subdir3",
"/subdir3/file",
"/subdir5",
"/subdir5/file",
},
),
},
},
}
for _, test := range tests {
@ -553,7 +399,7 @@ func TestWalker(t *testing.T) {
defer cancel()
fn, last := check(t)
err := Walk(ctx, repo, root, restic.NewIDSet(), fn)
err := Walk(ctx, repo, root, fn)
if err != nil {
t.Error(err)
}