From 1bd1f3008ddf74f6f702c1c9c2b7c86d938e83f6 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Wed, 28 Dec 2022 11:34:55 +0100 Subject: [PATCH] walker: extend TreeRewriter to support snapshot repairing This adds support for caching already rewritten trees, handling of load errors and disabling the check that the serialization doesn't lead to data loss. --- cmd/restic/cmd_rewrite.go | 1 + internal/walker/rewriter.go | 58 +++++++++++++--- internal/walker/rewriter_test.go | 114 ++++++++++++++++++++++++++++++- 3 files changed, 161 insertions(+), 12 deletions(-) diff --git a/cmd/restic/cmd_rewrite.go b/cmd/restic/cmd_rewrite.go index a60fdc8fc..e5c65850d 100644 --- a/cmd/restic/cmd_rewrite.go +++ b/cmd/restic/cmd_rewrite.go @@ -95,6 +95,7 @@ func rewriteSnapshot(ctx context.Context, repo *repository.Repository, sn *resti Verbosef(fmt.Sprintf("excluding %s\n", path)) return nil }, + DisableNodeCache: true, }) return filterAndReplaceSnapshot(ctx, repo, sn, diff --git a/internal/walker/rewriter.go b/internal/walker/rewriter.go index 48f16a53a..649857032 100644 --- a/internal/walker/rewriter.go +++ b/internal/walker/rewriter.go @@ -10,26 +10,45 @@ import ( ) type NodeRewriteFunc func(node *restic.Node, path string) *restic.Node +type FailedTreeRewriteFunc func(nodeID restic.ID, path string, err error) (restic.ID, error) type RewriteOpts struct { // return nil to remove the node RewriteNode NodeRewriteFunc + // decide what to do with a tree that could not be loaded. Return nil to remove the node. By default the load error is returned which causes the operation to fail. + RewriteFailedTree FailedTreeRewriteFunc + + AllowUnstableSerialization bool + DisableNodeCache bool } +type idMap map[restic.ID]restic.ID + type TreeRewriter struct { opts RewriteOpts + + replaces idMap } func NewTreeRewriter(opts RewriteOpts) *TreeRewriter { rw := &TreeRewriter{ opts: opts, } + if !opts.DisableNodeCache { + rw.replaces = make(idMap) + } // setup default implementations if rw.opts.RewriteNode == nil { rw.opts.RewriteNode = func(node *restic.Node, path string) *restic.Node { return node } } + if rw.opts.RewriteFailedTree == nil { + // fail with error by default + rw.opts.RewriteFailedTree = func(nodeID restic.ID, path string, err error) (restic.ID, error) { + return restic.ID{}, err + } + } return rw } @@ -39,20 +58,29 @@ type BlobLoadSaver interface { } func (t *TreeRewriter) RewriteTree(ctx context.Context, repo BlobLoadSaver, nodepath string, nodeID restic.ID) (newNodeID restic.ID, err error) { - curTree, err := restic.LoadTree(ctx, repo, nodeID) - if err != nil { - return restic.ID{}, err + // check if tree was already changed + newID, ok := t.replaces[nodeID] + if ok { + return newID, nil } - // check that we can properly encode this tree without losing information - // The alternative of using json/Decoder.DisallowUnknownFields() doesn't work as we use - // a custom UnmarshalJSON to decode trees, see also https://github.com/golang/go/issues/41144 - testID, err := restic.SaveTree(ctx, repo, curTree) + // a nil nodeID will lead to a load error + curTree, err := restic.LoadTree(ctx, repo, nodeID) if err != nil { - return restic.ID{}, err + return t.opts.RewriteFailedTree(nodeID, nodepath, err) } - if nodeID != testID { - return restic.ID{}, fmt.Errorf("cannot encode tree at %q without losing information", nodepath) + + if !t.opts.AllowUnstableSerialization { + // check that we can properly encode this tree without losing information + // The alternative of using json/Decoder.DisallowUnknownFields() doesn't work as we use + // a custom UnmarshalJSON to decode trees, see also https://github.com/golang/go/issues/41144 + testID, err := restic.SaveTree(ctx, repo, curTree) + if err != nil { + return restic.ID{}, err + } + if nodeID != testID { + return restic.ID{}, fmt.Errorf("cannot encode tree at %q without losing information", nodepath) + } } debug.Log("filterTree: %s, nodeId: %s\n", nodepath, nodeID.Str()) @@ -72,7 +100,12 @@ func (t *TreeRewriter) RewriteTree(ctx context.Context, repo BlobLoadSaver, node } continue } - newID, err := t.RewriteTree(ctx, repo, path, *node.Subtree) + // treat nil as null id + var subtree restic.ID + if node.Subtree != nil { + subtree = *node.Subtree + } + newID, err := t.RewriteTree(ctx, repo, path, subtree) if err != nil { return restic.ID{}, err } @@ -90,6 +123,9 @@ func (t *TreeRewriter) RewriteTree(ctx context.Context, repo BlobLoadSaver, node // Save new tree newTreeID, _, _, err := repo.SaveBlob(ctx, restic.TreeBlob, tree, restic.ID{}, false) + if t.replaces != nil { + t.replaces[nodeID] = newTreeID + } if !newTreeID.Equal(nodeID) { debug.Log("filterTree: save new tree for %s as %v\n", nodepath, newTreeID) } diff --git a/internal/walker/rewriter_test.go b/internal/walker/rewriter_test.go index 8f99fe9bd..07ce5f72f 100644 --- a/internal/walker/rewriter_test.go +++ b/internal/walker/rewriter_test.go @@ -7,6 +7,7 @@ import ( "github.com/pkg/errors" "github.com/restic/restic/internal/restic" + "github.com/restic/restic/internal/test" ) // WritableTreeMap also support saving @@ -69,7 +70,7 @@ func checkRewriteItemOrder(want []string) checkRewriteFunc { } // checkRewriteSkips excludes nodes if path is in skipFor, it checks that rewriting proceedes in the correct order. -func checkRewriteSkips(skipFor map[string]struct{}, want []string) checkRewriteFunc { +func checkRewriteSkips(skipFor map[string]struct{}, want []string, disableCache bool) checkRewriteFunc { var pos int return func(t testing.TB) (rewriter *TreeRewriter, final func(testing.TB)) { @@ -91,6 +92,7 @@ func checkRewriteSkips(skipFor map[string]struct{}, want []string) checkRewriteF } return node }, + DisableNodeCache: disableCache, }) final = func(t testing.TB) { @@ -160,6 +162,7 @@ func TestRewriter(t *testing.T) { "/subdir", "/subdir/subfile", }, + false, ), }, { // exclude dir @@ -180,6 +183,7 @@ func TestRewriter(t *testing.T) { "/foo", "/subdir", }, + false, ), }, { // modify node @@ -197,6 +201,75 @@ func TestRewriter(t *testing.T) { }, check: checkIncreaseNodeSize(21), }, + { // test cache + tree: TestTree{ + // both subdirs are identical + "subdir1": TestTree{ + "subfile": TestFile{}, + "subfile2": TestFile{}, + }, + "subdir2": TestTree{ + "subfile": TestFile{}, + "subfile2": TestFile{}, + }, + }, + newTree: TestTree{ + "subdir1": TestTree{ + "subfile2": TestFile{}, + }, + "subdir2": TestTree{ + "subfile2": TestFile{}, + }, + }, + check: checkRewriteSkips( + map[string]struct{}{ + "/subdir1/subfile": {}, + }, + []string{ + "/subdir1", + "/subdir1/subfile", + "/subdir1/subfile2", + "/subdir2", + }, + false, + ), + }, + { // test disabled cache + tree: TestTree{ + // both subdirs are identical + "subdir1": TestTree{ + "subfile": TestFile{}, + "subfile2": TestFile{}, + }, + "subdir2": TestTree{ + "subfile": TestFile{}, + "subfile2": TestFile{}, + }, + }, + newTree: TestTree{ + "subdir1": TestTree{ + "subfile2": TestFile{}, + }, + "subdir2": TestTree{ + "subfile": TestFile{}, + "subfile2": TestFile{}, + }, + }, + check: checkRewriteSkips( + map[string]struct{}{ + "/subdir1/subfile": {}, + }, + []string{ + "/subdir1", + "/subdir1/subfile", + "/subdir1/subfile2", + "/subdir2", + "/subdir2/subfile", + "/subdir2/subfile2", + }, + true, + ), + }, } for _, test := range tests { @@ -251,4 +324,43 @@ func TestRewriterFailOnUnknownFields(t *testing.T) { if err == nil { t.Error("missing error on unknown field") } + + // check that the serialization check can be disabled + rewriter = NewTreeRewriter(RewriteOpts{ + AllowUnstableSerialization: true, + }) + root, err := rewriter.RewriteTree(ctx, tm, "/", id) + test.OK(t, err) + _, expRoot := BuildTreeMap(TestTree{ + "subfile": TestFile{}, + }) + test.Assert(t, root == expRoot, "mismatched trees") +} + +func TestRewriterTreeLoadError(t *testing.T) { + tm := WritableTreeMap{TreeMap{}} + id := restic.NewRandomID() + + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + // also check that load error by default cause the operation to fail + rewriter := NewTreeRewriter(RewriteOpts{}) + _, err := rewriter.RewriteTree(ctx, tm, "/", id) + if err == nil { + t.Fatal("missing error on unloadable tree") + } + + replacementID := restic.NewRandomID() + rewriter = NewTreeRewriter(RewriteOpts{ + RewriteFailedTree: func(nodeID restic.ID, path string, err error) (restic.ID, error) { + if nodeID != id || path != "/" { + t.Fail() + } + return replacementID, nil + }, + }) + newRoot, err := rewriter.RewriteTree(ctx, tm, "/", id) + test.OK(t, err) + test.Equals(t, replacementID, newRoot) }