walker: extend TreeRewriter to support snapshot repairing

This adds support for caching already rewritten trees, handling of load
errors and disabling the check that the serialization doesn't lead to
data loss.
This commit is contained in:
Michael Eischer 2022-12-28 11:34:55 +01:00
parent 38dac78180
commit 1bd1f3008d
3 changed files with 161 additions and 12 deletions

View File

@ -95,6 +95,7 @@ func rewriteSnapshot(ctx context.Context, repo *repository.Repository, sn *resti
Verbosef(fmt.Sprintf("excluding %s\n", path)) Verbosef(fmt.Sprintf("excluding %s\n", path))
return nil return nil
}, },
DisableNodeCache: true,
}) })
return filterAndReplaceSnapshot(ctx, repo, sn, return filterAndReplaceSnapshot(ctx, repo, sn,

View File

@ -10,26 +10,45 @@ import (
) )
type NodeRewriteFunc func(node *restic.Node, path string) *restic.Node type NodeRewriteFunc func(node *restic.Node, path string) *restic.Node
type FailedTreeRewriteFunc func(nodeID restic.ID, path string, err error) (restic.ID, error)
type RewriteOpts struct { type RewriteOpts struct {
// return nil to remove the node // return nil to remove the node
RewriteNode NodeRewriteFunc RewriteNode NodeRewriteFunc
// decide what to do with a tree that could not be loaded. Return nil to remove the node. By default the load error is returned which causes the operation to fail.
RewriteFailedTree FailedTreeRewriteFunc
AllowUnstableSerialization bool
DisableNodeCache bool
} }
type idMap map[restic.ID]restic.ID
type TreeRewriter struct { type TreeRewriter struct {
opts RewriteOpts opts RewriteOpts
replaces idMap
} }
func NewTreeRewriter(opts RewriteOpts) *TreeRewriter { func NewTreeRewriter(opts RewriteOpts) *TreeRewriter {
rw := &TreeRewriter{ rw := &TreeRewriter{
opts: opts, opts: opts,
} }
if !opts.DisableNodeCache {
rw.replaces = make(idMap)
}
// setup default implementations // setup default implementations
if rw.opts.RewriteNode == nil { if rw.opts.RewriteNode == nil {
rw.opts.RewriteNode = func(node *restic.Node, path string) *restic.Node { rw.opts.RewriteNode = func(node *restic.Node, path string) *restic.Node {
return node return node
} }
} }
if rw.opts.RewriteFailedTree == nil {
// fail with error by default
rw.opts.RewriteFailedTree = func(nodeID restic.ID, path string, err error) (restic.ID, error) {
return restic.ID{}, err
}
}
return rw return rw
} }
@ -39,20 +58,29 @@ type BlobLoadSaver interface {
} }
func (t *TreeRewriter) RewriteTree(ctx context.Context, repo BlobLoadSaver, nodepath string, nodeID restic.ID) (newNodeID restic.ID, err error) { func (t *TreeRewriter) RewriteTree(ctx context.Context, repo BlobLoadSaver, nodepath string, nodeID restic.ID) (newNodeID restic.ID, err error) {
curTree, err := restic.LoadTree(ctx, repo, nodeID) // check if tree was already changed
if err != nil { newID, ok := t.replaces[nodeID]
return restic.ID{}, err if ok {
return newID, nil
} }
// check that we can properly encode this tree without losing information // a nil nodeID will lead to a load error
// The alternative of using json/Decoder.DisallowUnknownFields() doesn't work as we use curTree, err := restic.LoadTree(ctx, repo, nodeID)
// a custom UnmarshalJSON to decode trees, see also https://github.com/golang/go/issues/41144
testID, err := restic.SaveTree(ctx, repo, curTree)
if err != nil { if err != nil {
return restic.ID{}, err return t.opts.RewriteFailedTree(nodeID, nodepath, err)
} }
if nodeID != testID {
return restic.ID{}, fmt.Errorf("cannot encode tree at %q without losing information", nodepath) if !t.opts.AllowUnstableSerialization {
// check that we can properly encode this tree without losing information
// The alternative of using json/Decoder.DisallowUnknownFields() doesn't work as we use
// a custom UnmarshalJSON to decode trees, see also https://github.com/golang/go/issues/41144
testID, err := restic.SaveTree(ctx, repo, curTree)
if err != nil {
return restic.ID{}, err
}
if nodeID != testID {
return restic.ID{}, fmt.Errorf("cannot encode tree at %q without losing information", nodepath)
}
} }
debug.Log("filterTree: %s, nodeId: %s\n", nodepath, nodeID.Str()) debug.Log("filterTree: %s, nodeId: %s\n", nodepath, nodeID.Str())
@ -72,7 +100,12 @@ func (t *TreeRewriter) RewriteTree(ctx context.Context, repo BlobLoadSaver, node
} }
continue continue
} }
newID, err := t.RewriteTree(ctx, repo, path, *node.Subtree) // treat nil as null id
var subtree restic.ID
if node.Subtree != nil {
subtree = *node.Subtree
}
newID, err := t.RewriteTree(ctx, repo, path, subtree)
if err != nil { if err != nil {
return restic.ID{}, err return restic.ID{}, err
} }
@ -90,6 +123,9 @@ func (t *TreeRewriter) RewriteTree(ctx context.Context, repo BlobLoadSaver, node
// Save new tree // Save new tree
newTreeID, _, _, err := repo.SaveBlob(ctx, restic.TreeBlob, tree, restic.ID{}, false) newTreeID, _, _, err := repo.SaveBlob(ctx, restic.TreeBlob, tree, restic.ID{}, false)
if t.replaces != nil {
t.replaces[nodeID] = newTreeID
}
if !newTreeID.Equal(nodeID) { if !newTreeID.Equal(nodeID) {
debug.Log("filterTree: save new tree for %s as %v\n", nodepath, newTreeID) debug.Log("filterTree: save new tree for %s as %v\n", nodepath, newTreeID)
} }

View File

@ -7,6 +7,7 @@ import (
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/test"
) )
// WritableTreeMap also support saving // WritableTreeMap also support saving
@ -69,7 +70,7 @@ func checkRewriteItemOrder(want []string) checkRewriteFunc {
} }
// checkRewriteSkips excludes nodes if path is in skipFor, it checks that rewriting proceedes in the correct order. // checkRewriteSkips excludes nodes if path is in skipFor, it checks that rewriting proceedes in the correct order.
func checkRewriteSkips(skipFor map[string]struct{}, want []string) checkRewriteFunc { func checkRewriteSkips(skipFor map[string]struct{}, want []string, disableCache bool) checkRewriteFunc {
var pos int var pos int
return func(t testing.TB) (rewriter *TreeRewriter, final func(testing.TB)) { return func(t testing.TB) (rewriter *TreeRewriter, final func(testing.TB)) {
@ -91,6 +92,7 @@ func checkRewriteSkips(skipFor map[string]struct{}, want []string) checkRewriteF
} }
return node return node
}, },
DisableNodeCache: disableCache,
}) })
final = func(t testing.TB) { final = func(t testing.TB) {
@ -160,6 +162,7 @@ func TestRewriter(t *testing.T) {
"/subdir", "/subdir",
"/subdir/subfile", "/subdir/subfile",
}, },
false,
), ),
}, },
{ // exclude dir { // exclude dir
@ -180,6 +183,7 @@ func TestRewriter(t *testing.T) {
"/foo", "/foo",
"/subdir", "/subdir",
}, },
false,
), ),
}, },
{ // modify node { // modify node
@ -197,6 +201,75 @@ func TestRewriter(t *testing.T) {
}, },
check: checkIncreaseNodeSize(21), check: checkIncreaseNodeSize(21),
}, },
{ // test cache
tree: TestTree{
// both subdirs are identical
"subdir1": TestTree{
"subfile": TestFile{},
"subfile2": TestFile{},
},
"subdir2": TestTree{
"subfile": TestFile{},
"subfile2": TestFile{},
},
},
newTree: TestTree{
"subdir1": TestTree{
"subfile2": TestFile{},
},
"subdir2": TestTree{
"subfile2": TestFile{},
},
},
check: checkRewriteSkips(
map[string]struct{}{
"/subdir1/subfile": {},
},
[]string{
"/subdir1",
"/subdir1/subfile",
"/subdir1/subfile2",
"/subdir2",
},
false,
),
},
{ // test disabled cache
tree: TestTree{
// both subdirs are identical
"subdir1": TestTree{
"subfile": TestFile{},
"subfile2": TestFile{},
},
"subdir2": TestTree{
"subfile": TestFile{},
"subfile2": TestFile{},
},
},
newTree: TestTree{
"subdir1": TestTree{
"subfile2": TestFile{},
},
"subdir2": TestTree{
"subfile": TestFile{},
"subfile2": TestFile{},
},
},
check: checkRewriteSkips(
map[string]struct{}{
"/subdir1/subfile": {},
},
[]string{
"/subdir1",
"/subdir1/subfile",
"/subdir1/subfile2",
"/subdir2",
"/subdir2/subfile",
"/subdir2/subfile2",
},
true,
),
},
} }
for _, test := range tests { for _, test := range tests {
@ -251,4 +324,43 @@ func TestRewriterFailOnUnknownFields(t *testing.T) {
if err == nil { if err == nil {
t.Error("missing error on unknown field") t.Error("missing error on unknown field")
} }
// check that the serialization check can be disabled
rewriter = NewTreeRewriter(RewriteOpts{
AllowUnstableSerialization: true,
})
root, err := rewriter.RewriteTree(ctx, tm, "/", id)
test.OK(t, err)
_, expRoot := BuildTreeMap(TestTree{
"subfile": TestFile{},
})
test.Assert(t, root == expRoot, "mismatched trees")
}
func TestRewriterTreeLoadError(t *testing.T) {
tm := WritableTreeMap{TreeMap{}}
id := restic.NewRandomID()
ctx, cancel := context.WithCancel(context.TODO())
defer cancel()
// also check that load error by default cause the operation to fail
rewriter := NewTreeRewriter(RewriteOpts{})
_, err := rewriter.RewriteTree(ctx, tm, "/", id)
if err == nil {
t.Fatal("missing error on unloadable tree")
}
replacementID := restic.NewRandomID()
rewriter = NewTreeRewriter(RewriteOpts{
RewriteFailedTree: func(nodeID restic.ID, path string, err error) (restic.ID, error) {
if nodeID != id || path != "/" {
t.Fail()
}
return replacementID, nil
},
})
newRoot, err := rewriter.RewriteTree(ctx, tm, "/", id)
test.OK(t, err)
test.Equals(t, replacementID, newRoot)
} }