2
2
mirror of https://github.com/octoleo/restic.git synced 2025-01-09 09:29:51 +00:00

copy: parallelize tree walk

This commit is contained in:
Michael Eischer 2020-11-07 01:41:22 +01:00 committed by Alexander Neumann
parent f2a1b125cb
commit 0caad1e890

View File

@ -6,6 +6,7 @@ import (
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/restic"
"golang.org/x/sync/errgroup"
"github.com/spf13/cobra"
)
@ -184,39 +185,37 @@ type treeCloner struct {
buf []byte
}
func (t *treeCloner) copyTree(ctx context.Context, treeID restic.ID) error {
// We have already processed this tree
if t.visitedTrees.Has(treeID) {
return nil
}
func (t *treeCloner) copyTree(ctx context.Context, rootTreeID restic.ID) error {
wg, ctx := errgroup.WithContext(ctx)
tree, err := t.srcRepo.LoadTree(ctx, treeID)
if err != nil {
return fmt.Errorf("LoadTree(%v) returned error %v", treeID.Str(), err)
}
treeStream := restic.StreamTrees(ctx, wg, t.srcRepo, restic.IDs{rootTreeID}, func(treeID restic.ID) bool {
visited := t.visitedTrees.Has(treeID)
t.visitedTrees.Insert(treeID)
return visited
})
wg.Go(func() error {
for tree := range treeStream {
if tree.Error != nil {
return fmt.Errorf("LoadTree(%v) returned error %v", tree.ID.Str(), tree.Error)
}
// Do we already have this tree blob?
if !t.dstRepo.Index().Has(restic.BlobHandle{ID: treeID, Type: restic.TreeBlob}) {
newTreeID, err := t.dstRepo.SaveTree(ctx, tree)
if !t.dstRepo.Index().Has(restic.BlobHandle{ID: tree.ID, Type: restic.TreeBlob}) {
newTreeID, err := t.dstRepo.SaveTree(ctx, tree.Tree)
if err != nil {
return fmt.Errorf("SaveTree(%v) returned error %v", treeID.Str(), err)
return fmt.Errorf("SaveTree(%v) returned error %v", tree.ID.Str(), err)
}
// Assurance only.
if newTreeID != treeID {
return fmt.Errorf("SaveTree(%v) returned unexpected id %s", treeID.Str(), newTreeID.Str())
if newTreeID != tree.ID {
return fmt.Errorf("SaveTree(%v) returned unexpected id %s", tree.ID.Str(), newTreeID.Str())
}
}
// TODO: parellize this stuff, likely only needed inside a tree.
// TODO: parallelize blob down/upload
for _, entry := range tree.Nodes {
// If it is a directory, recurse
if entry.Type == "dir" && entry.Subtree != nil {
if err := t.copyTree(ctx, *entry.Subtree); err != nil {
return err
}
}
// Recursion into directories is handled by StreamTrees
// Copy the blobs for this file.
for _, blobID := range entry.Content {
// Do we already have this data blob?
@ -224,6 +223,7 @@ func (t *treeCloner) copyTree(ctx context.Context, treeID restic.ID) error {
continue
}
debug.Log("Copying blob %s\n", blobID.Str())
var err error
t.buf, err = t.srcRepo.LoadBlob(ctx, restic.DataBlob, blobID, t.buf)
if err != nil {
return fmt.Errorf("LoadBlob(%v) returned error %v", blobID, err)
@ -236,5 +236,8 @@ func (t *treeCloner) copyTree(ctx context.Context, treeID restic.ID) error {
}
}
}
return nil
})
return wg.Wait()
}