2
2
mirror of https://github.com/octoleo/restic.git synced 2024-06-05 18:40:49 +00:00
restic/internal/restic/find.go
Michael Eischer 258ce0c1e5 parallel: report progress for StreamTrees
This assigns an id to each tree root and then keeps track of how many
tree loads (i.e. trees referenced for the first time) are pending per
tree root. Once a tree root and its subtrees were fully processed there
are no more pending tree loads and the tree root is reported as
processed.
2021-01-28 11:08:43 +01:00

53 lines
1.2 KiB
Go

package restic
import (
"context"
"sync"
"golang.org/x/sync/errgroup"
)
// TreeLoader loads a tree from a repository.
type TreeLoader interface {
LoadTree(context.Context, ID) (*Tree, error)
}
// FindUsedBlobs traverses the tree ID and adds all seen blobs (trees and data
// blobs) to the set blobs. Already seen tree blobs will not be visited again.
func FindUsedBlobs(ctx context.Context, repo TreeLoader, treeID ID, blobs BlobSet) error {
var lock sync.Mutex
wg, ctx := errgroup.WithContext(ctx)
treeStream := StreamTrees(ctx, wg, repo, IDs{treeID}, func(treeID ID) bool {
// locking is necessary the goroutine below concurrently adds data blobs
lock.Lock()
h := BlobHandle{ID: treeID, Type: TreeBlob}
blobReferenced := blobs.Has(h)
// noop if already referenced
blobs.Insert(h)
lock.Unlock()
return blobReferenced
}, nil)
wg.Go(func() error {
for tree := range treeStream {
if tree.Error != nil {
return tree.Error
}
lock.Lock()
for _, node := range tree.Nodes {
switch node.Type {
case "file":
for _, blob := range node.Content {
blobs.Insert(BlobHandle{ID: blob, Type: DataBlob})
}
}
}
lock.Unlock()
}
return nil
})
return wg.Wait()
}