check: Only track data blobs when unused blobs should be reported

This improves the memory usage of check a lot as it now only has to
track tree blobs when run using the default parameters.
This commit is contained in:
Michael Eischer 2020-11-07 00:07:32 +01:00
parent 6da66c15d8
commit 1f43cac12d
5 changed files with 34 additions and 25 deletions

View File

@ -193,7 +193,7 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
}
}
chkr := checker.New(repo)
chkr := checker.New(repo, opts.CheckUnused)
Verbosef("load indexes\n")
hints, errs := chkr.LoadIndex(gopts.ctx)

View File

@ -27,6 +27,7 @@ type Checker struct {
sync.Mutex
M restic.BlobSet
}
trackUnused bool
masterIndex *repository.MasterIndex
@ -34,11 +35,12 @@ type Checker struct {
}
// New returns a new checker which runs on repo.
func New(repo restic.Repository) *Checker {
func New(repo restic.Repository, trackUnused bool) *Checker {
c := &Checker{
packs: make(map[restic.ID]int64),
masterIndex: repository.NewMasterIndex(),
repo: repo,
trackUnused: trackUnused,
}
c.blobRefs.M = restic.NewBlobSet()
@ -626,8 +628,6 @@ func (c *Checker) Structure(ctx context.Context, errChan chan<- error) {
func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
debug.Log("checking tree %v", id)
var blobs []restic.ID
for _, node := range tree.Nodes {
switch node.Type {
case "file":
@ -641,7 +641,6 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("file %q blob %d has null ID", node.Name, b)})
continue
}
blobs = append(blobs, blobID)
blobSize, found := c.repo.LookupBlobSize(blobID, restic.DataBlob)
if !found {
debug.Log("tree %v references blob %v which isn't contained in index", id, blobID)
@ -649,6 +648,21 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
}
size += uint64(blobSize)
}
if c.trackUnused {
// loop a second time to keep the locked section as short as possible
c.blobRefs.Lock()
for _, blobID := range node.Content {
if blobID.IsNull() {
continue
}
h := restic.BlobHandle{ID: blobID, Type: restic.DataBlob}
c.blobRefs.M.Insert(h)
debug.Log("blob %v is referenced", blobID)
}
c.blobRefs.Unlock()
}
case "dir":
if node.Subtree == nil {
errs = append(errs, Error{TreeID: id, Err: errors.Errorf("dir node %q has no subtree", node.Name)})
@ -672,19 +686,14 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
}
}
for _, blobID := range blobs {
c.blobRefs.Lock()
h := restic.BlobHandle{ID: blobID, Type: restic.DataBlob}
c.blobRefs.M.Insert(h)
debug.Log("blob %v is referenced", blobID)
c.blobRefs.Unlock()
}
return errs
}
// UnusedBlobs returns all blobs that have never been referenced.
func (c *Checker) UnusedBlobs(ctx context.Context) (blobs restic.BlobHandles) {
if !c.trackUnused {
panic("only works when tracking blob references")
}
c.blobRefs.Lock()
defer c.blobRefs.Unlock()

View File

@ -61,7 +61,7 @@ func TestCheckRepo(t *testing.T) {
repo := repository.TestOpenLocal(t, repodir)
chkr := checker.New(repo)
chkr := checker.New(repo, false)
hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 {
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
@ -87,7 +87,7 @@ func TestMissingPack(t *testing.T) {
}
test.OK(t, repo.Backend().Remove(context.TODO(), packHandle))
chkr := checker.New(repo)
chkr := checker.New(repo, false)
hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 {
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
@ -123,7 +123,7 @@ func TestUnreferencedPack(t *testing.T) {
}
test.OK(t, repo.Backend().Remove(context.TODO(), indexHandle))
chkr := checker.New(repo)
chkr := checker.New(repo, false)
hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 {
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
@ -168,7 +168,7 @@ func TestUnreferencedBlobs(t *testing.T) {
sort.Sort(unusedBlobsBySnapshot)
chkr := checker.New(repo)
chkr := checker.New(repo, true)
hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 {
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
@ -241,7 +241,7 @@ func TestModifiedIndex(t *testing.T) {
t.Fatal(err)
}
chkr := checker.New(repo)
chkr := checker.New(repo, false)
hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) == 0 {
t.Fatalf("expected errors not found")
@ -264,7 +264,7 @@ func TestDuplicatePacksInIndex(t *testing.T) {
repo := repository.TestOpenLocal(t, repodir)
chkr := checker.New(repo)
chkr := checker.New(repo, false)
hints, errs := chkr.LoadIndex(context.TODO())
if len(hints) == 0 {
t.Fatalf("did not get expected checker hints for duplicate packs in indexes")
@ -336,7 +336,7 @@ func TestCheckerModifiedData(t *testing.T) {
checkRepo := repository.New(beError)
test.OK(t, checkRepo.SearchKey(context.TODO(), test.TestPassword, 5, ""))
chkr := checker.New(checkRepo)
chkr := checker.New(checkRepo, false)
hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 {
@ -398,7 +398,7 @@ func TestCheckerNoDuplicateTreeDecodes(t *testing.T) {
loadedTrees: restic.NewIDSet(),
}
chkr := checker.New(checkRepo)
chkr := checker.New(checkRepo, false)
hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 {
t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
@ -509,7 +509,7 @@ func TestCheckerBlobTypeConfusion(t *testing.T) {
UnblockChannel: make(chan struct{}),
}
chkr := checker.New(delayRepo)
chkr := checker.New(delayRepo, false)
go func() {
<-ctx.Done()
@ -544,7 +544,7 @@ func loadBenchRepository(t *testing.B) (*checker.Checker, restic.Repository, fun
repo := repository.TestOpenLocal(t, repodir)
chkr := checker.New(repo)
chkr := checker.New(repo, false)
hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) > 0 {
defer cleanup()

View File

@ -9,7 +9,7 @@ import (
// TestCheckRepo runs the checker on repo.
func TestCheckRepo(t testing.TB, repo restic.Repository) {
chkr := New(repo)
chkr := New(repo, true)
hints, errs := chkr.LoadIndex(context.TODO())
if len(errs) != 0 {

View File

@ -360,7 +360,7 @@ func TestIndexSave(t *testing.T) {
}
}
checker := checker.New(repo)
checker := checker.New(repo, false)
hints, errs := checker.LoadIndex(context.TODO())
for _, h := range hints {
t.Logf("hint: %v\n", h)