check: Only track data blobs when unused blobs should be reported

This improves the memory usage of check a lot as it now only has to track tree blobs when run using the default parameters.
2025-04-11 00:41:50 +00:00 · 2020-11-07 00:07:32 +01:00 · 2020-11-07 00:07:32 +01:00 · 1f43cac12d
commit 1f43cac12d
parent 6da66c15d8
5 changed files with 34 additions and 25 deletions
--- a/cmd/restic/cmd_check.go
+++ b/cmd/restic/cmd_check.go
@ -193,7 +193,7 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
 		}
 	}

-	chkr := checker.New(repo)
+	chkr := checker.New(repo, opts.CheckUnused)

 	Verbosef("load indexes\n")
 	hints, errs := chkr.LoadIndex(gopts.ctx)
--- a/internal/checker/checker.go
+++ b/internal/checker/checker.go
@ -27,6 +27,7 @@ type Checker struct {
 		sync.Mutex
 		M restic.BlobSet
 	}
+	trackUnused bool

 	masterIndex *repository.MasterIndex

@ -34,11 +35,12 @@ type Checker struct {
 }

 // New returns a new checker which runs on repo.
-func New(repo restic.Repository) *Checker {
+func New(repo restic.Repository, trackUnused bool) *Checker {
 	c := &Checker{
 		packs:       make(map[restic.ID]int64),
 		masterIndex: repository.NewMasterIndex(),
 		repo:        repo,
+		trackUnused: trackUnused,
 	}

 	c.blobRefs.M = restic.NewBlobSet()
@ -626,8 +628,6 @@ func (c *Checker) Structure(ctx context.Context, errChan chan<- error) {
 func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
 	debug.Log("checking tree %v", id)

-	var blobs []restic.ID
-
 	for _, node := range tree.Nodes {
 		switch node.Type {
 		case "file":
@ -641,7 +641,6 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
 					errs = append(errs, Error{TreeID: id, Err: errors.Errorf("file %q blob %d has null ID", node.Name, b)})
 					continue
 				}
-				blobs = append(blobs, blobID)
 				blobSize, found := c.repo.LookupBlobSize(blobID, restic.DataBlob)
 				if !found {
 					debug.Log("tree %v references blob %v which isn't contained in index", id, blobID)
@ -649,6 +648,21 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
 				}
 				size += uint64(blobSize)
 			}
+
+			if c.trackUnused {
+				// loop a second time to keep the locked section as short as possible
+				c.blobRefs.Lock()
+				for _, blobID := range node.Content {
+					if blobID.IsNull() {
+						continue
+					}
+					h := restic.BlobHandle{ID: blobID, Type: restic.DataBlob}
+					c.blobRefs.M.Insert(h)
+					debug.Log("blob %v is referenced", blobID)
+				}
+				c.blobRefs.Unlock()
+			}
+
 		case "dir":
 			if node.Subtree == nil {
 				errs = append(errs, Error{TreeID: id, Err: errors.Errorf("dir node %q has no subtree", node.Name)})
@ -672,19 +686,14 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) {
 		}
 	}

-	for _, blobID := range blobs {
-		c.blobRefs.Lock()
-		h := restic.BlobHandle{ID: blobID, Type: restic.DataBlob}
-		c.blobRefs.M.Insert(h)
-		debug.Log("blob %v is referenced", blobID)
-		c.blobRefs.Unlock()
-	}
-
 	return errs
 }

 // UnusedBlobs returns all blobs that have never been referenced.
 func (c *Checker) UnusedBlobs(ctx context.Context) (blobs restic.BlobHandles) {
+	if !c.trackUnused {
+		panic("only works when tracking blob references")
+	}
 	c.blobRefs.Lock()
 	defer c.blobRefs.Unlock()

--- a/internal/checker/checker_test.go
+++ b/internal/checker/checker_test.go
@ -61,7 +61,7 @@ func TestCheckRepo(t *testing.T) {

 	repo := repository.TestOpenLocal(t, repodir)

-	chkr := checker.New(repo)
+	chkr := checker.New(repo, false)
 	hints, errs := chkr.LoadIndex(context.TODO())
 	if len(errs) > 0 {
 		t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
@ -87,7 +87,7 @@ func TestMissingPack(t *testing.T) {
 	}
 	test.OK(t, repo.Backend().Remove(context.TODO(), packHandle))

-	chkr := checker.New(repo)
+	chkr := checker.New(repo, false)
 	hints, errs := chkr.LoadIndex(context.TODO())
 	if len(errs) > 0 {
 		t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
@ -123,7 +123,7 @@ func TestUnreferencedPack(t *testing.T) {
 	}
 	test.OK(t, repo.Backend().Remove(context.TODO(), indexHandle))

-	chkr := checker.New(repo)
+	chkr := checker.New(repo, false)
 	hints, errs := chkr.LoadIndex(context.TODO())
 	if len(errs) > 0 {
 		t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
@ -168,7 +168,7 @@ func TestUnreferencedBlobs(t *testing.T) {

 	sort.Sort(unusedBlobsBySnapshot)

-	chkr := checker.New(repo)
+	chkr := checker.New(repo, true)
 	hints, errs := chkr.LoadIndex(context.TODO())
 	if len(errs) > 0 {
 		t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
@ -241,7 +241,7 @@ func TestModifiedIndex(t *testing.T) {
 		t.Fatal(err)
 	}

-	chkr := checker.New(repo)
+	chkr := checker.New(repo, false)
 	hints, errs := chkr.LoadIndex(context.TODO())
 	if len(errs) == 0 {
 		t.Fatalf("expected errors not found")
@ -264,7 +264,7 @@ func TestDuplicatePacksInIndex(t *testing.T) {

 	repo := repository.TestOpenLocal(t, repodir)

-	chkr := checker.New(repo)
+	chkr := checker.New(repo, false)
 	hints, errs := chkr.LoadIndex(context.TODO())
 	if len(hints) == 0 {
 		t.Fatalf("did not get expected checker hints for duplicate packs in indexes")
@ -336,7 +336,7 @@ func TestCheckerModifiedData(t *testing.T) {
 	checkRepo := repository.New(beError)
 	test.OK(t, checkRepo.SearchKey(context.TODO(), test.TestPassword, 5, ""))

-	chkr := checker.New(checkRepo)
+	chkr := checker.New(checkRepo, false)

 	hints, errs := chkr.LoadIndex(context.TODO())
 	if len(errs) > 0 {
@ -398,7 +398,7 @@ func TestCheckerNoDuplicateTreeDecodes(t *testing.T) {
 		loadedTrees: restic.NewIDSet(),
 	}

-	chkr := checker.New(checkRepo)
+	chkr := checker.New(checkRepo, false)
 	hints, errs := chkr.LoadIndex(context.TODO())
 	if len(errs) > 0 {
 		t.Fatalf("expected no errors, got %v: %v", len(errs), errs)
@ -509,7 +509,7 @@ func TestCheckerBlobTypeConfusion(t *testing.T) {
 		UnblockChannel: make(chan struct{}),
 	}

-	chkr := checker.New(delayRepo)
+	chkr := checker.New(delayRepo, false)

 	go func() {
 		<-ctx.Done()
@ -544,7 +544,7 @@ func loadBenchRepository(t *testing.B) (*checker.Checker, restic.Repository, fun

 	repo := repository.TestOpenLocal(t, repodir)

-	chkr := checker.New(repo)
+	chkr := checker.New(repo, false)
 	hints, errs := chkr.LoadIndex(context.TODO())
 	if len(errs) > 0 {
 		defer cleanup()
--- a/internal/checker/testing.go
+++ b/internal/checker/testing.go
@ -9,7 +9,7 @@ import (

 // TestCheckRepo runs the checker on repo.
 func TestCheckRepo(t testing.TB, repo restic.Repository) {
-	chkr := New(repo)
+	chkr := New(repo, true)

 	hints, errs := chkr.LoadIndex(context.TODO())
 	if len(errs) != 0 {
--- a/internal/repository/master_index_test.go
+++ b/internal/repository/master_index_test.go
@ -360,7 +360,7 @@ func TestIndexSave(t *testing.T) {
 		}
 	}

-	checker := checker.New(repo)
+	checker := checker.New(repo, false)
 	hints, errs := checker.LoadIndex(context.TODO())
 	for _, h := range hints {
 		t.Logf("hint: %v\n", h)