From 1f43cac12d3b3184a41535b291cefb868ccc63e1 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 7 Nov 2020 00:07:32 +0100 Subject: [PATCH] check: Only track data blobs when unused blobs should be reported This improves the memory usage of check a lot as it now only has to track tree blobs when run using the default parameters. --- cmd/restic/cmd_check.go | 2 +- internal/checker/checker.go | 33 +++++++++++++++--------- internal/checker/checker_test.go | 20 +++++++------- internal/checker/testing.go | 2 +- internal/repository/master_index_test.go | 2 +- 5 files changed, 34 insertions(+), 25 deletions(-) diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index 84eaa5ac4..774879490 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -193,7 +193,7 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error { } } - chkr := checker.New(repo) + chkr := checker.New(repo, opts.CheckUnused) Verbosef("load indexes\n") hints, errs := chkr.LoadIndex(gopts.ctx) diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 19501b4e6..1e31ba986 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -27,6 +27,7 @@ type Checker struct { sync.Mutex M restic.BlobSet } + trackUnused bool masterIndex *repository.MasterIndex @@ -34,11 +35,12 @@ type Checker struct { } // New returns a new checker which runs on repo. -func New(repo restic.Repository) *Checker { +func New(repo restic.Repository, trackUnused bool) *Checker { c := &Checker{ packs: make(map[restic.ID]int64), masterIndex: repository.NewMasterIndex(), repo: repo, + trackUnused: trackUnused, } c.blobRefs.M = restic.NewBlobSet() @@ -626,8 +628,6 @@ func (c *Checker) Structure(ctx context.Context, errChan chan<- error) { func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) { debug.Log("checking tree %v", id) - var blobs []restic.ID - for _, node := range tree.Nodes { switch node.Type { case "file": @@ -641,7 +641,6 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) { errs = append(errs, Error{TreeID: id, Err: errors.Errorf("file %q blob %d has null ID", node.Name, b)}) continue } - blobs = append(blobs, blobID) blobSize, found := c.repo.LookupBlobSize(blobID, restic.DataBlob) if !found { debug.Log("tree %v references blob %v which isn't contained in index", id, blobID) @@ -649,6 +648,21 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) { } size += uint64(blobSize) } + + if c.trackUnused { + // loop a second time to keep the locked section as short as possible + c.blobRefs.Lock() + for _, blobID := range node.Content { + if blobID.IsNull() { + continue + } + h := restic.BlobHandle{ID: blobID, Type: restic.DataBlob} + c.blobRefs.M.Insert(h) + debug.Log("blob %v is referenced", blobID) + } + c.blobRefs.Unlock() + } + case "dir": if node.Subtree == nil { errs = append(errs, Error{TreeID: id, Err: errors.Errorf("dir node %q has no subtree", node.Name)}) @@ -672,19 +686,14 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) { } } - for _, blobID := range blobs { - c.blobRefs.Lock() - h := restic.BlobHandle{ID: blobID, Type: restic.DataBlob} - c.blobRefs.M.Insert(h) - debug.Log("blob %v is referenced", blobID) - c.blobRefs.Unlock() - } - return errs } // UnusedBlobs returns all blobs that have never been referenced. func (c *Checker) UnusedBlobs(ctx context.Context) (blobs restic.BlobHandles) { + if !c.trackUnused { + panic("only works when tracking blob references") + } c.blobRefs.Lock() defer c.blobRefs.Unlock() diff --git a/internal/checker/checker_test.go b/internal/checker/checker_test.go index 333e5573b..f8efd05e8 100644 --- a/internal/checker/checker_test.go +++ b/internal/checker/checker_test.go @@ -61,7 +61,7 @@ func TestCheckRepo(t *testing.T) { repo := repository.TestOpenLocal(t, repodir) - chkr := checker.New(repo) + chkr := checker.New(repo, false) hints, errs := chkr.LoadIndex(context.TODO()) if len(errs) > 0 { t.Fatalf("expected no errors, got %v: %v", len(errs), errs) @@ -87,7 +87,7 @@ func TestMissingPack(t *testing.T) { } test.OK(t, repo.Backend().Remove(context.TODO(), packHandle)) - chkr := checker.New(repo) + chkr := checker.New(repo, false) hints, errs := chkr.LoadIndex(context.TODO()) if len(errs) > 0 { t.Fatalf("expected no errors, got %v: %v", len(errs), errs) @@ -123,7 +123,7 @@ func TestUnreferencedPack(t *testing.T) { } test.OK(t, repo.Backend().Remove(context.TODO(), indexHandle)) - chkr := checker.New(repo) + chkr := checker.New(repo, false) hints, errs := chkr.LoadIndex(context.TODO()) if len(errs) > 0 { t.Fatalf("expected no errors, got %v: %v", len(errs), errs) @@ -168,7 +168,7 @@ func TestUnreferencedBlobs(t *testing.T) { sort.Sort(unusedBlobsBySnapshot) - chkr := checker.New(repo) + chkr := checker.New(repo, true) hints, errs := chkr.LoadIndex(context.TODO()) if len(errs) > 0 { t.Fatalf("expected no errors, got %v: %v", len(errs), errs) @@ -241,7 +241,7 @@ func TestModifiedIndex(t *testing.T) { t.Fatal(err) } - chkr := checker.New(repo) + chkr := checker.New(repo, false) hints, errs := chkr.LoadIndex(context.TODO()) if len(errs) == 0 { t.Fatalf("expected errors not found") @@ -264,7 +264,7 @@ func TestDuplicatePacksInIndex(t *testing.T) { repo := repository.TestOpenLocal(t, repodir) - chkr := checker.New(repo) + chkr := checker.New(repo, false) hints, errs := chkr.LoadIndex(context.TODO()) if len(hints) == 0 { t.Fatalf("did not get expected checker hints for duplicate packs in indexes") @@ -336,7 +336,7 @@ func TestCheckerModifiedData(t *testing.T) { checkRepo := repository.New(beError) test.OK(t, checkRepo.SearchKey(context.TODO(), test.TestPassword, 5, "")) - chkr := checker.New(checkRepo) + chkr := checker.New(checkRepo, false) hints, errs := chkr.LoadIndex(context.TODO()) if len(errs) > 0 { @@ -398,7 +398,7 @@ func TestCheckerNoDuplicateTreeDecodes(t *testing.T) { loadedTrees: restic.NewIDSet(), } - chkr := checker.New(checkRepo) + chkr := checker.New(checkRepo, false) hints, errs := chkr.LoadIndex(context.TODO()) if len(errs) > 0 { t.Fatalf("expected no errors, got %v: %v", len(errs), errs) @@ -509,7 +509,7 @@ func TestCheckerBlobTypeConfusion(t *testing.T) { UnblockChannel: make(chan struct{}), } - chkr := checker.New(delayRepo) + chkr := checker.New(delayRepo, false) go func() { <-ctx.Done() @@ -544,7 +544,7 @@ func loadBenchRepository(t *testing.B) (*checker.Checker, restic.Repository, fun repo := repository.TestOpenLocal(t, repodir) - chkr := checker.New(repo) + chkr := checker.New(repo, false) hints, errs := chkr.LoadIndex(context.TODO()) if len(errs) > 0 { defer cleanup() diff --git a/internal/checker/testing.go b/internal/checker/testing.go index c6d9a08ef..6c5be84e2 100644 --- a/internal/checker/testing.go +++ b/internal/checker/testing.go @@ -9,7 +9,7 @@ import ( // TestCheckRepo runs the checker on repo. func TestCheckRepo(t testing.TB, repo restic.Repository) { - chkr := New(repo) + chkr := New(repo, true) hints, errs := chkr.LoadIndex(context.TODO()) if len(errs) != 0 { diff --git a/internal/repository/master_index_test.go b/internal/repository/master_index_test.go index 729f088ac..0d56cd097 100644 --- a/internal/repository/master_index_test.go +++ b/internal/repository/master_index_test.go @@ -360,7 +360,7 @@ func TestIndexSave(t *testing.T) { } } - checker := checker.New(repo) + checker := checker.New(repo, false) hints, errs := checker.LoadIndex(context.TODO()) for _, h := range hints { t.Logf("hint: %v\n", h)