From ddf0b8cd0b10577ec25e63bd83a0bbc5356d07f1 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 18 Apr 2020 19:46:33 +0200 Subject: [PATCH] checker: Properly distinguish between data and tree blobs If a data blob and a tree blob with the same ID (= same content) exist, then the checker did not report a data or tree blob as unused when the blob of the other type was still in use. --- cmd/restic/cmd_check.go | 2 +- internal/checker/checker.go | 29 ++++++++++++++++------------- internal/checker/checker_test.go | 14 +++++++------- internal/restic/testing.go | 5 +++++ 4 files changed, 29 insertions(+), 21 deletions(-) diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index 35729af6d..fdd76dd3e 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -259,7 +259,7 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error { if opts.CheckUnused { for _, id := range chkr.UnusedBlobs() { - Verbosef("unused blob %v\n", id.Str()) + Verbosef("unused blob %v\n", id) errorsFound = true } } diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 987eff8ca..95a9f6de8 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -25,7 +25,7 @@ type Checker struct { blobRefs struct { sync.Mutex // see flags below - M map[restic.ID]blobStatus + M map[restic.BlobHandle]blobStatus } masterIndex *repository.MasterIndex @@ -36,9 +36,8 @@ type Checker struct { type blobStatus uint8 const ( - blobExists blobStatus = 1 << iota - blobReferenced - treeProcessed + blobStatusExists blobStatus = 1 << iota + blobStatusReferenced ) // New returns a new checker which runs on repo. @@ -49,7 +48,7 @@ func New(repo restic.Repository) *Checker { repo: repo, } - c.blobRefs.M = make(map[restic.ID]blobStatus) + c.blobRefs.M = make(map[restic.BlobHandle]blobStatus) return c } @@ -163,7 +162,8 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) { cnt := 0 for blob := range res.Index.Each(ctx) { c.packs.Insert(blob.PackID) - c.blobRefs.M[blob.ID] = blobExists + h := restic.BlobHandle{ID: blob.ID, Type: blob.Type} + c.blobRefs.M[h] = blobStatusExists cnt++ if _, ok := packToIndex[blob.PackID]; !ok { @@ -500,9 +500,10 @@ func (c *Checker) filterTrees(ctx context.Context, backlog restic.IDs, loaderCha // use a separate flag for processed trees to ensure that check still processes trees // even when a file references a tree blob c.blobRefs.Lock() - blobFlags := c.blobRefs.M[nextTreeID] + h := restic.BlobHandle{ID: nextTreeID, Type: restic.TreeBlob} + status := c.blobRefs.M[h] c.blobRefs.Unlock() - if (blobFlags & treeProcessed) != 0 { + if (status & blobStatusReferenced) != 0 { continue } @@ -522,7 +523,8 @@ func (c *Checker) filterTrees(ctx context.Context, backlog restic.IDs, loaderCha outstandingLoadTreeJobs++ loadCh = nil c.blobRefs.Lock() - c.blobRefs.M[nextTreeID] |= treeProcessed | blobReferenced + h := restic.BlobHandle{ID: nextTreeID, Type: restic.TreeBlob} + c.blobRefs.M[h] |= blobStatusReferenced c.blobRefs.Unlock() case j, ok := <-inCh: @@ -655,11 +657,12 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) { for _, blobID := range blobs { c.blobRefs.Lock() - if (c.blobRefs.M[blobID] & blobExists) == 0 { + h := restic.BlobHandle{ID: blobID, Type: restic.DataBlob} + if (c.blobRefs.M[h] & blobStatusExists) == 0 { debug.Log("tree %v references blob %v which isn't contained in index", id, blobID) errs = append(errs, Error{TreeID: id, BlobID: blobID, Err: errors.New("not found in index")}) } - c.blobRefs.M[blobID] |= blobReferenced + c.blobRefs.M[h] |= blobStatusReferenced debug.Log("blob %v is referenced", blobID) c.blobRefs.Unlock() } @@ -668,13 +671,13 @@ func (c *Checker) checkTree(id restic.ID, tree *restic.Tree) (errs []error) { } // UnusedBlobs returns all blobs that have never been referenced. -func (c *Checker) UnusedBlobs() (blobs restic.IDs) { +func (c *Checker) UnusedBlobs() (blobs restic.BlobHandles) { c.blobRefs.Lock() defer c.blobRefs.Unlock() debug.Log("checking %d blobs", len(c.blobRefs.M)) for id, flags := range c.blobRefs.M { - if (flags & blobReferenced) == 0 { + if (flags & blobStatusReferenced) == 0 { debug.Log("blob %v not referenced", id) blobs = append(blobs, id) } diff --git a/internal/checker/checker_test.go b/internal/checker/checker_test.go index e19982a25..b571361ec 100644 --- a/internal/checker/checker_test.go +++ b/internal/checker/checker_test.go @@ -157,13 +157,13 @@ func TestUnreferencedBlobs(t *testing.T) { } test.OK(t, repo.Backend().Remove(context.TODO(), snapshotHandle)) - unusedBlobsBySnapshot := restic.IDs{ - restic.TestParseID("58c748bbe2929fdf30c73262bd8313fe828f8925b05d1d4a87fe109082acb849"), - restic.TestParseID("988a272ab9768182abfd1fe7d7a7b68967825f0b861d3b36156795832c772235"), - restic.TestParseID("c01952de4d91da1b1b80bc6e06eaa4ec21523f4853b69dc8231708b9b7ec62d8"), - restic.TestParseID("bec3a53d7dc737f9a9bee68b107ec9e8ad722019f649b34d474b9982c3a3fec7"), - restic.TestParseID("2a6f01e5e92d8343c4c6b78b51c5a4dc9c39d42c04e26088c7614b13d8d0559d"), - restic.TestParseID("18b51b327df9391732ba7aaf841a4885f350d8a557b2da8352c9acf8898e3f10"), + unusedBlobsBySnapshot := restic.BlobHandles{ + restic.TestParseHandle("58c748bbe2929fdf30c73262bd8313fe828f8925b05d1d4a87fe109082acb849", restic.DataBlob), + restic.TestParseHandle("988a272ab9768182abfd1fe7d7a7b68967825f0b861d3b36156795832c772235", restic.DataBlob), + restic.TestParseHandle("c01952de4d91da1b1b80bc6e06eaa4ec21523f4853b69dc8231708b9b7ec62d8", restic.TreeBlob), + restic.TestParseHandle("bec3a53d7dc737f9a9bee68b107ec9e8ad722019f649b34d474b9982c3a3fec7", restic.TreeBlob), + restic.TestParseHandle("2a6f01e5e92d8343c4c6b78b51c5a4dc9c39d42c04e26088c7614b13d8d0559d", restic.TreeBlob), + restic.TestParseHandle("18b51b327df9391732ba7aaf841a4885f350d8a557b2da8352c9acf8898e3f10", restic.DataBlob), } sort.Sort(unusedBlobsBySnapshot) diff --git a/internal/restic/testing.go b/internal/restic/testing.go index b3a42fd45..bcb3db155 100644 --- a/internal/restic/testing.go +++ b/internal/restic/testing.go @@ -200,3 +200,8 @@ func TestParseID(s string) ID { return id } + +// TestParseHandle parses s as a ID, panics if that fails and creates a BlobHandle with t. +func TestParseHandle(s string, t BlobType) BlobHandle { + return BlobHandle{ID: TestParseID(s), Type: t} +}