From 3ba19869be71c232ab47abea8abf8b10bd098f57 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 31 Mar 2020 14:33:32 +0200 Subject: [PATCH 01/10] prune: Abort if any used blobs are missing The previous check only approximately verified whether all required blobs were found. However, after forgetting a few snapshots the repository contains lots of unused blobs whose number can be sufficient to make up for missing packs. When coupled with a malfunctioning backend that temporarily returns broken data this could cause restic to regard the corresponding packs as invalid and thereby delete data that's still in use. This change lets restic play it safe and refuse to delete anything if data is missing. --- cmd/restic/cmd_prune.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index 569fed523..0cda6da15 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -191,10 +191,12 @@ func pruneRepository(gopts GlobalOptions, repo restic.Repository) error { return err } - if len(usedBlobs) > stats.blobs { - return errors.Fatalf("number of used blobs is larger than number of available blobs!\n" + - "Please report this error (along with the output of the 'prune' run) at\n" + - "https://github.com/restic/restic/issues/new") + for h := range usedBlobs { + if _, ok := blobCount[h]; !ok { + return errors.Fatalf("At least one data blob seems to be missing, aborting prune to prevent further data loss!\n" + + "Please report this error (along with the output of the 'prune' run) at\n" + + "https://github.com/restic/restic/issues/new") + } } Verbosef("found %d of %d data blobs still in use, removing %d blobs\n", From 744a15247db3c940d4dcb87e0a5b79a4268a9a3c Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 31 Mar 2020 14:42:45 +0200 Subject: [PATCH 02/10] prune/rebuild-index: Fail if an old index cannot be removed The old behavior was problematic in the context of rebuild-index as it could leave old, possibly invalid index files behind without returning a fatal error. Prune calls rebuildIndex before removing any data from the repository. For this use case failing to delete an old index MUST be treated as a fatal error. Otherwise the index could still contain an old index file that refers to blobs/packs that were later on deleted by prune. Later backup runs will assume that the affected blobs already exist in the repository which results in a backup which misses data. --- cmd/restic/cmd_rebuild_index.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmd/restic/cmd_rebuild_index.go b/cmd/restic/cmd_rebuild_index.go index bfa121338..b5f23aad7 100644 --- a/cmd/restic/cmd_rebuild_index.go +++ b/cmd/restic/cmd_rebuild_index.go @@ -92,7 +92,10 @@ func rebuildIndex(ctx context.Context, repo restic.Repository, ignorePacks resti Verbosef("saved new indexes as %v\n", ids) Verbosef("remove %d old index files\n", len(supersedes)) - DeleteFiles(globalOptions, repo, restic.NewIDSet(supersedes...), restic.IndexFile) + err = DeleteFilesChecked(globalOptions, repo, restic.NewIDSet(supersedes...), restic.IndexFile) + if err != nil { + return errors.Fatalf("unable to remove an old index: %v\n", err) + } return nil } From 7042bafea5febb54bb9590c7cd9d8869899cfa71 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 31 Mar 2020 14:50:30 +0200 Subject: [PATCH 03/10] prune: Abort repacking when a pack contains a wrong blob If a blob in a pack file can be decrypted successfully but contains data that results in a different hash than stated in the header pack, then abort repacking. As both the pack header and the blob are cryptographically verified this either means than a malicious entity tampered with the backup or indicates hardware problems on the client. prune should fail with an error in both cases. --- internal/repository/repack.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/internal/repository/repack.go b/internal/repository/repack.go index 8fde5dd72..1d8426e48 100644 --- a/internal/repository/repack.go +++ b/internal/repository/repack.go @@ -2,8 +2,6 @@ package repository import ( "context" - "fmt" - "os" "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/errors" @@ -85,7 +83,7 @@ func Repack(ctx context.Context, repo restic.Repository, packs restic.IDSet, kee if !id.Equal(entry.ID) { debug.Log("read blob %v/%v from %v: wrong data returned, hash is %v", h.Type, h.ID, tempfile.Name(), id) - fmt.Fprintf(os.Stderr, "read blob %v from %v: wrong data returned, hash is %v", + return nil, errors.Errorf("read blob %v from %v: wrong data returned, hash is %v", h, tempfile.Name(), id) } From 367449dede18a74d52cdf97c3c755c7d422caccc Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 31 Mar 2020 14:58:48 +0200 Subject: [PATCH 04/10] prune: Reduce memory allocations while repacking The slicing operator `slice[low:high]` default to 0 for the lower bound and len(slice) for the upper bound when either or both are not specified. Fix the code to use `cap(slice)` to check for the slice capacity. --- internal/repository/repack.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/internal/repository/repack.go b/internal/repository/repack.go index 1d8426e48..1a579d00b 100644 --- a/internal/repository/repack.go +++ b/internal/repository/repack.go @@ -57,8 +57,7 @@ func Repack(ctx context.Context, repo restic.Repository, packs restic.IDSet, kee debug.Log(" process blob %v", h) - buf = buf[:] - if uint(len(buf)) < entry.Length { + if uint(cap(buf)) < entry.Length { buf = make([]byte, entry.Length) } buf = buf[:entry.Length] From f4b9544ab2750b35fcf7c66f2e0cfcf2095adbb3 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 31 Mar 2020 17:04:48 +0200 Subject: [PATCH 05/10] prune: Add test that repack aborts on wrong blob --- internal/repository/repack_test.go | 40 ++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/internal/repository/repack_test.go b/internal/repository/repack_test.go index 5e75011c9..519037b0d 100644 --- a/internal/repository/repack_test.go +++ b/internal/repository/repack_test.go @@ -54,6 +54,24 @@ func createRandomBlobs(t testing.TB, repo restic.Repository, blobs int, pData fl } } +func createRandomWrongBlob(t testing.TB, repo restic.Repository) { + length := randomSize(10*1024, 1024*1024) // 10KiB to 1MiB of data + buf := make([]byte, length) + rand.Read(buf) + id := restic.Hash(buf) + // invert first data byte + buf[0] ^= 0xff + + _, _, err := repo.SaveBlob(context.TODO(), restic.DataBlob, buf, id, false) + if err != nil { + t.Fatalf("SaveFrom() error %v", err) + } + + if err := repo.Flush(context.Background()); err != nil { + t.Fatalf("repo.Flush() returned error %v", err) + } +} + // selectBlobs splits the list of all blobs randomly into two lists. A blob // will be contained in the firstone ith probability p. func selectBlobs(t *testing.T, repo restic.Repository, p float32) (list1, list2 restic.BlobSet) { @@ -239,3 +257,25 @@ func TestRepack(t *testing.T) { } } } + +func TestRepackWrongBlob(t *testing.T) { + repo, cleanup := repository.TestRepository(t) + defer cleanup() + + seed := rand.Int63() + rand.Seed(seed) + t.Logf("rand seed is %v", seed) + + createRandomBlobs(t, repo, 5, 0.7) + createRandomWrongBlob(t, repo) + + // just keep all blobs, but also rewrite every pack + _, keepBlobs := selectBlobs(t, repo, 0) + rewritePacks := findPacksForBlobs(t, repo, keepBlobs) + + _, err := repository.Repack(context.TODO(), repo, rewritePacks, keepBlobs, nil) + if err == nil { + t.Fatal("expected repack to fail but got no error") + } + t.Log(err) +} From 8f811642c3daa76bb67d45683749557c8e70bf55 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 31 Mar 2020 19:09:01 +0200 Subject: [PATCH 06/10] Add support for integration tests to wrap the backend --- cmd/restic/global.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cmd/restic/global.go b/cmd/restic/global.go index 10334b3e7..d3b264b3d 100644 --- a/cmd/restic/global.go +++ b/cmd/restic/global.go @@ -44,6 +44,8 @@ var version = "0.9.6-dev (compiled manually)" // TimeFormat is the format used for all timestamps printed by restic. const TimeFormat = "2006-01-02 15:04:05" +type backendWrapper func(r restic.Backend) (restic.Backend, error) + // GlobalOptions hold all global options for restic. type GlobalOptions struct { Repo string @@ -68,6 +70,8 @@ type GlobalOptions struct { stdout io.Writer stderr io.Writer + backendTestHook backendWrapper + // verbosity is set as follows: // 0 means: don't print any messages except errors, this is used when --quiet is specified // 1 is the default: print essential messages @@ -395,6 +399,14 @@ func OpenRepository(opts GlobalOptions) (*repository.Repository, error) { Warnf("%v returned error, retrying after %v: %v\n", msg, d, err) }) + // wrap backend if a test specified a hook + if opts.backendTestHook != nil { + be, err = opts.backendTestHook(be) + if err != nil { + return nil, err + } + } + s := repository.New(be) passwordTriesLeft := 1 From 575ed9a47e5179ca00448407924a05ebb4c97800 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 31 Mar 2020 17:34:46 +0200 Subject: [PATCH 07/10] Test that rebuild-index errors when old index cannot be removed --- cmd/restic/integration_test.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index f3e02a37b..1333bd3aa 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -1132,6 +1132,37 @@ func TestRebuildIndexAlwaysFull(t *testing.T) { TestRebuildIndex(t) } +type appendOnlyBackend struct { + restic.Backend +} + +// called via repo.Backend().Remove() +func (b *appendOnlyBackend) Remove(ctx context.Context, h restic.Handle) error { + return errors.Errorf("Failed to remove %v", h) +} + +func TestRebuildIndexFailsOnAppendOnly(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + + datafile := filepath.Join("..", "..", "internal", "checker", "testdata", "duplicate-packs-in-index-test-repo.tar.gz") + rtest.SetupTarTestFixture(t, env.base, datafile) + + globalOptions.stdout = ioutil.Discard + defer func() { + globalOptions.stdout = os.Stdout + }() + + env.gopts.backendTestHook = func(r restic.Backend) (restic.Backend, error) { + return &appendOnlyBackend{r}, nil + } + err := runRebuildIndex(env.gopts) + if err == nil { + t.Error("expected rebuildIndex to fail") + } + t.Log(err) +} + func TestCheckRestoreNoLock(t *testing.T) { env, cleanup := withTestEnvironment(t) defer cleanup() From 1c84aceb39572b9defc7675a0eb75416bddcc2b6 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 31 Mar 2020 19:10:14 +0200 Subject: [PATCH 08/10] prune: Test for abort on damaged repositories --- cmd/restic/integration_test.go | 97 +++++++++++++++++++++++++++------- 1 file changed, 77 insertions(+), 20 deletions(-) diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index 1333bd3aa..2fdd2f4ec 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -359,6 +359,28 @@ func TestBackupNonExistingFile(t *testing.T) { testRunBackup(t, "", dirs, opts, env.gopts) } +func removeDataPacksExcept(gopts GlobalOptions, t *testing.T, keep restic.IDSet) { + r, err := OpenRepository(gopts) + rtest.OK(t, err) + + // Get all tree packs + rtest.OK(t, r.LoadIndex(gopts.ctx)) + treePacks := restic.NewIDSet() + for _, idx := range r.Index().(*repository.MasterIndex).All() { + for _, id := range idx.TreePacks() { + treePacks.Insert(id) + } + } + + // remove all packs containing data blobs + rtest.OK(t, r.List(gopts.ctx, restic.PackFile, func(id restic.ID, size int64) error { + if treePacks.Has(id) || keep.Has(id) { + return nil + } + return r.Backend().Remove(gopts.ctx, restic.Handle{Type: restic.PackFile, Name: id.String()}) + })) +} + func TestBackupSelfHealing(t *testing.T) { env, cleanup := withTestEnvironment(t) defer cleanup() @@ -374,25 +396,8 @@ func TestBackupSelfHealing(t *testing.T) { testRunBackup(t, filepath.Dir(env.testdata), []string{filepath.Base(env.testdata)}, opts, env.gopts) testRunCheck(t, env.gopts) - r, err := OpenRepository(env.gopts) - rtest.OK(t, err) - - // Get all tree packs - rtest.OK(t, r.LoadIndex(env.gopts.ctx)) - treePacks := restic.NewIDSet() - for _, idx := range r.Index().(*repository.MasterIndex).All() { - for _, id := range idx.TreePacks() { - treePacks.Insert(id) - } - } - - // remove all packs containing data blobs - rtest.OK(t, r.List(env.gopts.ctx, restic.PackFile, func(id restic.ID, size int64) error { - if treePacks.Has(id) { - return nil - } - return r.Backend().Remove(env.gopts.ctx, restic.Handle{Type: restic.PackFile, Name: id.String()}) - })) + // remove all data packs + removeDataPacksExcept(env.gopts, t, restic.NewIDSet()) testRunRebuildIndex(t, env.gopts) // now the repo is also missing the data blob in the index; check should report this @@ -400,7 +405,7 @@ func TestBackupSelfHealing(t *testing.T) { "check should have reported an error") // second backup should report an error but "heal" this situation - err = testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{filepath.Base(env.testdata)}, opts, env.gopts) + err := testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{filepath.Base(env.testdata)}, opts, env.gopts) rtest.Assert(t, err != nil, "backup should have reported an error") testRunCheck(t, env.gopts) @@ -1226,6 +1231,58 @@ func TestPrune(t *testing.T) { testRunCheck(t, env.gopts) } +func listPacks(gopts GlobalOptions, t *testing.T) restic.IDSet { + r, err := OpenRepository(gopts) + rtest.OK(t, err) + + packs := restic.NewIDSet() + + rtest.OK(t, r.List(gopts.ctx, restic.PackFile, func(id restic.ID, size int64) error { + packs.Insert(id) + return nil + })) + return packs +} + +func TestPruneWithDamagedRepository(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + + datafile := filepath.Join("testdata", "backup-data.tar.gz") + testRunInit(t, env.gopts) + + rtest.SetupTarTestFixture(t, env.testdata, datafile) + opts := BackupOptions{} + + // create and delete snapshot to create unused blobs + testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9", "2")}, opts, env.gopts) + firstSnapshot := testRunList(t, "snapshots", env.gopts) + rtest.Assert(t, len(firstSnapshot) == 1, + "expected one snapshot, got %v", firstSnapshot) + testRunForget(t, env.gopts, firstSnapshot[0].String()) + + oldPacks := listPacks(env.gopts, t) + + // create new snapshot, but lose all data + testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9", "3")}, opts, env.gopts) + snapshotIDs := testRunList(t, "snapshots", env.gopts) + + removeDataPacksExcept(env.gopts, t, oldPacks) + + rtest.Assert(t, len(snapshotIDs) == 1, + "expected one snapshot, got %v", snapshotIDs) + + // prune should fail + err := runPrune(env.gopts) + if err == nil { + t.Fatalf("expected prune to fail") + } + if !strings.Contains(err.Error(), "blob seems to be missing") { + t.Fatalf("did not find hint for missing blobs") + } + t.Log(err) +} + func TestHardLink(t *testing.T) { // this test assumes a test set with a single directory containing hard linked files env, cleanup := withTestEnvironment(t) From d8b80e98623e608694169f116dbd115bdf16a753 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 31 Mar 2020 21:50:47 +0200 Subject: [PATCH 09/10] Add changelog --- changelog/unreleased/pull-2674 | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 changelog/unreleased/pull-2674 diff --git a/changelog/unreleased/pull-2674 b/changelog/unreleased/pull-2674 new file mode 100644 index 000000000..3a8cc0d9b --- /dev/null +++ b/changelog/unreleased/pull-2674 @@ -0,0 +1,8 @@ +Bugfix: Add stricter prune error checks + +Additional checks were added to the prune command in order to improve +resiliency to backend, hardware and/or networking issues. The checks now +detect a few more cases where such outside factors could potentially cause +data loss. + +https://github.com/restic/restic/pull/2674 From 08d24ff99e257fae8853b006ce686ff24aa34702 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 31 Mar 2020 23:31:33 +0200 Subject: [PATCH 10/10] prune: Include ID of all missing blobs in error message --- cmd/restic/cmd_prune.go | 11 ++++++++--- cmd/restic/integration_test.go | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index 0cda6da15..fcddc76b1 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -191,13 +191,18 @@ func pruneRepository(gopts GlobalOptions, repo restic.Repository) error { return err } + var missingBlobs []restic.BlobHandle for h := range usedBlobs { if _, ok := blobCount[h]; !ok { - return errors.Fatalf("At least one data blob seems to be missing, aborting prune to prevent further data loss!\n" + - "Please report this error (along with the output of the 'prune' run) at\n" + - "https://github.com/restic/restic/issues/new") + missingBlobs = append(missingBlobs, h) } } + if len(missingBlobs) > 0 { + return errors.Fatalf("%v not found in the new index\n"+ + "Data blobs seem to be missing, aborting prune to prevent further data loss!\n"+ + "Please report this error (along with the output of the 'prune' run) at\n"+ + "https://github.com/restic/restic/issues/new/choose", missingBlobs) + } Verbosef("found %d of %d data blobs still in use, removing %d blobs\n", len(usedBlobs), stats.blobs, stats.blobs-len(usedBlobs)) diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index 2fdd2f4ec..1267db0a0 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -1277,7 +1277,7 @@ func TestPruneWithDamagedRepository(t *testing.T) { if err == nil { t.Fatalf("expected prune to fail") } - if !strings.Contains(err.Error(), "blob seems to be missing") { + if !strings.Contains(err.Error(), "blobs seem to be missing") { t.Fatalf("did not find hint for missing blobs") } t.Log(err)