diff --git a/changelog/unreleased/pull-2674 b/changelog/unreleased/pull-2674 new file mode 100644 index 000000000..3a8cc0d9b --- /dev/null +++ b/changelog/unreleased/pull-2674 @@ -0,0 +1,8 @@ +Bugfix: Add stricter prune error checks + +Additional checks were added to the prune command in order to improve +resiliency to backend, hardware and/or networking issues. The checks now +detect a few more cases where such outside factors could potentially cause +data loss. + +https://github.com/restic/restic/pull/2674 diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index 569fed523..fcddc76b1 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -191,10 +191,17 @@ func pruneRepository(gopts GlobalOptions, repo restic.Repository) error { return err } - if len(usedBlobs) > stats.blobs { - return errors.Fatalf("number of used blobs is larger than number of available blobs!\n" + - "Please report this error (along with the output of the 'prune' run) at\n" + - "https://github.com/restic/restic/issues/new") + var missingBlobs []restic.BlobHandle + for h := range usedBlobs { + if _, ok := blobCount[h]; !ok { + missingBlobs = append(missingBlobs, h) + } + } + if len(missingBlobs) > 0 { + return errors.Fatalf("%v not found in the new index\n"+ + "Data blobs seem to be missing, aborting prune to prevent further data loss!\n"+ + "Please report this error (along with the output of the 'prune' run) at\n"+ + "https://github.com/restic/restic/issues/new/choose", missingBlobs) } Verbosef("found %d of %d data blobs still in use, removing %d blobs\n", diff --git a/cmd/restic/cmd_rebuild_index.go b/cmd/restic/cmd_rebuild_index.go index bfa121338..b5f23aad7 100644 --- a/cmd/restic/cmd_rebuild_index.go +++ b/cmd/restic/cmd_rebuild_index.go @@ -92,7 +92,10 @@ func rebuildIndex(ctx context.Context, repo restic.Repository, ignorePacks resti Verbosef("saved new indexes as %v\n", ids) Verbosef("remove %d old index files\n", len(supersedes)) - DeleteFiles(globalOptions, repo, restic.NewIDSet(supersedes...), restic.IndexFile) + err = DeleteFilesChecked(globalOptions, repo, restic.NewIDSet(supersedes...), restic.IndexFile) + if err != nil { + return errors.Fatalf("unable to remove an old index: %v\n", err) + } return nil } diff --git a/cmd/restic/global.go b/cmd/restic/global.go index 10334b3e7..d3b264b3d 100644 --- a/cmd/restic/global.go +++ b/cmd/restic/global.go @@ -44,6 +44,8 @@ var version = "0.9.6-dev (compiled manually)" // TimeFormat is the format used for all timestamps printed by restic. const TimeFormat = "2006-01-02 15:04:05" +type backendWrapper func(r restic.Backend) (restic.Backend, error) + // GlobalOptions hold all global options for restic. type GlobalOptions struct { Repo string @@ -68,6 +70,8 @@ type GlobalOptions struct { stdout io.Writer stderr io.Writer + backendTestHook backendWrapper + // verbosity is set as follows: // 0 means: don't print any messages except errors, this is used when --quiet is specified // 1 is the default: print essential messages @@ -395,6 +399,14 @@ func OpenRepository(opts GlobalOptions) (*repository.Repository, error) { Warnf("%v returned error, retrying after %v: %v\n", msg, d, err) }) + // wrap backend if a test specified a hook + if opts.backendTestHook != nil { + be, err = opts.backendTestHook(be) + if err != nil { + return nil, err + } + } + s := repository.New(be) passwordTriesLeft := 1 diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index f3e02a37b..1267db0a0 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -359,6 +359,28 @@ func TestBackupNonExistingFile(t *testing.T) { testRunBackup(t, "", dirs, opts, env.gopts) } +func removeDataPacksExcept(gopts GlobalOptions, t *testing.T, keep restic.IDSet) { + r, err := OpenRepository(gopts) + rtest.OK(t, err) + + // Get all tree packs + rtest.OK(t, r.LoadIndex(gopts.ctx)) + treePacks := restic.NewIDSet() + for _, idx := range r.Index().(*repository.MasterIndex).All() { + for _, id := range idx.TreePacks() { + treePacks.Insert(id) + } + } + + // remove all packs containing data blobs + rtest.OK(t, r.List(gopts.ctx, restic.PackFile, func(id restic.ID, size int64) error { + if treePacks.Has(id) || keep.Has(id) { + return nil + } + return r.Backend().Remove(gopts.ctx, restic.Handle{Type: restic.PackFile, Name: id.String()}) + })) +} + func TestBackupSelfHealing(t *testing.T) { env, cleanup := withTestEnvironment(t) defer cleanup() @@ -374,25 +396,8 @@ func TestBackupSelfHealing(t *testing.T) { testRunBackup(t, filepath.Dir(env.testdata), []string{filepath.Base(env.testdata)}, opts, env.gopts) testRunCheck(t, env.gopts) - r, err := OpenRepository(env.gopts) - rtest.OK(t, err) - - // Get all tree packs - rtest.OK(t, r.LoadIndex(env.gopts.ctx)) - treePacks := restic.NewIDSet() - for _, idx := range r.Index().(*repository.MasterIndex).All() { - for _, id := range idx.TreePacks() { - treePacks.Insert(id) - } - } - - // remove all packs containing data blobs - rtest.OK(t, r.List(env.gopts.ctx, restic.PackFile, func(id restic.ID, size int64) error { - if treePacks.Has(id) { - return nil - } - return r.Backend().Remove(env.gopts.ctx, restic.Handle{Type: restic.PackFile, Name: id.String()}) - })) + // remove all data packs + removeDataPacksExcept(env.gopts, t, restic.NewIDSet()) testRunRebuildIndex(t, env.gopts) // now the repo is also missing the data blob in the index; check should report this @@ -400,7 +405,7 @@ func TestBackupSelfHealing(t *testing.T) { "check should have reported an error") // second backup should report an error but "heal" this situation - err = testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{filepath.Base(env.testdata)}, opts, env.gopts) + err := testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{filepath.Base(env.testdata)}, opts, env.gopts) rtest.Assert(t, err != nil, "backup should have reported an error") testRunCheck(t, env.gopts) @@ -1132,6 +1137,37 @@ func TestRebuildIndexAlwaysFull(t *testing.T) { TestRebuildIndex(t) } +type appendOnlyBackend struct { + restic.Backend +} + +// called via repo.Backend().Remove() +func (b *appendOnlyBackend) Remove(ctx context.Context, h restic.Handle) error { + return errors.Errorf("Failed to remove %v", h) +} + +func TestRebuildIndexFailsOnAppendOnly(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + + datafile := filepath.Join("..", "..", "internal", "checker", "testdata", "duplicate-packs-in-index-test-repo.tar.gz") + rtest.SetupTarTestFixture(t, env.base, datafile) + + globalOptions.stdout = ioutil.Discard + defer func() { + globalOptions.stdout = os.Stdout + }() + + env.gopts.backendTestHook = func(r restic.Backend) (restic.Backend, error) { + return &appendOnlyBackend{r}, nil + } + err := runRebuildIndex(env.gopts) + if err == nil { + t.Error("expected rebuildIndex to fail") + } + t.Log(err) +} + func TestCheckRestoreNoLock(t *testing.T) { env, cleanup := withTestEnvironment(t) defer cleanup() @@ -1195,6 +1231,58 @@ func TestPrune(t *testing.T) { testRunCheck(t, env.gopts) } +func listPacks(gopts GlobalOptions, t *testing.T) restic.IDSet { + r, err := OpenRepository(gopts) + rtest.OK(t, err) + + packs := restic.NewIDSet() + + rtest.OK(t, r.List(gopts.ctx, restic.PackFile, func(id restic.ID, size int64) error { + packs.Insert(id) + return nil + })) + return packs +} + +func TestPruneWithDamagedRepository(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + + datafile := filepath.Join("testdata", "backup-data.tar.gz") + testRunInit(t, env.gopts) + + rtest.SetupTarTestFixture(t, env.testdata, datafile) + opts := BackupOptions{} + + // create and delete snapshot to create unused blobs + testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9", "2")}, opts, env.gopts) + firstSnapshot := testRunList(t, "snapshots", env.gopts) + rtest.Assert(t, len(firstSnapshot) == 1, + "expected one snapshot, got %v", firstSnapshot) + testRunForget(t, env.gopts, firstSnapshot[0].String()) + + oldPacks := listPacks(env.gopts, t) + + // create new snapshot, but lose all data + testRunBackup(t, "", []string{filepath.Join(env.testdata, "0", "0", "9", "3")}, opts, env.gopts) + snapshotIDs := testRunList(t, "snapshots", env.gopts) + + removeDataPacksExcept(env.gopts, t, oldPacks) + + rtest.Assert(t, len(snapshotIDs) == 1, + "expected one snapshot, got %v", snapshotIDs) + + // prune should fail + err := runPrune(env.gopts) + if err == nil { + t.Fatalf("expected prune to fail") + } + if !strings.Contains(err.Error(), "blobs seem to be missing") { + t.Fatalf("did not find hint for missing blobs") + } + t.Log(err) +} + func TestHardLink(t *testing.T) { // this test assumes a test set with a single directory containing hard linked files env, cleanup := withTestEnvironment(t) diff --git a/internal/repository/repack.go b/internal/repository/repack.go index 8fde5dd72..1a579d00b 100644 --- a/internal/repository/repack.go +++ b/internal/repository/repack.go @@ -2,8 +2,6 @@ package repository import ( "context" - "fmt" - "os" "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/errors" @@ -59,8 +57,7 @@ func Repack(ctx context.Context, repo restic.Repository, packs restic.IDSet, kee debug.Log(" process blob %v", h) - buf = buf[:] - if uint(len(buf)) < entry.Length { + if uint(cap(buf)) < entry.Length { buf = make([]byte, entry.Length) } buf = buf[:entry.Length] @@ -85,7 +82,7 @@ func Repack(ctx context.Context, repo restic.Repository, packs restic.IDSet, kee if !id.Equal(entry.ID) { debug.Log("read blob %v/%v from %v: wrong data returned, hash is %v", h.Type, h.ID, tempfile.Name(), id) - fmt.Fprintf(os.Stderr, "read blob %v from %v: wrong data returned, hash is %v", + return nil, errors.Errorf("read blob %v from %v: wrong data returned, hash is %v", h, tempfile.Name(), id) } diff --git a/internal/repository/repack_test.go b/internal/repository/repack_test.go index 5e75011c9..519037b0d 100644 --- a/internal/repository/repack_test.go +++ b/internal/repository/repack_test.go @@ -54,6 +54,24 @@ func createRandomBlobs(t testing.TB, repo restic.Repository, blobs int, pData fl } } +func createRandomWrongBlob(t testing.TB, repo restic.Repository) { + length := randomSize(10*1024, 1024*1024) // 10KiB to 1MiB of data + buf := make([]byte, length) + rand.Read(buf) + id := restic.Hash(buf) + // invert first data byte + buf[0] ^= 0xff + + _, _, err := repo.SaveBlob(context.TODO(), restic.DataBlob, buf, id, false) + if err != nil { + t.Fatalf("SaveFrom() error %v", err) + } + + if err := repo.Flush(context.Background()); err != nil { + t.Fatalf("repo.Flush() returned error %v", err) + } +} + // selectBlobs splits the list of all blobs randomly into two lists. A blob // will be contained in the firstone ith probability p. func selectBlobs(t *testing.T, repo restic.Repository, p float32) (list1, list2 restic.BlobSet) { @@ -239,3 +257,25 @@ func TestRepack(t *testing.T) { } } } + +func TestRepackWrongBlob(t *testing.T) { + repo, cleanup := repository.TestRepository(t) + defer cleanup() + + seed := rand.Int63() + rand.Seed(seed) + t.Logf("rand seed is %v", seed) + + createRandomBlobs(t, repo, 5, 0.7) + createRandomWrongBlob(t, repo) + + // just keep all blobs, but also rewrite every pack + _, keepBlobs := selectBlobs(t, repo, 0) + rewritePacks := findPacksForBlobs(t, repo, keepBlobs) + + _, err := repository.Repack(context.TODO(), repo, rewritePacks, keepBlobs, nil) + if err == nil { + t.Fatal("expected repack to fail but got no error") + } + t.Log(err) +}