diff --git a/changelog/unreleased/issue-1759 b/changelog/unreleased/issue-1759 new file mode 100644 index 000000000..1b698f845 --- /dev/null +++ b/changelog/unreleased/issue-1759 @@ -0,0 +1,20 @@ +Enhancement: Add `repair index` and `repair snapshots` commands + +The `rebuild-index` command has been renamed to `repair index`. The old name +will still work, but is deprecated. + +When a snapshot was damaged, the only option up to now was to completely forget +the snapshot, even if only some unimportant file was damaged. + +We've added a `repair snapshots` command, which can repair snapshots by removing +damaged directories and missing files contents. Note that using this command +can lead to data loss! Please see the "Troubleshooting" section in the documentation +for more details. + +https://github.com/restic/restic/issues/1759 +https://github.com/restic/restic/issues/1714 +https://github.com/restic/restic/issues/1798 +https://github.com/restic/restic/issues/2334 +https://github.com/restic/restic/pull/2876 +https://forum.restic.net/t/corrupted-repo-how-to-repair/799 +https://forum.restic.net/t/recovery-options-for-damaged-repositories/1571 diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index e5f29a7e5..b9f3199b2 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -245,7 +245,7 @@ func runCheck(ctx context.Context, opts CheckOptions, gopts GlobalOptions, args } if suggestIndexRebuild { - Printf("Duplicate packs/old indexes are non-critical, you can run `restic rebuild-index' to correct this.\n") + Printf("Duplicate packs/old indexes are non-critical, you can run `restic repair index' to correct this.\n") } if mixedFound { Printf("Mixed packs with tree and data blobs are non-critical, you can run `restic prune` to correct this.\n") diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index 6104002b0..1138bb55b 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -488,7 +488,7 @@ func decidePackAction(ctx context.Context, opts PruneOptions, repo restic.Reposi // Pack size does not fit and pack is needed => error // If the pack is not needed, this is no error, the pack can // and will be simply removed, see below. - Warnf("pack %s: calculated size %d does not match real size %d\nRun 'restic rebuild-index'.\n", + Warnf("pack %s: calculated size %d does not match real size %d\nRun 'restic repair index'.\n", id.Str(), p.unusedSize+p.usedSize, packSize) return errorSizeNotMatching } diff --git a/cmd/restic/cmd_repair.go b/cmd/restic/cmd_repair.go new file mode 100644 index 000000000..aefe02f3c --- /dev/null +++ b/cmd/restic/cmd_repair.go @@ -0,0 +1,14 @@ +package main + +import ( + "github.com/spf13/cobra" +) + +var cmdRepair = &cobra.Command{ + Use: "repair", + Short: "Repair the repository", +} + +func init() { + cmdRoot.AddCommand(cmdRepair) +} diff --git a/cmd/restic/cmd_rebuild_index.go b/cmd/restic/cmd_repair_index.go similarity index 75% rename from cmd/restic/cmd_rebuild_index.go rename to cmd/restic/cmd_repair_index.go index 5d70a9e12..25d6b1cab 100644 --- a/cmd/restic/cmd_rebuild_index.go +++ b/cmd/restic/cmd_repair_index.go @@ -7,15 +7,15 @@ import ( "github.com/restic/restic/internal/pack" "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/restic" - "github.com/spf13/cobra" + "github.com/spf13/pflag" ) -var cmdRebuildIndex = &cobra.Command{ - Use: "rebuild-index [flags]", +var cmdRepairIndex = &cobra.Command{ + Use: "index [flags]", Short: "Build a new index", Long: ` -The "rebuild-index" command creates a new index based on the pack files in the +The "repair index" command creates a new index based on the pack files in the repository. EXIT STATUS @@ -25,25 +25,37 @@ Exit status is 0 if the command was successful, and non-zero if there was any er `, DisableAutoGenTag: true, RunE: func(cmd *cobra.Command, args []string) error { - return runRebuildIndex(cmd.Context(), rebuildIndexOptions, globalOptions) + return runRebuildIndex(cmd.Context(), repairIndexOptions, globalOptions) }, } -// RebuildIndexOptions collects all options for the rebuild-index command. -type RebuildIndexOptions struct { +var cmdRebuildIndex = &cobra.Command{ + Use: "rebuild-index [flags]", + Short: cmdRepairIndex.Short, + Long: cmdRepairIndex.Long, + Deprecated: `Use "repair index" instead`, + DisableAutoGenTag: true, + RunE: cmdRepairIndex.RunE, +} + +// RepairIndexOptions collects all options for the repair index command. +type RepairIndexOptions struct { ReadAllPacks bool } -var rebuildIndexOptions RebuildIndexOptions +var repairIndexOptions RepairIndexOptions func init() { + cmdRepair.AddCommand(cmdRepairIndex) + // add alias for old name cmdRoot.AddCommand(cmdRebuildIndex) - f := cmdRebuildIndex.Flags() - f.BoolVar(&rebuildIndexOptions.ReadAllPacks, "read-all-packs", false, "read all pack files to generate new index from scratch") + for _, f := range []*pflag.FlagSet{cmdRepairIndex.Flags(), cmdRebuildIndex.Flags()} { + f.BoolVar(&repairIndexOptions.ReadAllPacks, "read-all-packs", false, "read all pack files to generate new index from scratch") + } } -func runRebuildIndex(ctx context.Context, opts RebuildIndexOptions, gopts GlobalOptions) error { +func runRebuildIndex(ctx context.Context, opts RepairIndexOptions, gopts GlobalOptions) error { repo, err := OpenRepository(ctx, gopts) if err != nil { return err @@ -58,7 +70,7 @@ func runRebuildIndex(ctx context.Context, opts RebuildIndexOptions, gopts Global return rebuildIndex(ctx, opts, gopts, repo, restic.NewIDSet()) } -func rebuildIndex(ctx context.Context, opts RebuildIndexOptions, gopts GlobalOptions, repo *repository.Repository, ignorePacks restic.IDSet) error { +func rebuildIndex(ctx context.Context, opts RepairIndexOptions, gopts GlobalOptions, repo *repository.Repository, ignorePacks restic.IDSet) error { var obsoleteIndexes restic.IDs packSizeFromList := make(map[restic.ID]int64) packSizeFromIndex := make(map[restic.ID]int64) diff --git a/cmd/restic/cmd_repair_snapshots.go b/cmd/restic/cmd_repair_snapshots.go new file mode 100644 index 000000000..5e9ec4130 --- /dev/null +++ b/cmd/restic/cmd_repair_snapshots.go @@ -0,0 +1,176 @@ +package main + +import ( + "context" + + "github.com/restic/restic/internal/backend" + "github.com/restic/restic/internal/errors" + "github.com/restic/restic/internal/restic" + "github.com/restic/restic/internal/walker" + + "github.com/spf13/cobra" +) + +var cmdRepairSnapshots = &cobra.Command{ + Use: "snapshots [flags] [snapshot ID] [...]", + Short: "Repair snapshots", + Long: ` +The "repair snapshots" command repairs broken snapshots. It scans the given +snapshots and generates new ones with damaged directories and file contents +removed. If the broken snapshots are deleted, a prune run will be able to +clean up the repository. + +The command depends on a correct index, thus make sure to run "repair index" +first! + + +WARNING +======= + +Repairing and deleting broken snapshots causes data loss! It will remove broken +directories and modify broken files in the modified snapshots. + +If the contents of directories and files are still available, the better option +is to run "backup" which in that case is able to heal existing snapshots. Only +use the "repair snapshots" command if you need to recover an old and broken +snapshot! + +EXIT STATUS +=========== + +Exit status is 0 if the command was successful, and non-zero if there was any error. +`, + DisableAutoGenTag: true, + RunE: func(cmd *cobra.Command, args []string) error { + return runRepairSnapshots(cmd.Context(), globalOptions, repairSnapshotOptions, args) + }, +} + +// RepairOptions collects all options for the repair command. +type RepairOptions struct { + DryRun bool + Forget bool + + restic.SnapshotFilter +} + +var repairSnapshotOptions RepairOptions + +func init() { + cmdRepair.AddCommand(cmdRepairSnapshots) + flags := cmdRepairSnapshots.Flags() + + flags.BoolVarP(&repairSnapshotOptions.DryRun, "dry-run", "n", false, "do not do anything, just print what would be done") + flags.BoolVarP(&repairSnapshotOptions.Forget, "forget", "", false, "remove original snapshots after creating new ones") + + initMultiSnapshotFilter(flags, &repairSnapshotOptions.SnapshotFilter, true) +} + +func runRepairSnapshots(ctx context.Context, gopts GlobalOptions, opts RepairOptions, args []string) error { + repo, err := OpenRepository(ctx, globalOptions) + if err != nil { + return err + } + + if !opts.DryRun { + var lock *restic.Lock + var err error + lock, ctx, err = lockRepoExclusive(ctx, repo, gopts.RetryLock, gopts.JSON) + defer unlockRepo(lock) + if err != nil { + return err + } + } else { + repo.SetDryRun() + } + + snapshotLister, err := backend.MemorizeList(ctx, repo.Backend(), restic.SnapshotFile) + if err != nil { + return err + } + + if err := repo.LoadIndex(ctx); err != nil { + return err + } + + // Three error cases are checked: + // - tree is a nil tree (-> will be replaced by an empty tree) + // - trees which cannot be loaded (-> the tree contents will be removed) + // - files whose contents are not fully available (-> file will be modified) + rewriter := walker.NewTreeRewriter(walker.RewriteOpts{ + RewriteNode: func(node *restic.Node, path string) *restic.Node { + if node.Type != "file" { + return node + } + + ok := true + var newContent restic.IDs = restic.IDs{} + var newSize uint64 + // check all contents and remove if not available + for _, id := range node.Content { + if size, found := repo.LookupBlobSize(id, restic.DataBlob); !found { + ok = false + } else { + newContent = append(newContent, id) + newSize += uint64(size) + } + } + if !ok { + Verbosef(" file %q: removed missing content\n", path) + } else if newSize != node.Size { + Verbosef(" file %q: fixed incorrect size\n", path) + } + // no-ops if already correct + node.Content = newContent + node.Size = newSize + return node + }, + RewriteFailedTree: func(nodeID restic.ID, path string, _ error) (restic.ID, error) { + if path == "/" { + Verbosef(" dir %q: not readable\n", path) + // remove snapshots with invalid root node + return restic.ID{}, nil + } + // If a subtree fails to load, remove it + Verbosef(" dir %q: replaced with empty directory\n", path) + emptyID, err := restic.SaveTree(ctx, repo, &restic.Tree{}) + if err != nil { + return restic.ID{}, err + } + return emptyID, nil + }, + AllowUnstableSerialization: true, + }) + + changedCount := 0 + for sn := range FindFilteredSnapshots(ctx, snapshotLister, repo, &opts.SnapshotFilter, args) { + Verbosef("\nsnapshot %s of %v at %s)\n", sn.ID().Str(), sn.Paths, sn.Time) + changed, err := filterAndReplaceSnapshot(ctx, repo, sn, + func(ctx context.Context, sn *restic.Snapshot) (restic.ID, error) { + return rewriter.RewriteTree(ctx, repo, "/", *sn.Tree) + }, opts.DryRun, opts.Forget, "repaired") + if err != nil { + return errors.Fatalf("unable to rewrite snapshot ID %q: %v", sn.ID().Str(), err) + } + if changed { + changedCount++ + } + } + + Verbosef("\n") + if changedCount == 0 { + if !opts.DryRun { + Verbosef("no snapshots were modified\n") + } else { + Verbosef("no snapshots would be modified\n") + } + } else { + if !opts.DryRun { + Verbosef("modified %v snapshots\n", changedCount) + } else { + Verbosef("would modify %v snapshots\n", changedCount) + } + } + + return nil +} diff --git a/cmd/restic/cmd_rewrite.go b/cmd/restic/cmd_rewrite.go index 744686390..c08797c48 100644 --- a/cmd/restic/cmd_rewrite.go +++ b/cmd/restic/cmd_rewrite.go @@ -87,36 +87,67 @@ func rewriteSnapshot(ctx context.Context, repo *repository.Repository, sn *resti return true } + rewriter := walker.NewTreeRewriter(walker.RewriteOpts{ + RewriteNode: func(node *restic.Node, path string) *restic.Node { + if selectByName(path) { + return node + } + Verbosef(fmt.Sprintf("excluding %s\n", path)) + return nil + }, + DisableNodeCache: true, + }) + + return filterAndReplaceSnapshot(ctx, repo, sn, + func(ctx context.Context, sn *restic.Snapshot) (restic.ID, error) { + return rewriter.RewriteTree(ctx, repo, "/", *sn.Tree) + }, opts.DryRun, opts.Forget, "rewrite") +} + +func filterAndReplaceSnapshot(ctx context.Context, repo restic.Repository, sn *restic.Snapshot, filter func(ctx context.Context, sn *restic.Snapshot) (restic.ID, error), dryRun bool, forget bool, addTag string) (bool, error) { + wg, wgCtx := errgroup.WithContext(ctx) repo.StartPackUploader(wgCtx, wg) var filteredTree restic.ID wg.Go(func() error { - filteredTree, err = walker.FilterTree(wgCtx, repo, "/", *sn.Tree, &walker.TreeFilterVisitor{ - SelectByName: selectByName, - PrintExclude: func(path string) { Verbosef(fmt.Sprintf("excluding %s\n", path)) }, - }) + var err error + filteredTree, err = filter(ctx, sn) if err != nil { return err } return repo.Flush(wgCtx) }) - err = wg.Wait() + err := wg.Wait() if err != nil { return false, err } + if filteredTree.IsNull() { + if dryRun { + Verbosef("would delete empty snapshot\n") + } else { + h := restic.Handle{Type: restic.SnapshotFile, Name: sn.ID().String()} + if err = repo.Backend().Remove(ctx, h); err != nil { + return false, err + } + debug.Log("removed empty snapshot %v", sn.ID()) + Verbosef("removed empty snapshot %v\n", sn.ID().Str()) + } + return true, nil + } + if filteredTree == *sn.Tree { debug.Log("Snapshot %v not modified", sn) return false, nil } debug.Log("Snapshot %v modified", sn) - if opts.DryRun { + if dryRun { Verbosef("would save new snapshot\n") - if opts.Forget { + if forget { Verbosef("would remove old snapshot\n") } @@ -125,10 +156,10 @@ func rewriteSnapshot(ctx context.Context, repo *repository.Repository, sn *resti // Always set the original snapshot id as this essentially a new snapshot. sn.Original = sn.ID() - *sn.Tree = filteredTree + sn.Tree = &filteredTree - if !opts.Forget { - sn.AddTags([]string{"rewrite"}) + if !forget { + sn.AddTags([]string{addTag}) } // Save the new snapshot. @@ -138,7 +169,7 @@ func rewriteSnapshot(ctx context.Context, repo *repository.Repository, sn *resti } Verbosef("saved new snapshot %v\n", id.Str()) - if opts.Forget { + if forget { h := restic.Handle{Type: restic.SnapshotFile, Name: sn.ID().String()} if err = repo.Backend().Remove(ctx, h); err != nil { return false, err diff --git a/cmd/restic/integration_repair_snapshots_test.go b/cmd/restic/integration_repair_snapshots_test.go new file mode 100644 index 000000000..04ef6ad1d --- /dev/null +++ b/cmd/restic/integration_repair_snapshots_test.go @@ -0,0 +1,135 @@ +package main + +import ( + "context" + "hash/fnv" + "io" + "math/rand" + "os" + "path/filepath" + "reflect" + "testing" + + "github.com/restic/restic/internal/restic" + rtest "github.com/restic/restic/internal/test" +) + +func testRunRepairSnapshot(t testing.TB, gopts GlobalOptions, forget bool) { + opts := RepairOptions{ + Forget: forget, + } + + rtest.OK(t, runRepairSnapshots(context.TODO(), gopts, opts, nil)) +} + +func createRandomFile(t testing.TB, env *testEnvironment, path string, size int) { + fn := filepath.Join(env.testdata, path) + rtest.OK(t, os.MkdirAll(filepath.Dir(fn), 0o755)) + + h := fnv.New64() + _, err := h.Write([]byte(path)) + rtest.OK(t, err) + r := rand.New(rand.NewSource(int64(h.Sum64()))) + + f, err := os.OpenFile(fn, os.O_CREATE|os.O_RDWR, 0o644) + rtest.OK(t, err) + _, err = io.Copy(f, io.LimitReader(r, int64(size))) + rtest.OK(t, err) + rtest.OK(t, f.Close()) +} + +func TestRepairSnapshotsWithLostData(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + + testRunInit(t, env.gopts) + + createRandomFile(t, env, "foo/bar/file", 512*1024) + testRunBackup(t, "", []string{env.testdata}, BackupOptions{}, env.gopts) + testListSnapshots(t, env.gopts, 1) + // damage repository + removePacksExcept(env.gopts, t, restic.NewIDSet(), false) + + createRandomFile(t, env, "foo/bar/file2", 256*1024) + testRunBackup(t, "", []string{env.testdata}, BackupOptions{}, env.gopts) + snapshotIDs := testListSnapshots(t, env.gopts, 2) + testRunCheckMustFail(t, env.gopts) + + // repair but keep broken snapshots + testRunRebuildIndex(t, env.gopts) + testRunRepairSnapshot(t, env.gopts, false) + testListSnapshots(t, env.gopts, 4) + testRunCheckMustFail(t, env.gopts) + + // repository must be ok after removing the broken snapshots + testRunForget(t, env.gopts, snapshotIDs[0].String(), snapshotIDs[1].String()) + testListSnapshots(t, env.gopts, 2) + _, err := testRunCheckOutput(env.gopts) + rtest.OK(t, err) +} + +func TestRepairSnapshotsWithLostTree(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + + testRunInit(t, env.gopts) + + createRandomFile(t, env, "foo/bar/file", 12345) + testRunBackup(t, "", []string{env.testdata}, BackupOptions{}, env.gopts) + oldSnapshot := testListSnapshots(t, env.gopts, 1) + oldPacks := testRunList(t, "packs", env.gopts) + + // keep foo/bar unchanged + createRandomFile(t, env, "foo/bar2", 1024) + testRunBackup(t, "", []string{env.testdata}, BackupOptions{}, env.gopts) + testListSnapshots(t, env.gopts, 2) + + // remove tree for foo/bar and the now completely broken first snapshot + removePacks(env.gopts, t, restic.NewIDSet(oldPacks...)) + testRunForget(t, env.gopts, oldSnapshot[0].String()) + testRunCheckMustFail(t, env.gopts) + + // repair + testRunRebuildIndex(t, env.gopts) + testRunRepairSnapshot(t, env.gopts, true) + testListSnapshots(t, env.gopts, 1) + _, err := testRunCheckOutput(env.gopts) + rtest.OK(t, err) +} + +func TestRepairSnapshotsWithLostRootTree(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + + testRunInit(t, env.gopts) + + createRandomFile(t, env, "foo/bar/file", 12345) + testRunBackup(t, "", []string{env.testdata}, BackupOptions{}, env.gopts) + testListSnapshots(t, env.gopts, 1) + oldPacks := testRunList(t, "packs", env.gopts) + + // remove all trees + removePacks(env.gopts, t, restic.NewIDSet(oldPacks...)) + testRunCheckMustFail(t, env.gopts) + + // repair + testRunRebuildIndex(t, env.gopts) + testRunRepairSnapshot(t, env.gopts, true) + testListSnapshots(t, env.gopts, 0) + _, err := testRunCheckOutput(env.gopts) + rtest.OK(t, err) +} + +func TestRepairSnapshotsIntact(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + testSetupBackupData(t, env) + testRunBackup(t, filepath.Dir(env.testdata), []string{"testdata"}, BackupOptions{}, env.gopts) + oldSnapshotIDs := testListSnapshots(t, env.gopts, 1) + + // use an exclude that will not exclude anything + testRunRepairSnapshot(t, env.gopts, false) + snapshotIDs := testListSnapshots(t, env.gopts, 1) + rtest.Assert(t, reflect.DeepEqual(oldSnapshotIDs, snapshotIDs), "unexpected snapshot id mismatch %v vs. %v", oldSnapshotIDs, snapshotIDs) + testRunCheck(t, env.gopts) +} diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index 10ebbaf13..211089253 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -100,6 +100,13 @@ func testRunList(t testing.TB, tpe string, opts GlobalOptions) restic.IDs { return parseIDsFromReader(t, buf) } +func testListSnapshots(t testing.TB, opts GlobalOptions, expected int) restic.IDs { + t.Helper() + snapshotIDs := testRunList(t, "snapshots", opts) + rtest.Assert(t, len(snapshotIDs) == expected, "expected %v snapshot, got %v", expected, snapshotIDs) + return snapshotIDs +} + func testRunRestore(t testing.TB, opts GlobalOptions, dir string, snapshotID restic.ID) { testRunRestoreExcludes(t, opts, dir, snapshotID, nil) } @@ -164,6 +171,11 @@ func testRunCheckOutput(gopts GlobalOptions) (string, error) { return buf.String(), err } +func testRunCheckMustFail(t testing.TB, gopts GlobalOptions) { + _, err := testRunCheckOutput(gopts) + rtest.Assert(t, err != nil, "expected non nil error after check of damaged repository") +} + func testRunDiffOutput(gopts GlobalOptions, firstSnapshotID string, secondSnapshotID string) (string, error) { buf := bytes.NewBuffer(nil) @@ -188,7 +200,7 @@ func testRunRebuildIndex(t testing.TB, gopts GlobalOptions) { globalOptions.stdout = os.Stdout }() - rtest.OK(t, runRebuildIndex(context.TODO(), RebuildIndexOptions{}, gopts)) + rtest.OK(t, runRebuildIndex(context.TODO(), RepairIndexOptions{}, gopts)) } func testRunLs(t testing.TB, gopts GlobalOptions, snapshotID string) []string { @@ -486,7 +498,16 @@ func TestBackupNonExistingFile(t *testing.T) { testRunBackup(t, "", dirs, opts, env.gopts) } -func removePacksExcept(gopts GlobalOptions, t *testing.T, keep restic.IDSet, removeTreePacks bool) { +func removePacks(gopts GlobalOptions, t testing.TB, remove restic.IDSet) { + r, err := OpenRepository(context.TODO(), gopts) + rtest.OK(t, err) + + for id := range remove { + rtest.OK(t, r.Backend().Remove(context.TODO(), restic.Handle{Type: restic.PackFile, Name: id.String()})) + } +} + +func removePacksExcept(gopts GlobalOptions, t testing.TB, keep restic.IDSet, removeTreePacks bool) { r, err := OpenRepository(context.TODO(), gopts) rtest.OK(t, err) @@ -1504,8 +1525,8 @@ func testRebuildIndex(t *testing.T, backendTestHook backendWrapper) { t.Fatalf("expected no error from checker for test repository, got %v", err) } - if !strings.Contains(out, "restic rebuild-index") { - t.Fatalf("did not find hint for rebuild-index command") + if !strings.Contains(out, "restic repair index") { + t.Fatalf("did not find hint for repair index command") } env.gopts.backendTestHook = backendTestHook @@ -1518,7 +1539,7 @@ func testRebuildIndex(t *testing.T, backendTestHook backendWrapper) { } if err != nil { - t.Fatalf("expected no error from checker after rebuild-index, got: %v", err) + t.Fatalf("expected no error from checker after repair index, got: %v", err) } } @@ -1599,7 +1620,7 @@ func TestRebuildIndexFailsOnAppendOnly(t *testing.T) { env.gopts.backendTestHook = func(r restic.Backend) (restic.Backend, error) { return &appendOnlyBackend{r}, nil } - err := runRebuildIndex(context.TODO(), RebuildIndexOptions{}, env.gopts) + err := runRebuildIndex(context.TODO(), RepairIndexOptions{}, env.gopts) if err == nil { t.Error("expected rebuildIndex to fail") } @@ -1887,8 +1908,8 @@ func TestListOnce(t *testing.T) { testRunPrune(t, env.gopts, pruneOpts) rtest.OK(t, runCheck(context.TODO(), checkOpts, env.gopts, nil)) - rtest.OK(t, runRebuildIndex(context.TODO(), RebuildIndexOptions{}, env.gopts)) - rtest.OK(t, runRebuildIndex(context.TODO(), RebuildIndexOptions{ReadAllPacks: true}, env.gopts)) + rtest.OK(t, runRebuildIndex(context.TODO(), RepairIndexOptions{}, env.gopts)) + rtest.OK(t, runRebuildIndex(context.TODO(), RepairIndexOptions{ReadAllPacks: true}, env.gopts)) } func TestHardLink(t *testing.T) { diff --git a/doc/060_forget.rst b/doc/060_forget.rst index 2353ef6a0..72c7ae97f 100644 --- a/doc/060_forget.rst +++ b/doc/060_forget.rst @@ -472,7 +472,7 @@ space. However, a **failed** ``prune`` run can cause the repository to become **temporarily unusable**. Therefore, make sure that you have a stable connection to the repository storage, before running this command. In case the command fails, it may become necessary to manually remove all files from the `index/` folder of the repository and -run `rebuild-index` afterwards. +run `repair index` afterwards. To prevent accidental usages of the ``--unsafe-recover-no-free-space`` option it is necessary to first run ``prune --unsafe-recover-no-free-space SOME-ID`` and then replace diff --git a/doc/077_troubleshooting.rst b/doc/077_troubleshooting.rst new file mode 100644 index 000000000..fe317acfc --- /dev/null +++ b/doc/077_troubleshooting.rst @@ -0,0 +1,194 @@ +.. + Normally, there are no heading levels assigned to certain characters as the structure is + determined from the succession of headings. However, this convention is used in Python’s + Style Guide for documenting which you may follow: + + # with overline, for parts + * for chapters + = for sections + - for subsections + ^ for subsubsections + " for paragraphs + +######################### +Troubleshooting +######################### + +The repository format used by restic is designed to be error resistant. In +particular, commands like, for example, ``backup`` or ``prune`` can be interrupted +at *any* point in time without damaging the repository. You might have to run +``unlock`` manually though, but that's it. + +However, a repository might be damaged if some of its files are damaged or lost. +This can occur due to hardware failures, accidentally removing files from the +repository or bugs in the implementation of restic. + +The following steps will help you recover a repository. This guide does not cover +all possible types of repository damages. Thus, if the steps do not work for you +or you are unsure how to proceed, then ask for help. Please always include the +check output discussed in the next section and what steps you've taken to repair +the repository so far. + +* `Forum `_ +* Our IRC channel ``#restic`` on ``irc.libera.chat`` + +Make sure that you **use the latest available restic version**. It can contain +bugfixes, and improvements to simplify the repair of a repository. It might also +contain a fix for your repository problems! + + +1. Find out what is damaged +*************************** + +The first step is always to check the repository. + +.. code-block:: console + + $ restic check --read-data + + using temporary cache in /tmp/restic-check-cache-1418935501 + repository 12345678 opened (version 2, compression level auto) + created new cache in /tmp/restic-check-cache-1418935501 + create exclusive lock for repository + load indexes + check all packs + check snapshots, trees and blobs + error for tree 7ef8ebab: + id 7ef8ebabc59aadda1a237d23ca7abac487b627a9b86508aa0194690446ff71f6 not found in repository + [0:02] 100.00% 7 / 7 snapshots + read all data + [0:05] 100.00% 25 / 25 packs + Fatal: repository contains errors + +.. note:: + + This will download the whole repository. If retrieving data from the backend is + expensive, then omit the ``--read-data`` option. Keep a copy of the check output + as it might be necessary later on! + +If the output contains warnings that the ``ciphertext verification failed`` for +some blobs in the repository, then please ask for help in the forum or our IRC +channel. These errors are often caused by hardware problems which **must** be +investigated and fixed. Otherwise, the backup will be damaged again and again. + +Similarly, if a repository is repeatedly damaged, please open an `issue on Github +`_ as this could indicate a bug +somewhere. Please include the check output and additional information that might +help locate the problem. + + +2. Backup the repository +************************ + +Create a full copy of the repository if possible. Or at the very least make a +copy of the ``index`` and ``snapshots`` folders. This will allow you to roll back +the repository if the repair procedure fails. If your repository resides in a +cloud storage, then you can for example use `rclone `_ to +make such a copy. + +Please disable all regular operations on the repository to prevent unexpected +changes. Especially, ``forget`` or ``prune`` must be disabled as they could +remove data unexpectedly. + +.. warning:: + + If you suspect hardware problems, then you *must* investigate those first. + Otherwise, the repository will soon be damaged again. + +Please take the time to understand what the commands described in the following +do. If you are unsure, then ask for help in the forum or our IRC channel. Search +whether your issue is already known and solved. Please take a look at the +`forum`_ and `Github issues `_. + + +3. Repair the index +******************* + +Restic relies on its index to contain correct information about what data is +stored in the repository. Thus, the first step to repair a repository is to +repair the index: + +.. code-block:: console + + $ restic repair index + + repository a14e5863 opened (version 2, compression level auto) + loading indexes... + getting pack files to read... + removing not found pack file 83ad44f59b05f6bce13376b022ac3194f24ca19e7a74926000b6e316ec6ea5a4 + rebuilding index + [0:00] 100.00% 27 / 27 packs processed + deleting obsolete index files + [0:00] 100.00% 3 / 3 files deleted + done + +This ensures that no longer existing files are removed from the index. All later +steps to repair the repository rely on a correct index. That is, you must always +repair the index first! + +Please note that it is not recommended to repair the index unless the repository +is actually damaged. + + +4. Run all backups (optional) +***************************** + +With a correct index, the ``backup`` command guarantees that newly created +snapshots can be restored successfully. It can also heal older snapshots, +if the missing data is also contained in the new snapshot. + +Therefore, it is recommended to run all your ``backup`` tasks again. In some +cases, this is enough to fully repair the repository. + + +5. Remove missing data from snapshots +************************************* + +If your repository is still missing data, then you can use the ``repair snapshots`` +command to remove all inaccessible data from the snapshots. That is, this will +result in a limited amount of data loss. Using the ``--forget`` option, the +command will automatically remove the original, damaged snapshots. + +.. code-block:: console + + $ restic repair snapshots --forget + + snapshot 6979421e of [/home/user/restic/restic] at 2022-11-02 20:59:18.617503315 +0100 CET) + file "/restic/internal/fuse/snapshots_dir.go": removed missing content + file "/restic/internal/restorer/restorer_unix_test.go": removed missing content + file "/restic/internal/walker/walker.go": removed missing content + saved new snapshot 7b094cea + removed old snapshot 6979421e + + modified 1 snapshots + +If you did not add the ``--forget`` option, then you have to manually delete all +modified snapshots using the ``forget`` command. In the example above, you'd have +to run ``restic forget 6979421e``. + + +6. Check the repository again +***************************** + +Phew, we're almost done now. To make sure that the repository has been successfully +repaired please run ``check`` again. + +.. code-block:: console + + $ restic check --read-data + + using temporary cache in /tmp/restic-check-cache-2569290785 + repository a14e5863 opened (version 2, compression level auto) + created new cache in /tmp/restic-check-cache-2569290785 + create exclusive lock for repository + load indexes + check all packs + check snapshots, trees and blobs + [0:00] 100.00% 7 / 7 snapshots + read all data + [0:00] 100.00% 25 / 25 packs + no errors were found + +If the ``check`` command did not complete with ``no errors were found``, then +the repository is still damaged. At this point, please ask for help at the +`forum`_ or our IRC channel ``#restic`` on ``irc.libera.chat``. diff --git a/doc/index.rst b/doc/index.rst index 034dbda23..8b72dcf58 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -14,6 +14,7 @@ Restic Documentation 060_forget 070_encryption 075_scripting + 077_troubleshooting 080_examples 090_participating 100_references diff --git a/doc/manual_rest.rst b/doc/manual_rest.rst index f812e3a70..093144722 100644 --- a/doc/manual_rest.rst +++ b/doc/manual_rest.rst @@ -35,8 +35,8 @@ Usage help is available: migrate Apply migrations mount Mount the repository prune Remove unneeded data from the repository - rebuild-index Build a new index recover Recover data from the repository not referenced by snapshots + repair Repair the repository restore Extract the data from a snapshot rewrite Rewrite snapshots to exclude unwanted files self-update Update the restic binary diff --git a/internal/archiver/archiver.go b/internal/archiver/archiver.go index a56965d63..3c1cc33d0 100644 --- a/internal/archiver/archiver.go +++ b/internal/archiver/archiver.go @@ -207,7 +207,7 @@ func (arch *Archiver) wrapLoadTreeError(id restic.ID, err error) error { if arch.Repo.Index().Has(restic.BlobHandle{ID: id, Type: restic.TreeBlob}) { err = errors.Errorf("tree %v could not be loaded; the repository could be damaged: %v", id, err) } else { - err = errors.Errorf("tree %v is not known; the repository could be damaged, run `rebuild-index` to try to repair it", id) + err = errors.Errorf("tree %v is not known; the repository could be damaged, run `repair index` to try to repair it", id) } return err } diff --git a/internal/walker/rewriter.go b/internal/walker/rewriter.go index cd05f69f5..649857032 100644 --- a/internal/walker/rewriter.go +++ b/internal/walker/rewriter.go @@ -9,13 +9,47 @@ import ( "github.com/restic/restic/internal/restic" ) -// SelectByNameFunc returns true for all items that should be included (files and -// dirs). If false is returned, files are ignored and dirs are not even walked. -type SelectByNameFunc func(item string) bool +type NodeRewriteFunc func(node *restic.Node, path string) *restic.Node +type FailedTreeRewriteFunc func(nodeID restic.ID, path string, err error) (restic.ID, error) -type TreeFilterVisitor struct { - SelectByName SelectByNameFunc - PrintExclude func(string) +type RewriteOpts struct { + // return nil to remove the node + RewriteNode NodeRewriteFunc + // decide what to do with a tree that could not be loaded. Return nil to remove the node. By default the load error is returned which causes the operation to fail. + RewriteFailedTree FailedTreeRewriteFunc + + AllowUnstableSerialization bool + DisableNodeCache bool +} + +type idMap map[restic.ID]restic.ID + +type TreeRewriter struct { + opts RewriteOpts + + replaces idMap +} + +func NewTreeRewriter(opts RewriteOpts) *TreeRewriter { + rw := &TreeRewriter{ + opts: opts, + } + if !opts.DisableNodeCache { + rw.replaces = make(idMap) + } + // setup default implementations + if rw.opts.RewriteNode == nil { + rw.opts.RewriteNode = func(node *restic.Node, path string) *restic.Node { + return node + } + } + if rw.opts.RewriteFailedTree == nil { + // fail with error by default + rw.opts.RewriteFailedTree = func(nodeID restic.ID, path string, err error) (restic.ID, error) { + return restic.ID{}, err + } + } + return rw } type BlobLoadSaver interface { @@ -23,51 +57,58 @@ type BlobLoadSaver interface { restic.BlobLoader } -func FilterTree(ctx context.Context, repo BlobLoadSaver, nodepath string, nodeID restic.ID, visitor *TreeFilterVisitor) (newNodeID restic.ID, err error) { - curTree, err := restic.LoadTree(ctx, repo, nodeID) - if err != nil { - return restic.ID{}, err +func (t *TreeRewriter) RewriteTree(ctx context.Context, repo BlobLoadSaver, nodepath string, nodeID restic.ID) (newNodeID restic.ID, err error) { + // check if tree was already changed + newID, ok := t.replaces[nodeID] + if ok { + return newID, nil } - // check that we can properly encode this tree without losing information - // The alternative of using json/Decoder.DisallowUnknownFields() doesn't work as we use - // a custom UnmarshalJSON to decode trees, see also https://github.com/golang/go/issues/41144 - testID, err := restic.SaveTree(ctx, repo, curTree) + // a nil nodeID will lead to a load error + curTree, err := restic.LoadTree(ctx, repo, nodeID) if err != nil { - return restic.ID{}, err + return t.opts.RewriteFailedTree(nodeID, nodepath, err) } - if nodeID != testID { - return restic.ID{}, fmt.Errorf("cannot encode tree at %q without losing information", nodepath) + + if !t.opts.AllowUnstableSerialization { + // check that we can properly encode this tree without losing information + // The alternative of using json/Decoder.DisallowUnknownFields() doesn't work as we use + // a custom UnmarshalJSON to decode trees, see also https://github.com/golang/go/issues/41144 + testID, err := restic.SaveTree(ctx, repo, curTree) + if err != nil { + return restic.ID{}, err + } + if nodeID != testID { + return restic.ID{}, fmt.Errorf("cannot encode tree at %q without losing information", nodepath) + } } debug.Log("filterTree: %s, nodeId: %s\n", nodepath, nodeID.Str()) - changed := false tb := restic.NewTreeJSONBuilder() for _, node := range curTree.Nodes { path := path.Join(nodepath, node.Name) - if !visitor.SelectByName(path) { - if visitor.PrintExclude != nil { - visitor.PrintExclude(path) - } - changed = true + node = t.opts.RewriteNode(node, path) + if node == nil { continue } - if node.Subtree == nil { + if node.Type != "dir" { err = tb.AddNode(node) if err != nil { return restic.ID{}, err } continue } - newID, err := FilterTree(ctx, repo, path, *node.Subtree, visitor) + // treat nil as null id + var subtree restic.ID + if node.Subtree != nil { + subtree = *node.Subtree + } + newID, err := t.RewriteTree(ctx, repo, path, subtree) if err != nil { return restic.ID{}, err } - if !node.Subtree.Equal(newID) { - changed = true - } node.Subtree = &newID err = tb.AddNode(node) if err != nil { @@ -75,17 +116,18 @@ func FilterTree(ctx context.Context, repo BlobLoadSaver, nodepath string, nodeID } } - if changed { - tree, err := tb.Finalize() - if err != nil { - return restic.ID{}, err - } - - // Save new tree - newTreeID, _, _, err := repo.SaveBlob(ctx, restic.TreeBlob, tree, restic.ID{}, false) - debug.Log("filterTree: save new tree for %s as %v\n", nodepath, newTreeID) - return newTreeID, err + tree, err := tb.Finalize() + if err != nil { + return restic.ID{}, err } - return nodeID, nil + // Save new tree + newTreeID, _, _, err := repo.SaveBlob(ctx, restic.TreeBlob, tree, restic.ID{}, false) + if t.replaces != nil { + t.replaces[nodeID] = newTreeID + } + if !newTreeID.Equal(nodeID) { + debug.Log("filterTree: save new tree for %s as %v\n", nodepath, newTreeID) + } + return newTreeID, err } diff --git a/internal/walker/rewriter_test.go b/internal/walker/rewriter_test.go index 3dcf0ac9e..07ce5f72f 100644 --- a/internal/walker/rewriter_test.go +++ b/internal/walker/rewriter_test.go @@ -5,9 +5,9 @@ import ( "fmt" "testing" - "github.com/google/go-cmp/cmp" "github.com/pkg/errors" "github.com/restic/restic/internal/restic" + "github.com/restic/restic/internal/test" ) // WritableTreeMap also support saving @@ -38,26 +38,26 @@ func (t WritableTreeMap) Dump() { } } -type checkRewriteFunc func(t testing.TB) (visitor TreeFilterVisitor, final func(testing.TB)) +type checkRewriteFunc func(t testing.TB) (rewriter *TreeRewriter, final func(testing.TB)) // checkRewriteItemOrder ensures that the order of the 'path' arguments is the one passed in as 'want'. func checkRewriteItemOrder(want []string) checkRewriteFunc { pos := 0 - return func(t testing.TB) (visitor TreeFilterVisitor, final func(testing.TB)) { - vis := TreeFilterVisitor{ - SelectByName: func(path string) bool { + return func(t testing.TB) (rewriter *TreeRewriter, final func(testing.TB)) { + rewriter = NewTreeRewriter(RewriteOpts{ + RewriteNode: func(node *restic.Node, path string) *restic.Node { if pos >= len(want) { t.Errorf("additional unexpected path found: %v", path) - return false + return nil } if path != want[pos] { t.Errorf("wrong path found, want %q, got %q", want[pos], path) } pos++ - return true + return node }, - } + }) final = func(t testing.TB) { if pos != len(want) { @@ -65,21 +65,20 @@ func checkRewriteItemOrder(want []string) checkRewriteFunc { } } - return vis, final + return rewriter, final } } -// checkRewriteSkips excludes nodes if path is in skipFor, it checks that all excluded entries are printed. -func checkRewriteSkips(skipFor map[string]struct{}, want []string) checkRewriteFunc { +// checkRewriteSkips excludes nodes if path is in skipFor, it checks that rewriting proceedes in the correct order. +func checkRewriteSkips(skipFor map[string]struct{}, want []string, disableCache bool) checkRewriteFunc { var pos int - printed := make(map[string]struct{}) - return func(t testing.TB) (visitor TreeFilterVisitor, final func(testing.TB)) { - vis := TreeFilterVisitor{ - SelectByName: func(path string) bool { + return func(t testing.TB) (rewriter *TreeRewriter, final func(testing.TB)) { + rewriter = NewTreeRewriter(RewriteOpts{ + RewriteNode: func(node *restic.Node, path string) *restic.Node { if pos >= len(want) { t.Errorf("additional unexpected path found: %v", path) - return false + return nil } if path != want[pos] { @@ -87,27 +86,40 @@ func checkRewriteSkips(skipFor map[string]struct{}, want []string) checkRewriteF } pos++ - _, ok := skipFor[path] - return !ok - }, - PrintExclude: func(s string) { - if _, ok := printed[s]; ok { - t.Errorf("path was already printed %v", s) + _, skip := skipFor[path] + if skip { + return nil } - printed[s] = struct{}{} + return node }, - } + DisableNodeCache: disableCache, + }) final = func(t testing.TB) { - if !cmp.Equal(skipFor, printed) { - t.Errorf("unexpected paths skipped: %s", cmp.Diff(skipFor, printed)) - } if pos != len(want) { t.Errorf("not enough items returned, want %d, got %d", len(want), pos) } } - return vis, final + return rewriter, final + } +} + +// checkIncreaseNodeSize modifies each node by changing its size. +func checkIncreaseNodeSize(increase uint64) checkRewriteFunc { + return func(t testing.TB) (rewriter *TreeRewriter, final func(testing.TB)) { + rewriter = NewTreeRewriter(RewriteOpts{ + RewriteNode: func(node *restic.Node, path string) *restic.Node { + if node.Type == "file" { + node.Size += increase + } + return node + }, + }) + + final = func(t testing.TB) {} + + return rewriter, final } } @@ -150,6 +162,7 @@ func TestRewriter(t *testing.T) { "/subdir", "/subdir/subfile", }, + false, ), }, { // exclude dir @@ -170,6 +183,91 @@ func TestRewriter(t *testing.T) { "/foo", "/subdir", }, + false, + ), + }, + { // modify node + tree: TestTree{ + "foo": TestFile{Size: 21}, + "subdir": TestTree{ + "subfile": TestFile{Size: 21}, + }, + }, + newTree: TestTree{ + "foo": TestFile{Size: 42}, + "subdir": TestTree{ + "subfile": TestFile{Size: 42}, + }, + }, + check: checkIncreaseNodeSize(21), + }, + { // test cache + tree: TestTree{ + // both subdirs are identical + "subdir1": TestTree{ + "subfile": TestFile{}, + "subfile2": TestFile{}, + }, + "subdir2": TestTree{ + "subfile": TestFile{}, + "subfile2": TestFile{}, + }, + }, + newTree: TestTree{ + "subdir1": TestTree{ + "subfile2": TestFile{}, + }, + "subdir2": TestTree{ + "subfile2": TestFile{}, + }, + }, + check: checkRewriteSkips( + map[string]struct{}{ + "/subdir1/subfile": {}, + }, + []string{ + "/subdir1", + "/subdir1/subfile", + "/subdir1/subfile2", + "/subdir2", + }, + false, + ), + }, + { // test disabled cache + tree: TestTree{ + // both subdirs are identical + "subdir1": TestTree{ + "subfile": TestFile{}, + "subfile2": TestFile{}, + }, + "subdir2": TestTree{ + "subfile": TestFile{}, + "subfile2": TestFile{}, + }, + }, + newTree: TestTree{ + "subdir1": TestTree{ + "subfile2": TestFile{}, + }, + "subdir2": TestTree{ + "subfile": TestFile{}, + "subfile2": TestFile{}, + }, + }, + check: checkRewriteSkips( + map[string]struct{}{ + "/subdir1/subfile": {}, + }, + []string{ + "/subdir1", + "/subdir1/subfile", + "/subdir1/subfile2", + "/subdir2", + "/subdir2/subfile", + "/subdir2/subfile2", + }, + true, ), }, } @@ -186,8 +284,8 @@ func TestRewriter(t *testing.T) { ctx, cancel := context.WithCancel(context.TODO()) defer cancel() - vis, last := test.check(t) - newRoot, err := FilterTree(ctx, modrepo, "/", root, &vis) + rewriter, last := test.check(t) + newRoot, err := rewriter.RewriteTree(ctx, modrepo, "/", root) if err != nil { t.Error(err) } @@ -213,10 +311,56 @@ func TestRewriterFailOnUnknownFields(t *testing.T) { ctx, cancel := context.WithCancel(context.TODO()) defer cancel() - // use nil visitor to crash if the tree loading works unexpectedly - _, err := FilterTree(ctx, tm, "/", id, nil) + + rewriter := NewTreeRewriter(RewriteOpts{ + RewriteNode: func(node *restic.Node, path string) *restic.Node { + // tree loading must not succeed + t.Fail() + return node + }, + }) + _, err := rewriter.RewriteTree(ctx, tm, "/", id) if err == nil { t.Error("missing error on unknown field") } + + // check that the serialization check can be disabled + rewriter = NewTreeRewriter(RewriteOpts{ + AllowUnstableSerialization: true, + }) + root, err := rewriter.RewriteTree(ctx, tm, "/", id) + test.OK(t, err) + _, expRoot := BuildTreeMap(TestTree{ + "subfile": TestFile{}, + }) + test.Assert(t, root == expRoot, "mismatched trees") +} + +func TestRewriterTreeLoadError(t *testing.T) { + tm := WritableTreeMap{TreeMap{}} + id := restic.NewRandomID() + + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + // also check that load error by default cause the operation to fail + rewriter := NewTreeRewriter(RewriteOpts{}) + _, err := rewriter.RewriteTree(ctx, tm, "/", id) + if err == nil { + t.Fatal("missing error on unloadable tree") + } + + replacementID := restic.NewRandomID() + rewriter = NewTreeRewriter(RewriteOpts{ + RewriteFailedTree: func(nodeID restic.ID, path string, err error) (restic.ID, error) { + if nodeID != id || path != "/" { + t.Fail() + } + return replacementID, nil + }, + }) + newRoot, err := rewriter.RewriteTree(ctx, tm, "/", id) + test.OK(t, err) + test.Equals(t, replacementID, newRoot) } diff --git a/internal/walker/walker_test.go b/internal/walker/walker_test.go index 6c4fd3436..8de1a9dc4 100644 --- a/internal/walker/walker_test.go +++ b/internal/walker/walker_test.go @@ -14,7 +14,9 @@ import ( type TestTree map[string]interface{} // TestNode is used to test the walker. -type TestFile struct{} +type TestFile struct { + Size uint64 +} func BuildTreeMap(tree TestTree) (m TreeMap, root restic.ID) { m = TreeMap{} @@ -37,6 +39,7 @@ func buildTreeMap(tree TestTree, m TreeMap) restic.ID { err := tb.AddNode(&restic.Node{ Name: name, Type: "file", + Size: elem.Size, }) if err != nil { panic(err)