From dbbeac7174c4c8e98076fbd2228b1805490b1c89 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Mon, 16 Aug 2021 16:02:01 +0200 Subject: [PATCH] prune: Add unsafe option to recover from no free space The new option allows prune to operate with nearly no scratch space by only removing no longer necessary pack files and first deleting the index before rebuilding it. By first deleting the index it becomes safe to just delete no longer necessary pack files. However, as a downside there's now the risk that the repository becomes inaccessible if prune fails. To recover from that problem a user might have to manually delete the repository index and then run (a full) `rebuild-index` again. --- changelog/unreleased/issue-1153 | 9 ++++++++ cmd/restic/cmd_prune.go | 41 ++++++++++++++++++++++++++++++--- cmd/restic/integration_test.go | 25 +++++++++++++------- doc/060_forget.rst | 28 ++++++++++++++++++++++ 4 files changed, 92 insertions(+), 11 deletions(-) create mode 100644 changelog/unreleased/issue-1153 diff --git a/changelog/unreleased/issue-1153 b/changelog/unreleased/issue-1153 new file mode 100644 index 000000000..c6eaa4ad2 --- /dev/null +++ b/changelog/unreleased/issue-1153 @@ -0,0 +1,9 @@ +Enhancement: Support pruning even after running out of disk space + +When running out of disk space it was no longer possible to add or remove +data from a repository. To help with recovering from such a deadlock, the +prune command now supports an `--unsafe-recover-no-free-space` option to +recover from such situations. Make sure to read the documentation first! + +https://github.com/restic/restic/issues/1153 +https://github.com/restic/restic/pull/3481 diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index 9447f8145..a6a8d0bde 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -39,7 +39,10 @@ Exit status is 0 if the command was successful, and non-zero if there was any er // PruneOptions collects all options for the cleanup command. type PruneOptions struct { - DryRun bool + DryRun bool + UnsafeNoSpaceRecovery string + + unsafeRecovery bool MaxUnused string maxUnusedBytes func(used uint64) (unused uint64) // calculates the number of unused bytes after repacking, according to MaxUnused @@ -56,6 +59,7 @@ func init() { cmdRoot.AddCommand(cmdPrune) f := cmdPrune.Flags() f.BoolVarP(&pruneOptions.DryRun, "dry-run", "n", false, "do not modify the repository, just print what would be done") + f.StringVarP(&pruneOptions.UnsafeNoSpaceRecovery, "unsafe-recover-no-free-space", "", "", "UNSAFE, READ THE DOCUMENTATION BEFORE USING! Try to recover a repository stuck with no free space. Do not use without trying out 'prune --max-repack-size 0' first.") addPruneOptions(cmdPrune) } @@ -75,6 +79,10 @@ func verifyPruneOptions(opts *PruneOptions) error { } opts.MaxRepackBytes = uint64(size) } + if opts.UnsafeNoSpaceRecovery != "" { + // prevent repacking data to make sure users cannot get stuck. + opts.MaxRepackBytes = 0 + } maxUnused := strings.TrimSpace(opts.MaxUnused) if maxUnused == "" { @@ -136,6 +144,14 @@ func runPrune(opts PruneOptions, gopts GlobalOptions) error { return errors.Fatal("prune requires a backend connection limit of at least two") } + if opts.UnsafeNoSpaceRecovery != "" { + repoID := repo.Config().ID + if opts.UnsafeNoSpaceRecovery != repoID { + return errors.Fatalf("must pass id '%s' to --unsafe-recover-no-free-space", repoID) + } + opts.unsafeRecovery = true + } + lock, err := lockRepoExclusive(gopts.ctx, repo) defer unlockRepo(lock) if err != nil { @@ -522,7 +538,14 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB ignorePacks.Merge(removePacks) } - if len(ignorePacks) != 0 { + if opts.unsafeRecovery { + Verbosef("deleting index files\n") + indexFiles := repo.Index().(*repository.MasterIndex).IDs() + err = DeleteFilesChecked(gopts, repo, indexFiles, restic.IndexFile) + if err != nil { + return errors.Fatalf("%s", err) + } + } else if len(ignorePacks) != 0 { err = rebuildIndexFiles(gopts, repo, ignorePacks, nil) if err != nil { return errors.Fatalf("%s", err) @@ -534,11 +557,18 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB DeleteFiles(gopts, repo, removePacks, restic.PackFile) } + if opts.unsafeRecovery { + _, err = writeIndexFiles(gopts, repo, ignorePacks, nil) + if err != nil { + return errors.Fatalf("%s", err) + } + } + Verbosef("done\n") return nil } -func rebuildIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks restic.IDSet, extraObsolete restic.IDs) error { +func writeIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks restic.IDSet, extraObsolete restic.IDs) (restic.IDSet, error) { Verbosef("rebuilding index\n") idx := (repo.Index()).(*repository.MasterIndex) @@ -546,6 +576,11 @@ func rebuildIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks bar := newProgressMax(!gopts.Quiet, packcount, "packs processed") obsoleteIndexes, err := idx.Save(gopts.ctx, repo, removePacks, extraObsolete, bar) bar.Done() + return obsoleteIndexes, err +} + +func rebuildIndexFiles(gopts GlobalOptions, repo restic.Repository, removePacks restic.IDSet, extraObsolete restic.IDs) error { + obsoleteIndexes, err := writeIndexFiles(gopts, repo, removePacks, extraObsolete) if err != nil { return err } diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index ebf63e930..792b825c2 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -1573,26 +1573,35 @@ func TestCheckRestoreNoLock(t *testing.T) { } func TestPrune(t *testing.T) { - t.Run("0", func(t *testing.T) { - opts := PruneOptions{MaxUnused: "0%"} + testPruneVariants(t, false) + testPruneVariants(t, true) +} + +func testPruneVariants(t *testing.T, unsafeNoSpaceRecovery bool) { + suffix := "" + if unsafeNoSpaceRecovery { + suffix = "-recovery" + } + t.Run("0"+suffix, func(t *testing.T) { + opts := PruneOptions{MaxUnused: "0%", unsafeRecovery: unsafeNoSpaceRecovery} checkOpts := CheckOptions{ReadData: true, CheckUnused: true} testPrune(t, opts, checkOpts) }) - t.Run("50", func(t *testing.T) { - opts := PruneOptions{MaxUnused: "50%"} + t.Run("50"+suffix, func(t *testing.T) { + opts := PruneOptions{MaxUnused: "50%", unsafeRecovery: unsafeNoSpaceRecovery} checkOpts := CheckOptions{ReadData: true} testPrune(t, opts, checkOpts) }) - t.Run("unlimited", func(t *testing.T) { - opts := PruneOptions{MaxUnused: "unlimited"} + t.Run("unlimited"+suffix, func(t *testing.T) { + opts := PruneOptions{MaxUnused: "unlimited", unsafeRecovery: unsafeNoSpaceRecovery} checkOpts := CheckOptions{ReadData: true} testPrune(t, opts, checkOpts) }) - t.Run("CachableOnly", func(t *testing.T) { - opts := PruneOptions{MaxUnused: "5%", RepackCachableOnly: true} + t.Run("CachableOnly"+suffix, func(t *testing.T) { + opts := PruneOptions{MaxUnused: "5%", RepackCachableOnly: true, unsafeRecovery: unsafeNoSpaceRecovery} checkOpts := CheckOptions{ReadData: true} testPrune(t, opts, checkOpts) }) diff --git a/doc/060_forget.rst b/doc/060_forget.rst index 55acc42ea..ab67368aa 100644 --- a/doc/060_forget.rst +++ b/doc/060_forget.rst @@ -444,3 +444,31 @@ The ``prune`` command accepts the following options: - ``--dry-run`` only show what ``prune`` would do. - ``--verbose`` increased verbosity shows additional statistics for ``prune``. + + +Recovering from "no free space" errors +************************************** + +In some cases when a repository has grown large enough to fill up all disk space or the +allocated quota, then ``prune`` might fail to free space. ``prune`` works in such a way +that a repository remains usable no matter at which point the command is interrupted. +However, this also means that ``prune`` requires some scratch space to work. + +In most cases it is sufficient to instruct ``prune`` to use as little scratch space as +possible by running it as ``prune --max-repack-size 0``. Note that for restic versions +before 0.13.0 ``prune --max-repack-size 1`` must be used. Obviously, this can only work +if several snapshots have been removed using ``forget`` before. This then allows the +``prune`` command to actually remove data from the repository. If the command succeeds, +but there is still little free space, then remove a few more snapshots and run ``prune`` again. + +If ``prune`` fails to complete, then ``prune --unsafe-recover-no-free-space SOME-ID`` +is available as a method of last resort. It allows prune to work with little to no free +space. However, a **failed** ``prune`` run can cause the repository to become +**temporarily unusable**. Therefore, make sure that you have a stable connection to the +repository storage, before running this command. In case the command fails, it may become +necessary to manually remove all files from the `index/` folder of the repository and +run `rebuild-index` afterwards. + +To prevent accidental usages of the ``--unsafe-recover-no-free-space`` option it is +necessary to first run ``prune --unsafe-recover-no-free-space SOME-ID`` and then replace +``SOME-ID`` with the requested ID.