Merge pull request #2718 from aawsome/new-cleanup-command

Reimplementation of prune
2024-12-31 05:51:51 +00:00 · 2020-11-05 10:12:19 +01:00 · 2020-11-05 10:12:19 +01:00 · 5144141321
commit 5144141321
parent d35d279455 1ca60bccfb
10 changed files with 607 additions and 216 deletions
--- a/changelog/unreleased/pull-2718
+++ b/changelog/unreleased/pull-2718
@ -0,0 +1,22 @@
 Enhancement: Improve pruning performance and make pruning more customizable
 The `prune` command is now much faster. This is especially the case for remote
 repositories or repositories with not much data to remove.
 Also the memory usage of the `prune` command is now reduced.
 By default, the `prune` command no longer removes all unused data. This
 behavior can be fine-tuned by new options, like the acceptable amount of unused space or
 the maximum size of data to reorganize. For more details, see
 https://restic.readthedocs.io/en/stable/060_forget.html
 Moreover, `prune` now accepts the `--dry-run` option and `forget --dry-run --prune`
 also shows what `prune` would do.
 Fixes several open issues, e.g.:
 https://github.com/restic/restic/issues/1140
 https://github.com/restic/restic/issues/1985
 https://github.com/restic/restic/issues/2112
 https://github.com/restic/restic/issues/2227
 https://github.com/restic/restic/issues/2305
 https://github.com/restic/restic/pull/2718
--- a/cmd/restic/cmd_forget.go
+++ b/cmd/restic/cmd_forget.go
@ -80,9 +80,15 @@ func init() {
 	f.BoolVar(&forgetOptions.Prune, "prune", false, "automatically run the 'prune' command if snapshots have been removed")
 	f.SortFlags = false
 	addPruneOptions(cmdForget)
 }
 func runForget(opts ForgetOptions, gopts GlobalOptions, args []string) error {
 	err := verifyPruneOptions(&pruneOptions)
 	if err != nil {
 		return err
 	}
 	repo, err := OpenRepository(gopts)
 	if err != nil {
 		return err
@ -205,7 +211,11 @@ func runForget(opts ForgetOptions, gopts GlobalOptions, args []string) error {
 	}
 	if len(removeSnIDs) > 0 && opts.Prune && !opts.DryRun {
-		return pruneRepository(gopts, repo)
+		if !gopts.JSON {
 			Verbosef("%d snapshots have been removed, running prune\n", len(removeSnIDs))
 		}
 		pruneOptions.DryRun = opts.DryRun
 		return runPruneWithRepo(pruneOptions, gopts, repo, removeSnIDs)
 	}
 	return nil
--- a/cmd/restic/cmd_prune.go
+++ b/cmd/restic/cmd_prune.go
@ -1,15 +1,24 @@
 package main
 import (
 	"math"
 	"sort"
 	"strconv"
 	"strings"
 	"github.com/restic/restic/internal/debug"
 	"github.com/restic/restic/internal/errors"
-	"github.com/restic/restic/internal/index"
+	"github.com/restic/restic/internal/pack"
 	"github.com/restic/restic/internal/repository"
 	"github.com/restic/restic/internal/restic"
 	"github.com/spf13/cobra"
 )
 var errorIndexIncomplete = errors.Fatal("index is not complete")
 var errorPacksMissing = errors.Fatal("packs from index missing in repo")
 var errorSizeNotMatching = errors.Fatal("pack size does not match calculated size from index")
 var cmdPrune = &cobra.Command{
 	Use:   "prune [flags]",
 	Short: "Remove unneeded data from the repository",
@ -24,12 +33,91 @@ Exit status is 0 if the command was successful, and non-zero if there was any er
 `,
 	DisableAutoGenTag: true,
 	RunE: func(cmd *cobra.Command, args []string) error {
-		return runPrune(globalOptions)
+		return runPrune(pruneOptions, globalOptions)
 	},
 }
 // PruneOptions collects all options for the cleanup command.
 type PruneOptions struct {
 	DryRun bool
 	MaxUnused      string
 	maxUnusedBytes func(used uint64) (unused uint64) // calculates the number of unused bytes after repacking, according to MaxUnused
 	MaxRepackSize  string
 	MaxRepackBytes uint64
 	RepackCachableOnly bool
 }
 var pruneOptions PruneOptions
 func init() {
 	cmdRoot.AddCommand(cmdPrune)
 	f := cmdPrune.Flags()
 	f.BoolVarP(&pruneOptions.DryRun, "dry-run", "n", false, "do not modify the repository, just print what would be done")
 	addPruneOptions(cmdPrune)
 }
 func addPruneOptions(c *cobra.Command) {
 	f := c.Flags()
 	f.StringVar(&pruneOptions.MaxUnused, "max-unused", "5%", "tolerate given `limit` of unused data (absolute value in bytes with suffixes k/K, m/M, g/G, t/T, a value in % or the word 'unlimited')")
 	f.StringVar(&pruneOptions.MaxRepackSize, "max-repack-size", "", "maximum `size` to repack (allowed suffixes: k/K, m/M, g/G, t/T)")
 	f.BoolVar(&pruneOptions.RepackCachableOnly, "repack-cacheable-only", false, "only repack packs which are cacheable")
 }
 func verifyPruneOptions(opts *PruneOptions) error {
 	if len(opts.MaxRepackSize) > 0 {
 		size, err := parseSizeStr(opts.MaxRepackSize)
 		if err != nil {
 			return err
 		}
 		opts.MaxRepackBytes = uint64(size)
 	}
 	maxUnused := strings.TrimSpace(opts.MaxUnused)
 	if maxUnused == "" {
 		return errors.Fatalf("invalid value for --max-unused: %q", opts.MaxUnused)
 	}
 	// parse MaxUnused either as unlimited, a percentage, or an absolute number of bytes
 	switch {
 	case maxUnused == "unlimited":
 		opts.maxUnusedBytes = func(used uint64) uint64 {
 			return math.MaxUint64
 		}
 	case strings.HasSuffix(maxUnused, "%"):
 		maxUnused = strings.TrimSuffix(maxUnused, "%")
 		p, err := strconv.ParseFloat(maxUnused, 64)
 		if err != nil {
 			return errors.Fatalf("invalid percentage %q passed for --max-unused: %v", opts.MaxUnused, err)
 		}
 		if p < 0 {
 			return errors.Fatal("percentage for --max-unused must be positive")
 		}
 		if p >= 100 {
 			return errors.Fatal("percentage for --max-unused must be below 100%")
 		}
 		opts.maxUnusedBytes = func(used uint64) uint64 {
 			return uint64(p / (100 - p) * float64(used))
 		}
 	default:
 		size, err := parseSizeStr(maxUnused)
 		if err != nil {
 			return errors.Fatalf("invalid number of bytes %q for --max-unused: %v", opts.MaxUnused, err)
 		}
 		opts.maxUnusedBytes = func(used uint64) uint64 {
 			return uint64(size)
 		}
 	}
 	return nil
 }
 func shortenStatus(maxLength int, s string) string {
@ -44,7 +132,12 @@ func shortenStatus(maxLength int, s string) string {
 	return s[:maxLength-3] + "..."
 }
-func runPrune(gopts GlobalOptions) error {
+func runPrune(opts PruneOptions, gopts GlobalOptions) error {
 	err := verifyPruneOptions(&opts)
 	if err != nil {
 		return err
 	}
 	repo, err := OpenRepository(gopts)
 	if err != nil {
 		return err
@ -56,203 +149,345 @@ func runPrune(gopts GlobalOptions) error {
 		return err
 	}
 	return runPruneWithRepo(opts, gopts, repo, restic.NewIDSet())
 }
 func runPruneWithRepo(opts PruneOptions, gopts GlobalOptions, repo *repository.Repository, ignoreSnapshots restic.IDSet) error {
 	// we do not need index updates while pruning!
 	repo.DisableAutoIndexUpdate()
-	return pruneRepository(gopts, repo)
+	Verbosef("loading all snapshots...\n")
-}
+	snapshots, err := restic.LoadAllSnapshots(gopts.ctx, repo, ignoreSnapshots)
 func mixedBlobs(list []restic.Blob) bool {
 	var tree, data bool
 	for _, pb := range list {
 		switch pb.Type {
 		case restic.TreeBlob:
 			tree = true
 		case restic.DataBlob:
 			data = true
 		}
 		if tree && data {
 			return true
 		}
 	}
 	return false
 }
 func pruneRepository(gopts GlobalOptions, repo restic.Repository) error {
 	ctx := gopts.ctx
 	err := repo.LoadIndex(ctx)
 	if err != nil {
 		return err
 	}
-	var stats struct {
+	Verbosef("loading indexes...\n")
-		blobs     int
+	err = repo.LoadIndex(gopts.ctx)
 		packs     int
 		snapshots int
 		bytes     int64
 	}
 	Verbosef("counting files in repo\n")
 	err = repo.List(ctx, restic.PackFile, func(restic.ID, int64) error {
 		stats.packs++
 		return nil
 	})
 	if err != nil {
 		return err
 	}
 	Verbosef("building new index for repo\n")
 	bar := newProgressMax(!gopts.Quiet, uint64(stats.packs), "packs")
 	idx, invalidFiles, err := index.New(ctx, repo, restic.NewIDSet(), bar)
 	if err != nil {
 		return err
 	}
 	for _, id := range invalidFiles {
 		Warnf("incomplete pack file (will be removed): %v\n", id)
 	}
 	blobs := 0
 	for _, pack := range idx.Packs {
 		stats.bytes += pack.Size
 		blobs += len(pack.Entries)
 	}
 	Verbosef("repository contains %v packs (%v blobs) with %v\n",
 		len(idx.Packs), blobs, formatBytes(uint64(stats.bytes)))
 	blobCount := make(map[restic.BlobHandle]int)
 	var duplicateBlobs uint64
 	var duplicateBytes uint64
 	// find duplicate blobs
 	for _, p := range idx.Packs {
 		for _, entry := range p.Entries {
 			stats.blobs++
 			h := restic.BlobHandle{ID: entry.ID, Type: entry.Type}
 			blobCount[h]++
 			if blobCount[h] > 1 {
 				duplicateBlobs++
 				duplicateBytes += uint64(entry.Length)
 			}
 		}
 	}
 	Verbosef("processed %d blobs: %d duplicate blobs, %v duplicate\n",
 		stats.blobs, duplicateBlobs, formatBytes(uint64(duplicateBytes)))
 	Verbosef("load all snapshots\n")
 	// find referenced blobs
 	snapshots, err := restic.LoadAllSnapshots(ctx, repo)
 	if err != nil {
 		return err
 	}
 	stats.snapshots = len(snapshots)
 	usedBlobs, err := getUsedBlobs(gopts, repo, snapshots)
 	if err != nil {
 		return err
 	}
-	var missingBlobs []restic.BlobHandle
+	return prune(opts, gopts, repo, usedBlobs)
-	for h := range usedBlobs {
+}
-		if _, ok := blobCount[h]; !ok {
+
-			missingBlobs = append(missingBlobs, h)
+type packInfo struct {
 	usedBlobs      uint
 	unusedBlobs    uint
 	duplicateBlobs uint
 	usedSize       uint64
 	unusedSize     uint64
 	tpe            restic.BlobType
 }
 type packInfoWithID struct {
 	ID restic.ID
 	packInfo
 }
 // prune selects which files to rewrite and then does that. The map usedBlobs is
 // modified in the process.
 func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedBlobs restic.BlobSet) error {
 	ctx := gopts.ctx
 	var stats struct {
 		blobs struct {
 			used      uint
 			duplicate uint
 			unused    uint
 			remove    uint
 			repack    uint
 			repackrm  uint
 		}
 		size struct {
 			used      uint64
 			duplicate uint64
 			unused    uint64
 			remove    uint64
 			repack    uint64
 			repackrm  uint64
 			unref     uint64
 		}
 		packs struct {
 			used       uint
 			unused     uint
 			partlyUsed uint
 			keep       uint
 		}
 	}
-	if len(missingBlobs) > 0 {
+
-		return errors.Fatalf("%v not found in the new index\n"+
+	Verbosef("searching used packs...\n")
 	keepBlobs := restic.NewBlobSet()
 	duplicateBlobs := restic.NewBlobSet()
 	// iterate over all blobs in index to find out which blobs are duplicates
 	for blob := range repo.Index().Each(ctx) {
 		bh := blob.Handle()
 		switch {
 		case usedBlobs.Has(bh): // used blob, move to keepBlobs
 			usedBlobs.Delete(bh)
 			keepBlobs.Insert(bh)
 		case keepBlobs.Has(bh): // duplicate blob
 			duplicateBlobs.Insert(bh)
 		}
 	}
 	// Check if all used blobs have been found in index
 	if len(usedBlobs) != 0 {
 		Warnf("%v not found in the new index\n"+
 			"Data blobs seem to be missing, aborting prune to prevent further data loss!\n"+
 			"Please report this error (along with the output of the 'prune' run) at\n"+
-			"https://github.com/restic/restic/issues/new/choose", missingBlobs)
+			"https://github.com/restic/restic/issues/new/choose", usedBlobs)
 		return errorIndexIncomplete
 	}
-	Verbosef("found %d of %d data blobs still in use, removing %d blobs\n",
+	indexPack := make(map[restic.ID]packInfo)
 		len(usedBlobs), stats.blobs, stats.blobs-len(usedBlobs))
-	// find packs that need a rewrite
+	// iterate over all blobs in index to generate packInfo
-	rewritePacks := restic.NewIDSet()
+	for blob := range repo.Index().Each(ctx) {
-	for _, pack := range idx.Packs {
+		ip, ok := indexPack[blob.PackID]
-		if mixedBlobs(pack.Entries) {
+		if !ok {
-			rewritePacks.Insert(pack.ID)
+			ip = packInfo{tpe: blob.Type, usedSize: pack.HeaderSize}
-			continue
+		}
 		// mark mixed packs with "Invalid blob type"
 		if ip.tpe != blob.Type {
 			ip.tpe = restic.InvalidBlob
 		}
-		for _, blob := range pack.Entries {
+		bh := blob.Handle()
-			h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
+		size := uint64(pack.PackedSizeOfBlob(blob.Length))
-			if !usedBlobs.Has(h) {
+		switch {
-				rewritePacks.Insert(pack.ID)
+		case duplicateBlobs.Has(bh): // duplicate blob
-				continue
+			ip.usedSize += size
 			ip.duplicateBlobs++
 			stats.size.duplicate += size
 			stats.blobs.duplicate++
 		case keepBlobs.Has(bh): // used blob, not duplicate
 			ip.usedSize += size
 			ip.usedBlobs++
 			stats.size.used += size
 			stats.blobs.used++
 		default: // unused blob
 			ip.unusedSize += size
 			ip.unusedBlobs++
 			stats.size.unused += size
 			stats.blobs.unused++
 		}
 		// update indexPack
 		indexPack[blob.PackID] = ip
 	}
-			if blobCount[h] > 1 {
+	Verbosef("collecting packs for deletion and repacking\n")
-				rewritePacks.Insert(pack.ID)
+	removePacksFirst := restic.NewIDSet()
 			}
 		}
 	}
 	removeBytes := duplicateBytes
 	// find packs that are unneeded
 	removePacks := restic.NewIDSet()
 	repackPacks := restic.NewIDSet()
-	Verbosef("will remove %d invalid files\n", len(invalidFiles))
+	var repackCandidates []packInfoWithID
-	for _, id := range invalidFiles {
+
 	// loop over all packs and decide what to do
 	bar := newProgressMax(!gopts.Quiet, uint64(len(indexPack)), "packs processed")
 	bar.Start()
 	err := repo.List(ctx, restic.PackFile, func(id restic.ID, packSize int64) error {
 		p, ok := indexPack[id]
 		if !ok {
 			// Pack was not referenced in index and is not used  => immediately remove!
 			Verboseff("will remove pack %v as it is unused and not indexed\n", id.Str())
 			removePacksFirst.Insert(id)
 			stats.size.unref += uint64(packSize)
 			return nil
 		}
 		if p.unusedSize+p.usedSize != uint64(packSize) {
 			Warnf("pack %s: calculated size %d does not match real size %d\nRun 'restic rebuild-index'.",
 				id.Str(), p.unusedSize+p.usedSize, packSize)
 			return errorSizeNotMatching
 		}
 		// statistics
 		switch {
 		case p.usedBlobs == 0 && p.duplicateBlobs == 0:
 			stats.packs.unused++
 		case p.unusedBlobs == 0:
 			stats.packs.used++
 		default:
 			stats.packs.partlyUsed++
 		}
 		// decide what to do
 		switch {
 		case p.usedBlobs == 0 && p.duplicateBlobs == 0:
 			// All blobs in pack are no longer used => remove pack!
 			removePacks.Insert(id)
 			stats.blobs.remove += p.unusedBlobs
 			stats.size.remove += p.unusedSize
 		case opts.RepackCachableOnly && p.tpe == restic.DataBlob:
 			// if this is a data pack and --repack-cacheable-only is set => keep pack!
 			stats.packs.keep++
 		case p.unusedBlobs == 0 && p.duplicateBlobs == 0 && p.tpe != restic.InvalidBlob:
 			// All blobs in pack are used and not duplicates/mixed => keep pack!
 			stats.packs.keep++
 		default:
 			// all other packs are candidates for repacking
 			repackCandidates = append(repackCandidates, packInfoWithID{ID: id, packInfo: p})
 		}
-	for packID, p := range idx.Packs {
+		delete(indexPack, id)
-
+		bar.Report(restic.Stat{Blobs: 1})
-		hasActiveBlob := false
+		return nil
-		for _, blob := range p.Entries {
+	})
-			h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
+	bar.Done()
 			if usedBlobs.Has(h) {
 				hasActiveBlob = true
 				continue
 			}
 			removeBytes += uint64(blob.Length)
 		}
 		if hasActiveBlob {
 			continue
 		}
 		removePacks.Insert(packID)
 		if !rewritePacks.Has(packID) {
 			return errors.Fatalf("pack %v is unneeded, but not contained in rewritePacks", packID.Str())
 		}
 		rewritePacks.Delete(packID)
 	}
 	Verbosef("will delete %d packs and rewrite %d packs, this frees %s\n",
 		len(removePacks), len(rewritePacks), formatBytes(uint64(removeBytes)))
 	var obsoletePacks restic.IDSet
 	if len(rewritePacks) != 0 {
 		bar := newProgressMax(!gopts.Quiet, uint64(len(rewritePacks)), "packs rewritten")
 		obsoletePacks, err = repository.Repack(ctx, repo, rewritePacks, usedBlobs, bar)
 	if err != nil {
 		return err
 	}
 	if len(indexPack) != 0 {
 		Warnf("The index references pack files which are missing from the repository: %v\n", indexPack)
 		return errorPacksMissing
 	}
-	removePacks.Merge(obsoletePacks)
+	repackAllPacksWithDuplicates := true
 	// calculate limit for number of unused bytes in the repo after repacking
 	maxUnusedSizeAfter := opts.maxUnusedBytes(stats.size.used)
 	// Sort repackCandidates such that packs with highest ratio unused/used space are picked first.
 	// This is equivalent to sorting by unused / total space.
 	// Instead of unused[i] / used[i] > unused[j] / used[j] we use
 	// unused[i] * used[j] > unused[j] * used[i] as uint32*uint32 < uint64
 	// Morover duplicates and mixed are sorted to the beginning
 	sort.Slice(repackCandidates, func(i, j int) bool {
 		pi := repackCandidates[i].packInfo
 		pj := repackCandidates[j].packInfo
 		switch {
 		case pi.duplicateBlobs > 0 && pj.duplicateBlobs == 0:
 			return true
 		case pj.duplicateBlobs > 0 && pi.duplicateBlobs == 0:
 			return false
 		case pi.tpe == restic.InvalidBlob && pj.tpe != restic.InvalidBlob:
 			return true
 		case pj.tpe == restic.InvalidBlob && pi.tpe != restic.InvalidBlob:
 			return false
 		}
 		return pi.unusedSize*pj.usedSize > pj.unusedSize*pi.usedSize
 	})
 	repack := func(id restic.ID, p packInfo) {
 		repackPacks.Insert(id)
 		stats.blobs.repack += p.unusedBlobs + p.duplicateBlobs + p.usedBlobs
 		stats.size.repack += p.unusedSize + p.usedSize
 		stats.blobs.repackrm += p.unusedBlobs
 		stats.size.repackrm += p.unusedSize
 	}
 	for _, p := range repackCandidates {
 		reachedUnusedSizeAfter := (stats.size.unused-stats.size.remove-stats.size.repackrm < maxUnusedSizeAfter)
 		reachedRepackSize := false
 		if opts.MaxRepackBytes > 0 {
 			reachedRepackSize = stats.size.repack+p.unusedSize+p.usedSize > opts.MaxRepackBytes
 		}
 		switch {
 		case !reachedRepackSize && (p.duplicateBlobs > 0 || p.tpe == restic.InvalidBlob):
 			// repacking duplicates/mixed is only limited by repackSize
 			repack(p.ID, p.packInfo)
 		case reachedUnusedSizeAfter, reachedRepackSize:
 			// for all other packs stop repacking if tolerated unused size is reached.
 			stats.packs.keep++
 			if p.duplicateBlobs > 0 {
 				repackAllPacksWithDuplicates = false
 			}
 		default:
 			repack(p.ID, p.packInfo)
 		}
 	}
 	// if all duplicates are repacked, print out correct statistics
 	if repackAllPacksWithDuplicates {
 		stats.blobs.repackrm += stats.blobs.duplicate
 		stats.size.repackrm += stats.size.duplicate
 	}
 	Verboseff("\nused:        %10d blobs / %s\n", stats.blobs.used, formatBytes(stats.size.used))
 	if stats.blobs.duplicate > 0 {
 		Verboseff("duplicates:  %10d blobs / %s\n", stats.blobs.duplicate, formatBytes(stats.size.duplicate))
 	}
 	Verboseff("unused:      %10d blobs / %s\n", stats.blobs.unused, formatBytes(stats.size.unused))
 	if stats.size.unref > 0 {
 		Verboseff("unreferenced:                   %s\n", formatBytes(stats.size.unref))
 	}
 	totalBlobs := stats.blobs.used + stats.blobs.unused + stats.blobs.duplicate
 	totalSize := stats.size.used + stats.size.duplicate + stats.size.unused + stats.size.unref
 	Verboseff("total:       %10d blobs / %s\n", totalBlobs, formatBytes(totalSize))
 	Verboseff("unused size: %s of total size\n", formatPercent(stats.size.unused, totalSize))
 	Verbosef("\nto repack:   %10d blobs / %s\n", stats.blobs.repack, formatBytes(stats.size.repack))
 	Verbosef("this removes %10d blobs / %s\n", stats.blobs.repackrm, formatBytes(stats.size.repackrm))
 	Verbosef("to delete:   %10d blobs / %s\n", stats.blobs.remove, formatBytes(stats.size.remove+stats.size.unref))
 	totalPruneSize := stats.size.remove + stats.size.repackrm + stats.size.unref
 	Verbosef("total prune: %10d blobs / %s\n", stats.blobs.remove+stats.blobs.repackrm, formatBytes(totalPruneSize))
 	Verbosef("remaining:   %10d blobs / %s\n", totalBlobs-(stats.blobs.remove+stats.blobs.repackrm), formatBytes(totalSize-totalPruneSize))
 	unusedAfter := stats.size.unused - stats.size.remove - stats.size.repackrm
 	Verbosef("unused size after prune: %s (%s of remaining size)\n",
 		formatBytes(unusedAfter), formatPercent(unusedAfter, totalSize-totalPruneSize))
 	Verbosef("\n")
 	Verboseff("totally used packs: %10d\n", stats.packs.used)
 	Verboseff("partly used packs:  %10d\n", stats.packs.partlyUsed)
 	Verboseff("unused packs:       %10d\n\n", stats.packs.unused)
 	Verboseff("to keep:   %10d packs\n", stats.packs.keep)
 	Verboseff("to repack: %10d packs\n", len(repackPacks))
 	Verboseff("to delete: %10d packs\n", len(removePacks))
 	if len(removePacksFirst) > 0 {
 		Verboseff("to delete: %10d unreferenced packs\n\n", len(removePacksFirst))
 	}
 	if opts.DryRun {
 		if !gopts.JSON && gopts.verbosity >= 2 {
 			if len(removePacksFirst) > 0 {
 				Printf("Would have removed the following unreferenced packs:\n%v\n\n", removePacksFirst)
 			}
 			Printf("Would have repacked and removed the following packs:\n%v\n\n", repackPacks)
 			Printf("Would have removed the following no longer used packs:\n%v\n\n", removePacks)
 		}
 		// Always quit here if DryRun was set!
 		return nil
 	}
 	// unreferenced packs can be safely deleted first
 	if len(removePacksFirst) != 0 {
 		Verbosef("deleting unreferenced packs\n")
 		DeleteFiles(gopts, repo, removePacksFirst, restic.PackFile)
 	}
 	if len(repackPacks) != 0 {
 		Verbosef("repacking packs\n")
 		bar := newProgressMax(!gopts.Quiet, uint64(len(repackPacks)), "packs repacked")
 		_, err := repository.Repack(ctx, repo, repackPacks, keepBlobs, bar)
 		if err != nil {
 			return err
 		}
 		// Also remove repacked packs
 		removePacks.Merge(repackPacks)
 	}
 	if len(removePacks) != 0 {
 		if err = rebuildIndex(ctx, repo, removePacks); err != nil {
 			return err
 		}
-	if len(removePacks) != 0 {
+		Verbosef("removing %d old packs\n", len(removePacks))
 		Verbosef("remove %d old packs\n", len(removePacks))
 		DeleteFiles(gopts, repo, removePacks, restic.PackFile)
 	}
@ -263,7 +498,7 @@ func pruneRepository(gopts GlobalOptions, repo restic.Repository) error {
 func getUsedBlobs(gopts GlobalOptions, repo restic.Repository, snapshots []*restic.Snapshot) (usedBlobs restic.BlobSet, err error) {
 	ctx := gopts.ctx
-	Verbosef("find data that is still in use for %d snapshots\n", len(snapshots))
+	Verbosef("finding data that is still in use for %d snapshots\n", len(snapshots))
 	usedBlobs = restic.NewBlobSet()
--- a/cmd/restic/global.go
+++ b/cmd/restic/global.go
@ -231,6 +231,13 @@ func Verbosef(format string, args ...interface{}) {
 	}
 }
 // Verboseff calls Printf to write the message when the verbosity is >= 2
 func Verboseff(format string, args ...interface{}) {
 	if globalOptions.verbosity >= 2 {
 		Printf(format, args...)
 	}
 }
 // PrintProgress wraps fmt.Printf to handle the difference in writing progress
 // information to terminals and non-terminal stdout
 func PrintProgress(format string, args ...interface{}) {
--- a/cmd/restic/integration_test.go
+++ b/cmd/restic/integration_test.go
@ -270,8 +270,8 @@ func testRunForgetJSON(t testing.TB, gopts GlobalOptions, args ...string) {
 		"Expected 2 snapshots to be removed, got %v", len(forgets[0].Remove))
 }
-func testRunPrune(t testing.TB, gopts GlobalOptions) {
+func testRunPrune(t testing.TB, gopts GlobalOptions, opts PruneOptions) {
-	rtest.OK(t, runPrune(gopts))
+	rtest.OK(t, runPrune(opts, gopts))
 }
 func testSetupBackupData(t testing.TB, env *testEnvironment) string {
@ -1386,6 +1386,32 @@ func TestCheckRestoreNoLock(t *testing.T) {
 }
 func TestPrune(t *testing.T) {
 	t.Run("0", func(t *testing.T) {
 		opts := PruneOptions{MaxUnused: "0%"}
 		checkOpts := CheckOptions{ReadData: true, CheckUnused: true}
 		testPrune(t, opts, checkOpts)
 	})
 	t.Run("50", func(t *testing.T) {
 		opts := PruneOptions{MaxUnused: "50%"}
 		checkOpts := CheckOptions{ReadData: true}
 		testPrune(t, opts, checkOpts)
 	})
 	t.Run("unlimited", func(t *testing.T) {
 		opts := PruneOptions{MaxUnused: "unlimited"}
 		checkOpts := CheckOptions{ReadData: true}
 		testPrune(t, opts, checkOpts)
 	})
 	t.Run("CachableOnly", func(t *testing.T) {
 		opts := PruneOptions{MaxUnused: "5%", RepackCachableOnly: true}
 		checkOpts := CheckOptions{ReadData: true}
 		testPrune(t, opts, checkOpts)
 	})
 }
 func testPrune(t *testing.T, pruneOpts PruneOptions, checkOpts CheckOptions) {
 	env, cleanup := withTestEnvironment(t)
 	defer cleanup()
@ -1406,10 +1432,12 @@ func TestPrune(t *testing.T) {
 	testRunForgetJSON(t, env.gopts)
 	testRunForget(t, env.gopts, firstSnapshot[0].String())
-	testRunPrune(t, env.gopts)
+	testRunPrune(t, env.gopts, pruneOpts)
-	testRunCheck(t, env.gopts)
+	rtest.OK(t, runCheck(checkOpts, env.gopts, nil))
 }
 var pruneDefaultOptions = PruneOptions{MaxUnused: "5%"}
 func listPacks(gopts GlobalOptions, t *testing.T) restic.IDSet {
 	r, err := OpenRepository(gopts)
 	rtest.OK(t, err)
@ -1452,14 +1480,8 @@ func TestPruneWithDamagedRepository(t *testing.T) {
 		"expected one snapshot, got %v", snapshotIDs)
 	// prune should fail
-	err := runPrune(env.gopts)
+	rtest.Assert(t, runPrune(pruneDefaultOptions, env.gopts) == errorPacksMissing,
-	if err == nil {
+		"prune should have reported index not complete error")
 		t.Fatalf("expected prune to fail")
 	}
 	if !strings.Contains(err.Error(), "blobs seem to be missing") {
 		t.Fatalf("did not find hint for missing blobs")
 	}
 	t.Log(err)
 }
 // Test repos for edge cases
@ -1469,37 +1491,37 @@ func TestEdgeCaseRepos(t *testing.T) {
 	// repo where index is completely missing
 	// => check and prune should fail
 	t.Run("no-index", func(t *testing.T) {
-		testEdgeCaseRepo(t, "repo-index-missing.tar.gz", opts, false, false)
+		testEdgeCaseRepo(t, "repo-index-missing.tar.gz", opts, pruneDefaultOptions, false, false)
 	})
 	// repo where an existing and used blob is missing from the index
-	// => check should fail, prune should heal this
+	// => check and prune should fail
 	t.Run("index-missing-blob", func(t *testing.T) {
-		testEdgeCaseRepo(t, "repo-index-missing-blob.tar.gz", opts, false, true)
+		testEdgeCaseRepo(t, "repo-index-missing-blob.tar.gz", opts, pruneDefaultOptions, false, false)
 	})
 	// repo where a blob is missing
 	// => check and prune should fail
 	t.Run("no-data", func(t *testing.T) {
-		testEdgeCaseRepo(t, "repo-data-missing.tar.gz", opts, false, false)
+		testEdgeCaseRepo(t, "repo-data-missing.tar.gz", opts, pruneDefaultOptions, false, false)
 	})
 	// repo where data exists that is not referenced
 	// => check and prune should fully work
 	t.Run("unreferenced-data", func(t *testing.T) {
-		testEdgeCaseRepo(t, "repo-unreferenced-data.tar.gz", opts, true, true)
+		testEdgeCaseRepo(t, "repo-unreferenced-data.tar.gz", opts, pruneDefaultOptions, true, true)
 	})
 	// repo where an obsolete index still exists
 	// => check and prune should fully work
 	t.Run("obsolete-index", func(t *testing.T) {
-		testEdgeCaseRepo(t, "repo-obsolete-index.tar.gz", opts, true, true)
+		testEdgeCaseRepo(t, "repo-obsolete-index.tar.gz", opts, pruneDefaultOptions, true, true)
 	})
 	// repo which contains mixed (data/tree) packs
 	// => check and prune should fully work
 	t.Run("mixed-packs", func(t *testing.T) {
-		testEdgeCaseRepo(t, "repo-mixed.tar.gz", opts, true, true)
+		testEdgeCaseRepo(t, "repo-mixed.tar.gz", opts, pruneDefaultOptions, true, true)
 	})
 	// repo which contains duplicate blobs
@ -1510,11 +1532,11 @@ func TestEdgeCaseRepos(t *testing.T) {
 		CheckUnused: true,
 	}
 	t.Run("duplicates", func(t *testing.T) {
-		testEdgeCaseRepo(t, "repo-duplicates.tar.gz", opts, false, true)
+		testEdgeCaseRepo(t, "repo-duplicates.tar.gz", opts, pruneDefaultOptions, false, true)
 	})
 }
-func testEdgeCaseRepo(t *testing.T, tarfile string, options CheckOptions, checkOK, pruneOK bool) {
+func testEdgeCaseRepo(t *testing.T, tarfile string, optionsCheck CheckOptions, optionsPrune PruneOptions, checkOK, pruneOK bool) {
 	env, cleanup := withTestEnvironment(t)
 	defer cleanup()
@ -1524,15 +1546,15 @@ func testEdgeCaseRepo(t *testing.T, tarfile string, options CheckOptions, checkO
 	if checkOK {
 		testRunCheck(t, env.gopts)
 	} else {
-		rtest.Assert(t, runCheck(options, env.gopts, nil) != nil,
+		rtest.Assert(t, runCheck(optionsCheck, env.gopts, nil) != nil,
 			"check should have reported an error")
 	}
 	if pruneOK {
-		testRunPrune(t, env.gopts)
+		testRunPrune(t, env.gopts, optionsPrune)
 		testRunCheck(t, env.gopts)
 	} else {
-		rtest.Assert(t, runPrune(env.gopts) != nil,
+		rtest.Assert(t, runPrune(optionsPrune, env.gopts) != nil,
 			"prune should have reported an error")
 	}
 }
--- a/doc/060_forget.rst
+++ b/doc/060_forget.rst
@ -23,12 +23,11 @@ data that was referenced by the snapshot from the repository. This can
 be automated with the ``--prune`` option of the ``forget`` command,
 which runs ``prune`` automatically if snapshots have been removed.
-.. Warning::
+Pruning snapshots can be a time-consuming process, depending on the
-
+amount of snapshots and data to process. During a prune operation, the
-   Pruning snapshots can be a very time-consuming process, taking nearly
+repository is locked and backups cannot be completed. Please plan your
-   as long as backups themselves. During a prune operation, the index is
+pruning so that there's time to complete it and it doesn't interfere with
-   locked and backups cannot be completed. Performance improvements are 
+regular backup runs.
   planned for this feature.
 It is advisable to run ``restic check`` after pruning, to make sure
 you are alerted, should the internal data structures of the repository
@ -82,20 +81,32 @@ command must be run:
    $ restic -r /srv/restic-repo prune
    enter password for repository:
    repository 33002c5e opened successfully, password is correct
    loading all snapshots...
    loading indexes...
    finding data that is still in use for 4 snapshots
    [0:00] 100.00%  4 / 4 snapshots
    searching used packs...
    collecting packs for deletion and repacking
    [0:00] 100.00%  5 / 5 packs processed
    to repack:           69 blobs / 1.078 MiB
    this removes         67 blobs / 1.047 MiB
    to delete:            7 blobs / 25.726 KiB
    total prune:         74 blobs / 1.072 MiB
    remaining:           16 blobs / 38.003 KiB
    unused size after prune: 0 B (0.00% of remaining size)
    repacking packs
    [0:00] 100.00%  2 / 2 packs repacked
    counting files in repo
-    building new index for repo
+    [0:00] 100.00%  3 / 3 packs
-    [0:00] 100.00%  22 / 22 files
+    finding old index files
-    repository contains 22 packs (8512 blobs) with 100.092 MiB bytes
+    saved new indexes as [59270b3a]
-    processed 8512 blobs: 0 duplicate blobs, 0B duplicate
+    remove 4 old index files
-    load all snapshots
+    [0:00] 100.00%  4 / 4 files deleted
-    find data that is still in use for 1 snapshots
+    removing 3 old packs
-    [0:00] 100.00%  1 / 1 snapshots
+    [0:00] 100.00%  3 / 3 files deleted
    found 8433 of 8512 data blobs still in use
    will rewrite 3 packs
    creating new index
    [0:00] 86.36%  19 / 22 files
    saved new index as 544a5084
    done
 Afterwards the repository is smaller.
@ -119,19 +130,31 @@ to ``forget``:
    8c02b94b  2017-02-21 10:48:33  mopped                  /home/user/work
    1 snapshots have been removed, running prune
-    counting files in repo
+    loading all snapshots...
-    building new index for repo
+    loading indexes...
-    [0:00] 100.00%  37 / 37 packs
+    finding data that is still in use for 1 snapshots
    repository contains 37 packs (5521 blobs) with 151.012 MiB bytes
    processed 5521 blobs: 0 duplicate blobs, 0B duplicate
    load all snapshots
    find data that is still in use for 1 snapshots
    [0:00] 100.00%  1 / 1 snapshots
-    found 5323 of 5521 data blobs still in use, removing 198 blobs
+    searching used packs...
-    will delete 0 packs and rewrite 27 packs, this frees 22.106 MiB
+    collecting packs for deletion and repacking
-    creating new index
+    [0:00] 100.00%  5 / 5 packs processed
-    [0:00] 100.00%  30 / 30 packs
+    
-    saved new index as b49f3e68
+    to repack:           69 blobs / 1.078 MiB
    this removes         67 blobs / 1.047 MiB
    to delete:            7 blobs / 25.726 KiB
    total prune:         74 blobs / 1.072 MiB
    remaining:           16 blobs / 38.003 KiB
    unused size after prune: 0 B (0.00% of remaining size)
    repacking packs
    [0:00] 100.00%  2 / 2 packs repacked
    counting files in repo
    [0:00] 100.00%  3 / 3 packs
    finding old index files
    saved new indexes as [59270b3a]
    remove 4 old index files
    [0:00] 100.00%  4 / 4 files deleted
    removing 3 old packs
    [0:00] 100.00%  3 / 3 files deleted
    done
 Removing snapshots according to a policy
@ -282,3 +305,59 @@ last-day-of-the-months (11 or 12 depends if the 5 weeklies cross a month).
 And finally 75 last-day-of-the-year snapshots. All other snapshots are
 removed.
 Customize pruning
 *****************
 To understand the custom options, we first explain how the pruning process works:
 1. All snapshots and directories within snapshots are scanned to determine
   which data is still in use.
 2. For all files in the repository, restic finds out if the file is fully
   used, partly used or completely unused.
 3. Completely unused files are marked for deletion. Fully used files are kept.
   A partially used file is either kept or marked for repacking depending on user
   options.
   Note that for repacking, restic must download the file from the repository
   storage and re-upload the needed data in the repository. This can be very
   time-consuming for remote repositories.
 4. After deciding what to do, ``prune`` will actually perform the repack, modify
   the index according to the changes and delete the obsolete files.
 The ``prune`` command accepts the following options:
 -  ``--max-unused limit`` allow unused data up to the specified limit within the repository.
   This allows restic to keep partly used files instead of repacking them.
   The limit can be specified in several ways:
    * As an absolute size (e.g. ``200M``). If you want to minimize the space
      used by your repository, pass ``0`` to this option.
    * As a size relative to the total repo size (e.g. ``10%``). This means that
      after prune, at most ``10%`` of the total data stored in the repo may be
      unused data. If the repo after prune has as size of 500MB, then at most
      50MB may be unused.
    * If the string ``unlimited`` is passed, there is no limit for partly
      unused files. This means that as long as some data is still used within
      a file stored in the repo, restic will just leave it there. Use this if
      you want to minimize the time and bandwidth used by the ``prune``
      operation.
   Restic tries to repack as little data as possible while still ensuring this 
   limit for unused data.
 - ``--max-repack-size size`` if set limits the total size of files to repack.
  As ``prune`` first stores all repacked files and deletes the obsolete files at the end,
  this option might be handy if you expect many files to be repacked and fear to run low
  on storage. 
 - ``--repack-cacheable-only`` if set to true only files which contain
  metadata and would be stored in the cache are repacked. Other pack files are
  not repacked if this option is set. This allows a very fast repacking
  using only cached data. It can, however, imply that the unused data in
  your repository exceeds the value given by ``--max-unused``.
  The default value is false.
 -  ``--dry-run`` only show what ``prune`` would do.
 -  ``--verbose`` increased verbosity shows additional statistics for ``prune``.
--- a/internal/pack/pack.go
+++ b/internal/pack/pack.go
@ -161,13 +161,16 @@ func (p *Packer) String() string {
 }
 var (
 	// size of the header-length field at the end of the file
 	headerLengthSize = binary.Size(uint32(0))
 	// we require at least one entry in the header, and one blob for a pack file
 	minFileSize = entrySize + crypto.Extension + uint(headerLengthSize)
 )
 const (
 	// size of the header-length field at the end of the file; it is a uint32
 	headerLengthSize = 4
 	// constant overhead of the header independent of #entries
 	HeaderSize = headerLengthSize + crypto.Extension
 	maxHeaderSize = 16 * 1024 * 1024
 	// number of header enries to download as part of header-length request
 	eagerEntries = 15
@ -315,3 +318,8 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, err
 	return entries, nil
 }
 // PackedSizeOfBlob returns the size a blob actually uses when saved in a pack
 func PackedSizeOfBlob(blobLength uint) uint {
 	return blobLength + entrySize
 }
--- a/internal/restic/blob.go
+++ b/internal/restic/blob.go
@ -19,6 +19,10 @@ func (b Blob) String() string {
 		b.Type, b.ID.Str(), b.Offset, b.Length)
 }
 func (b Blob) Handle() BlobHandle {
 	return BlobHandle{ID: b.ID, Type: b.Type}
 }
 // PackedBlob is a blob stored within a file.
 type PackedBlob struct {
 	Blob
--- a/internal/restic/snapshot.go
+++ b/internal/restic/snapshot.go
@ -67,8 +67,12 @@ func LoadSnapshot(ctx context.Context, repo Repository, id ID) (*Snapshot, error
 }
 // LoadAllSnapshots returns a list of all snapshots in the repo.
-func LoadAllSnapshots(ctx context.Context, repo Repository) (snapshots []*Snapshot, err error) {
+// If a snapshot ID is in excludeIDs, it will not be included in the result.
 func LoadAllSnapshots(ctx context.Context, repo Repository, excludeIDs IDSet) (snapshots []*Snapshot, err error) {
 	err = repo.List(ctx, SnapshotFile, func(id ID, size int64) error {
 		if excludeIDs.Has(id) {
 			return nil
 		}
 		sn, err := LoadSnapshot(ctx, repo, id)
 		if err != nil {
 			return err
--- a/internal/restic/testing_test.go
+++ b/internal/restic/testing_test.go
@ -25,7 +25,7 @@ func TestCreateSnapshot(t *testing.T) {
 		restic.TestCreateSnapshot(t, repo, testSnapshotTime.Add(time.Duration(i)*time.Second), testDepth, 0)
 	}
-	snapshots, err := restic.LoadAllSnapshots(context.TODO(), repo)
+	snapshots, err := restic.LoadAllSnapshots(context.TODO(), repo, restic.NewIDSet())
 	if err != nil {
 		t.Fatal(err)
 	}