2
2
mirror of https://github.com/octoleo/restic.git synced 2024-09-28 22:49:01 +00:00
restic/cmd/restic/cmd_prune.go
Michael Eischer 3ba19869be prune: Abort if any used blobs are missing
The previous check only approximately verified whether all required
blobs were found. However, after forgetting a few snapshots the
repository contains lots of unused blobs whose number can be sufficient
to make up for missing packs.

When coupled with a malfunctioning backend that temporarily returns broken
data this could cause restic to regard the corresponding packs as
invalid and thereby delete data that's still in use. This change lets
restic play it safe and refuse to delete anything if data is missing.
2020-08-16 11:34:01 +02:00

316 lines
6.9 KiB
Go

package main
import (
"fmt"
"time"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/index"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
"github.com/spf13/cobra"
)
var cmdPrune = &cobra.Command{
Use: "prune [flags]",
Short: "Remove unneeded data from the repository",
Long: `
The "prune" command checks the repository and removes data that is not
referenced and therefore not needed any more.
EXIT STATUS
===========
Exit status is 0 if the command was successful, and non-zero if there was any error.
`,
DisableAutoGenTag: true,
RunE: func(cmd *cobra.Command, args []string) error {
return runPrune(globalOptions)
},
}
func init() {
cmdRoot.AddCommand(cmdPrune)
}
func shortenStatus(maxLength int, s string) string {
if len(s) <= maxLength {
return s
}
if maxLength < 3 {
return s[:maxLength]
}
return s[:maxLength-3] + "..."
}
// newProgressMax returns a progress that counts blobs.
func newProgressMax(show bool, max uint64, description string) *restic.Progress {
if !show {
return nil
}
p := restic.NewProgress()
p.OnUpdate = func(s restic.Stat, d time.Duration, ticker bool) {
status := fmt.Sprintf("[%s] %s %d / %d %s",
formatDuration(d),
formatPercent(s.Blobs, max),
s.Blobs, max, description)
if w := stdoutTerminalWidth(); w > 0 {
status = shortenStatus(w, status)
}
PrintProgress("%s", status)
}
p.OnDone = func(s restic.Stat, d time.Duration, ticker bool) {
fmt.Printf("\n")
}
return p
}
func runPrune(gopts GlobalOptions) error {
repo, err := OpenRepository(gopts)
if err != nil {
return err
}
lock, err := lockRepoExclusive(repo)
defer unlockRepo(lock)
if err != nil {
return err
}
// we do not need index updates while pruning!
repo.DisableAutoIndexUpdate()
return pruneRepository(gopts, repo)
}
func mixedBlobs(list []restic.Blob) bool {
var tree, data bool
for _, pb := range list {
switch pb.Type {
case restic.TreeBlob:
tree = true
case restic.DataBlob:
data = true
}
if tree && data {
return true
}
}
return false
}
func pruneRepository(gopts GlobalOptions, repo restic.Repository) error {
ctx := gopts.ctx
err := repo.LoadIndex(ctx)
if err != nil {
return err
}
var stats struct {
blobs int
packs int
snapshots int
bytes int64
}
Verbosef("counting files in repo\n")
err = repo.List(ctx, restic.PackFile, func(restic.ID, int64) error {
stats.packs++
return nil
})
if err != nil {
return err
}
Verbosef("building new index for repo\n")
bar := newProgressMax(!gopts.Quiet, uint64(stats.packs), "packs")
idx, invalidFiles, err := index.New(ctx, repo, restic.NewIDSet(), bar)
if err != nil {
return err
}
for _, id := range invalidFiles {
Warnf("incomplete pack file (will be removed): %v\n", id)
}
blobs := 0
for _, pack := range idx.Packs {
stats.bytes += pack.Size
blobs += len(pack.Entries)
}
Verbosef("repository contains %v packs (%v blobs) with %v\n",
len(idx.Packs), blobs, formatBytes(uint64(stats.bytes)))
blobCount := make(map[restic.BlobHandle]int)
var duplicateBlobs uint64
var duplicateBytes uint64
// find duplicate blobs
for _, p := range idx.Packs {
for _, entry := range p.Entries {
stats.blobs++
h := restic.BlobHandle{ID: entry.ID, Type: entry.Type}
blobCount[h]++
if blobCount[h] > 1 {
duplicateBlobs++
duplicateBytes += uint64(entry.Length)
}
}
}
Verbosef("processed %d blobs: %d duplicate blobs, %v duplicate\n",
stats.blobs, duplicateBlobs, formatBytes(uint64(duplicateBytes)))
Verbosef("load all snapshots\n")
// find referenced blobs
snapshots, err := restic.LoadAllSnapshots(ctx, repo)
if err != nil {
return err
}
stats.snapshots = len(snapshots)
usedBlobs, err := getUsedBlobs(gopts, repo, snapshots)
if err != nil {
return err
}
for h := range usedBlobs {
if _, ok := blobCount[h]; !ok {
return errors.Fatalf("At least one data blob seems to be missing, aborting prune to prevent further data loss!\n" +
"Please report this error (along with the output of the 'prune' run) at\n" +
"https://github.com/restic/restic/issues/new")
}
}
Verbosef("found %d of %d data blobs still in use, removing %d blobs\n",
len(usedBlobs), stats.blobs, stats.blobs-len(usedBlobs))
// find packs that need a rewrite
rewritePacks := restic.NewIDSet()
for _, pack := range idx.Packs {
if mixedBlobs(pack.Entries) {
rewritePacks.Insert(pack.ID)
continue
}
for _, blob := range pack.Entries {
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
if !usedBlobs.Has(h) {
rewritePacks.Insert(pack.ID)
continue
}
if blobCount[h] > 1 {
rewritePacks.Insert(pack.ID)
}
}
}
removeBytes := duplicateBytes
// find packs that are unneeded
removePacks := restic.NewIDSet()
Verbosef("will remove %d invalid files\n", len(invalidFiles))
for _, id := range invalidFiles {
removePacks.Insert(id)
}
for packID, p := range idx.Packs {
hasActiveBlob := false
for _, blob := range p.Entries {
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
if usedBlobs.Has(h) {
hasActiveBlob = true
continue
}
removeBytes += uint64(blob.Length)
}
if hasActiveBlob {
continue
}
removePacks.Insert(packID)
if !rewritePacks.Has(packID) {
return errors.Fatalf("pack %v is unneeded, but not contained in rewritePacks", packID.Str())
}
rewritePacks.Delete(packID)
}
Verbosef("will delete %d packs and rewrite %d packs, this frees %s\n",
len(removePacks), len(rewritePacks), formatBytes(uint64(removeBytes)))
var obsoletePacks restic.IDSet
if len(rewritePacks) != 0 {
bar := newProgressMax(!gopts.Quiet, uint64(len(rewritePacks)), "packs rewritten")
obsoletePacks, err = repository.Repack(ctx, repo, rewritePacks, usedBlobs, bar)
if err != nil {
return err
}
}
removePacks.Merge(obsoletePacks)
if err = rebuildIndex(ctx, repo, removePacks); err != nil {
return err
}
if len(removePacks) != 0 {
Verbosef("remove %d old packs\n", len(removePacks))
DeleteFiles(gopts, repo, removePacks, restic.PackFile)
}
Verbosef("done\n")
return nil
}
func getUsedBlobs(gopts GlobalOptions, repo restic.Repository, snapshots []*restic.Snapshot) (usedBlobs restic.BlobSet, err error) {
ctx := gopts.ctx
Verbosef("find data that is still in use for %d snapshots\n", len(snapshots))
usedBlobs = restic.NewBlobSet()
bar := newProgressMax(!gopts.Quiet, uint64(len(snapshots)), "snapshots")
bar.Start()
defer bar.Done()
for _, sn := range snapshots {
debug.Log("process snapshot %v", sn.ID())
err = restic.FindUsedBlobs(ctx, repo, *sn.Tree, usedBlobs)
if err != nil {
if repo.Backend().IsNotExist(err) {
return nil, errors.Fatal("unable to load a tree from the repo: " + err.Error())
}
return nil, err
}
debug.Log("processed snapshot %v", sn.ID())
bar.Report(restic.Stat{Blobs: 1})
}
return usedBlobs, nil
}