mirror of
https://github.com/octoleo/restic.git
synced 2024-11-10 15:21:03 +00:00
Merge pull request #2718 from aawsome/new-cleanup-command
Reimplementation of prune
This commit is contained in:
commit
5144141321
22
changelog/unreleased/pull-2718
Normal file
22
changelog/unreleased/pull-2718
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
Enhancement: Improve pruning performance and make pruning more customizable
|
||||||
|
|
||||||
|
The `prune` command is now much faster. This is especially the case for remote
|
||||||
|
repositories or repositories with not much data to remove.
|
||||||
|
Also the memory usage of the `prune` command is now reduced.
|
||||||
|
|
||||||
|
By default, the `prune` command no longer removes all unused data. This
|
||||||
|
behavior can be fine-tuned by new options, like the acceptable amount of unused space or
|
||||||
|
the maximum size of data to reorganize. For more details, see
|
||||||
|
https://restic.readthedocs.io/en/stable/060_forget.html
|
||||||
|
|
||||||
|
Moreover, `prune` now accepts the `--dry-run` option and `forget --dry-run --prune`
|
||||||
|
also shows what `prune` would do.
|
||||||
|
|
||||||
|
Fixes several open issues, e.g.:
|
||||||
|
https://github.com/restic/restic/issues/1140
|
||||||
|
https://github.com/restic/restic/issues/1985
|
||||||
|
https://github.com/restic/restic/issues/2112
|
||||||
|
https://github.com/restic/restic/issues/2227
|
||||||
|
https://github.com/restic/restic/issues/2305
|
||||||
|
|
||||||
|
https://github.com/restic/restic/pull/2718
|
@ -80,9 +80,15 @@ func init() {
|
|||||||
f.BoolVar(&forgetOptions.Prune, "prune", false, "automatically run the 'prune' command if snapshots have been removed")
|
f.BoolVar(&forgetOptions.Prune, "prune", false, "automatically run the 'prune' command if snapshots have been removed")
|
||||||
|
|
||||||
f.SortFlags = false
|
f.SortFlags = false
|
||||||
|
addPruneOptions(cmdForget)
|
||||||
}
|
}
|
||||||
|
|
||||||
func runForget(opts ForgetOptions, gopts GlobalOptions, args []string) error {
|
func runForget(opts ForgetOptions, gopts GlobalOptions, args []string) error {
|
||||||
|
err := verifyPruneOptions(&pruneOptions)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
repo, err := OpenRepository(gopts)
|
repo, err := OpenRepository(gopts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@ -205,7 +211,11 @@ func runForget(opts ForgetOptions, gopts GlobalOptions, args []string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if len(removeSnIDs) > 0 && opts.Prune && !opts.DryRun {
|
if len(removeSnIDs) > 0 && opts.Prune && !opts.DryRun {
|
||||||
return pruneRepository(gopts, repo)
|
if !gopts.JSON {
|
||||||
|
Verbosef("%d snapshots have been removed, running prune\n", len(removeSnIDs))
|
||||||
|
}
|
||||||
|
pruneOptions.DryRun = opts.DryRun
|
||||||
|
return runPruneWithRepo(pruneOptions, gopts, repo, removeSnIDs)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
@ -1,15 +1,24 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"math"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/restic/restic/internal/debug"
|
"github.com/restic/restic/internal/debug"
|
||||||
"github.com/restic/restic/internal/errors"
|
"github.com/restic/restic/internal/errors"
|
||||||
"github.com/restic/restic/internal/index"
|
"github.com/restic/restic/internal/pack"
|
||||||
"github.com/restic/restic/internal/repository"
|
"github.com/restic/restic/internal/repository"
|
||||||
"github.com/restic/restic/internal/restic"
|
"github.com/restic/restic/internal/restic"
|
||||||
|
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var errorIndexIncomplete = errors.Fatal("index is not complete")
|
||||||
|
var errorPacksMissing = errors.Fatal("packs from index missing in repo")
|
||||||
|
var errorSizeNotMatching = errors.Fatal("pack size does not match calculated size from index")
|
||||||
|
|
||||||
var cmdPrune = &cobra.Command{
|
var cmdPrune = &cobra.Command{
|
||||||
Use: "prune [flags]",
|
Use: "prune [flags]",
|
||||||
Short: "Remove unneeded data from the repository",
|
Short: "Remove unneeded data from the repository",
|
||||||
@ -24,12 +33,91 @@ Exit status is 0 if the command was successful, and non-zero if there was any er
|
|||||||
`,
|
`,
|
||||||
DisableAutoGenTag: true,
|
DisableAutoGenTag: true,
|
||||||
RunE: func(cmd *cobra.Command, args []string) error {
|
RunE: func(cmd *cobra.Command, args []string) error {
|
||||||
return runPrune(globalOptions)
|
return runPrune(pruneOptions, globalOptions)
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PruneOptions collects all options for the cleanup command.
|
||||||
|
type PruneOptions struct {
|
||||||
|
DryRun bool
|
||||||
|
|
||||||
|
MaxUnused string
|
||||||
|
maxUnusedBytes func(used uint64) (unused uint64) // calculates the number of unused bytes after repacking, according to MaxUnused
|
||||||
|
|
||||||
|
MaxRepackSize string
|
||||||
|
MaxRepackBytes uint64
|
||||||
|
|
||||||
|
RepackCachableOnly bool
|
||||||
|
}
|
||||||
|
|
||||||
|
var pruneOptions PruneOptions
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
cmdRoot.AddCommand(cmdPrune)
|
cmdRoot.AddCommand(cmdPrune)
|
||||||
|
f := cmdPrune.Flags()
|
||||||
|
f.BoolVarP(&pruneOptions.DryRun, "dry-run", "n", false, "do not modify the repository, just print what would be done")
|
||||||
|
addPruneOptions(cmdPrune)
|
||||||
|
}
|
||||||
|
|
||||||
|
func addPruneOptions(c *cobra.Command) {
|
||||||
|
f := c.Flags()
|
||||||
|
f.StringVar(&pruneOptions.MaxUnused, "max-unused", "5%", "tolerate given `limit` of unused data (absolute value in bytes with suffixes k/K, m/M, g/G, t/T, a value in % or the word 'unlimited')")
|
||||||
|
f.StringVar(&pruneOptions.MaxRepackSize, "max-repack-size", "", "maximum `size` to repack (allowed suffixes: k/K, m/M, g/G, t/T)")
|
||||||
|
f.BoolVar(&pruneOptions.RepackCachableOnly, "repack-cacheable-only", false, "only repack packs which are cacheable")
|
||||||
|
}
|
||||||
|
|
||||||
|
func verifyPruneOptions(opts *PruneOptions) error {
|
||||||
|
if len(opts.MaxRepackSize) > 0 {
|
||||||
|
size, err := parseSizeStr(opts.MaxRepackSize)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
opts.MaxRepackBytes = uint64(size)
|
||||||
|
}
|
||||||
|
|
||||||
|
maxUnused := strings.TrimSpace(opts.MaxUnused)
|
||||||
|
if maxUnused == "" {
|
||||||
|
return errors.Fatalf("invalid value for --max-unused: %q", opts.MaxUnused)
|
||||||
|
}
|
||||||
|
|
||||||
|
// parse MaxUnused either as unlimited, a percentage, or an absolute number of bytes
|
||||||
|
switch {
|
||||||
|
case maxUnused == "unlimited":
|
||||||
|
opts.maxUnusedBytes = func(used uint64) uint64 {
|
||||||
|
return math.MaxUint64
|
||||||
|
}
|
||||||
|
|
||||||
|
case strings.HasSuffix(maxUnused, "%"):
|
||||||
|
maxUnused = strings.TrimSuffix(maxUnused, "%")
|
||||||
|
p, err := strconv.ParseFloat(maxUnused, 64)
|
||||||
|
if err != nil {
|
||||||
|
return errors.Fatalf("invalid percentage %q passed for --max-unused: %v", opts.MaxUnused, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if p < 0 {
|
||||||
|
return errors.Fatal("percentage for --max-unused must be positive")
|
||||||
|
}
|
||||||
|
|
||||||
|
if p >= 100 {
|
||||||
|
return errors.Fatal("percentage for --max-unused must be below 100%")
|
||||||
|
}
|
||||||
|
|
||||||
|
opts.maxUnusedBytes = func(used uint64) uint64 {
|
||||||
|
return uint64(p / (100 - p) * float64(used))
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
size, err := parseSizeStr(maxUnused)
|
||||||
|
if err != nil {
|
||||||
|
return errors.Fatalf("invalid number of bytes %q for --max-unused: %v", opts.MaxUnused, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
opts.maxUnusedBytes = func(used uint64) uint64 {
|
||||||
|
return uint64(size)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func shortenStatus(maxLength int, s string) string {
|
func shortenStatus(maxLength int, s string) string {
|
||||||
@ -44,7 +132,12 @@ func shortenStatus(maxLength int, s string) string {
|
|||||||
return s[:maxLength-3] + "..."
|
return s[:maxLength-3] + "..."
|
||||||
}
|
}
|
||||||
|
|
||||||
func runPrune(gopts GlobalOptions) error {
|
func runPrune(opts PruneOptions, gopts GlobalOptions) error {
|
||||||
|
err := verifyPruneOptions(&opts)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
repo, err := OpenRepository(gopts)
|
repo, err := OpenRepository(gopts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@ -56,203 +149,345 @@ func runPrune(gopts GlobalOptions) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return runPruneWithRepo(opts, gopts, repo, restic.NewIDSet())
|
||||||
|
}
|
||||||
|
|
||||||
|
func runPruneWithRepo(opts PruneOptions, gopts GlobalOptions, repo *repository.Repository, ignoreSnapshots restic.IDSet) error {
|
||||||
// we do not need index updates while pruning!
|
// we do not need index updates while pruning!
|
||||||
repo.DisableAutoIndexUpdate()
|
repo.DisableAutoIndexUpdate()
|
||||||
|
|
||||||
return pruneRepository(gopts, repo)
|
Verbosef("loading all snapshots...\n")
|
||||||
}
|
snapshots, err := restic.LoadAllSnapshots(gopts.ctx, repo, ignoreSnapshots)
|
||||||
|
|
||||||
func mixedBlobs(list []restic.Blob) bool {
|
|
||||||
var tree, data bool
|
|
||||||
|
|
||||||
for _, pb := range list {
|
|
||||||
switch pb.Type {
|
|
||||||
case restic.TreeBlob:
|
|
||||||
tree = true
|
|
||||||
case restic.DataBlob:
|
|
||||||
data = true
|
|
||||||
}
|
|
||||||
|
|
||||||
if tree && data {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func pruneRepository(gopts GlobalOptions, repo restic.Repository) error {
|
|
||||||
ctx := gopts.ctx
|
|
||||||
|
|
||||||
err := repo.LoadIndex(ctx)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
var stats struct {
|
Verbosef("loading indexes...\n")
|
||||||
blobs int
|
err = repo.LoadIndex(gopts.ctx)
|
||||||
packs int
|
|
||||||
snapshots int
|
|
||||||
bytes int64
|
|
||||||
}
|
|
||||||
|
|
||||||
Verbosef("counting files in repo\n")
|
|
||||||
err = repo.List(ctx, restic.PackFile, func(restic.ID, int64) error {
|
|
||||||
stats.packs++
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
Verbosef("building new index for repo\n")
|
|
||||||
|
|
||||||
bar := newProgressMax(!gopts.Quiet, uint64(stats.packs), "packs")
|
|
||||||
idx, invalidFiles, err := index.New(ctx, repo, restic.NewIDSet(), bar)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, id := range invalidFiles {
|
|
||||||
Warnf("incomplete pack file (will be removed): %v\n", id)
|
|
||||||
}
|
|
||||||
|
|
||||||
blobs := 0
|
|
||||||
for _, pack := range idx.Packs {
|
|
||||||
stats.bytes += pack.Size
|
|
||||||
blobs += len(pack.Entries)
|
|
||||||
}
|
|
||||||
Verbosef("repository contains %v packs (%v blobs) with %v\n",
|
|
||||||
len(idx.Packs), blobs, formatBytes(uint64(stats.bytes)))
|
|
||||||
|
|
||||||
blobCount := make(map[restic.BlobHandle]int)
|
|
||||||
var duplicateBlobs uint64
|
|
||||||
var duplicateBytes uint64
|
|
||||||
|
|
||||||
// find duplicate blobs
|
|
||||||
for _, p := range idx.Packs {
|
|
||||||
for _, entry := range p.Entries {
|
|
||||||
stats.blobs++
|
|
||||||
h := restic.BlobHandle{ID: entry.ID, Type: entry.Type}
|
|
||||||
blobCount[h]++
|
|
||||||
|
|
||||||
if blobCount[h] > 1 {
|
|
||||||
duplicateBlobs++
|
|
||||||
duplicateBytes += uint64(entry.Length)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Verbosef("processed %d blobs: %d duplicate blobs, %v duplicate\n",
|
|
||||||
stats.blobs, duplicateBlobs, formatBytes(uint64(duplicateBytes)))
|
|
||||||
Verbosef("load all snapshots\n")
|
|
||||||
|
|
||||||
// find referenced blobs
|
|
||||||
snapshots, err := restic.LoadAllSnapshots(ctx, repo)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
stats.snapshots = len(snapshots)
|
|
||||||
|
|
||||||
usedBlobs, err := getUsedBlobs(gopts, repo, snapshots)
|
usedBlobs, err := getUsedBlobs(gopts, repo, snapshots)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
var missingBlobs []restic.BlobHandle
|
return prune(opts, gopts, repo, usedBlobs)
|
||||||
for h := range usedBlobs {
|
}
|
||||||
if _, ok := blobCount[h]; !ok {
|
|
||||||
missingBlobs = append(missingBlobs, h)
|
type packInfo struct {
|
||||||
|
usedBlobs uint
|
||||||
|
unusedBlobs uint
|
||||||
|
duplicateBlobs uint
|
||||||
|
usedSize uint64
|
||||||
|
unusedSize uint64
|
||||||
|
tpe restic.BlobType
|
||||||
|
}
|
||||||
|
|
||||||
|
type packInfoWithID struct {
|
||||||
|
ID restic.ID
|
||||||
|
packInfo
|
||||||
|
}
|
||||||
|
|
||||||
|
// prune selects which files to rewrite and then does that. The map usedBlobs is
|
||||||
|
// modified in the process.
|
||||||
|
func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedBlobs restic.BlobSet) error {
|
||||||
|
ctx := gopts.ctx
|
||||||
|
|
||||||
|
var stats struct {
|
||||||
|
blobs struct {
|
||||||
|
used uint
|
||||||
|
duplicate uint
|
||||||
|
unused uint
|
||||||
|
remove uint
|
||||||
|
repack uint
|
||||||
|
repackrm uint
|
||||||
|
}
|
||||||
|
size struct {
|
||||||
|
used uint64
|
||||||
|
duplicate uint64
|
||||||
|
unused uint64
|
||||||
|
remove uint64
|
||||||
|
repack uint64
|
||||||
|
repackrm uint64
|
||||||
|
unref uint64
|
||||||
|
}
|
||||||
|
packs struct {
|
||||||
|
used uint
|
||||||
|
unused uint
|
||||||
|
partlyUsed uint
|
||||||
|
keep uint
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(missingBlobs) > 0 {
|
|
||||||
return errors.Fatalf("%v not found in the new index\n"+
|
Verbosef("searching used packs...\n")
|
||||||
|
|
||||||
|
keepBlobs := restic.NewBlobSet()
|
||||||
|
duplicateBlobs := restic.NewBlobSet()
|
||||||
|
|
||||||
|
// iterate over all blobs in index to find out which blobs are duplicates
|
||||||
|
for blob := range repo.Index().Each(ctx) {
|
||||||
|
bh := blob.Handle()
|
||||||
|
switch {
|
||||||
|
case usedBlobs.Has(bh): // used blob, move to keepBlobs
|
||||||
|
usedBlobs.Delete(bh)
|
||||||
|
keepBlobs.Insert(bh)
|
||||||
|
case keepBlobs.Has(bh): // duplicate blob
|
||||||
|
duplicateBlobs.Insert(bh)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if all used blobs have been found in index
|
||||||
|
if len(usedBlobs) != 0 {
|
||||||
|
Warnf("%v not found in the new index\n"+
|
||||||
"Data blobs seem to be missing, aborting prune to prevent further data loss!\n"+
|
"Data blobs seem to be missing, aborting prune to prevent further data loss!\n"+
|
||||||
"Please report this error (along with the output of the 'prune' run) at\n"+
|
"Please report this error (along with the output of the 'prune' run) at\n"+
|
||||||
"https://github.com/restic/restic/issues/new/choose", missingBlobs)
|
"https://github.com/restic/restic/issues/new/choose", usedBlobs)
|
||||||
|
return errorIndexIncomplete
|
||||||
}
|
}
|
||||||
|
|
||||||
Verbosef("found %d of %d data blobs still in use, removing %d blobs\n",
|
indexPack := make(map[restic.ID]packInfo)
|
||||||
len(usedBlobs), stats.blobs, stats.blobs-len(usedBlobs))
|
|
||||||
|
|
||||||
// find packs that need a rewrite
|
// iterate over all blobs in index to generate packInfo
|
||||||
rewritePacks := restic.NewIDSet()
|
for blob := range repo.Index().Each(ctx) {
|
||||||
for _, pack := range idx.Packs {
|
ip, ok := indexPack[blob.PackID]
|
||||||
if mixedBlobs(pack.Entries) {
|
if !ok {
|
||||||
rewritePacks.Insert(pack.ID)
|
ip = packInfo{tpe: blob.Type, usedSize: pack.HeaderSize}
|
||||||
continue
|
}
|
||||||
|
// mark mixed packs with "Invalid blob type"
|
||||||
|
if ip.tpe != blob.Type {
|
||||||
|
ip.tpe = restic.InvalidBlob
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, blob := range pack.Entries {
|
bh := blob.Handle()
|
||||||
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
|
size := uint64(pack.PackedSizeOfBlob(blob.Length))
|
||||||
if !usedBlobs.Has(h) {
|
switch {
|
||||||
rewritePacks.Insert(pack.ID)
|
case duplicateBlobs.Has(bh): // duplicate blob
|
||||||
continue
|
ip.usedSize += size
|
||||||
|
ip.duplicateBlobs++
|
||||||
|
stats.size.duplicate += size
|
||||||
|
stats.blobs.duplicate++
|
||||||
|
case keepBlobs.Has(bh): // used blob, not duplicate
|
||||||
|
ip.usedSize += size
|
||||||
|
ip.usedBlobs++
|
||||||
|
stats.size.used += size
|
||||||
|
stats.blobs.used++
|
||||||
|
default: // unused blob
|
||||||
|
ip.unusedSize += size
|
||||||
|
ip.unusedBlobs++
|
||||||
|
stats.size.unused += size
|
||||||
|
stats.blobs.unused++
|
||||||
|
}
|
||||||
|
// update indexPack
|
||||||
|
indexPack[blob.PackID] = ip
|
||||||
}
|
}
|
||||||
|
|
||||||
if blobCount[h] > 1 {
|
Verbosef("collecting packs for deletion and repacking\n")
|
||||||
rewritePacks.Insert(pack.ID)
|
removePacksFirst := restic.NewIDSet()
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
removeBytes := duplicateBytes
|
|
||||||
|
|
||||||
// find packs that are unneeded
|
|
||||||
removePacks := restic.NewIDSet()
|
removePacks := restic.NewIDSet()
|
||||||
|
repackPacks := restic.NewIDSet()
|
||||||
|
|
||||||
Verbosef("will remove %d invalid files\n", len(invalidFiles))
|
var repackCandidates []packInfoWithID
|
||||||
for _, id := range invalidFiles {
|
|
||||||
|
// loop over all packs and decide what to do
|
||||||
|
bar := newProgressMax(!gopts.Quiet, uint64(len(indexPack)), "packs processed")
|
||||||
|
bar.Start()
|
||||||
|
err := repo.List(ctx, restic.PackFile, func(id restic.ID, packSize int64) error {
|
||||||
|
p, ok := indexPack[id]
|
||||||
|
if !ok {
|
||||||
|
// Pack was not referenced in index and is not used => immediately remove!
|
||||||
|
Verboseff("will remove pack %v as it is unused and not indexed\n", id.Str())
|
||||||
|
removePacksFirst.Insert(id)
|
||||||
|
stats.size.unref += uint64(packSize)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.unusedSize+p.usedSize != uint64(packSize) {
|
||||||
|
Warnf("pack %s: calculated size %d does not match real size %d\nRun 'restic rebuild-index'.",
|
||||||
|
id.Str(), p.unusedSize+p.usedSize, packSize)
|
||||||
|
return errorSizeNotMatching
|
||||||
|
}
|
||||||
|
|
||||||
|
// statistics
|
||||||
|
switch {
|
||||||
|
case p.usedBlobs == 0 && p.duplicateBlobs == 0:
|
||||||
|
stats.packs.unused++
|
||||||
|
case p.unusedBlobs == 0:
|
||||||
|
stats.packs.used++
|
||||||
|
default:
|
||||||
|
stats.packs.partlyUsed++
|
||||||
|
}
|
||||||
|
|
||||||
|
// decide what to do
|
||||||
|
switch {
|
||||||
|
case p.usedBlobs == 0 && p.duplicateBlobs == 0:
|
||||||
|
// All blobs in pack are no longer used => remove pack!
|
||||||
removePacks.Insert(id)
|
removePacks.Insert(id)
|
||||||
|
stats.blobs.remove += p.unusedBlobs
|
||||||
|
stats.size.remove += p.unusedSize
|
||||||
|
|
||||||
|
case opts.RepackCachableOnly && p.tpe == restic.DataBlob:
|
||||||
|
// if this is a data pack and --repack-cacheable-only is set => keep pack!
|
||||||
|
stats.packs.keep++
|
||||||
|
|
||||||
|
case p.unusedBlobs == 0 && p.duplicateBlobs == 0 && p.tpe != restic.InvalidBlob:
|
||||||
|
// All blobs in pack are used and not duplicates/mixed => keep pack!
|
||||||
|
stats.packs.keep++
|
||||||
|
|
||||||
|
default:
|
||||||
|
// all other packs are candidates for repacking
|
||||||
|
repackCandidates = append(repackCandidates, packInfoWithID{ID: id, packInfo: p})
|
||||||
}
|
}
|
||||||
|
|
||||||
for packID, p := range idx.Packs {
|
delete(indexPack, id)
|
||||||
|
bar.Report(restic.Stat{Blobs: 1})
|
||||||
hasActiveBlob := false
|
return nil
|
||||||
for _, blob := range p.Entries {
|
})
|
||||||
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
|
bar.Done()
|
||||||
if usedBlobs.Has(h) {
|
|
||||||
hasActiveBlob = true
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
removeBytes += uint64(blob.Length)
|
|
||||||
}
|
|
||||||
|
|
||||||
if hasActiveBlob {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
removePacks.Insert(packID)
|
|
||||||
|
|
||||||
if !rewritePacks.Has(packID) {
|
|
||||||
return errors.Fatalf("pack %v is unneeded, but not contained in rewritePacks", packID.Str())
|
|
||||||
}
|
|
||||||
|
|
||||||
rewritePacks.Delete(packID)
|
|
||||||
}
|
|
||||||
|
|
||||||
Verbosef("will delete %d packs and rewrite %d packs, this frees %s\n",
|
|
||||||
len(removePacks), len(rewritePacks), formatBytes(uint64(removeBytes)))
|
|
||||||
|
|
||||||
var obsoletePacks restic.IDSet
|
|
||||||
if len(rewritePacks) != 0 {
|
|
||||||
bar := newProgressMax(!gopts.Quiet, uint64(len(rewritePacks)), "packs rewritten")
|
|
||||||
obsoletePacks, err = repository.Repack(ctx, repo, rewritePacks, usedBlobs, bar)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if len(indexPack) != 0 {
|
||||||
|
Warnf("The index references pack files which are missing from the repository: %v\n", indexPack)
|
||||||
|
return errorPacksMissing
|
||||||
}
|
}
|
||||||
|
|
||||||
removePacks.Merge(obsoletePacks)
|
repackAllPacksWithDuplicates := true
|
||||||
|
|
||||||
|
// calculate limit for number of unused bytes in the repo after repacking
|
||||||
|
maxUnusedSizeAfter := opts.maxUnusedBytes(stats.size.used)
|
||||||
|
|
||||||
|
// Sort repackCandidates such that packs with highest ratio unused/used space are picked first.
|
||||||
|
// This is equivalent to sorting by unused / total space.
|
||||||
|
// Instead of unused[i] / used[i] > unused[j] / used[j] we use
|
||||||
|
// unused[i] * used[j] > unused[j] * used[i] as uint32*uint32 < uint64
|
||||||
|
// Morover duplicates and mixed are sorted to the beginning
|
||||||
|
sort.Slice(repackCandidates, func(i, j int) bool {
|
||||||
|
pi := repackCandidates[i].packInfo
|
||||||
|
pj := repackCandidates[j].packInfo
|
||||||
|
switch {
|
||||||
|
case pi.duplicateBlobs > 0 && pj.duplicateBlobs == 0:
|
||||||
|
return true
|
||||||
|
case pj.duplicateBlobs > 0 && pi.duplicateBlobs == 0:
|
||||||
|
return false
|
||||||
|
case pi.tpe == restic.InvalidBlob && pj.tpe != restic.InvalidBlob:
|
||||||
|
return true
|
||||||
|
case pj.tpe == restic.InvalidBlob && pi.tpe != restic.InvalidBlob:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return pi.unusedSize*pj.usedSize > pj.unusedSize*pi.usedSize
|
||||||
|
})
|
||||||
|
|
||||||
|
repack := func(id restic.ID, p packInfo) {
|
||||||
|
repackPacks.Insert(id)
|
||||||
|
stats.blobs.repack += p.unusedBlobs + p.duplicateBlobs + p.usedBlobs
|
||||||
|
stats.size.repack += p.unusedSize + p.usedSize
|
||||||
|
stats.blobs.repackrm += p.unusedBlobs
|
||||||
|
stats.size.repackrm += p.unusedSize
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, p := range repackCandidates {
|
||||||
|
reachedUnusedSizeAfter := (stats.size.unused-stats.size.remove-stats.size.repackrm < maxUnusedSizeAfter)
|
||||||
|
|
||||||
|
reachedRepackSize := false
|
||||||
|
if opts.MaxRepackBytes > 0 {
|
||||||
|
reachedRepackSize = stats.size.repack+p.unusedSize+p.usedSize > opts.MaxRepackBytes
|
||||||
|
}
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case !reachedRepackSize && (p.duplicateBlobs > 0 || p.tpe == restic.InvalidBlob):
|
||||||
|
// repacking duplicates/mixed is only limited by repackSize
|
||||||
|
repack(p.ID, p.packInfo)
|
||||||
|
|
||||||
|
case reachedUnusedSizeAfter, reachedRepackSize:
|
||||||
|
// for all other packs stop repacking if tolerated unused size is reached.
|
||||||
|
stats.packs.keep++
|
||||||
|
if p.duplicateBlobs > 0 {
|
||||||
|
repackAllPacksWithDuplicates = false
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
repack(p.ID, p.packInfo)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// if all duplicates are repacked, print out correct statistics
|
||||||
|
if repackAllPacksWithDuplicates {
|
||||||
|
stats.blobs.repackrm += stats.blobs.duplicate
|
||||||
|
stats.size.repackrm += stats.size.duplicate
|
||||||
|
}
|
||||||
|
|
||||||
|
Verboseff("\nused: %10d blobs / %s\n", stats.blobs.used, formatBytes(stats.size.used))
|
||||||
|
if stats.blobs.duplicate > 0 {
|
||||||
|
Verboseff("duplicates: %10d blobs / %s\n", stats.blobs.duplicate, formatBytes(stats.size.duplicate))
|
||||||
|
}
|
||||||
|
Verboseff("unused: %10d blobs / %s\n", stats.blobs.unused, formatBytes(stats.size.unused))
|
||||||
|
if stats.size.unref > 0 {
|
||||||
|
Verboseff("unreferenced: %s\n", formatBytes(stats.size.unref))
|
||||||
|
}
|
||||||
|
totalBlobs := stats.blobs.used + stats.blobs.unused + stats.blobs.duplicate
|
||||||
|
totalSize := stats.size.used + stats.size.duplicate + stats.size.unused + stats.size.unref
|
||||||
|
Verboseff("total: %10d blobs / %s\n", totalBlobs, formatBytes(totalSize))
|
||||||
|
Verboseff("unused size: %s of total size\n", formatPercent(stats.size.unused, totalSize))
|
||||||
|
|
||||||
|
Verbosef("\nto repack: %10d blobs / %s\n", stats.blobs.repack, formatBytes(stats.size.repack))
|
||||||
|
Verbosef("this removes %10d blobs / %s\n", stats.blobs.repackrm, formatBytes(stats.size.repackrm))
|
||||||
|
Verbosef("to delete: %10d blobs / %s\n", stats.blobs.remove, formatBytes(stats.size.remove+stats.size.unref))
|
||||||
|
totalPruneSize := stats.size.remove + stats.size.repackrm + stats.size.unref
|
||||||
|
Verbosef("total prune: %10d blobs / %s\n", stats.blobs.remove+stats.blobs.repackrm, formatBytes(totalPruneSize))
|
||||||
|
Verbosef("remaining: %10d blobs / %s\n", totalBlobs-(stats.blobs.remove+stats.blobs.repackrm), formatBytes(totalSize-totalPruneSize))
|
||||||
|
unusedAfter := stats.size.unused - stats.size.remove - stats.size.repackrm
|
||||||
|
Verbosef("unused size after prune: %s (%s of remaining size)\n",
|
||||||
|
formatBytes(unusedAfter), formatPercent(unusedAfter, totalSize-totalPruneSize))
|
||||||
|
Verbosef("\n")
|
||||||
|
Verboseff("totally used packs: %10d\n", stats.packs.used)
|
||||||
|
Verboseff("partly used packs: %10d\n", stats.packs.partlyUsed)
|
||||||
|
Verboseff("unused packs: %10d\n\n", stats.packs.unused)
|
||||||
|
|
||||||
|
Verboseff("to keep: %10d packs\n", stats.packs.keep)
|
||||||
|
Verboseff("to repack: %10d packs\n", len(repackPacks))
|
||||||
|
Verboseff("to delete: %10d packs\n", len(removePacks))
|
||||||
|
if len(removePacksFirst) > 0 {
|
||||||
|
Verboseff("to delete: %10d unreferenced packs\n\n", len(removePacksFirst))
|
||||||
|
}
|
||||||
|
|
||||||
|
if opts.DryRun {
|
||||||
|
if !gopts.JSON && gopts.verbosity >= 2 {
|
||||||
|
if len(removePacksFirst) > 0 {
|
||||||
|
Printf("Would have removed the following unreferenced packs:\n%v\n\n", removePacksFirst)
|
||||||
|
}
|
||||||
|
Printf("Would have repacked and removed the following packs:\n%v\n\n", repackPacks)
|
||||||
|
Printf("Would have removed the following no longer used packs:\n%v\n\n", removePacks)
|
||||||
|
}
|
||||||
|
// Always quit here if DryRun was set!
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// unreferenced packs can be safely deleted first
|
||||||
|
if len(removePacksFirst) != 0 {
|
||||||
|
Verbosef("deleting unreferenced packs\n")
|
||||||
|
DeleteFiles(gopts, repo, removePacksFirst, restic.PackFile)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(repackPacks) != 0 {
|
||||||
|
Verbosef("repacking packs\n")
|
||||||
|
bar := newProgressMax(!gopts.Quiet, uint64(len(repackPacks)), "packs repacked")
|
||||||
|
_, err := repository.Repack(ctx, repo, repackPacks, keepBlobs, bar)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// Also remove repacked packs
|
||||||
|
removePacks.Merge(repackPacks)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(removePacks) != 0 {
|
||||||
if err = rebuildIndex(ctx, repo, removePacks); err != nil {
|
if err = rebuildIndex(ctx, repo, removePacks); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(removePacks) != 0 {
|
Verbosef("removing %d old packs\n", len(removePacks))
|
||||||
Verbosef("remove %d old packs\n", len(removePacks))
|
|
||||||
DeleteFiles(gopts, repo, removePacks, restic.PackFile)
|
DeleteFiles(gopts, repo, removePacks, restic.PackFile)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -263,7 +498,7 @@ func pruneRepository(gopts GlobalOptions, repo restic.Repository) error {
|
|||||||
func getUsedBlobs(gopts GlobalOptions, repo restic.Repository, snapshots []*restic.Snapshot) (usedBlobs restic.BlobSet, err error) {
|
func getUsedBlobs(gopts GlobalOptions, repo restic.Repository, snapshots []*restic.Snapshot) (usedBlobs restic.BlobSet, err error) {
|
||||||
ctx := gopts.ctx
|
ctx := gopts.ctx
|
||||||
|
|
||||||
Verbosef("find data that is still in use for %d snapshots\n", len(snapshots))
|
Verbosef("finding data that is still in use for %d snapshots\n", len(snapshots))
|
||||||
|
|
||||||
usedBlobs = restic.NewBlobSet()
|
usedBlobs = restic.NewBlobSet()
|
||||||
|
|
||||||
|
@ -231,6 +231,13 @@ func Verbosef(format string, args ...interface{}) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Verboseff calls Printf to write the message when the verbosity is >= 2
|
||||||
|
func Verboseff(format string, args ...interface{}) {
|
||||||
|
if globalOptions.verbosity >= 2 {
|
||||||
|
Printf(format, args...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// PrintProgress wraps fmt.Printf to handle the difference in writing progress
|
// PrintProgress wraps fmt.Printf to handle the difference in writing progress
|
||||||
// information to terminals and non-terminal stdout
|
// information to terminals and non-terminal stdout
|
||||||
func PrintProgress(format string, args ...interface{}) {
|
func PrintProgress(format string, args ...interface{}) {
|
||||||
|
@ -270,8 +270,8 @@ func testRunForgetJSON(t testing.TB, gopts GlobalOptions, args ...string) {
|
|||||||
"Expected 2 snapshots to be removed, got %v", len(forgets[0].Remove))
|
"Expected 2 snapshots to be removed, got %v", len(forgets[0].Remove))
|
||||||
}
|
}
|
||||||
|
|
||||||
func testRunPrune(t testing.TB, gopts GlobalOptions) {
|
func testRunPrune(t testing.TB, gopts GlobalOptions, opts PruneOptions) {
|
||||||
rtest.OK(t, runPrune(gopts))
|
rtest.OK(t, runPrune(opts, gopts))
|
||||||
}
|
}
|
||||||
|
|
||||||
func testSetupBackupData(t testing.TB, env *testEnvironment) string {
|
func testSetupBackupData(t testing.TB, env *testEnvironment) string {
|
||||||
@ -1386,6 +1386,32 @@ func TestCheckRestoreNoLock(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestPrune(t *testing.T) {
|
func TestPrune(t *testing.T) {
|
||||||
|
t.Run("0", func(t *testing.T) {
|
||||||
|
opts := PruneOptions{MaxUnused: "0%"}
|
||||||
|
checkOpts := CheckOptions{ReadData: true, CheckUnused: true}
|
||||||
|
testPrune(t, opts, checkOpts)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("50", func(t *testing.T) {
|
||||||
|
opts := PruneOptions{MaxUnused: "50%"}
|
||||||
|
checkOpts := CheckOptions{ReadData: true}
|
||||||
|
testPrune(t, opts, checkOpts)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("unlimited", func(t *testing.T) {
|
||||||
|
opts := PruneOptions{MaxUnused: "unlimited"}
|
||||||
|
checkOpts := CheckOptions{ReadData: true}
|
||||||
|
testPrune(t, opts, checkOpts)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("CachableOnly", func(t *testing.T) {
|
||||||
|
opts := PruneOptions{MaxUnused: "5%", RepackCachableOnly: true}
|
||||||
|
checkOpts := CheckOptions{ReadData: true}
|
||||||
|
testPrune(t, opts, checkOpts)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func testPrune(t *testing.T, pruneOpts PruneOptions, checkOpts CheckOptions) {
|
||||||
env, cleanup := withTestEnvironment(t)
|
env, cleanup := withTestEnvironment(t)
|
||||||
defer cleanup()
|
defer cleanup()
|
||||||
|
|
||||||
@ -1406,10 +1432,12 @@ func TestPrune(t *testing.T) {
|
|||||||
|
|
||||||
testRunForgetJSON(t, env.gopts)
|
testRunForgetJSON(t, env.gopts)
|
||||||
testRunForget(t, env.gopts, firstSnapshot[0].String())
|
testRunForget(t, env.gopts, firstSnapshot[0].String())
|
||||||
testRunPrune(t, env.gopts)
|
testRunPrune(t, env.gopts, pruneOpts)
|
||||||
testRunCheck(t, env.gopts)
|
rtest.OK(t, runCheck(checkOpts, env.gopts, nil))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var pruneDefaultOptions = PruneOptions{MaxUnused: "5%"}
|
||||||
|
|
||||||
func listPacks(gopts GlobalOptions, t *testing.T) restic.IDSet {
|
func listPacks(gopts GlobalOptions, t *testing.T) restic.IDSet {
|
||||||
r, err := OpenRepository(gopts)
|
r, err := OpenRepository(gopts)
|
||||||
rtest.OK(t, err)
|
rtest.OK(t, err)
|
||||||
@ -1452,14 +1480,8 @@ func TestPruneWithDamagedRepository(t *testing.T) {
|
|||||||
"expected one snapshot, got %v", snapshotIDs)
|
"expected one snapshot, got %v", snapshotIDs)
|
||||||
|
|
||||||
// prune should fail
|
// prune should fail
|
||||||
err := runPrune(env.gopts)
|
rtest.Assert(t, runPrune(pruneDefaultOptions, env.gopts) == errorPacksMissing,
|
||||||
if err == nil {
|
"prune should have reported index not complete error")
|
||||||
t.Fatalf("expected prune to fail")
|
|
||||||
}
|
|
||||||
if !strings.Contains(err.Error(), "blobs seem to be missing") {
|
|
||||||
t.Fatalf("did not find hint for missing blobs")
|
|
||||||
}
|
|
||||||
t.Log(err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test repos for edge cases
|
// Test repos for edge cases
|
||||||
@ -1469,37 +1491,37 @@ func TestEdgeCaseRepos(t *testing.T) {
|
|||||||
// repo where index is completely missing
|
// repo where index is completely missing
|
||||||
// => check and prune should fail
|
// => check and prune should fail
|
||||||
t.Run("no-index", func(t *testing.T) {
|
t.Run("no-index", func(t *testing.T) {
|
||||||
testEdgeCaseRepo(t, "repo-index-missing.tar.gz", opts, false, false)
|
testEdgeCaseRepo(t, "repo-index-missing.tar.gz", opts, pruneDefaultOptions, false, false)
|
||||||
})
|
})
|
||||||
|
|
||||||
// repo where an existing and used blob is missing from the index
|
// repo where an existing and used blob is missing from the index
|
||||||
// => check should fail, prune should heal this
|
// => check and prune should fail
|
||||||
t.Run("index-missing-blob", func(t *testing.T) {
|
t.Run("index-missing-blob", func(t *testing.T) {
|
||||||
testEdgeCaseRepo(t, "repo-index-missing-blob.tar.gz", opts, false, true)
|
testEdgeCaseRepo(t, "repo-index-missing-blob.tar.gz", opts, pruneDefaultOptions, false, false)
|
||||||
})
|
})
|
||||||
|
|
||||||
// repo where a blob is missing
|
// repo where a blob is missing
|
||||||
// => check and prune should fail
|
// => check and prune should fail
|
||||||
t.Run("no-data", func(t *testing.T) {
|
t.Run("no-data", func(t *testing.T) {
|
||||||
testEdgeCaseRepo(t, "repo-data-missing.tar.gz", opts, false, false)
|
testEdgeCaseRepo(t, "repo-data-missing.tar.gz", opts, pruneDefaultOptions, false, false)
|
||||||
})
|
})
|
||||||
|
|
||||||
// repo where data exists that is not referenced
|
// repo where data exists that is not referenced
|
||||||
// => check and prune should fully work
|
// => check and prune should fully work
|
||||||
t.Run("unreferenced-data", func(t *testing.T) {
|
t.Run("unreferenced-data", func(t *testing.T) {
|
||||||
testEdgeCaseRepo(t, "repo-unreferenced-data.tar.gz", opts, true, true)
|
testEdgeCaseRepo(t, "repo-unreferenced-data.tar.gz", opts, pruneDefaultOptions, true, true)
|
||||||
})
|
})
|
||||||
|
|
||||||
// repo where an obsolete index still exists
|
// repo where an obsolete index still exists
|
||||||
// => check and prune should fully work
|
// => check and prune should fully work
|
||||||
t.Run("obsolete-index", func(t *testing.T) {
|
t.Run("obsolete-index", func(t *testing.T) {
|
||||||
testEdgeCaseRepo(t, "repo-obsolete-index.tar.gz", opts, true, true)
|
testEdgeCaseRepo(t, "repo-obsolete-index.tar.gz", opts, pruneDefaultOptions, true, true)
|
||||||
})
|
})
|
||||||
|
|
||||||
// repo which contains mixed (data/tree) packs
|
// repo which contains mixed (data/tree) packs
|
||||||
// => check and prune should fully work
|
// => check and prune should fully work
|
||||||
t.Run("mixed-packs", func(t *testing.T) {
|
t.Run("mixed-packs", func(t *testing.T) {
|
||||||
testEdgeCaseRepo(t, "repo-mixed.tar.gz", opts, true, true)
|
testEdgeCaseRepo(t, "repo-mixed.tar.gz", opts, pruneDefaultOptions, true, true)
|
||||||
})
|
})
|
||||||
|
|
||||||
// repo which contains duplicate blobs
|
// repo which contains duplicate blobs
|
||||||
@ -1510,11 +1532,11 @@ func TestEdgeCaseRepos(t *testing.T) {
|
|||||||
CheckUnused: true,
|
CheckUnused: true,
|
||||||
}
|
}
|
||||||
t.Run("duplicates", func(t *testing.T) {
|
t.Run("duplicates", func(t *testing.T) {
|
||||||
testEdgeCaseRepo(t, "repo-duplicates.tar.gz", opts, false, true)
|
testEdgeCaseRepo(t, "repo-duplicates.tar.gz", opts, pruneDefaultOptions, false, true)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func testEdgeCaseRepo(t *testing.T, tarfile string, options CheckOptions, checkOK, pruneOK bool) {
|
func testEdgeCaseRepo(t *testing.T, tarfile string, optionsCheck CheckOptions, optionsPrune PruneOptions, checkOK, pruneOK bool) {
|
||||||
env, cleanup := withTestEnvironment(t)
|
env, cleanup := withTestEnvironment(t)
|
||||||
defer cleanup()
|
defer cleanup()
|
||||||
|
|
||||||
@ -1524,15 +1546,15 @@ func testEdgeCaseRepo(t *testing.T, tarfile string, options CheckOptions, checkO
|
|||||||
if checkOK {
|
if checkOK {
|
||||||
testRunCheck(t, env.gopts)
|
testRunCheck(t, env.gopts)
|
||||||
} else {
|
} else {
|
||||||
rtest.Assert(t, runCheck(options, env.gopts, nil) != nil,
|
rtest.Assert(t, runCheck(optionsCheck, env.gopts, nil) != nil,
|
||||||
"check should have reported an error")
|
"check should have reported an error")
|
||||||
}
|
}
|
||||||
|
|
||||||
if pruneOK {
|
if pruneOK {
|
||||||
testRunPrune(t, env.gopts)
|
testRunPrune(t, env.gopts, optionsPrune)
|
||||||
testRunCheck(t, env.gopts)
|
testRunCheck(t, env.gopts)
|
||||||
} else {
|
} else {
|
||||||
rtest.Assert(t, runPrune(env.gopts) != nil,
|
rtest.Assert(t, runPrune(optionsPrune, env.gopts) != nil,
|
||||||
"prune should have reported an error")
|
"prune should have reported an error")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -23,12 +23,11 @@ data that was referenced by the snapshot from the repository. This can
|
|||||||
be automated with the ``--prune`` option of the ``forget`` command,
|
be automated with the ``--prune`` option of the ``forget`` command,
|
||||||
which runs ``prune`` automatically if snapshots have been removed.
|
which runs ``prune`` automatically if snapshots have been removed.
|
||||||
|
|
||||||
.. Warning::
|
Pruning snapshots can be a time-consuming process, depending on the
|
||||||
|
amount of snapshots and data to process. During a prune operation, the
|
||||||
Pruning snapshots can be a very time-consuming process, taking nearly
|
repository is locked and backups cannot be completed. Please plan your
|
||||||
as long as backups themselves. During a prune operation, the index is
|
pruning so that there's time to complete it and it doesn't interfere with
|
||||||
locked and backups cannot be completed. Performance improvements are
|
regular backup runs.
|
||||||
planned for this feature.
|
|
||||||
|
|
||||||
It is advisable to run ``restic check`` after pruning, to make sure
|
It is advisable to run ``restic check`` after pruning, to make sure
|
||||||
you are alerted, should the internal data structures of the repository
|
you are alerted, should the internal data structures of the repository
|
||||||
@ -82,20 +81,32 @@ command must be run:
|
|||||||
|
|
||||||
$ restic -r /srv/restic-repo prune
|
$ restic -r /srv/restic-repo prune
|
||||||
enter password for repository:
|
enter password for repository:
|
||||||
|
repository 33002c5e opened successfully, password is correct
|
||||||
|
loading all snapshots...
|
||||||
|
loading indexes...
|
||||||
|
finding data that is still in use for 4 snapshots
|
||||||
|
[0:00] 100.00% 4 / 4 snapshots
|
||||||
|
searching used packs...
|
||||||
|
collecting packs for deletion and repacking
|
||||||
|
[0:00] 100.00% 5 / 5 packs processed
|
||||||
|
|
||||||
|
to repack: 69 blobs / 1.078 MiB
|
||||||
|
this removes 67 blobs / 1.047 MiB
|
||||||
|
to delete: 7 blobs / 25.726 KiB
|
||||||
|
total prune: 74 blobs / 1.072 MiB
|
||||||
|
remaining: 16 blobs / 38.003 KiB
|
||||||
|
unused size after prune: 0 B (0.00% of remaining size)
|
||||||
|
|
||||||
|
repacking packs
|
||||||
|
[0:00] 100.00% 2 / 2 packs repacked
|
||||||
counting files in repo
|
counting files in repo
|
||||||
building new index for repo
|
[0:00] 100.00% 3 / 3 packs
|
||||||
[0:00] 100.00% 22 / 22 files
|
finding old index files
|
||||||
repository contains 22 packs (8512 blobs) with 100.092 MiB bytes
|
saved new indexes as [59270b3a]
|
||||||
processed 8512 blobs: 0 duplicate blobs, 0B duplicate
|
remove 4 old index files
|
||||||
load all snapshots
|
[0:00] 100.00% 4 / 4 files deleted
|
||||||
find data that is still in use for 1 snapshots
|
removing 3 old packs
|
||||||
[0:00] 100.00% 1 / 1 snapshots
|
[0:00] 100.00% 3 / 3 files deleted
|
||||||
found 8433 of 8512 data blobs still in use
|
|
||||||
will rewrite 3 packs
|
|
||||||
creating new index
|
|
||||||
[0:00] 86.36% 19 / 22 files
|
|
||||||
saved new index as 544a5084
|
|
||||||
done
|
done
|
||||||
|
|
||||||
Afterwards the repository is smaller.
|
Afterwards the repository is smaller.
|
||||||
@ -119,19 +130,31 @@ to ``forget``:
|
|||||||
8c02b94b 2017-02-21 10:48:33 mopped /home/user/work
|
8c02b94b 2017-02-21 10:48:33 mopped /home/user/work
|
||||||
|
|
||||||
1 snapshots have been removed, running prune
|
1 snapshots have been removed, running prune
|
||||||
counting files in repo
|
loading all snapshots...
|
||||||
building new index for repo
|
loading indexes...
|
||||||
[0:00] 100.00% 37 / 37 packs
|
finding data that is still in use for 1 snapshots
|
||||||
repository contains 37 packs (5521 blobs) with 151.012 MiB bytes
|
|
||||||
processed 5521 blobs: 0 duplicate blobs, 0B duplicate
|
|
||||||
load all snapshots
|
|
||||||
find data that is still in use for 1 snapshots
|
|
||||||
[0:00] 100.00% 1 / 1 snapshots
|
[0:00] 100.00% 1 / 1 snapshots
|
||||||
found 5323 of 5521 data blobs still in use, removing 198 blobs
|
searching used packs...
|
||||||
will delete 0 packs and rewrite 27 packs, this frees 22.106 MiB
|
collecting packs for deletion and repacking
|
||||||
creating new index
|
[0:00] 100.00% 5 / 5 packs processed
|
||||||
[0:00] 100.00% 30 / 30 packs
|
|
||||||
saved new index as b49f3e68
|
to repack: 69 blobs / 1.078 MiB
|
||||||
|
this removes 67 blobs / 1.047 MiB
|
||||||
|
to delete: 7 blobs / 25.726 KiB
|
||||||
|
total prune: 74 blobs / 1.072 MiB
|
||||||
|
remaining: 16 blobs / 38.003 KiB
|
||||||
|
unused size after prune: 0 B (0.00% of remaining size)
|
||||||
|
|
||||||
|
repacking packs
|
||||||
|
[0:00] 100.00% 2 / 2 packs repacked
|
||||||
|
counting files in repo
|
||||||
|
[0:00] 100.00% 3 / 3 packs
|
||||||
|
finding old index files
|
||||||
|
saved new indexes as [59270b3a]
|
||||||
|
remove 4 old index files
|
||||||
|
[0:00] 100.00% 4 / 4 files deleted
|
||||||
|
removing 3 old packs
|
||||||
|
[0:00] 100.00% 3 / 3 files deleted
|
||||||
done
|
done
|
||||||
|
|
||||||
Removing snapshots according to a policy
|
Removing snapshots according to a policy
|
||||||
@ -282,3 +305,59 @@ last-day-of-the-months (11 or 12 depends if the 5 weeklies cross a month).
|
|||||||
And finally 75 last-day-of-the-year snapshots. All other snapshots are
|
And finally 75 last-day-of-the-year snapshots. All other snapshots are
|
||||||
removed.
|
removed.
|
||||||
|
|
||||||
|
Customize pruning
|
||||||
|
*****************
|
||||||
|
|
||||||
|
To understand the custom options, we first explain how the pruning process works:
|
||||||
|
|
||||||
|
1. All snapshots and directories within snapshots are scanned to determine
|
||||||
|
which data is still in use.
|
||||||
|
2. For all files in the repository, restic finds out if the file is fully
|
||||||
|
used, partly used or completely unused.
|
||||||
|
3. Completely unused files are marked for deletion. Fully used files are kept.
|
||||||
|
A partially used file is either kept or marked for repacking depending on user
|
||||||
|
options.
|
||||||
|
|
||||||
|
Note that for repacking, restic must download the file from the repository
|
||||||
|
storage and re-upload the needed data in the repository. This can be very
|
||||||
|
time-consuming for remote repositories.
|
||||||
|
4. After deciding what to do, ``prune`` will actually perform the repack, modify
|
||||||
|
the index according to the changes and delete the obsolete files.
|
||||||
|
|
||||||
|
The ``prune`` command accepts the following options:
|
||||||
|
|
||||||
|
- ``--max-unused limit`` allow unused data up to the specified limit within the repository.
|
||||||
|
This allows restic to keep partly used files instead of repacking them.
|
||||||
|
|
||||||
|
The limit can be specified in several ways:
|
||||||
|
|
||||||
|
* As an absolute size (e.g. ``200M``). If you want to minimize the space
|
||||||
|
used by your repository, pass ``0`` to this option.
|
||||||
|
* As a size relative to the total repo size (e.g. ``10%``). This means that
|
||||||
|
after prune, at most ``10%`` of the total data stored in the repo may be
|
||||||
|
unused data. If the repo after prune has as size of 500MB, then at most
|
||||||
|
50MB may be unused.
|
||||||
|
* If the string ``unlimited`` is passed, there is no limit for partly
|
||||||
|
unused files. This means that as long as some data is still used within
|
||||||
|
a file stored in the repo, restic will just leave it there. Use this if
|
||||||
|
you want to minimize the time and bandwidth used by the ``prune``
|
||||||
|
operation.
|
||||||
|
|
||||||
|
Restic tries to repack as little data as possible while still ensuring this
|
||||||
|
limit for unused data.
|
||||||
|
|
||||||
|
- ``--max-repack-size size`` if set limits the total size of files to repack.
|
||||||
|
As ``prune`` first stores all repacked files and deletes the obsolete files at the end,
|
||||||
|
this option might be handy if you expect many files to be repacked and fear to run low
|
||||||
|
on storage.
|
||||||
|
|
||||||
|
- ``--repack-cacheable-only`` if set to true only files which contain
|
||||||
|
metadata and would be stored in the cache are repacked. Other pack files are
|
||||||
|
not repacked if this option is set. This allows a very fast repacking
|
||||||
|
using only cached data. It can, however, imply that the unused data in
|
||||||
|
your repository exceeds the value given by ``--max-unused``.
|
||||||
|
The default value is false.
|
||||||
|
|
||||||
|
- ``--dry-run`` only show what ``prune`` would do.
|
||||||
|
|
||||||
|
- ``--verbose`` increased verbosity shows additional statistics for ``prune``.
|
||||||
|
@ -161,13 +161,16 @@ func (p *Packer) String() string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// size of the header-length field at the end of the file
|
|
||||||
headerLengthSize = binary.Size(uint32(0))
|
|
||||||
// we require at least one entry in the header, and one blob for a pack file
|
// we require at least one entry in the header, and one blob for a pack file
|
||||||
minFileSize = entrySize + crypto.Extension + uint(headerLengthSize)
|
minFileSize = entrySize + crypto.Extension + uint(headerLengthSize)
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
// size of the header-length field at the end of the file; it is a uint32
|
||||||
|
headerLengthSize = 4
|
||||||
|
// constant overhead of the header independent of #entries
|
||||||
|
HeaderSize = headerLengthSize + crypto.Extension
|
||||||
|
|
||||||
maxHeaderSize = 16 * 1024 * 1024
|
maxHeaderSize = 16 * 1024 * 1024
|
||||||
// number of header enries to download as part of header-length request
|
// number of header enries to download as part of header-length request
|
||||||
eagerEntries = 15
|
eagerEntries = 15
|
||||||
@ -315,3 +318,8 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, err
|
|||||||
|
|
||||||
return entries, nil
|
return entries, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PackedSizeOfBlob returns the size a blob actually uses when saved in a pack
|
||||||
|
func PackedSizeOfBlob(blobLength uint) uint {
|
||||||
|
return blobLength + entrySize
|
||||||
|
}
|
||||||
|
@ -19,6 +19,10 @@ func (b Blob) String() string {
|
|||||||
b.Type, b.ID.Str(), b.Offset, b.Length)
|
b.Type, b.ID.Str(), b.Offset, b.Length)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (b Blob) Handle() BlobHandle {
|
||||||
|
return BlobHandle{ID: b.ID, Type: b.Type}
|
||||||
|
}
|
||||||
|
|
||||||
// PackedBlob is a blob stored within a file.
|
// PackedBlob is a blob stored within a file.
|
||||||
type PackedBlob struct {
|
type PackedBlob struct {
|
||||||
Blob
|
Blob
|
||||||
|
@ -67,8 +67,12 @@ func LoadSnapshot(ctx context.Context, repo Repository, id ID) (*Snapshot, error
|
|||||||
}
|
}
|
||||||
|
|
||||||
// LoadAllSnapshots returns a list of all snapshots in the repo.
|
// LoadAllSnapshots returns a list of all snapshots in the repo.
|
||||||
func LoadAllSnapshots(ctx context.Context, repo Repository) (snapshots []*Snapshot, err error) {
|
// If a snapshot ID is in excludeIDs, it will not be included in the result.
|
||||||
|
func LoadAllSnapshots(ctx context.Context, repo Repository, excludeIDs IDSet) (snapshots []*Snapshot, err error) {
|
||||||
err = repo.List(ctx, SnapshotFile, func(id ID, size int64) error {
|
err = repo.List(ctx, SnapshotFile, func(id ID, size int64) error {
|
||||||
|
if excludeIDs.Has(id) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
sn, err := LoadSnapshot(ctx, repo, id)
|
sn, err := LoadSnapshot(ctx, repo, id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -25,7 +25,7 @@ func TestCreateSnapshot(t *testing.T) {
|
|||||||
restic.TestCreateSnapshot(t, repo, testSnapshotTime.Add(time.Duration(i)*time.Second), testDepth, 0)
|
restic.TestCreateSnapshot(t, repo, testSnapshotTime.Add(time.Duration(i)*time.Second), testDepth, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
snapshots, err := restic.LoadAllSnapshots(context.TODO(), repo)
|
snapshots, err := restic.LoadAllSnapshots(context.TODO(), repo, restic.NewIDSet())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user