2016-08-04 17:42:40 +00:00
package main
import (
2022-07-16 23:06:47 +00:00
"context"
2020-11-03 09:53:38 +00:00
"math"
2020-07-19 05:55:14 +00:00
"sort"
"strconv"
2020-11-03 09:53:38 +00:00
"strings"
2020-07-19 05:55:14 +00:00
2017-07-23 12:21:03 +00:00
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
2022-06-12 12:43:43 +00:00
"github.com/restic/restic/internal/index"
2022-02-13 13:25:38 +00:00
"github.com/restic/restic/internal/pack"
2017-07-23 12:21:03 +00:00
"github.com/restic/restic/internal/repository"
2017-07-24 15:42:25 +00:00
"github.com/restic/restic/internal/restic"
2022-10-21 15:34:14 +00:00
"github.com/restic/restic/internal/ui"
2017-07-23 12:21:03 +00:00
2016-09-17 10:36:05 +00:00
"github.com/spf13/cobra"
2016-08-04 17:42:40 +00:00
)
2020-07-19 05:55:14 +00:00
var errorIndexIncomplete = errors . Fatal ( "index is not complete" )
var errorPacksMissing = errors . Fatal ( "packs from index missing in repo" )
var errorSizeNotMatching = errors . Fatal ( "pack size does not match calculated size from index" )
2016-09-17 10:36:05 +00:00
var cmdPrune = & cobra . Command {
Use : "prune [flags]" ,
2017-09-11 16:32:44 +00:00
Short : "Remove unneeded data from the repository" ,
2016-09-17 10:36:05 +00:00
Long : `
The "prune" command checks the repository and removes data that is not
referenced and therefore not needed any more .
2019-11-05 06:03:38 +00:00
EXIT STATUS
== == == == == =
Exit status is 0 if the command was successful , and non - zero if there was any error .
2016-09-17 10:36:05 +00:00
` ,
2017-08-06 19:02:16 +00:00
DisableAutoGenTag : true ,
2016-09-17 10:36:05 +00:00
RunE : func ( cmd * cobra . Command , args [ ] string ) error {
2022-10-02 21:24:37 +00:00
return runPrune ( cmd . Context ( ) , pruneOptions , globalOptions )
2016-09-17 10:36:05 +00:00
} ,
2016-08-04 17:42:40 +00:00
}
2020-07-19 05:55:14 +00:00
// PruneOptions collects all options for the cleanup command.
type PruneOptions struct {
2021-08-16 14:02:01 +00:00
DryRun bool
UnsafeNoSpaceRecovery string
unsafeRecovery bool
2020-11-03 10:14:53 +00:00
2020-11-03 09:53:38 +00:00
MaxUnused string
maxUnusedBytes func ( used uint64 ) ( unused uint64 ) // calculates the number of unused bytes after repacking, according to MaxUnused
2020-11-03 10:14:53 +00:00
MaxRepackSize string
MaxRepackBytes uint64
2020-07-19 05:55:14 +00:00
RepackCachableOnly bool
2022-04-30 22:02:01 +00:00
RepackSmall bool
2022-05-01 10:02:05 +00:00
RepackUncompressed bool
2020-07-19 05:55:14 +00:00
}
var pruneOptions PruneOptions
2016-08-04 17:42:40 +00:00
func init ( ) {
2016-09-17 10:36:05 +00:00
cmdRoot . AddCommand ( cmdPrune )
2020-07-19 05:55:14 +00:00
f := cmdPrune . Flags ( )
f . BoolVarP ( & pruneOptions . DryRun , "dry-run" , "n" , false , "do not modify the repository, just print what would be done" )
2021-08-16 14:02:01 +00:00
f . StringVarP ( & pruneOptions . UnsafeNoSpaceRecovery , "unsafe-recover-no-free-space" , "" , "" , "UNSAFE, READ THE DOCUMENTATION BEFORE USING! Try to recover a repository stuck with no free space. Do not use without trying out 'prune --max-repack-size 0' first." )
2020-07-19 05:55:14 +00:00
addPruneOptions ( cmdPrune )
}
func addPruneOptions ( c * cobra . Command ) {
f := c . Flags ( )
2020-11-03 09:53:38 +00:00
f . StringVar ( & pruneOptions . MaxUnused , "max-unused" , "5%" , "tolerate given `limit` of unused data (absolute value in bytes with suffixes k/K, m/M, g/G, t/T, a value in % or the word 'unlimited')" )
2020-07-19 05:55:14 +00:00
f . StringVar ( & pruneOptions . MaxRepackSize , "max-repack-size" , "" , "maximum `size` to repack (allowed suffixes: k/K, m/M, g/G, t/T)" )
f . BoolVar ( & pruneOptions . RepackCachableOnly , "repack-cacheable-only" , false , "only repack packs which are cacheable" )
2022-08-07 19:21:05 +00:00
f . BoolVar ( & pruneOptions . RepackSmall , "repack-small" , false , "repack pack files below 80% of target pack size" )
2022-05-01 10:02:05 +00:00
f . BoolVar ( & pruneOptions . RepackUncompressed , "repack-uncompressed" , false , "repack all uncompressed data" )
2020-07-19 05:55:14 +00:00
}
func verifyPruneOptions ( opts * PruneOptions ) error {
2021-12-07 19:45:36 +00:00
opts . MaxRepackBytes = math . MaxUint64
2020-07-19 05:55:14 +00:00
if len ( opts . MaxRepackSize ) > 0 {
size , err := parseSizeStr ( opts . MaxRepackSize )
if err != nil {
return err
}
opts . MaxRepackBytes = uint64 ( size )
}
2021-08-16 14:02:01 +00:00
if opts . UnsafeNoSpaceRecovery != "" {
// prevent repacking data to make sure users cannot get stuck.
opts . MaxRepackBytes = 0
}
2020-07-19 05:55:14 +00:00
2020-11-03 09:53:38 +00:00
maxUnused := strings . TrimSpace ( opts . MaxUnused )
if maxUnused == "" {
return errors . Fatalf ( "invalid value for --max-unused: %q" , opts . MaxUnused )
2020-07-19 05:55:14 +00:00
}
2020-11-03 09:53:38 +00:00
// parse MaxUnused either as unlimited, a percentage, or an absolute number of bytes
switch {
case maxUnused == "unlimited" :
opts . maxUnusedBytes = func ( used uint64 ) uint64 {
return math . MaxUint64
}
case strings . HasSuffix ( maxUnused , "%" ) :
maxUnused = strings . TrimSuffix ( maxUnused , "%" )
p , err := strconv . ParseFloat ( maxUnused , 64 )
if err != nil {
return errors . Fatalf ( "invalid percentage %q passed for --max-unused: %v" , opts . MaxUnused , err )
}
if p < 0 {
return errors . Fatal ( "percentage for --max-unused must be positive" )
}
2020-07-19 05:55:14 +00:00
2020-11-03 09:53:38 +00:00
if p >= 100 {
return errors . Fatal ( "percentage for --max-unused must be below 100%" )
}
opts . maxUnusedBytes = func ( used uint64 ) uint64 {
return uint64 ( p / ( 100 - p ) * float64 ( used ) )
}
default :
size , err := parseSizeStr ( maxUnused )
if err != nil {
return errors . Fatalf ( "invalid number of bytes %q for --max-unused: %v" , opts . MaxUnused , err )
}
opts . maxUnusedBytes = func ( used uint64 ) uint64 {
return uint64 ( size )
}
2020-07-19 05:55:14 +00:00
}
return nil
2016-08-04 17:42:40 +00:00
}
2021-10-31 22:08:13 +00:00
func runPrune ( ctx context . Context , opts PruneOptions , gopts GlobalOptions ) error {
2020-07-19 05:55:14 +00:00
err := verifyPruneOptions ( & opts )
if err != nil {
return err
}
2022-05-01 10:02:05 +00:00
if opts . RepackUncompressed && gopts . Compression == repository . CompressionOff {
return errors . Fatal ( "disabled compression and `--repack-uncompressed` are mutually exclusive" )
}
2021-10-31 22:08:13 +00:00
repo , err := OpenRepository ( ctx , gopts )
2016-08-04 17:42:40 +00:00
if err != nil {
return err
}
2022-04-23 09:32:52 +00:00
if repo . Backend ( ) . Connections ( ) < 2 {
return errors . Fatal ( "prune requires a backend connection limit of at least two" )
}
2022-05-01 10:02:05 +00:00
if repo . Config ( ) . Version < 2 && opts . RepackUncompressed {
return errors . Fatal ( "compression requires at least repository format version 2" )
}
2021-08-16 14:02:01 +00:00
if opts . UnsafeNoSpaceRecovery != "" {
repoID := repo . Config ( ) . ID
if opts . UnsafeNoSpaceRecovery != repoID {
return errors . Fatalf ( "must pass id '%s' to --unsafe-recover-no-free-space" , repoID )
}
opts . unsafeRecovery = true
}
2021-10-31 22:19:27 +00:00
lock , ctx , err := lockRepoExclusive ( ctx , repo )
2016-08-04 17:42:40 +00:00
defer unlockRepo ( lock )
if err != nil {
return err
}
2021-10-31 22:08:13 +00:00
return runPruneWithRepo ( ctx , opts , gopts , repo , restic . NewIDSet ( ) )
2017-02-21 09:58:30 +00:00
}
2021-10-31 22:08:13 +00:00
func runPruneWithRepo ( ctx context . Context , opts PruneOptions , gopts GlobalOptions , repo * repository . Repository , ignoreSnapshots restic . IDSet ) error {
2020-07-19 05:55:14 +00:00
// we do not need index updates while pruning!
repo . DisableAutoIndexUpdate ( )
2017-06-04 09:16:55 +00:00
2020-11-08 19:15:58 +00:00
if repo . Cache == nil {
Print ( "warning: running prune without a cache, this may be very slow!\n" )
}
2020-07-19 05:55:14 +00:00
Verbosef ( "loading indexes...\n" )
2021-11-05 23:32:46 +00:00
// loading the index before the snapshots is ok, as we use an exclusive lock here
2021-10-31 22:08:13 +00:00
err := repo . LoadIndex ( ctx )
2018-01-21 16:25:36 +00:00
if err != nil {
return err
2016-08-15 19:10:20 +00:00
}
2022-12-03 09:47:19 +00:00
plan , stats , err := planPrune ( ctx , opts , repo , ignoreSnapshots , gopts . Quiet )
2020-11-30 15:55:14 +00:00
if err != nil {
return err
}
2022-12-30 14:01:49 +00:00
if opts . DryRun {
Verbosef ( "\nWould have made the following changes:" )
}
2022-12-03 09:47:19 +00:00
err = printPruneStats ( stats )
2020-11-30 15:55:14 +00:00
if err != nil {
return err
}
2021-10-31 22:08:13 +00:00
return doPrune ( ctx , opts , gopts , repo , plan )
2020-07-19 05:55:14 +00:00
}
2017-06-15 13:03:05 +00:00
2020-11-30 15:55:14 +00:00
type pruneStats struct {
blobs struct {
used uint
duplicate uint
unused uint
remove uint
repack uint
repackrm uint
}
size struct {
2022-10-22 18:07:45 +00:00
used uint64
duplicate uint64
unused uint64
remove uint64
repack uint64
repackrm uint64
unref uint64
uncompressed uint64
2020-11-30 15:55:14 +00:00
}
packs struct {
used uint
unused uint
partlyUsed uint
unref uint
keep uint
repack uint
remove uint
}
2020-07-19 05:55:14 +00:00
}
2016-08-04 17:42:40 +00:00
2020-11-30 15:55:14 +00:00
type prunePlan struct {
2022-08-28 10:17:20 +00:00
removePacksFirst restic . IDSet // packs to remove first (unreferenced packs)
repackPacks restic . IDSet // packs to repack
keepBlobs restic . CountedBlobSet // blobs to keep during repacking
removePacks restic . IDSet // packs to remove
ignorePacks restic . IDSet // packs to ignore when rebuilding the index
2020-07-19 05:55:14 +00:00
}
2016-08-04 17:42:40 +00:00
2022-07-16 23:06:47 +00:00
type packInfo struct {
usedBlobs uint
unusedBlobs uint
usedSize uint64
unusedSize uint64
tpe restic . BlobType
uncompressed bool
}
type packInfoWithID struct {
ID restic . ID
packInfo
2022-12-28 17:02:08 +00:00
mustCompress bool
2022-07-16 23:06:47 +00:00
}
2020-11-30 15:55:14 +00:00
// planPrune selects which files to rewrite and which to delete and which blobs to keep.
// Also some summary statistics are returned.
2022-12-03 09:47:19 +00:00
func planPrune ( ctx context . Context , opts PruneOptions , repo restic . Repository , ignoreSnapshots restic . IDSet , quiet bool ) ( prunePlan , pruneStats , error ) {
2022-07-16 23:06:47 +00:00
var stats pruneStats
2022-12-03 09:47:19 +00:00
usedBlobs , err := getUsedBlobs ( ctx , repo , ignoreSnapshots , quiet )
2022-07-16 23:06:47 +00:00
if err != nil {
return prunePlan { } , stats , err
2020-11-30 15:55:14 +00:00
}
2020-07-19 05:55:14 +00:00
2022-07-16 23:06:47 +00:00
Verbosef ( "searching used packs...\n" )
keepBlobs , indexPack , err := packInfoFromIndex ( ctx , repo . Index ( ) , usedBlobs , & stats )
if err != nil {
return prunePlan { } , stats , err
2016-08-04 17:42:40 +00:00
}
2022-07-16 23:06:47 +00:00
Verbosef ( "collecting packs for deletion and repacking\n" )
2022-12-03 09:47:19 +00:00
plan , err := decidePackAction ( ctx , opts , repo , indexPack , & stats , quiet )
2022-07-16 23:06:47 +00:00
if err != nil {
return prunePlan { } , stats , err
}
2020-11-30 15:55:14 +00:00
2022-07-16 23:06:47 +00:00
if len ( plan . repackPacks ) != 0 {
2022-08-28 10:24:30 +00:00
blobCount := keepBlobs . Len ( )
2022-07-16 23:06:47 +00:00
// when repacking, we do not want to keep blobs which are
// already contained in kept packs, so delete them from keepBlobs
2022-08-19 18:04:39 +00:00
repo . Index ( ) . Each ( ctx , func ( blob restic . PackedBlob ) {
2022-07-16 23:06:47 +00:00
if plan . removePacks . Has ( blob . PackID ) || plan . repackPacks . Has ( blob . PackID ) {
2022-08-19 18:04:39 +00:00
return
2022-07-16 23:06:47 +00:00
}
keepBlobs . Delete ( blob . BlobHandle )
2022-08-19 18:04:39 +00:00
} )
2022-08-28 10:24:30 +00:00
if keepBlobs . Len ( ) < blobCount / 2 {
// replace with copy to shrink map to necessary size if there's a chance to benefit
keepBlobs = keepBlobs . Copy ( )
}
2022-07-16 23:06:47 +00:00
} else {
// keepBlobs is only needed if packs are repacked
keepBlobs = nil
}
plan . keepBlobs = keepBlobs
return plan , stats , nil
}
2016-08-04 17:42:40 +00:00
2022-08-28 10:17:20 +00:00
func packInfoFromIndex ( ctx context . Context , idx restic . MasterIndex , usedBlobs restic . CountedBlobSet , stats * pruneStats ) ( restic . CountedBlobSet , map [ restic . ID ] packInfo , error ) {
2020-07-19 05:55:14 +00:00
// iterate over all blobs in index to find out which blobs are duplicates
2022-08-28 10:17:20 +00:00
// The counter in usedBlobs describes how many instances of the blob exist in the repository index
// Thus 0 == blob is missing, 1 == blob exists once, >= 2 == duplicates exist
2022-08-19 18:04:39 +00:00
idx . Each ( ctx , func ( blob restic . PackedBlob ) {
2020-11-05 20:52:34 +00:00
bh := blob . BlobHandle
2022-08-28 10:17:20 +00:00
count , ok := usedBlobs [ bh ]
if ok {
if count < math . MaxUint8 {
2022-07-16 22:27:40 +00:00
// don't overflow, but saturate count at 255
// this can lead to a non-optimal pack selection, but won't cause
// problems otherwise
2021-02-19 15:57:51 +00:00
count ++
}
2022-08-28 10:17:20 +00:00
usedBlobs [ bh ] = count
2020-03-31 12:33:32 +00:00
}
2022-08-19 18:04:39 +00:00
} )
2020-07-19 05:55:14 +00:00
// Check if all used blobs have been found in index
2022-08-28 10:17:20 +00:00
missingBlobs := restic . NewBlobSet ( )
for bh , count := range usedBlobs {
if count == 0 {
// blob does not exist in any pack files
missingBlobs . Insert ( bh )
}
}
if len ( missingBlobs ) != 0 {
2021-01-28 20:48:24 +00:00
Warnf ( "%v not found in the index\n\n" +
"Integrity check failed: Data seems to be missing.\n" +
"Will not start prune to prevent (additional) data loss!\n" +
2020-03-31 21:31:33 +00:00
"Please report this error (along with the output of the 'prune' run) at\n" +
2022-08-28 10:17:20 +00:00
"https://github.com/restic/restic/issues/new/choose\n" , missingBlobs )
2022-07-16 23:06:47 +00:00
return nil , nil , errorIndexIncomplete
2020-03-31 21:31:33 +00:00
}
2017-09-19 08:50:07 +00:00
2020-07-19 05:55:14 +00:00
indexPack := make ( map [ restic . ID ] packInfo )
2016-08-04 17:42:40 +00:00
2020-11-16 04:04:13 +00:00
// save computed pack header size
2022-07-16 23:06:47 +00:00
for pid , hdrSize := range pack . Size ( ctx , idx , true ) {
2020-11-16 04:04:13 +00:00
// initialize tpe with NumBlobTypes to indicate it's not set
indexPack [ pid ] = packInfo { tpe : restic . NumBlobTypes , usedSize : uint64 ( hdrSize ) }
}
2022-08-28 10:17:20 +00:00
hasDuplicates := false
2020-07-19 05:55:14 +00:00
// iterate over all blobs in index to generate packInfo
2022-08-19 18:04:39 +00:00
idx . Each ( ctx , func ( blob restic . PackedBlob ) {
2020-11-16 04:04:13 +00:00
ip := indexPack [ blob . PackID ]
// Set blob type if not yet set
if ip . tpe == restic . NumBlobTypes {
ip . tpe = blob . Type
2020-07-19 05:55:14 +00:00
}
2020-11-16 04:04:13 +00:00
2020-07-19 05:55:14 +00:00
// mark mixed packs with "Invalid blob type"
if ip . tpe != blob . Type {
ip . tpe = restic . InvalidBlob
2017-07-20 20:22:51 +00:00
}
2020-11-05 20:52:34 +00:00
bh := blob . BlobHandle
2020-11-16 04:04:13 +00:00
size := uint64 ( blob . Length )
2022-08-28 10:17:20 +00:00
dupCount := usedBlobs [ bh ]
2020-07-19 05:55:14 +00:00
switch {
2022-08-28 10:17:20 +00:00
case dupCount >= 2 :
hasDuplicates = true
// mark as unused for now, we will later on select one copy
ip . unusedSize += size
ip . unusedBlobs ++
2022-10-22 17:10:33 +00:00
// count as duplicate, will later on change one copy to be counted as used
stats . size . duplicate += size
stats . blobs . duplicate ++
2022-08-28 10:17:20 +00:00
case dupCount == 1 : // used blob, not duplicate
2020-07-19 05:55:14 +00:00
ip . usedSize += size
ip . usedBlobs ++
2022-10-22 17:10:33 +00:00
stats . size . used += size
stats . blobs . used ++
2020-07-19 05:55:14 +00:00
default : // unused blob
ip . unusedSize += size
ip . unusedBlobs ++
2022-10-22 17:10:33 +00:00
stats . size . unused += size
stats . blobs . unused ++
2016-08-04 17:42:40 +00:00
}
2022-04-10 09:57:01 +00:00
if ! blob . IsCompressed ( ) {
ip . uncompressed = true
}
2020-07-19 05:55:14 +00:00
// update indexPack
indexPack [ blob . PackID ] = ip
2022-08-19 18:04:39 +00:00
} )
2016-08-04 17:42:40 +00:00
2021-02-19 15:57:51 +00:00
// if duplicate blobs exist, those will be set to either "used" or "unused":
2022-07-16 22:27:40 +00:00
// - mark only one occurence of duplicate blobs as used
2021-02-19 15:57:51 +00:00
// - if there are already some used blobs in a pack, possibly mark duplicates in this pack as "used"
2022-07-16 22:27:40 +00:00
// - if there are no used blobs in a pack, possibly mark duplicates as "unused"
2022-08-28 10:17:20 +00:00
if hasDuplicates {
2021-02-19 15:57:51 +00:00
// iterate again over all blobs in index (this is pretty cheap, all in-mem)
2022-08-19 18:04:39 +00:00
idx . Each ( ctx , func ( blob restic . PackedBlob ) {
2021-02-19 15:57:51 +00:00
bh := blob . BlobHandle
2022-08-28 10:17:20 +00:00
count , ok := usedBlobs [ bh ]
// skip non-duplicate, aka. normal blobs
// count == 0 is used to mark that this was a duplicate blob with only a single occurence remaining
if ! ok || count == 1 {
2022-08-19 18:04:39 +00:00
return
2021-02-19 15:57:51 +00:00
}
ip := indexPack [ blob . PackID ]
size := uint64 ( blob . Length )
switch {
2022-08-28 10:17:20 +00:00
case ip . usedBlobs > 0 , count == 0 :
// other used blobs in pack or "last" occurence -> transition to used
2021-02-19 15:57:51 +00:00
ip . usedSize += size
ip . usedBlobs ++
2022-08-28 10:17:20 +00:00
ip . unusedSize -= size
ip . unusedBlobs --
2022-10-22 17:10:33 +00:00
// same for the global statistics
stats . size . used += size
stats . blobs . used ++
stats . size . duplicate -= size
stats . blobs . duplicate --
2022-08-28 10:17:20 +00:00
// let other occurences remain marked as unused
usedBlobs [ bh ] = 1
2021-02-19 15:57:51 +00:00
default :
2022-08-28 10:17:20 +00:00
// remain unused and decrease counter
count --
if count == 1 {
// setting count to 1 would lead to forgetting that this blob had duplicates
// thus use the special value zero. This will select the last instance of the blob for keeping.
count = 0
}
usedBlobs [ bh ] = count
2021-02-19 15:57:51 +00:00
}
// update indexPack
indexPack [ blob . PackID ] = ip
2022-08-19 18:04:39 +00:00
} )
2021-02-19 15:57:51 +00:00
}
2022-08-28 10:17:20 +00:00
// Sanity check. If no duplicates exist, all blobs have value 1. After handling
// duplicates, this also applies to duplicates.
for _ , count := range usedBlobs {
if count != 1 {
panic ( "internal error during blob selection" )
}
}
return usedBlobs , indexPack , nil
2022-07-16 23:06:47 +00:00
}
2022-12-03 09:47:19 +00:00
func decidePackAction ( ctx context . Context , opts PruneOptions , repo restic . Repository , indexPack map [ restic . ID ] packInfo , stats * pruneStats , quiet bool ) ( prunePlan , error ) {
2020-07-19 05:55:14 +00:00
removePacksFirst := restic . NewIDSet ( )
2016-09-01 14:04:29 +00:00
removePacks := restic . NewIDSet ( )
2020-07-19 05:55:14 +00:00
repackPacks := restic . NewIDSet ( )
var repackCandidates [ ] packInfoWithID
2022-07-02 22:16:04 +00:00
var repackSmallCandidates [ ] packInfoWithID
2022-04-10 09:57:01 +00:00
repoVersion := repo . Config ( ) . Version
2022-07-02 22:18:44 +00:00
// only repack very small files by default
targetPackSize := repo . PackSize ( ) / 25
if opts . RepackSmall {
// consider files with at least 80% of the target size as large enough
targetPackSize = repo . PackSize ( ) / 5 * 4
}
2022-04-10 09:57:01 +00:00
2020-07-19 05:55:14 +00:00
// loop over all packs and decide what to do
2022-12-03 09:47:19 +00:00
bar := newProgressMax ( quiet , uint64 ( len ( indexPack ) ) , "packs processed" )
2022-07-16 23:06:47 +00:00
err := repo . List ( ctx , restic . PackFile , func ( id restic . ID , packSize int64 ) error {
2020-07-19 05:55:14 +00:00
p , ok := indexPack [ id ]
if ! ok {
// Pack was not referenced in index and is not used => immediately remove!
Verboseff ( "will remove pack %v as it is unused and not indexed\n" , id . Str ( ) )
removePacksFirst . Insert ( id )
stats . size . unref += uint64 ( packSize )
return nil
}
2016-09-12 12:26:47 +00:00
2022-07-17 09:41:56 +00:00
if p . unusedSize + p . usedSize != uint64 ( packSize ) && p . usedBlobs != 0 {
2020-11-30 06:25:10 +00:00
// Pack size does not fit and pack is needed => error
// If the pack is not needed, this is no error, the pack can
// and will be simply removed, see below.
2021-06-20 12:25:40 +00:00
Warnf ( "pack %s: calculated size %d does not match real size %d\nRun 'restic rebuild-index'.\n" ,
2020-07-19 05:55:14 +00:00
id . Str ( ) , p . unusedSize + p . usedSize , packSize )
return errorSizeNotMatching
}
// statistics
switch {
2021-02-19 15:57:51 +00:00
case p . usedBlobs == 0 :
2020-07-19 05:55:14 +00:00
stats . packs . unused ++
case p . unusedBlobs == 0 :
stats . packs . used ++
default :
stats . packs . partlyUsed ++
}
2022-10-22 18:07:45 +00:00
if p . uncompressed {
stats . size . uncompressed += p . unusedSize + p . usedSize
}
2022-05-01 10:02:05 +00:00
mustCompress := false
if repoVersion >= 2 {
// repo v2: always repack tree blobs if uncompressed
// compress data blobs if requested
mustCompress = ( p . tpe == restic . TreeBlob || opts . RepackUncompressed ) && p . uncompressed
}
2022-04-10 09:57:01 +00:00
2020-07-19 05:55:14 +00:00
// decide what to do
switch {
2021-02-19 15:57:51 +00:00
case p . usedBlobs == 0 :
2020-07-19 05:55:14 +00:00
// All blobs in pack are no longer used => remove pack!
removePacks . Insert ( id )
stats . blobs . remove += p . unusedBlobs
stats . size . remove += p . unusedSize
2020-11-03 11:50:47 +00:00
case opts . RepackCachableOnly && p . tpe == restic . DataBlob :
2020-07-19 05:55:14 +00:00
// if this is a data pack and --repack-cacheable-only is set => keep pack!
2022-07-17 09:41:56 +00:00
stats . packs . keep ++
2020-11-03 11:50:47 +00:00
2022-07-02 22:18:44 +00:00
case p . unusedBlobs == 0 && p . tpe != restic . InvalidBlob && ! mustCompress :
if packSize >= int64 ( targetPackSize ) {
2022-07-02 22:16:04 +00:00
// All blobs in pack are used and not mixed => keep pack!
stats . packs . keep ++
} else {
2022-12-28 17:02:08 +00:00
repackSmallCandidates = append ( repackSmallCandidates , packInfoWithID { ID : id , packInfo : p , mustCompress : mustCompress } )
2022-07-02 22:16:04 +00:00
}
2020-07-19 05:55:14 +00:00
default :
// all other packs are candidates for repacking
2022-12-28 17:02:08 +00:00
repackCandidates = append ( repackCandidates , packInfoWithID { ID : id , packInfo : p , mustCompress : mustCompress } )
2020-07-19 05:55:14 +00:00
}
delete ( indexPack , id )
2020-11-04 13:11:29 +00:00
bar . Add ( 1 )
2020-07-19 05:55:14 +00:00
return nil
} )
bar . Done ( )
if err != nil {
2022-07-16 23:06:47 +00:00
return prunePlan { } , err
2020-07-19 05:55:14 +00:00
}
2020-11-30 06:25:10 +00:00
// At this point indexPacks contains only missing packs!
// missing packs that are not needed can be ignored
ignorePacks := restic . NewIDSet ( )
for id , p := range indexPack {
2021-02-19 15:57:51 +00:00
if p . usedBlobs == 0 {
2020-11-30 06:25:10 +00:00
ignorePacks . Insert ( id )
stats . blobs . remove += p . unusedBlobs
stats . size . remove += p . unusedSize
delete ( indexPack , id )
}
}
2020-07-19 05:55:14 +00:00
if len ( indexPack ) != 0 {
2021-01-31 10:31:27 +00:00
Warnf ( "The index references %d needed pack files which are missing from the repository:\n" , len ( indexPack ) )
for id := range indexPack {
Warnf ( " %v\n" , id )
}
2022-07-16 23:06:47 +00:00
return prunePlan { } , errorPacksMissing
2020-07-19 05:55:14 +00:00
}
2020-11-30 06:25:10 +00:00
if len ( ignorePacks ) != 0 {
2021-01-28 20:42:26 +00:00
Warnf ( "Missing but unneeded pack files are referenced in the index, will be repaired\n" )
for id := range ignorePacks {
Warnf ( "will forget missing pack file %v\n" , id )
}
2020-11-30 06:25:10 +00:00
}
2020-07-19 05:55:14 +00:00
2022-07-02 22:16:04 +00:00
if len ( repackSmallCandidates ) < 10 {
// too few small files to be worth the trouble, this also prevents endlessly repacking
// if there is just a single pack file below the target size
stats . packs . keep += uint ( len ( repackSmallCandidates ) )
} else {
repackCandidates = append ( repackCandidates , repackSmallCandidates ... )
}
2020-07-19 05:55:14 +00:00
// Sort repackCandidates such that packs with highest ratio unused/used space are picked first.
// This is equivalent to sorting by unused / total space.
// Instead of unused[i] / used[i] > unused[j] / used[j] we use
// unused[i] * used[j] > unused[j] * used[i] as uint32*uint32 < uint64
2022-07-02 22:18:44 +00:00
// Moreover packs containing trees and too small packs are sorted to the beginning
2020-07-19 05:55:14 +00:00
sort . Slice ( repackCandidates , func ( i , j int ) bool {
pi := repackCandidates [ i ] . packInfo
pj := repackCandidates [ j ] . packInfo
switch {
2021-01-15 15:42:04 +00:00
case pi . tpe != restic . DataBlob && pj . tpe == restic . DataBlob :
2020-07-19 05:55:14 +00:00
return true
2021-01-15 15:42:04 +00:00
case pj . tpe != restic . DataBlob && pi . tpe == restic . DataBlob :
2020-07-19 05:55:14 +00:00
return false
2022-07-02 22:18:44 +00:00
case pi . unusedSize + pi . usedSize < uint64 ( targetPackSize ) && pj . unusedSize + pj . usedSize >= uint64 ( targetPackSize ) :
2022-06-11 21:11:02 +00:00
return true
2022-07-02 22:18:44 +00:00
case pj . unusedSize + pj . usedSize < uint64 ( targetPackSize ) && pi . unusedSize + pi . usedSize >= uint64 ( targetPackSize ) :
2022-06-11 21:11:02 +00:00
return false
2016-08-25 20:35:22 +00:00
}
2020-07-19 05:55:14 +00:00
return pi . unusedSize * pj . usedSize > pj . unusedSize * pi . usedSize
} )
2016-08-25 20:35:22 +00:00
2020-11-03 12:27:53 +00:00
repack := func ( id restic . ID , p packInfo ) {
repackPacks . Insert ( id )
2021-02-19 15:57:51 +00:00
stats . blobs . repack += p . unusedBlobs + p . usedBlobs
2020-11-03 12:27:53 +00:00
stats . size . repack += p . unusedSize + p . usedSize
stats . blobs . repackrm += p . unusedBlobs
stats . size . repackrm += p . unusedSize
2022-12-28 17:02:08 +00:00
if p . uncompressed {
stats . size . uncompressed -= p . unusedSize + p . usedSize
}
2020-11-03 12:27:53 +00:00
}
2022-07-17 10:27:52 +00:00
// calculate limit for number of unused bytes in the repo after repacking
maxUnusedSizeAfter := opts . maxUnusedBytes ( stats . size . used )
2020-07-19 05:55:14 +00:00
for _ , p := range repackCandidates {
reachedUnusedSizeAfter := ( stats . size . unused - stats . size . remove - stats . size . repackrm < maxUnusedSizeAfter )
2021-12-07 19:45:36 +00:00
reachedRepackSize := stats . size . repack + p . unusedSize + p . usedSize >= opts . MaxRepackBytes
2022-07-02 22:18:44 +00:00
packIsLargeEnough := p . unusedSize + p . usedSize >= uint64 ( targetPackSize )
2020-11-03 12:28:21 +00:00
2020-07-19 05:55:14 +00:00
switch {
2021-01-29 21:25:41 +00:00
case reachedRepackSize :
2022-07-17 09:41:56 +00:00
stats . packs . keep ++
2021-01-29 21:25:41 +00:00
2022-12-28 17:02:08 +00:00
case p . tpe != restic . DataBlob , p . mustCompress :
2021-02-19 15:57:51 +00:00
// repacking non-data packs / uncompressed-trees is only limited by repackSize
2020-07-19 05:55:14 +00:00
repack ( p . ID , p . packInfo )
2022-04-30 22:02:01 +00:00
case reachedUnusedSizeAfter && packIsLargeEnough :
2020-07-19 05:55:14 +00:00
// for all other packs stop repacking if tolerated unused size is reached.
2022-07-17 09:41:56 +00:00
stats . packs . keep ++
2016-08-25 20:35:22 +00:00
2020-07-19 05:55:14 +00:00
default :
repack ( p . ID , p . packInfo )
2016-08-25 20:35:22 +00:00
}
2020-07-19 05:55:14 +00:00
}
2016-08-25 20:35:22 +00:00
2020-11-30 15:55:14 +00:00
stats . packs . unref = uint ( len ( removePacksFirst ) )
stats . packs . repack = uint ( len ( repackPacks ) )
stats . packs . remove = uint ( len ( removePacks ) )
2022-10-22 18:07:45 +00:00
if repo . Config ( ) . Version < 2 {
// compression not supported for repository format version 1
stats . size . uncompressed = 0
}
2022-07-16 23:06:47 +00:00
return prunePlan { removePacksFirst : removePacksFirst ,
removePacks : removePacks ,
repackPacks : repackPacks ,
ignorePacks : ignorePacks ,
} , nil
2020-11-30 15:55:14 +00:00
}
// printPruneStats prints out the statistics
2022-12-03 09:47:19 +00:00
func printPruneStats ( stats pruneStats ) error {
2022-10-21 15:34:14 +00:00
Verboseff ( "\nused: %10d blobs / %s\n" , stats . blobs . used , ui . FormatBytes ( stats . size . used ) )
2020-07-19 05:55:14 +00:00
if stats . blobs . duplicate > 0 {
2022-10-21 15:34:14 +00:00
Verboseff ( "duplicates: %10d blobs / %s\n" , stats . blobs . duplicate , ui . FormatBytes ( stats . size . duplicate ) )
2020-07-19 05:55:14 +00:00
}
2022-10-21 15:34:14 +00:00
Verboseff ( "unused: %10d blobs / %s\n" , stats . blobs . unused , ui . FormatBytes ( stats . size . unused ) )
2020-07-19 05:55:14 +00:00
if stats . size . unref > 0 {
2022-10-21 15:34:14 +00:00
Verboseff ( "unreferenced: %s\n" , ui . FormatBytes ( stats . size . unref ) )
2020-07-19 05:55:14 +00:00
}
totalBlobs := stats . blobs . used + stats . blobs . unused + stats . blobs . duplicate
totalSize := stats . size . used + stats . size . duplicate + stats . size . unused + stats . size . unref
2020-11-18 21:30:22 +00:00
unusedSize := stats . size . duplicate + stats . size . unused
2022-10-21 15:34:14 +00:00
Verboseff ( "total: %10d blobs / %s\n" , totalBlobs , ui . FormatBytes ( totalSize ) )
Verboseff ( "unused size: %s of total size\n" , ui . FormatPercent ( unusedSize , totalSize ) )
2020-07-19 05:55:14 +00:00
2022-10-21 15:34:14 +00:00
Verbosef ( "\nto repack: %10d blobs / %s\n" , stats . blobs . repack , ui . FormatBytes ( stats . size . repack ) )
Verbosef ( "this removes: %10d blobs / %s\n" , stats . blobs . repackrm , ui . FormatBytes ( stats . size . repackrm ) )
Verbosef ( "to delete: %10d blobs / %s\n" , stats . blobs . remove , ui . FormatBytes ( stats . size . remove + stats . size . unref ) )
2020-07-19 05:55:14 +00:00
totalPruneSize := stats . size . remove + stats . size . repackrm + stats . size . unref
2022-10-21 15:34:14 +00:00
Verbosef ( "total prune: %10d blobs / %s\n" , stats . blobs . remove + stats . blobs . repackrm , ui . FormatBytes ( totalPruneSize ) )
2022-10-22 18:07:45 +00:00
if stats . size . uncompressed > 0 {
Verbosef ( "not yet compressed: %s\n" , ui . FormatBytes ( stats . size . uncompressed ) )
}
2022-10-21 15:34:14 +00:00
Verbosef ( "remaining: %10d blobs / %s\n" , totalBlobs - ( stats . blobs . remove + stats . blobs . repackrm ) , ui . FormatBytes ( totalSize - totalPruneSize ) )
2020-11-18 21:30:22 +00:00
unusedAfter := unusedSize - stats . size . remove - stats . size . repackrm
2020-07-19 05:55:14 +00:00
Verbosef ( "unused size after prune: %s (%s of remaining size)\n" ,
2022-10-21 15:34:14 +00:00
ui . FormatBytes ( unusedAfter ) , ui . FormatPercent ( unusedAfter , totalSize - totalPruneSize ) )
2020-07-19 05:55:14 +00:00
Verbosef ( "\n" )
Verboseff ( "totally used packs: %10d\n" , stats . packs . used )
Verboseff ( "partly used packs: %10d\n" , stats . packs . partlyUsed )
Verboseff ( "unused packs: %10d\n\n" , stats . packs . unused )
2022-01-06 21:15:15 +00:00
Verboseff ( "to keep: %10d packs\n" , stats . packs . keep )
2020-11-30 15:55:14 +00:00
Verboseff ( "to repack: %10d packs\n" , stats . packs . repack )
Verboseff ( "to delete: %10d packs\n" , stats . packs . remove )
if stats . packs . unref > 0 {
Verboseff ( "to delete: %10d unreferenced packs\n\n" , stats . packs . unref )
2020-07-19 05:55:14 +00:00
}
2020-11-30 15:55:14 +00:00
return nil
}
// doPrune does the actual pruning:
// - remove unreferenced packs first
// - repack given pack files while keeping the given blobs
// - rebuild the index while ignoring all files that will be deleted
// - delete the files
// plan.removePacks and plan.ignorePacks are modified in this function.
2021-10-31 22:08:13 +00:00
func doPrune ( ctx context . Context , opts PruneOptions , gopts GlobalOptions , repo restic . Repository , plan prunePlan ) ( err error ) {
2020-07-19 05:55:14 +00:00
if opts . DryRun {
if ! gopts . JSON && gopts . verbosity >= 2 {
2022-10-22 17:46:10 +00:00
Printf ( "Repeated prune dry-runs can report slightly different amounts of data to keep or repack. This is expected behavior.\n\n" )
2020-11-30 15:55:14 +00:00
if len ( plan . removePacksFirst ) > 0 {
Printf ( "Would have removed the following unreferenced packs:\n%v\n\n" , plan . removePacksFirst )
2020-07-19 05:55:14 +00:00
}
2020-11-30 15:55:14 +00:00
Printf ( "Would have repacked and removed the following packs:\n%v\n\n" , plan . repackPacks )
Printf ( "Would have removed the following no longer used packs:\n%v\n\n" , plan . removePacks )
2017-03-04 16:38:34 +00:00
}
2020-07-19 05:55:14 +00:00
// Always quit here if DryRun was set!
return nil
2016-08-04 17:42:40 +00:00
}
2020-07-19 05:55:14 +00:00
// unreferenced packs can be safely deleted first
2020-11-30 15:55:14 +00:00
if len ( plan . removePacksFirst ) != 0 {
2020-07-19 05:55:14 +00:00
Verbosef ( "deleting unreferenced packs\n" )
2021-10-31 22:08:13 +00:00
DeleteFiles ( ctx , gopts , repo , plan . removePacksFirst , restic . PackFile )
2020-07-19 05:55:14 +00:00
}
2017-07-18 21:07:40 +00:00
2020-11-30 15:55:14 +00:00
if len ( plan . repackPacks ) != 0 {
2020-07-19 05:55:14 +00:00
Verbosef ( "repacking packs\n" )
2020-11-30 15:55:14 +00:00
bar := newProgressMax ( ! gopts . Quiet , uint64 ( len ( plan . repackPacks ) ) , "packs repacked" )
_ , err := repository . Repack ( ctx , repo , repo , plan . repackPacks , plan . keepBlobs , bar )
2020-11-08 20:03:59 +00:00
bar . Done ( )
2020-07-19 05:55:14 +00:00
if err != nil {
2020-12-19 13:22:59 +00:00
return errors . Fatalf ( "%s" , err )
2020-07-19 05:55:14 +00:00
}
2020-11-06 10:26:35 +00:00
2020-07-19 05:55:14 +00:00
// Also remove repacked packs
2020-11-30 15:55:14 +00:00
plan . removePacks . Merge ( plan . repackPacks )
2022-07-17 10:05:04 +00:00
if len ( plan . keepBlobs ) != 0 {
Warnf ( "%v was not repacked\n\n" +
"Integrity check failed.\n" +
"Please report this error (along with the output of the 'prune' run) at\n" +
"https://github.com/restic/restic/issues/new/choose\n" , plan . keepBlobs )
return errors . Fatal ( "internal error: blobs were not repacked" )
}
2022-10-22 16:10:17 +00:00
// allow GC of the blob set
plan . keepBlobs = nil
2017-06-15 11:12:46 +00:00
}
2020-11-30 15:55:14 +00:00
if len ( plan . ignorePacks ) == 0 {
plan . ignorePacks = plan . removePacks
2020-11-30 06:25:10 +00:00
} else {
2020-11-30 15:55:14 +00:00
plan . ignorePacks . Merge ( plan . removePacks )
2020-11-30 06:25:10 +00:00
}
2021-08-16 14:02:01 +00:00
if opts . unsafeRecovery {
Verbosef ( "deleting index files\n" )
2022-06-12 12:43:43 +00:00
indexFiles := repo . Index ( ) . ( * index . MasterIndex ) . IDs ( )
2021-10-31 22:08:13 +00:00
err = DeleteFilesChecked ( ctx , gopts , repo , indexFiles , restic . IndexFile )
2021-08-16 14:02:01 +00:00
if err != nil {
return errors . Fatalf ( "%s" , err )
}
2020-11-30 15:55:14 +00:00
} else if len ( plan . ignorePacks ) != 0 {
2021-10-31 22:08:13 +00:00
err = rebuildIndexFiles ( ctx , gopts , repo , plan . ignorePacks , nil )
2020-10-10 16:54:13 +00:00
if err != nil {
2020-12-19 13:22:59 +00:00
return errors . Fatalf ( "%s" , err )
2020-07-19 05:55:14 +00:00
}
2020-11-30 06:25:10 +00:00
}
2020-07-19 05:55:14 +00:00
2020-11-30 15:55:14 +00:00
if len ( plan . removePacks ) != 0 {
Verbosef ( "removing %d old packs\n" , len ( plan . removePacks ) )
2021-10-31 22:08:13 +00:00
DeleteFiles ( ctx , gopts , repo , plan . removePacks , restic . PackFile )
2016-08-25 20:35:22 +00:00
}
2021-08-16 14:02:01 +00:00
if opts . unsafeRecovery {
2021-10-31 22:08:13 +00:00
_ , err = writeIndexFiles ( ctx , gopts , repo , plan . ignorePacks , nil )
2021-08-16 14:02:01 +00:00
if err != nil {
return errors . Fatalf ( "%s" , err )
}
}
2016-09-17 10:36:05 +00:00
Verbosef ( "done\n" )
2016-08-04 17:42:40 +00:00
return nil
}
2020-07-19 07:48:53 +00:00
2021-10-31 22:08:13 +00:00
func writeIndexFiles ( ctx context . Context , gopts GlobalOptions , repo restic . Repository , removePacks restic . IDSet , extraObsolete restic . IDs ) ( restic . IDSet , error ) {
2020-10-10 16:54:13 +00:00
Verbosef ( "rebuilding index\n" )
2022-05-26 10:49:03 +00:00
bar := newProgressMax ( ! gopts . Quiet , 0 , "packs processed" )
2021-10-31 22:08:13 +00:00
obsoleteIndexes , err := repo . Index ( ) . Save ( ctx , repo , removePacks , extraObsolete , bar )
2020-11-04 13:11:29 +00:00
bar . Done ( )
2021-08-16 14:02:01 +00:00
return obsoleteIndexes , err
}
2021-10-31 22:08:13 +00:00
func rebuildIndexFiles ( ctx context . Context , gopts GlobalOptions , repo restic . Repository , removePacks restic . IDSet , extraObsolete restic . IDs ) error {
obsoleteIndexes , err := writeIndexFiles ( ctx , gopts , repo , removePacks , extraObsolete )
2020-10-10 16:54:13 +00:00
if err != nil {
return err
}
Verbosef ( "deleting obsolete index files\n" )
2021-10-31 22:08:13 +00:00
return DeleteFilesChecked ( ctx , gopts , repo , obsoleteIndexes , restic . IndexFile )
2020-10-10 16:54:13 +00:00
}
2022-12-03 09:47:19 +00:00
func getUsedBlobs ( ctx context . Context , repo restic . Repository , ignoreSnapshots restic . IDSet , quiet bool ) ( usedBlobs restic . CountedBlobSet , err error ) {
2020-12-06 04:22:27 +00:00
var snapshotTrees restic . IDs
Verbosef ( "loading all snapshots...\n" )
2021-10-31 22:08:13 +00:00
err = restic . ForAllSnapshots ( ctx , repo . Backend ( ) , repo , ignoreSnapshots ,
2020-12-06 04:22:27 +00:00
func ( id restic . ID , sn * restic . Snapshot , err error ) error {
if err != nil {
2022-05-23 20:32:59 +00:00
debug . Log ( "failed to load snapshot %v (error %v)" , id , err )
2020-12-06 04:22:27 +00:00
return err
}
2022-05-23 20:32:59 +00:00
debug . Log ( "add snapshot %v (tree %v)" , id , * sn . Tree )
2020-12-06 04:22:27 +00:00
snapshotTrees = append ( snapshotTrees , * sn . Tree )
return nil
} )
if err != nil {
2022-05-23 20:38:45 +00:00
return nil , errors . Fatalf ( "failed loading snapshot: %v" , err )
2020-12-06 04:22:27 +00:00
}
Verbosef ( "finding data that is still in use for %d snapshots\n" , len ( snapshotTrees ) )
2020-07-19 07:48:53 +00:00
2022-08-28 10:17:20 +00:00
usedBlobs = restic . NewCountedBlobSet ( )
2020-07-19 07:48:53 +00:00
2022-12-03 09:47:19 +00:00
bar := newProgressMax ( ! quiet , uint64 ( len ( snapshotTrees ) ) , "snapshots" )
2020-08-03 17:31:49 +00:00
defer bar . Done ( )
2020-07-19 07:48:53 +00:00
2020-11-07 13:16:04 +00:00
err = restic . FindUsedBlobs ( ctx , repo , snapshotTrees , usedBlobs , bar )
if err != nil {
if repo . Backend ( ) . IsNotExist ( err ) {
2022-07-12 18:48:01 +00:00
return nil , errors . Fatal ( "unable to load a tree from the repository: " + err . Error ( ) )
2020-07-19 07:48:53 +00:00
}
2020-11-07 13:16:04 +00:00
return nil , err
2020-07-19 07:48:53 +00:00
}
return usedBlobs , nil
}