2016-08-04 17:42:40 +00:00
package main
import (
2022-07-16 23:06:47 +00:00
"context"
2020-11-03 09:53:38 +00:00
"math"
2023-06-02 19:57:40 +00:00
"runtime"
2020-07-19 05:55:14 +00:00
"strconv"
2020-11-03 09:53:38 +00:00
"strings"
2020-07-19 05:55:14 +00:00
2017-07-23 12:21:03 +00:00
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/repository"
2017-07-24 15:42:25 +00:00
"github.com/restic/restic/internal/restic"
2022-10-21 15:34:14 +00:00
"github.com/restic/restic/internal/ui"
2024-01-20 14:58:06 +00:00
"github.com/restic/restic/internal/ui/progress"
2024-04-05 22:51:20 +00:00
"github.com/restic/restic/internal/ui/termstatus"
2017-07-23 12:21:03 +00:00
2016-09-17 10:36:05 +00:00
"github.com/spf13/cobra"
2016-08-04 17:42:40 +00:00
)
2016-09-17 10:36:05 +00:00
var cmdPrune = & cobra . Command {
Use : "prune [flags]" ,
2017-09-11 16:32:44 +00:00
Short : "Remove unneeded data from the repository" ,
2016-09-17 10:36:05 +00:00
Long : `
The "prune" command checks the repository and removes data that is not
referenced and therefore not needed any more .
2019-11-05 06:03:38 +00:00
EXIT STATUS
== == == == == =
2024-06-30 15:52:50 +00:00
Exit status is 0 if the command was successful .
Exit status is 1 if there was any error .
2024-07-10 19:46:26 +00:00
Exit status is 10 if the repository does not exist .
2024-06-30 15:53:33 +00:00
Exit status is 11 if the repository is already locked .
2016-09-17 10:36:05 +00:00
` ,
2017-08-06 19:02:16 +00:00
DisableAutoGenTag : true ,
2024-02-10 21:58:10 +00:00
RunE : func ( cmd * cobra . Command , _ [ ] string ) error {
2024-04-05 22:51:20 +00:00
term , cancel := setupTermstatus ( )
defer cancel ( )
return runPrune ( cmd . Context ( ) , pruneOptions , globalOptions , term )
2016-09-17 10:36:05 +00:00
} ,
2016-08-04 17:42:40 +00:00
}
2020-07-19 05:55:14 +00:00
// PruneOptions collects all options for the cleanup command.
type PruneOptions struct {
2021-08-16 14:02:01 +00:00
DryRun bool
UnsafeNoSpaceRecovery string
unsafeRecovery bool
2020-11-03 10:14:53 +00:00
2020-11-03 09:53:38 +00:00
MaxUnused string
maxUnusedBytes func ( used uint64 ) ( unused uint64 ) // calculates the number of unused bytes after repacking, according to MaxUnused
2020-11-03 10:14:53 +00:00
MaxRepackSize string
MaxRepackBytes uint64
2024-07-01 22:45:59 +00:00
RepackCacheableOnly bool
RepackSmall bool
RepackUncompressed bool
2020-07-19 05:55:14 +00:00
}
var pruneOptions PruneOptions
2016-08-04 17:42:40 +00:00
func init ( ) {
2016-09-17 10:36:05 +00:00
cmdRoot . AddCommand ( cmdPrune )
2020-07-19 05:55:14 +00:00
f := cmdPrune . Flags ( )
f . BoolVarP ( & pruneOptions . DryRun , "dry-run" , "n" , false , "do not modify the repository, just print what would be done" )
2021-08-16 14:02:01 +00:00
f . StringVarP ( & pruneOptions . UnsafeNoSpaceRecovery , "unsafe-recover-no-free-space" , "" , "" , "UNSAFE, READ THE DOCUMENTATION BEFORE USING! Try to recover a repository stuck with no free space. Do not use without trying out 'prune --max-repack-size 0' first." )
2024-01-21 16:43:13 +00:00
addPruneOptions ( cmdPrune , & pruneOptions )
2020-07-19 05:55:14 +00:00
}
2024-01-21 16:43:13 +00:00
func addPruneOptions ( c * cobra . Command , pruneOptions * PruneOptions ) {
2020-07-19 05:55:14 +00:00
f := c . Flags ( )
2020-11-03 09:53:38 +00:00
f . StringVar ( & pruneOptions . MaxUnused , "max-unused" , "5%" , "tolerate given `limit` of unused data (absolute value in bytes with suffixes k/K, m/M, g/G, t/T, a value in % or the word 'unlimited')" )
2020-07-19 05:55:14 +00:00
f . StringVar ( & pruneOptions . MaxRepackSize , "max-repack-size" , "" , "maximum `size` to repack (allowed suffixes: k/K, m/M, g/G, t/T)" )
2024-07-01 22:45:59 +00:00
f . BoolVar ( & pruneOptions . RepackCacheableOnly , "repack-cacheable-only" , false , "only repack packs which are cacheable" )
2022-08-07 19:21:05 +00:00
f . BoolVar ( & pruneOptions . RepackSmall , "repack-small" , false , "repack pack files below 80% of target pack size" )
2022-05-01 10:02:05 +00:00
f . BoolVar ( & pruneOptions . RepackUncompressed , "repack-uncompressed" , false , "repack all uncompressed data" )
2020-07-19 05:55:14 +00:00
}
func verifyPruneOptions ( opts * PruneOptions ) error {
2021-12-07 19:45:36 +00:00
opts . MaxRepackBytes = math . MaxUint64
2020-07-19 05:55:14 +00:00
if len ( opts . MaxRepackSize ) > 0 {
2023-07-02 18:09:57 +00:00
size , err := ui . ParseBytes ( opts . MaxRepackSize )
2020-07-19 05:55:14 +00:00
if err != nil {
return err
}
opts . MaxRepackBytes = uint64 ( size )
}
2021-08-16 14:02:01 +00:00
if opts . UnsafeNoSpaceRecovery != "" {
// prevent repacking data to make sure users cannot get stuck.
opts . MaxRepackBytes = 0
}
2020-07-19 05:55:14 +00:00
2020-11-03 09:53:38 +00:00
maxUnused := strings . TrimSpace ( opts . MaxUnused )
if maxUnused == "" {
return errors . Fatalf ( "invalid value for --max-unused: %q" , opts . MaxUnused )
2020-07-19 05:55:14 +00:00
}
2020-11-03 09:53:38 +00:00
// parse MaxUnused either as unlimited, a percentage, or an absolute number of bytes
switch {
case maxUnused == "unlimited" :
2024-02-10 21:58:10 +00:00
opts . maxUnusedBytes = func ( _ uint64 ) uint64 {
2020-11-03 09:53:38 +00:00
return math . MaxUint64
}
case strings . HasSuffix ( maxUnused , "%" ) :
maxUnused = strings . TrimSuffix ( maxUnused , "%" )
p , err := strconv . ParseFloat ( maxUnused , 64 )
if err != nil {
return errors . Fatalf ( "invalid percentage %q passed for --max-unused: %v" , opts . MaxUnused , err )
}
if p < 0 {
return errors . Fatal ( "percentage for --max-unused must be positive" )
}
2020-07-19 05:55:14 +00:00
2020-11-03 09:53:38 +00:00
if p >= 100 {
return errors . Fatal ( "percentage for --max-unused must be below 100%" )
}
opts . maxUnusedBytes = func ( used uint64 ) uint64 {
return uint64 ( p / ( 100 - p ) * float64 ( used ) )
}
default :
2023-07-02 18:09:57 +00:00
size , err := ui . ParseBytes ( maxUnused )
2020-11-03 09:53:38 +00:00
if err != nil {
return errors . Fatalf ( "invalid number of bytes %q for --max-unused: %v" , opts . MaxUnused , err )
}
2024-02-10 21:58:10 +00:00
opts . maxUnusedBytes = func ( _ uint64 ) uint64 {
2020-11-03 09:53:38 +00:00
return uint64 ( size )
}
2020-07-19 05:55:14 +00:00
}
return nil
2016-08-04 17:42:40 +00:00
}
2024-04-05 22:51:20 +00:00
func runPrune ( ctx context . Context , opts PruneOptions , gopts GlobalOptions , term * termstatus . Terminal ) error {
2020-07-19 05:55:14 +00:00
err := verifyPruneOptions ( & opts )
if err != nil {
return err
}
2022-05-01 10:02:05 +00:00
if opts . RepackUncompressed && gopts . Compression == repository . CompressionOff {
return errors . Fatal ( "disabled compression and `--repack-uncompressed` are mutually exclusive" )
}
2024-02-24 14:19:02 +00:00
ctx , repo , unlock , err := openWithExclusiveLock ( ctx , gopts , false )
2016-08-04 17:42:40 +00:00
if err != nil {
return err
}
2024-02-24 14:19:02 +00:00
defer unlock ( )
2016-08-04 17:42:40 +00:00
2021-08-16 14:02:01 +00:00
if opts . UnsafeNoSpaceRecovery != "" {
repoID := repo . Config ( ) . ID
if opts . UnsafeNoSpaceRecovery != repoID {
return errors . Fatalf ( "must pass id '%s' to --unsafe-recover-no-free-space" , repoID )
}
opts . unsafeRecovery = true
}
2024-04-05 22:51:20 +00:00
return runPruneWithRepo ( ctx , opts , gopts , repo , restic . NewIDSet ( ) , term )
2017-02-21 09:58:30 +00:00
}
2024-04-05 22:51:20 +00:00
func runPruneWithRepo ( ctx context . Context , opts PruneOptions , gopts GlobalOptions , repo * repository . Repository , ignoreSnapshots restic . IDSet , term * termstatus . Terminal ) error {
2020-11-08 19:15:58 +00:00
if repo . Cache == nil {
Print ( "warning: running prune without a cache, this may be very slow!\n" )
}
2024-04-05 22:51:20 +00:00
printer := newTerminalProgressPrinter ( gopts . verbosity , term )
printer . P ( "loading indexes...\n" )
2021-11-05 23:32:46 +00:00
// loading the index before the snapshots is ok, as we use an exclusive lock here
2024-04-05 22:51:20 +00:00
bar := newIndexTerminalProgress ( gopts . Quiet , gopts . JSON , term )
2023-07-16 02:48:30 +00:00
err := repo . LoadIndex ( ctx , bar )
2018-01-21 16:25:36 +00:00
if err != nil {
return err
2016-08-15 19:10:20 +00:00
}
2024-04-06 17:21:21 +00:00
popts := repository . PruneOptions {
DryRun : opts . DryRun ,
UnsafeRecovery : opts . unsafeRecovery ,
MaxUnusedBytes : opts . maxUnusedBytes ,
MaxRepackBytes : opts . MaxRepackBytes ,
2024-07-01 22:45:59 +00:00
RepackCacheableOnly : opts . RepackCacheableOnly ,
RepackSmall : opts . RepackSmall ,
RepackUncompressed : opts . RepackUncompressed ,
2024-04-06 17:21:21 +00:00
}
2024-05-20 09:47:53 +00:00
plan , err := repository . PlanPrune ( ctx , popts , repo , func ( ctx context . Context , repo restic . Repository , usedBlobs restic . FindBlobSet ) error {
return getUsedBlobs ( ctx , repo , usedBlobs , ignoreSnapshots , printer )
2024-04-06 17:17:28 +00:00
} , printer )
2020-11-30 15:55:14 +00:00
if err != nil {
return err
}
2024-03-29 23:19:58 +00:00
if ctx . Err ( ) != nil {
return ctx . Err ( )
}
2020-11-30 15:55:14 +00:00
2024-04-06 17:21:21 +00:00
if popts . DryRun {
2024-04-05 22:51:20 +00:00
printer . P ( "\nWould have made the following changes:" )
2022-12-30 14:01:49 +00:00
}
2024-04-14 10:32:29 +00:00
err = printPruneStats ( printer , plan . Stats ( ) )
2020-11-30 15:55:14 +00:00
if err != nil {
return err
}
2023-06-02 19:57:40 +00:00
// Trigger GC to reset garbage collection threshold
runtime . GC ( )
2024-04-14 10:32:29 +00:00
return plan . Execute ( ctx , printer )
2020-11-30 15:55:14 +00:00
}
// printPruneStats prints out the statistics
2024-04-06 17:21:21 +00:00
func printPruneStats ( printer progress . Printer , stats repository . PruneStats ) error {
2024-04-06 17:17:28 +00:00
printer . V ( "\nused: %10d blobs / %s\n" , stats . Blobs . Used , ui . FormatBytes ( stats . Size . Used ) )
if stats . Blobs . Duplicate > 0 {
printer . V ( "duplicates: %10d blobs / %s\n" , stats . Blobs . Duplicate , ui . FormatBytes ( stats . Size . Duplicate ) )
}
printer . V ( "unused: %10d blobs / %s\n" , stats . Blobs . Unused , ui . FormatBytes ( stats . Size . Unused ) )
if stats . Size . Unref > 0 {
printer . V ( "unreferenced: %s\n" , ui . FormatBytes ( stats . Size . Unref ) )
}
totalBlobs := stats . Blobs . Used + stats . Blobs . Unused + stats . Blobs . Duplicate
totalSize := stats . Size . Used + stats . Size . Duplicate + stats . Size . Unused + stats . Size . Unref
unusedSize := stats . Size . Duplicate + stats . Size . Unused
2024-04-05 22:51:20 +00:00
printer . V ( "total: %10d blobs / %s\n" , totalBlobs , ui . FormatBytes ( totalSize ) )
printer . V ( "unused size: %s of total size\n" , ui . FormatPercent ( unusedSize , totalSize ) )
2020-07-19 05:55:14 +00:00
2024-04-06 17:17:28 +00:00
printer . P ( "\nto repack: %10d blobs / %s\n" , stats . Blobs . Repack , ui . FormatBytes ( stats . Size . Repack ) )
printer . P ( "this removes: %10d blobs / %s\n" , stats . Blobs . Repackrm , ui . FormatBytes ( stats . Size . Repackrm ) )
printer . P ( "to delete: %10d blobs / %s\n" , stats . Blobs . Remove , ui . FormatBytes ( stats . Size . Remove + stats . Size . Unref ) )
totalPruneSize := stats . Size . Remove + stats . Size . Repackrm + stats . Size . Unref
printer . P ( "total prune: %10d blobs / %s\n" , stats . Blobs . Remove + stats . Blobs . Repackrm , ui . FormatBytes ( totalPruneSize ) )
if stats . Size . Uncompressed > 0 {
printer . P ( "not yet compressed: %s\n" , ui . FormatBytes ( stats . Size . Uncompressed ) )
2022-10-22 18:07:45 +00:00
}
2024-04-06 17:17:28 +00:00
printer . P ( "remaining: %10d blobs / %s\n" , totalBlobs - ( stats . Blobs . Remove + stats . Blobs . Repackrm ) , ui . FormatBytes ( totalSize - totalPruneSize ) )
unusedAfter := unusedSize - stats . Size . Remove - stats . Size . Repackrm
2024-04-05 22:51:20 +00:00
printer . P ( "unused size after prune: %s (%s of remaining size)\n" ,
2022-10-21 15:34:14 +00:00
ui . FormatBytes ( unusedAfter ) , ui . FormatPercent ( unusedAfter , totalSize - totalPruneSize ) )
2024-04-05 22:51:20 +00:00
printer . P ( "\n" )
2024-04-06 17:17:28 +00:00
printer . V ( "totally used packs: %10d\n" , stats . Packs . Used )
printer . V ( "partly used packs: %10d\n" , stats . Packs . PartlyUsed )
printer . V ( "unused packs: %10d\n\n" , stats . Packs . Unused )
2024-04-05 22:51:20 +00:00
2024-04-06 17:17:28 +00:00
printer . V ( "to keep: %10d packs\n" , stats . Packs . Keep )
printer . V ( "to repack: %10d packs\n" , stats . Packs . Repack )
printer . V ( "to delete: %10d packs\n" , stats . Packs . Remove )
if stats . Packs . Unref > 0 {
printer . V ( "to delete: %10d unreferenced packs\n\n" , stats . Packs . Unref )
2020-07-19 05:55:14 +00:00
}
2020-11-30 15:55:14 +00:00
return nil
}
2024-05-20 09:47:53 +00:00
func getUsedBlobs ( ctx context . Context , repo restic . Repository , usedBlobs restic . FindBlobSet , ignoreSnapshots restic . IDSet , printer progress . Printer ) error {
2020-12-06 04:22:27 +00:00
var snapshotTrees restic . IDs
2024-04-05 22:51:20 +00:00
printer . P ( "loading all snapshots...\n" )
2024-05-20 09:47:53 +00:00
err := restic . ForAllSnapshots ( ctx , repo , repo , ignoreSnapshots ,
2020-12-06 04:22:27 +00:00
func ( id restic . ID , sn * restic . Snapshot , err error ) error {
if err != nil {
2022-05-23 20:32:59 +00:00
debug . Log ( "failed to load snapshot %v (error %v)" , id , err )
2020-12-06 04:22:27 +00:00
return err
}
2022-05-23 20:32:59 +00:00
debug . Log ( "add snapshot %v (tree %v)" , id , * sn . Tree )
2020-12-06 04:22:27 +00:00
snapshotTrees = append ( snapshotTrees , * sn . Tree )
return nil
} )
if err != nil {
2024-05-20 09:47:53 +00:00
return errors . Fatalf ( "failed loading snapshot: %v" , err )
2020-12-06 04:22:27 +00:00
}
2024-04-05 22:51:20 +00:00
printer . P ( "finding data that is still in use for %d snapshots\n" , len ( snapshotTrees ) )
2020-07-19 07:48:53 +00:00
2024-04-05 22:51:20 +00:00
bar := printer . NewCounter ( "snapshots" )
bar . SetMax ( uint64 ( len ( snapshotTrees ) ) )
2020-08-03 17:31:49 +00:00
defer bar . Done ( )
2020-07-19 07:48:53 +00:00
2024-05-20 09:47:53 +00:00
return restic . FindUsedBlobs ( ctx , repo , snapshotTrees , usedBlobs , bar )
2020-07-19 07:48:53 +00:00
}