diff --git a/changelog/unreleased/issue-2858 b/changelog/unreleased/issue-2858 new file mode 100644 index 000000000..d4301e5fc --- /dev/null +++ b/changelog/unreleased/issue-2858 @@ -0,0 +1,9 @@ +Enhancement: Support filtering snapshots by tag and path in the stats command + +We've added filtering snapshots by `--tag tagList` and by `--path path` to +the `stats` command. This includes filtering of only 'latest' snapshots or +all snapshots in a repository. + +https://github.com/restic/restic/issues/2858 +https://github.com/restic/restic/pull/2859 +https://forum.restic.net/t/stats-for-a-host-and-filtered-snapshots/3020 diff --git a/cmd/restic/cmd_stats.go b/cmd/restic/cmd_stats.go index a779447b4..f26d5a6cc 100644 --- a/cmd/restic/cmd_stats.go +++ b/cmd/restic/cmd_stats.go @@ -6,7 +6,6 @@ import ( "fmt" "path/filepath" - "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/walker" @@ -15,18 +14,19 @@ import ( ) var cmdStats = &cobra.Command{ - Use: "stats [flags] [snapshot-ID]", + Use: "stats [flags] [snapshot ID] [...]", Short: "Scan the repository and show basic statistics", Long: ` -The "stats" command walks one or all snapshots in a repository and -accumulates statistics about the data stored therein. It reports on -the number of unique files and their sizes, according to one of +The "stats" command walks one or multiple snapshots in a repository +and accumulates statistics about the data stored therein. It reports +on the number of unique files and their sizes, according to one of the counting modes as given by the --mode flag. -If no snapshot is specified, all snapshots will be considered. Some -modes make more sense over just a single snapshot, while others -are useful across all snapshots, depending on what you are trying -to calculate. +It operates on all snapshots matching the selection criteria or all +snapshots if nothing is specified. The special snapshot ID "latest" +is also supported. Some modes make more sense over +just a single snapshot, while others are useful across all snapshots, +depending on what you are trying to calculate. The modes are: @@ -50,11 +50,26 @@ Exit status is 0 if the command was successful, and non-zero if there was any er }, } +// StatsOptions collects all options for the stats command. +type StatsOptions struct { + // the mode of counting to perform (see consts for available modes) + countMode string + + // filter snapshots by, if given by user + Hosts []string + Tags restic.TagLists + Paths []string +} + +var statsOptions StatsOptions + func init() { cmdRoot.AddCommand(cmdStats) f := cmdStats.Flags() - f.StringVar(&countMode, "mode", countModeRestoreSize, "counting mode: restore-size (default), files-by-contents, blobs-per-file, or raw-data") - f.StringArrayVarP(&snapshotByHosts, "host", "H", nil, "filter latest snapshot by this hostname (can be specified multiple times)") + f.StringVar(&statsOptions.countMode, "mode", countModeRestoreSize, "counting mode: restore-size (default), files-by-contents, blobs-per-file or raw-data") + f.StringArrayVarP(&statsOptions.Hosts, "host", "H", nil, "only consider snapshots with the given `host` (can be specified multiple times)") + f.Var(&statsOptions.Tags, "tag", "only consider snapshots which include this `taglist` in the format `tag[,tag,...]` (can be specified multiple times)") + f.StringArrayVar(&statsOptions.Paths, "path", nil, "only consider snapshots which include this (absolute) `path` (can be specified multiple times)") } func runStats(gopts GlobalOptions, args []string) error { @@ -89,52 +104,25 @@ func runStats(gopts GlobalOptions, args []string) error { // create a container for the stats (and other needed state) stats := &statsContainer{ - uniqueFiles: make(map[fileID]struct{}), - uniqueInodes: make(map[uint64]struct{}), - fileBlobs: make(map[string]restic.IDSet), - blobs: restic.NewBlobSet(), + uniqueFiles: make(map[fileID]struct{}), + uniqueInodes: make(map[uint64]struct{}), + fileBlobs: make(map[string]restic.IDSet), + blobs: restic.NewBlobSet(), + snapshotsCount: 0, } - if snapshotIDString != "" { - // scan just a single snapshot - - var sID restic.ID - if snapshotIDString == "latest" { - sID, err = restic.FindLatestSnapshot(ctx, repo, []string{}, []restic.TagList{}, snapshotByHosts) - if err != nil { - return errors.Fatalf("latest snapshot for criteria not found: %v", err) - } - } else { - sID, err = restic.FindSnapshot(repo, snapshotIDString) - if err != nil { - return errors.Fatalf("error loading snapshot: %v", err) - } - } - - snapshot, err := restic.LoadSnapshot(ctx, repo, sID) - if err != nil { - return errors.Fatalf("error loading snapshot from repo: %v", err) - } - - err = statsWalkSnapshot(ctx, snapshot, repo, stats) + for sn := range FindFilteredSnapshots(ctx, repo, statsOptions.Hosts, statsOptions.Tags, statsOptions.Paths, args) { + err = statsWalkSnapshot(ctx, sn, repo, stats) if err != nil { return fmt.Errorf("error walking snapshot: %v", err) } - } else { - // iterate every snapshot in the repo - err = repo.List(ctx, restic.SnapshotFile, func(snapshotID restic.ID, size int64) error { - snapshot, err := restic.LoadSnapshot(ctx, repo, snapshotID) - if err != nil { - return fmt.Errorf("Error loading snapshot %s: %v", snapshotID.Str(), err) - } - return statsWalkSnapshot(ctx, snapshot, repo, stats) - }) } + if err != nil { return err } - if countMode == countModeRawData { + if statsOptions.countMode == countModeRawData { // the blob handles have been collected, but not yet counted for blobHandle := range stats.blobs { blobSize, found := repo.LookupBlobSize(blobHandle.ID, blobHandle.Type) @@ -154,22 +142,16 @@ func runStats(gopts GlobalOptions, args []string) error { return nil } - // inform the user what was scanned and how it was scanned - snapshotsScanned := snapshotIDString - if snapshotsScanned == "latest" { - snapshotsScanned = "the latest snapshot" - } else if snapshotsScanned == "" { - snapshotsScanned = "all snapshots" - } - Printf("Stats for %s in %s mode:\n", snapshotsScanned, countMode) + Printf("Stats in %s mode:\n", statsOptions.countMode) + Printf("Snapshots processed: %d\n", stats.snapshotsCount) if stats.TotalBlobCount > 0 { - Printf(" Total Blob Count: %d\n", stats.TotalBlobCount) + Printf(" Total Blob Count: %d\n", stats.TotalBlobCount) } if stats.TotalFileCount > 0 { - Printf(" Total File Count: %d\n", stats.TotalFileCount) + Printf(" Total File Count: %d\n", stats.TotalFileCount) } - Printf(" Total Size: %-5s\n", formatBytes(stats.TotalSize)) + Printf(" Total Size: %-5s\n", formatBytes(stats.TotalSize)) return nil } @@ -179,7 +161,9 @@ func statsWalkSnapshot(ctx context.Context, snapshot *restic.Snapshot, repo rest return fmt.Errorf("snapshot %s has nil tree", snapshot.ID().Str()) } - if countMode == countModeRawData { + stats.snapshotsCount++ + + if statsOptions.countMode == countModeRawData { // count just the sizes of unique blobs; we don't need to walk the tree // ourselves in this case, since a nifty function does it for us return restic.FindUsedBlobs(ctx, repo, *snapshot.Tree, stats.blobs) @@ -189,6 +173,7 @@ func statsWalkSnapshot(ctx context.Context, snapshot *restic.Snapshot, repo rest if err != nil { return fmt.Errorf("walking tree %s: %v", *snapshot.Tree, err) } + return nil } @@ -201,19 +186,19 @@ func statsWalkTree(repo restic.Repository, stats *statsContainer) walker.WalkFun return true, nil } - if countMode == countModeUniqueFilesByContents || countMode == countModeBlobsPerFile { + if statsOptions.countMode == countModeUniqueFilesByContents || statsOptions.countMode == countModeBlobsPerFile { // only count this file if we haven't visited it before fid := makeFileIDByContents(node) if _, ok := stats.uniqueFiles[fid]; !ok { // mark the file as visited stats.uniqueFiles[fid] = struct{}{} - if countMode == countModeUniqueFilesByContents { + if statsOptions.countMode == countModeUniqueFilesByContents { // simply count the size of each unique file (unique by contents only) stats.TotalSize += node.Size stats.TotalFileCount++ } - if countMode == countModeBlobsPerFile { + if statsOptions.countMode == countModeBlobsPerFile { // count the size of each unique blob reference, which is // by unique file (unique by contents and file path) for _, blobID := range node.Content { @@ -243,7 +228,7 @@ func statsWalkTree(repo restic.Repository, stats *statsContainer) walker.WalkFun } } - if countMode == countModeRestoreSize { + if statsOptions.countMode == countModeRestoreSize { // as this is a file in the snapshot, we can simply count its // size without worrying about uniqueness, since duplicate files // will still be restored @@ -275,23 +260,13 @@ func makeFileIDByContents(node *restic.Node) fileID { func verifyStatsInput(gopts GlobalOptions, args []string) error { // require a recognized counting mode - switch countMode { + switch statsOptions.countMode { case countModeRestoreSize: case countModeUniqueFilesByContents: case countModeBlobsPerFile: case countModeRawData: default: - return fmt.Errorf("unknown counting mode: %s (use the -h flag to get a list of supported modes)", countMode) - } - - // ensure at most one snapshot was specified - if len(args) > 1 { - return fmt.Errorf("only one snapshot may be specified") - } - - // if a snapshot was specified, mark it as the one to scan - if len(args) == 1 { - snapshotIDString = args[0] + return fmt.Errorf("unknown counting mode: %s (use the -h flag to get a list of supported modes)", statsOptions.countMode) } return nil @@ -320,23 +295,14 @@ type statsContainer struct { // blobs is used to count individual unique blobs, // independent of references to files blobs restic.BlobSet + + // holds count of all considered snapshots + snapshotsCount int } // fileID is a 256-bit hash that distinguishes unique files. type fileID [32]byte -var ( - // the mode of counting to perform - countMode string - - // the snapshot to scan, as given by the user - snapshotIDString string - - // snapshotByHost is the host to filter latest - // snapshot by, if given by user - snapshotByHosts []string -) - const ( countModeRestoreSize = "restore-size" countModeUniqueFilesByContents = "files-by-contents" diff --git a/cmd/restic/find.go b/cmd/restic/find.go index ca79ec265..8d39e177f 100644 --- a/cmd/restic/find.go +++ b/cmd/restic/find.go @@ -22,10 +22,10 @@ func FindFilteredSnapshots(ctx context.Context, repo *repository.Repository, hos // Process all snapshot IDs given as arguments. for _, s := range snapshotIDs { if s == "latest" { + usedFilter = true id, err = restic.FindLatestSnapshot(ctx, repo, paths, tags, hosts) if err != nil { Warnf("Ignoring %q, no snapshot matched given filter (Paths:%v Tags:%v Hosts:%v)\n", s, paths, tags, hosts) - usedFilter = true continue } } else { diff --git a/doc/manual_rest.rst b/doc/manual_rest.rst index e65ae6957..f886475d3 100644 --- a/doc/manual_rest.rst +++ b/doc/manual_rest.rst @@ -306,6 +306,10 @@ host by using the ``--host`` flag: There we see that it would take 482 GiB of disk space to restore the latest snapshot from "myserver". +In case you have multiple backups running from the same host so can also use +``--tag`` and ``--path`` to be more specific about which snapshots you +are looking for. + But how much space does that snapshot take on disk? In other words, how much has restic's deduplication helped? We can check: