diff --git a/cmd/restic/cmd_stats.go b/cmd/restic/cmd_stats.go index 621ff0880..970ee1291 100644 --- a/cmd/restic/cmd_stats.go +++ b/cmd/restic/cmd_stats.go @@ -21,6 +21,25 @@ The "stats" command walks one or all snapshots in a repository and accumulates statistics about the data stored therein. It reports on the number of unique files and their sizes, according to one of the counting modes as given by the --mode flag. + +If no snapshot is specified, all snapshots will be considered. Some +modes make more sense over just a single snapshot, while others +are useful across all snapshots, depending on what you are trying +to calculate. + +The modes are: + + restore-size: (default) Counts the size of the restored files. + + files-by-contents: Counts total size of files, where a file is + considered unique if it has unique contents. + + raw-data: Counts the size of blobs in the repository, regardless + of how many files reference them. + + blobs-per-file: A combination of files-by-contents and raw-data. + +Refer to the online manual for more details about each mode. `, DisableAutoGenTag: true, RunE: func(cmd *cobra.Command, args []string) error { @@ -31,7 +50,7 @@ the counting modes as given by the --mode flag. func init() { cmdRoot.AddCommand(cmdStats) f := cmdStats.Flags() - f.StringVar(&countMode, "mode", countModeRestoreSize, "counting mode: restore-size (default), files-by-content, blobs-per-file, or raw-data") + f.StringVar(&countMode, "mode", countModeRestoreSize, "counting mode: restore-size (default), files-by-contents, blobs-per-file, or raw-data") f.StringVar(&snapshotByHost, "host", "", "filter latest snapshot by this hostname") } @@ -163,14 +182,14 @@ func statsWalkTree(repo restic.Repository, stats *statsContainer) walker.WalkFun return true, nil } - if countMode == countModeUniqueFilesByContent || countMode == countModeBlobsPerFile { + if countMode == countModeUniqueFilesByContents || countMode == countModeBlobsPerFile { // only count this file if we haven't visited it before fid := makeFileIDByContents(node) if _, ok := stats.uniqueFiles[fid]; !ok { // mark the file as visited stats.uniqueFiles[fid] = struct{}{} - if countMode == countModeUniqueFilesByContent { + if countMode == countModeUniqueFilesByContents { // simply count the size of each unique file (unique by contents only) stats.TotalSize += node.Size stats.TotalFileCount++ @@ -197,7 +216,6 @@ func statsWalkTree(repo restic.Repository, stats *statsContainer) walker.WalkFun // file (path) so we don't double-count it stats.TotalSize += uint64(blobSize) stats.fileBlobs[nodePath].Insert(blobID) - // this mode also counts total unique blob _references_ per file stats.TotalBlobCount++ } @@ -232,7 +250,7 @@ func verifyStatsInput(gopts GlobalOptions, args []string) error { // require a recognized counting mode switch countMode { case countModeRestoreSize: - case countModeUniqueFilesByContent: + case countModeUniqueFilesByContents: case countModeBlobsPerFile: case countModeRawData: default: @@ -289,8 +307,8 @@ var ( ) const ( - countModeRestoreSize = "restore-size" - countModeUniqueFilesByContent = "files-by-content" - countModeBlobsPerFile = "blobs-per-file" - countModeRawData = "raw-data" + countModeRestoreSize = "restore-size" + countModeUniqueFilesByContents = "files-by-contents" + countModeBlobsPerFile = "blobs-per-file" + countModeRawData = "raw-data" ) diff --git a/doc/manual_rest.rst b/doc/manual_rest.rst index 40540e84f..94173644b 100644 --- a/doc/manual_rest.rst +++ b/doc/manual_rest.rst @@ -36,6 +36,7 @@ Usage help is available: rebuild-index Build a new index file restore Extract the data from a snapshot snapshots List all snapshots + stats Count up sizes and show information about repository data tag Modify tags on snapshots unlock Remove locks other processes created version Print version information @@ -236,6 +237,76 @@ The following metadata is handled by restic: - Subtree - ExtendedAttributes + +Getting information about repository data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use the ``stats`` command to count up stats about the data in the repository. +There are different counting modes available using the ``--mode`` flag, +depending on what you want to calculate. The default is the restore size, or +the size required to restore the files: + +- ``restore-size`` (default) counts the size of the restored files. +- ``files-by-contents`` counts the total size of unique files as given by their + contents. This can be useful since a file is considered unique only if it has + unique contents. Keep in mind that a small change to a large file (even when the + file name/path hasn't changed) will cause them to look like different files, thus + essentially causing the whole size of the file to be counted twice. +- ``raw-data`` counts the size of the blobs in the repository, regardless of how many + files reference them. This tells you how much restic has reduced all your original + data down to (either for a single snapshot or across all your backups), and compared + to the size given by the restore-size mode, can tell you how much deduplication is + helping you. +- ``blobs-per-file`` is kind of a mix between files-by-contents and raw-data modes; + it is useful for knowing how much value your backup is providing you in terms of unique + data stored by file. Like files-by-contents, it is resilient to file renames/moves. + Unlike files-by-contents, it does not balloon to high values when large files have + small edits, as long as the file path stayed the same. Unlike raw-data, this mode + DOES consider how many files point to each blob such that the more files a blob is + referenced by, the more it counts toward the size. + +For example, to calculate how much space would be +required to restore the latest snapshot (from any host that made it): + +.. code-block:: console + + $ restic stats latest + password is correct + Total File Count: 10538 + Total Size: 37.824 GiB + +If multiple hosts are backing up to the repository, the latest snapshot may not +be the one you want. You can specify the latest snapshot from only a specific +host by using the ``--host`` flag: + +.. code-block:: console + + $ restic stats --host myserver latest + password is correct + Total File Count: 21766 + Total Size: 481.783 GiB + +There we see that it would take 482 GiB of disk space to restore the latest +snapshot from "myserver". + +But how much space does that snapshot take on disk? In other words, how much +has restic's deduplication helped? We can check: + +.. code-block:: console + + $ restic stats --host myserver --mode raw-data latest + password is correct + Total Blob Count: 340847 + Total Size: 458.663 GiB + +Comparing this size to the previous command, we see that restic has saved +about 23 GiB of space with deduplication. + +Which mode you use depends on your exact use case. Some modes are more useful +across all snapshots, while others make more sense on just a single snapshot, +depending on what you're trying to calculate. + + Scripting ---------