From 0ce182f044d98c91f35605a9fa0a6d59e4b4a73f Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 10 Dec 2022 16:02:29 +0100 Subject: [PATCH] document backup --group-by --- changelog/unreleased/issue-3941 | 14 ++++++++++++++ cmd/restic/cmd_backup.go | 2 +- doc/040_backup.rst | 15 +++++++++++++-- doc/060_forget.rst | 2 ++ 4 files changed, 30 insertions(+), 3 deletions(-) create mode 100644 changelog/unreleased/issue-3941 diff --git a/changelog/unreleased/issue-3941 b/changelog/unreleased/issue-3941 new file mode 100644 index 000000000..011cd9eaa --- /dev/null +++ b/changelog/unreleased/issue-3941 @@ -0,0 +1,14 @@ +Enhancement: Support `--group-by` for backup parent selection + +The backup command by default selected the parent snapshot based on the hostname +and the backup targets. When the backup path list changed, the backup command +was unable to determine a suitable parent snapshot and had to read all +files again. + +The new `--group-by` option for the backup command allows filtering snapshots +for the parent selection by `host`, `paths` and `tags`. It defaults to +`host,paths` which selects the latest snapshot with hostname and paths matching +those of the backup run. It should be used consistently with `forget --group-by`. + +https://github.com/restic/restic/issues/3941 +https://github.com/restic/restic/pull/4081 diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index 696a19b94..7c58f95c4 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -121,7 +121,7 @@ func init() { cmdRoot.AddCommand(cmdBackup) f := cmdBackup.Flags() - f.StringVar(&backupOptions.Parent, "parent", "", "use this parent `snapshot` (default: last snapshot in the repository that has the same target files/directories, and is not newer than the snapshot time)") + f.StringVar(&backupOptions.Parent, "parent", "", "use this parent `snapshot` (default: latest snapshot in the group determined by --group-by and not newer than the timestamp determined by --time)") backupOptions.GroupBy = restic.SnapshotGroupByOptions{Host: true, Path: true} f.VarP(&backupOptions.GroupBy, "group-by", "g", "`group` snapshots by host, paths and/or tags, separated by comma (disable grouping with '')") f.BoolVarP(&backupOptions.Force, "force", "f", false, `force re-reading the target files/directories (overrides the "parent" flag)`) diff --git a/doc/040_backup.rst b/doc/040_backup.rst index b9996311d..3b1a56bd6 100644 --- a/doc/040_backup.rst +++ b/doc/040_backup.rst @@ -139,13 +139,24 @@ File change detection ********************* When restic encounters a file that has already been backed up, whether in the -current backup or a previous one, it makes sure the file's contents are only +current backup or a previous one, it makes sure the file's content is only stored once in the repository. To do so, it normally has to scan the entire -contents of every file. Because this can be very expensive, restic also uses a +content of the file. Because this can be very expensive, restic also uses a change detection rule based on file metadata to determine whether a file is likely unchanged since a previous backup. If it is, the file is not scanned again. +The previous backup snapshot, called "parent" snaphot in restic terminology, +is determined as follows. By default restic groups snapshots by hostname and +backup paths, and then selects the latest snapshot in the group that matches +the current backup. You can change the selection criteria using the +``--group-by`` option, which defaults to ``host,paths``. To select the latest +snapshot with the same paths independent of the hostname, use ``paths``. Or, +to only consider the hostname and tags, use ``host,tags``. Alternatively, it +is possible to manually specify a specific parent snapshot using the +``--parent`` option. Finally, note that one would normally set the +``--group-by`` option for the ``forget`` command to the same value. + Change detection is only performed for regular files (not special files, symlinks or directories) that have the exact same path as they did in a previous backup of the same location. If a file or one of its containing diff --git a/doc/060_forget.rst b/doc/060_forget.rst index a4205de75..b960ddb14 100644 --- a/doc/060_forget.rst +++ b/doc/060_forget.rst @@ -219,6 +219,8 @@ paths and tags. The policy is then applied to each group of snapshots individual This is a safety feature to prevent accidental removal of unrelated backup sets. To disable grouping and apply the policy to all snapshots regardless of their host, paths and tags, use ``--group-by ''`` (that is, an empty value to ``--group-by``). +Note that one would normally set the ``--group-by`` option for the ``backup`` +command to the same value. Additionally, you can restrict the policy to only process snapshots which have a particular hostname with the ``--host`` parameter, or tags with the ``--tag``