From 07d080830e08e0a530303db3be8e2826e1d1d6aa Mon Sep 17 00:00:00 2001 From: Igor Fedorenko Date: Tue, 2 Jan 2018 00:38:14 -0500 Subject: [PATCH] Add --read-data-subset flag to check command Signed-off-by: Igor Fedorenko --- changelog/0.8.3/issue-1497 | 7 ++++ cmd/restic/cmd_check.go | 75 ++++++++++++++++++++++++++++++---- doc/045_working_with_repos.rst | 26 ++++++++++++ internal/checker/checker.go | 31 ++++++++------ 4 files changed, 120 insertions(+), 19 deletions(-) create mode 100644 changelog/0.8.3/issue-1497 diff --git a/changelog/0.8.3/issue-1497 b/changelog/0.8.3/issue-1497 new file mode 100644 index 000000000..f7aea49e3 --- /dev/null +++ b/changelog/0.8.3/issue-1497 @@ -0,0 +1,7 @@ +Enhancement: Add --read-data-subset flag to check command + +This change introduces ability to check integrity of a subset of repository +data packs. This can be used to spread integrity check of larger repositories +over period of time. + +https://github.com/restic/restic/issues/1497 diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index 9be7c2ee2..ac669c6c7 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -3,6 +3,8 @@ package main import ( "fmt" "os" + "strconv" + "strings" "time" "github.com/spf13/cobra" @@ -26,13 +28,17 @@ repository and not use a local cache. RunE: func(cmd *cobra.Command, args []string) error { return runCheck(checkOptions, globalOptions, args) }, + PreRunE: func(cmd *cobra.Command, args []string) error { + return checkFlags(checkOptions) + }, } // CheckOptions bundles all options for the 'check' command. type CheckOptions struct { - ReadData bool - CheckUnused bool - WithCache bool + ReadData bool + ReadDataSubset string + CheckUnused bool + WithCache bool } var checkOptions CheckOptions @@ -42,10 +48,45 @@ func init() { f := cmdCheck.Flags() f.BoolVar(&checkOptions.ReadData, "read-data", false, "read all data blobs") + f.StringVar(&checkOptions.ReadDataSubset, "read-data-subset", "", "read subset of data packs") f.BoolVar(&checkOptions.CheckUnused, "check-unused", false, "find unused blobs") f.BoolVar(&checkOptions.WithCache, "with-cache", false, "use the cache") } +func checkFlags(opts CheckOptions) error { + if opts.ReadData && opts.ReadDataSubset != "" { + return errors.Fatalf("check flags --read-data and --read-data-subset cannot be used together") + } + if opts.ReadDataSubset != "" { + dataSubset, err := stringToIntSlice(opts.ReadDataSubset) + if err != nil || len(dataSubset) != 2 { + return errors.Fatalf("check flag --read-data-subset must have two positive integer values, e.g. --read-data-subset=1/2") + } + if dataSubset[0] == 0 || dataSubset[1] == 0 || dataSubset[0] > dataSubset[1] { + return errors.Fatalf("check flag --read-data-subset=n/t values must be positive integers, and n <= t, e.g. --read-data-subset=1/2") + } + } + + return nil +} + +// stringToIntSlice converts string to []uint, using '/' as element separator +func stringToIntSlice(param string) (split []uint, err error) { + if param == "" { + return nil, nil + } + parts := strings.Split(param, "/") + result := make([]uint, len(parts)) + for idx, part := range parts { + uintval, err := strconv.ParseUint(part, 10, 0) + if err != nil { + return nil, err + } + result[idx] = uint(uintval) + } + return result, nil +} + func newReadProgress(gopts GlobalOptions, todo restic.Stat) *restic.Progress { if gopts.Quiet { return nil @@ -158,13 +199,25 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error { } } - if opts.ReadData { - Verbosef("read all data\n") + doReadData := func(bucket, totalBuckets uint) { + packs := restic.IDSet{} + for pack := range chkr.GetPacks() { + if (uint(pack[0]) % totalBuckets) == (bucket - 1) { + packs.Insert(pack) + } + } + packCount := uint64(len(packs)) - p := newReadProgress(gopts, restic.Stat{Blobs: chkr.CountPacks()}) + if packCount < chkr.CountPacks() { + Verbosef(fmt.Sprintf("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets)) + } else { + Verbosef("read all data\n") + } + + p := newReadProgress(gopts, restic.Stat{Blobs: packCount}) errChan := make(chan error) - go chkr.ReadData(gopts.ctx, p, errChan) + go chkr.ReadPacks(gopts.ctx, packs, p, errChan) for err := range errChan { errorsFound = true @@ -172,6 +225,14 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error { } } + switch { + case opts.ReadData: + doReadData(1, 1) + case opts.ReadDataSubset != "": + dataSubset, _ := stringToIntSlice(opts.ReadDataSubset) + doReadData(dataSubset[0], dataSubset[1]) + } + if errorsFound { return errors.Fatal("repository contains errors") } diff --git a/doc/045_working_with_repos.rst b/doc/045_working_with_repos.rst index 4008a9110..5ee39ea26 100644 --- a/doc/045_working_with_repos.rst +++ b/doc/045_working_with_repos.rst @@ -87,3 +87,29 @@ yield the same error: Load indexes ciphertext verification failed +By default, ``check`` command does not check that repository data files +are unmodified. Use ``--read-data`` parameter to check all repository +data files: + +.. code-block:: console + + $ restic -r /tmp/backup check --read-data + load indexes + check all packs + check snapshots, trees and blobs + read all data + +Use ``--read-data-subset=n/t`` parameter to check subset of repository data +files. The parameter takes two values, ``n`` and ``t``. All repository data +files are logically devided in ``t`` roughly equal groups and only files that +belong to the group number ``n`` are checked. For example, the following +commands check all repository data files over 5 separate invocations: + +.. code-block:: console + + $ restic -r /tmp/backup check --read-data-subset=1/5 + $ restic -r /tmp/backup check --read-data-subset=2/5 + $ restic -r /tmp/backup check --read-data-subset=3/5 + $ restic -r /tmp/backup check --read-data-subset=4/5 + $ restic -r /tmp/backup check --read-data-subset=5/5 + diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 6e24b7b1c..81c0d8347 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -622,6 +622,11 @@ func (c *Checker) CountPacks() uint64 { return uint64(len(c.packs)) } +// GetPacks returns IDSet of packs in the repository +func (c *Checker) GetPacks() restic.IDSet { + return c.packs +} + // checkPack reads a pack and checks the integrity of all blobs. func checkPack(ctx context.Context, r restic.Repository, id restic.ID) error { debug.Log("checking pack %v", id) @@ -697,6 +702,11 @@ func checkPack(ctx context.Context, r restic.Repository, id restic.ID) error { // ReadData loads all data from the repository and checks the integrity. func (c *Checker) ReadData(ctx context.Context, p *restic.Progress, errChan chan<- error) { + c.ReadPacks(ctx, c.packs, p, errChan) +} + +// ReadPacks loads data from specified packs and checks the integrity. +func (c *Checker) ReadPacks(ctx context.Context, packs restic.IDSet, p *restic.Progress, errChan chan<- error) { defer close(errChan) p.Start() @@ -705,18 +715,6 @@ func (c *Checker) ReadData(ctx context.Context, p *restic.Progress, errChan chan g, ctx := errgroup.WithContext(ctx) ch := make(chan restic.ID) - // start producer for channel ch - g.Go(func() error { - defer close(ch) - return c.repo.List(ctx, restic.DataFile, func(id restic.ID, size int64) error { - select { - case <-ctx.Done(): - case ch <- id: - } - return nil - }) - }) - // run workers for i := 0; i < defaultParallelism; i++ { g.Go(func() error { @@ -748,6 +746,15 @@ func (c *Checker) ReadData(ctx context.Context, p *restic.Progress, errChan chan }) } + // push packs to ch + for pack := range packs { + select { + case ch <- pack: + case <-ctx.Done(): + } + } + close(ch) + err := g.Wait() if err != nil { select {