From 836fbb9133cbf3fe307cadaf2e3b1e7ee32083a3 Mon Sep 17 00:00:00 2001 From: Gautam Menghani Date: Mon, 1 Nov 2021 13:30:25 +0530 Subject: [PATCH] [#issue 3490] Support for specifying file size in read-data-subset --- changelog/unreleased/issue-3490 | 9 +++++++ cmd/restic/cmd_check.go | 45 ++++++++++++++++++++++++++++----- cmd/restic/cmd_check_test.go | 34 +++++++++++++++++++++++++ doc/045_working_with_repos.rst | 19 +++++++++++--- 4 files changed, 98 insertions(+), 9 deletions(-) create mode 100644 changelog/unreleased/issue-3490 diff --git a/changelog/unreleased/issue-3490 b/changelog/unreleased/issue-3490 new file mode 100644 index 000000000..e5ce23d1d --- /dev/null +++ b/changelog/unreleased/issue-3490 @@ -0,0 +1,9 @@ +Enhancement: Support for specifying file size in `check --read-data-subset` + +To check a subset of repository files, the `check --read-data-subset` command +used to support two ways to select a subset - A specific range of pack files, +or random percentage of pack files. We have added a third method to select pack +files - By specifying file size. This new option is available with the 'restic check' command. + +https://github.com/restic/restic/issues/3490 +https://github.com/restic/restic/pull/3548 \ No newline at end of file diff --git a/cmd/restic/cmd_check.go b/cmd/restic/cmd_check.go index 2b5017ead..f5b6598a5 100644 --- a/cmd/restic/cmd_check.go +++ b/cmd/restic/cmd_check.go @@ -65,7 +65,7 @@ func checkFlags(opts CheckOptions) error { } if opts.ReadDataSubset != "" { dataSubset, err := stringToIntSlice(opts.ReadDataSubset) - argumentError := errors.Fatal("check flag --read-data-subset must have two positive integer values or a percentage, e.g. --read-data-subset=1/2 or --read-data-subset=2.5%%") + argumentError := errors.Fatal("check flag --read-data-subset must have two positive integer values or a percentage or a file size, e.g. --read-data-subset=1/2 or --read-data-subset=2.5%% or --read-data-subset=10G") if err == nil { if len(dataSubset) != 2 { return argumentError @@ -76,7 +76,7 @@ func checkFlags(opts CheckOptions) error { if dataSubset[1] > totalBucketsMax { return errors.Fatalf("check flag --read-data-subset=n/t t must be at most %d", totalBucketsMax) } - } else { + } else if strings.HasSuffix(opts.ReadDataSubset, "%") { percentage, err := parsePercentage(opts.ReadDataSubset) if err != nil { return argumentError @@ -86,6 +86,17 @@ func checkFlags(opts CheckOptions) error { return errors.Fatal( "check flag --read-data-subset=n% n must be above 0.0% and at most 100.0%") } + + } else { + fileSize, err := parseSizeStr(opts.ReadDataSubset) + if err != nil { + return argumentError + } + if fileSize <= 0.0 { + return errors.Fatal( + "check flag --read-data-subset=n n must be above 0.0") + } + } } @@ -294,10 +305,27 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error { packs = selectPacksByBucket(chkr.GetPacks(), bucket, totalBuckets) packCount := uint64(len(packs)) Verbosef("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets) + } else if strings.HasSuffix(opts.ReadDataSubset, "%") { + percentage, err := parsePercentage(opts.ReadDataSubset) + if err == nil { + packs = selectRandomPacksByPercentage(chkr.GetPacks(), percentage) + Verbosef("read %.1f%% of data packs\n", percentage) + } } else { - percentage, _ := parsePercentage(opts.ReadDataSubset) - packs = selectRandomPacksByPercentage(chkr.GetPacks(), percentage) - Verbosef("read %.1f%% of data packs\n", percentage) + repoSize := int64(0) + allPacks := chkr.GetPacks() + for _, size := range allPacks { + repoSize += size + } + if repoSize == 0 { + return errors.Fatal("Cannot read from a repository having size 0") + } + subsetSize, _ := parseSizeStr(opts.ReadDataSubset) + if subsetSize > repoSize { + subsetSize = repoSize + } + packs = selectRandomPacksByFileSize(chkr.GetPacks(), subsetSize, repoSize) + Verbosef("read %d bytes of data packs\n", subsetSize) } if packs == nil { return errors.Fatal("internal error: failed to select packs to check") @@ -349,6 +377,11 @@ func selectRandomPacksByPercentage(allPacks map[restic.ID]int64, percentage floa id := keys[idx[i]] packs[id] = allPacks[id] } - + return packs +} + +func selectRandomPacksByFileSize(allPacks map[restic.ID]int64, subsetSize int64, repoSize int64) map[restic.ID]int64 { + subsetPercentage := (float64(subsetSize) / float64(repoSize)) * 100.0 + packs := selectRandomPacksByPercentage(allPacks, subsetPercentage) return packs } diff --git a/cmd/restic/cmd_check_test.go b/cmd/restic/cmd_check_test.go index ef038c050..fb61f8420 100644 --- a/cmd/restic/cmd_check_test.go +++ b/cmd/restic/cmd_check_test.go @@ -129,3 +129,37 @@ func TestSelectNoRandomPacksByPercentage(t *testing.T) { selectedPacks := selectRandomPacksByPercentage(testPacks, 10.0) rtest.Assert(t, len(selectedPacks) == 0, "Expected 0 selected packs") } + +func TestSelectRandomPacksByFileSize(t *testing.T) { + var testPacks = make(map[restic.ID]int64) + for i := 1; i <= 10; i++ { + id := restic.NewRandomID() + // ensure unique ids + id[0] = byte(i) + testPacks[id] = 0 + } + + selectedPacks := selectRandomPacksByFileSize(testPacks, 10, 500) + rtest.Assert(t, len(selectedPacks) == 1, "Expected 1 selected packs") + + selectedPacks = selectRandomPacksByFileSize(testPacks, 10240, 51200) + rtest.Assert(t, len(selectedPacks) == 2, "Expected 2 selected packs") + for pack := range selectedPacks { + _, ok := testPacks[pack] + rtest.Assert(t, ok, "Unexpected selection") + } + + selectedPacks = selectRandomPacksByFileSize(testPacks, 500, 500) + rtest.Assert(t, len(selectedPacks) == 10, "Expected 10 selected packs") + for pack := range selectedPacks { + _, ok := testPacks[pack] + rtest.Assert(t, ok, "Unexpected item in selection") + } +} + +func TestSelectNoRandomPacksByFileSize(t *testing.T) { + // that the a repository without pack files works + var testPacks = make(map[restic.ID]int64) + selectedPacks := selectRandomPacksByFileSize(testPacks, 10, 500) + rtest.Assert(t, len(selectedPacks) == 0, "Expected 0 selected packs") +} diff --git a/doc/045_working_with_repos.rst b/doc/045_working_with_repos.rst index 75855a5fe..b21d198b9 100644 --- a/doc/045_working_with_repos.rst +++ b/doc/045_working_with_repos.rst @@ -249,9 +249,9 @@ integrity of the pack files in the repository, use the ``--read-data`` flag: and also that it takes more time than the default ``check``. Alternatively, use the ``--read-data-subset`` parameter to check only a -subset of the repository pack files at a time. It supports two ways to select a -subset. One selects a specific range of pack files, the other selects a random -percentage of pack files. +subset of the repository pack files at a time. It supports three ways to select a +subset. One selects a specific range of pack files, the second selects a random +percentage of pack files, and the third selects pack files of the specified size. Use ``--read-data-subset=n/t`` to check only a subset of the repository pack files at a time. The parameter takes two values, ``n`` and ``t``. When the check @@ -285,3 +285,16 @@ integer: .. code-block:: console $ restic -r /srv/restic-repo check --read-data-subset=10% + +Use ``--read-data-subset=NS`` to check a randomly chosen subset of the repository pack files. +It takes one parameter, ``NS``, where 'N' is a whole number representing file size and 'S' is the unit +of file size (B/K/M/G/T) of pack files to check. Behind the scenes, the specified size will be converted +to percentage of the total repository size. The behaviour of the check command following this conversion +will be the same as the percentage option above. For a file size value the following command may be used: + +.. code-block:: console + + $ restic -r /srv/restic-repo check --read-data-subset=50M + $ restic -r /srv/restic-repo check --read-data-subset=10G + +