[#issue 3490] Support for specifying file size in read-data-subset

This commit is contained in:
Gautam Menghani 2021-11-01 13:30:25 +05:30 committed by gum3ng
parent 829c0a67af
commit 836fbb9133
4 changed files with 98 additions and 9 deletions

View File

@ -0,0 +1,9 @@
Enhancement: Support for specifying file size in `check --read-data-subset`
To check a subset of repository files, the `check --read-data-subset` command
used to support two ways to select a subset - A specific range of pack files,
or random percentage of pack files. We have added a third method to select pack
files - By specifying file size. This new option is available with the 'restic check' command.
https://github.com/restic/restic/issues/3490
https://github.com/restic/restic/pull/3548

View File

@ -65,7 +65,7 @@ func checkFlags(opts CheckOptions) error {
}
if opts.ReadDataSubset != "" {
dataSubset, err := stringToIntSlice(opts.ReadDataSubset)
argumentError := errors.Fatal("check flag --read-data-subset must have two positive integer values or a percentage, e.g. --read-data-subset=1/2 or --read-data-subset=2.5%%")
argumentError := errors.Fatal("check flag --read-data-subset must have two positive integer values or a percentage or a file size, e.g. --read-data-subset=1/2 or --read-data-subset=2.5%% or --read-data-subset=10G")
if err == nil {
if len(dataSubset) != 2 {
return argumentError
@ -76,7 +76,7 @@ func checkFlags(opts CheckOptions) error {
if dataSubset[1] > totalBucketsMax {
return errors.Fatalf("check flag --read-data-subset=n/t t must be at most %d", totalBucketsMax)
}
} else {
} else if strings.HasSuffix(opts.ReadDataSubset, "%") {
percentage, err := parsePercentage(opts.ReadDataSubset)
if err != nil {
return argumentError
@ -86,6 +86,17 @@ func checkFlags(opts CheckOptions) error {
return errors.Fatal(
"check flag --read-data-subset=n% n must be above 0.0% and at most 100.0%")
}
} else {
fileSize, err := parseSizeStr(opts.ReadDataSubset)
if err != nil {
return argumentError
}
if fileSize <= 0.0 {
return errors.Fatal(
"check flag --read-data-subset=n n must be above 0.0")
}
}
}
@ -294,10 +305,27 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
packs = selectPacksByBucket(chkr.GetPacks(), bucket, totalBuckets)
packCount := uint64(len(packs))
Verbosef("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets)
} else if strings.HasSuffix(opts.ReadDataSubset, "%") {
percentage, err := parsePercentage(opts.ReadDataSubset)
if err == nil {
packs = selectRandomPacksByPercentage(chkr.GetPacks(), percentage)
Verbosef("read %.1f%% of data packs\n", percentage)
}
} else {
percentage, _ := parsePercentage(opts.ReadDataSubset)
packs = selectRandomPacksByPercentage(chkr.GetPacks(), percentage)
Verbosef("read %.1f%% of data packs\n", percentage)
repoSize := int64(0)
allPacks := chkr.GetPacks()
for _, size := range allPacks {
repoSize += size
}
if repoSize == 0 {
return errors.Fatal("Cannot read from a repository having size 0")
}
subsetSize, _ := parseSizeStr(opts.ReadDataSubset)
if subsetSize > repoSize {
subsetSize = repoSize
}
packs = selectRandomPacksByFileSize(chkr.GetPacks(), subsetSize, repoSize)
Verbosef("read %d bytes of data packs\n", subsetSize)
}
if packs == nil {
return errors.Fatal("internal error: failed to select packs to check")
@ -349,6 +377,11 @@ func selectRandomPacksByPercentage(allPacks map[restic.ID]int64, percentage floa
id := keys[idx[i]]
packs[id] = allPacks[id]
}
return packs
}
func selectRandomPacksByFileSize(allPacks map[restic.ID]int64, subsetSize int64, repoSize int64) map[restic.ID]int64 {
subsetPercentage := (float64(subsetSize) / float64(repoSize)) * 100.0
packs := selectRandomPacksByPercentage(allPacks, subsetPercentage)
return packs
}

View File

@ -129,3 +129,37 @@ func TestSelectNoRandomPacksByPercentage(t *testing.T) {
selectedPacks := selectRandomPacksByPercentage(testPacks, 10.0)
rtest.Assert(t, len(selectedPacks) == 0, "Expected 0 selected packs")
}
func TestSelectRandomPacksByFileSize(t *testing.T) {
var testPacks = make(map[restic.ID]int64)
for i := 1; i <= 10; i++ {
id := restic.NewRandomID()
// ensure unique ids
id[0] = byte(i)
testPacks[id] = 0
}
selectedPacks := selectRandomPacksByFileSize(testPacks, 10, 500)
rtest.Assert(t, len(selectedPacks) == 1, "Expected 1 selected packs")
selectedPacks = selectRandomPacksByFileSize(testPacks, 10240, 51200)
rtest.Assert(t, len(selectedPacks) == 2, "Expected 2 selected packs")
for pack := range selectedPacks {
_, ok := testPacks[pack]
rtest.Assert(t, ok, "Unexpected selection")
}
selectedPacks = selectRandomPacksByFileSize(testPacks, 500, 500)
rtest.Assert(t, len(selectedPacks) == 10, "Expected 10 selected packs")
for pack := range selectedPacks {
_, ok := testPacks[pack]
rtest.Assert(t, ok, "Unexpected item in selection")
}
}
func TestSelectNoRandomPacksByFileSize(t *testing.T) {
// that the a repository without pack files works
var testPacks = make(map[restic.ID]int64)
selectedPacks := selectRandomPacksByFileSize(testPacks, 10, 500)
rtest.Assert(t, len(selectedPacks) == 0, "Expected 0 selected packs")
}

View File

@ -249,9 +249,9 @@ integrity of the pack files in the repository, use the ``--read-data`` flag:
and also that it takes more time than the default ``check``.
Alternatively, use the ``--read-data-subset`` parameter to check only a
subset of the repository pack files at a time. It supports two ways to select a
subset. One selects a specific range of pack files, the other selects a random
percentage of pack files.
subset of the repository pack files at a time. It supports three ways to select a
subset. One selects a specific range of pack files, the second selects a random
percentage of pack files, and the third selects pack files of the specified size.
Use ``--read-data-subset=n/t`` to check only a subset of the repository pack
files at a time. The parameter takes two values, ``n`` and ``t``. When the check
@ -285,3 +285,16 @@ integer:
.. code-block:: console
$ restic -r /srv/restic-repo check --read-data-subset=10%
Use ``--read-data-subset=NS`` to check a randomly chosen subset of the repository pack files.
It takes one parameter, ``NS``, where 'N' is a whole number representing file size and 'S' is the unit
of file size (B/K/M/G/T) of pack files to check. Behind the scenes, the specified size will be converted
to percentage of the total repository size. The behaviour of the check command following this conversion
will be the same as the percentage option above. For a file size value the following command may be used:
.. code-block:: console
$ restic -r /srv/restic-repo check --read-data-subset=50M
$ restic -r /srv/restic-repo check --read-data-subset=10G