mirror of
https://github.com/octoleo/restic.git
synced 2024-11-10 15:21:03 +00:00
[#issue 3490] Support for specifying file size in read-data-subset
This commit is contained in:
parent
829c0a67af
commit
836fbb9133
9
changelog/unreleased/issue-3490
Normal file
9
changelog/unreleased/issue-3490
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
Enhancement: Support for specifying file size in `check --read-data-subset`
|
||||||
|
|
||||||
|
To check a subset of repository files, the `check --read-data-subset` command
|
||||||
|
used to support two ways to select a subset - A specific range of pack files,
|
||||||
|
or random percentage of pack files. We have added a third method to select pack
|
||||||
|
files - By specifying file size. This new option is available with the 'restic check' command.
|
||||||
|
|
||||||
|
https://github.com/restic/restic/issues/3490
|
||||||
|
https://github.com/restic/restic/pull/3548
|
@ -65,7 +65,7 @@ func checkFlags(opts CheckOptions) error {
|
|||||||
}
|
}
|
||||||
if opts.ReadDataSubset != "" {
|
if opts.ReadDataSubset != "" {
|
||||||
dataSubset, err := stringToIntSlice(opts.ReadDataSubset)
|
dataSubset, err := stringToIntSlice(opts.ReadDataSubset)
|
||||||
argumentError := errors.Fatal("check flag --read-data-subset must have two positive integer values or a percentage, e.g. --read-data-subset=1/2 or --read-data-subset=2.5%%")
|
argumentError := errors.Fatal("check flag --read-data-subset must have two positive integer values or a percentage or a file size, e.g. --read-data-subset=1/2 or --read-data-subset=2.5%% or --read-data-subset=10G")
|
||||||
if err == nil {
|
if err == nil {
|
||||||
if len(dataSubset) != 2 {
|
if len(dataSubset) != 2 {
|
||||||
return argumentError
|
return argumentError
|
||||||
@ -76,7 +76,7 @@ func checkFlags(opts CheckOptions) error {
|
|||||||
if dataSubset[1] > totalBucketsMax {
|
if dataSubset[1] > totalBucketsMax {
|
||||||
return errors.Fatalf("check flag --read-data-subset=n/t t must be at most %d", totalBucketsMax)
|
return errors.Fatalf("check flag --read-data-subset=n/t t must be at most %d", totalBucketsMax)
|
||||||
}
|
}
|
||||||
} else {
|
} else if strings.HasSuffix(opts.ReadDataSubset, "%") {
|
||||||
percentage, err := parsePercentage(opts.ReadDataSubset)
|
percentage, err := parsePercentage(opts.ReadDataSubset)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return argumentError
|
return argumentError
|
||||||
@ -86,6 +86,17 @@ func checkFlags(opts CheckOptions) error {
|
|||||||
return errors.Fatal(
|
return errors.Fatal(
|
||||||
"check flag --read-data-subset=n% n must be above 0.0% and at most 100.0%")
|
"check flag --read-data-subset=n% n must be above 0.0% and at most 100.0%")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
fileSize, err := parseSizeStr(opts.ReadDataSubset)
|
||||||
|
if err != nil {
|
||||||
|
return argumentError
|
||||||
|
}
|
||||||
|
if fileSize <= 0.0 {
|
||||||
|
return errors.Fatal(
|
||||||
|
"check flag --read-data-subset=n n must be above 0.0")
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -294,11 +305,28 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
|
|||||||
packs = selectPacksByBucket(chkr.GetPacks(), bucket, totalBuckets)
|
packs = selectPacksByBucket(chkr.GetPacks(), bucket, totalBuckets)
|
||||||
packCount := uint64(len(packs))
|
packCount := uint64(len(packs))
|
||||||
Verbosef("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets)
|
Verbosef("read group #%d of %d data packs (out of total %d packs in %d groups)\n", bucket, packCount, chkr.CountPacks(), totalBuckets)
|
||||||
} else {
|
} else if strings.HasSuffix(opts.ReadDataSubset, "%") {
|
||||||
percentage, _ := parsePercentage(opts.ReadDataSubset)
|
percentage, err := parsePercentage(opts.ReadDataSubset)
|
||||||
|
if err == nil {
|
||||||
packs = selectRandomPacksByPercentage(chkr.GetPacks(), percentage)
|
packs = selectRandomPacksByPercentage(chkr.GetPacks(), percentage)
|
||||||
Verbosef("read %.1f%% of data packs\n", percentage)
|
Verbosef("read %.1f%% of data packs\n", percentage)
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
repoSize := int64(0)
|
||||||
|
allPacks := chkr.GetPacks()
|
||||||
|
for _, size := range allPacks {
|
||||||
|
repoSize += size
|
||||||
|
}
|
||||||
|
if repoSize == 0 {
|
||||||
|
return errors.Fatal("Cannot read from a repository having size 0")
|
||||||
|
}
|
||||||
|
subsetSize, _ := parseSizeStr(opts.ReadDataSubset)
|
||||||
|
if subsetSize > repoSize {
|
||||||
|
subsetSize = repoSize
|
||||||
|
}
|
||||||
|
packs = selectRandomPacksByFileSize(chkr.GetPacks(), subsetSize, repoSize)
|
||||||
|
Verbosef("read %d bytes of data packs\n", subsetSize)
|
||||||
|
}
|
||||||
if packs == nil {
|
if packs == nil {
|
||||||
return errors.Fatal("internal error: failed to select packs to check")
|
return errors.Fatal("internal error: failed to select packs to check")
|
||||||
}
|
}
|
||||||
@ -349,6 +377,11 @@ func selectRandomPacksByPercentage(allPacks map[restic.ID]int64, percentage floa
|
|||||||
id := keys[idx[i]]
|
id := keys[idx[i]]
|
||||||
packs[id] = allPacks[id]
|
packs[id] = allPacks[id]
|
||||||
}
|
}
|
||||||
|
return packs
|
||||||
|
}
|
||||||
|
|
||||||
|
func selectRandomPacksByFileSize(allPacks map[restic.ID]int64, subsetSize int64, repoSize int64) map[restic.ID]int64 {
|
||||||
|
subsetPercentage := (float64(subsetSize) / float64(repoSize)) * 100.0
|
||||||
|
packs := selectRandomPacksByPercentage(allPacks, subsetPercentage)
|
||||||
return packs
|
return packs
|
||||||
}
|
}
|
||||||
|
@ -129,3 +129,37 @@ func TestSelectNoRandomPacksByPercentage(t *testing.T) {
|
|||||||
selectedPacks := selectRandomPacksByPercentage(testPacks, 10.0)
|
selectedPacks := selectRandomPacksByPercentage(testPacks, 10.0)
|
||||||
rtest.Assert(t, len(selectedPacks) == 0, "Expected 0 selected packs")
|
rtest.Assert(t, len(selectedPacks) == 0, "Expected 0 selected packs")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSelectRandomPacksByFileSize(t *testing.T) {
|
||||||
|
var testPacks = make(map[restic.ID]int64)
|
||||||
|
for i := 1; i <= 10; i++ {
|
||||||
|
id := restic.NewRandomID()
|
||||||
|
// ensure unique ids
|
||||||
|
id[0] = byte(i)
|
||||||
|
testPacks[id] = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
selectedPacks := selectRandomPacksByFileSize(testPacks, 10, 500)
|
||||||
|
rtest.Assert(t, len(selectedPacks) == 1, "Expected 1 selected packs")
|
||||||
|
|
||||||
|
selectedPacks = selectRandomPacksByFileSize(testPacks, 10240, 51200)
|
||||||
|
rtest.Assert(t, len(selectedPacks) == 2, "Expected 2 selected packs")
|
||||||
|
for pack := range selectedPacks {
|
||||||
|
_, ok := testPacks[pack]
|
||||||
|
rtest.Assert(t, ok, "Unexpected selection")
|
||||||
|
}
|
||||||
|
|
||||||
|
selectedPacks = selectRandomPacksByFileSize(testPacks, 500, 500)
|
||||||
|
rtest.Assert(t, len(selectedPacks) == 10, "Expected 10 selected packs")
|
||||||
|
for pack := range selectedPacks {
|
||||||
|
_, ok := testPacks[pack]
|
||||||
|
rtest.Assert(t, ok, "Unexpected item in selection")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSelectNoRandomPacksByFileSize(t *testing.T) {
|
||||||
|
// that the a repository without pack files works
|
||||||
|
var testPacks = make(map[restic.ID]int64)
|
||||||
|
selectedPacks := selectRandomPacksByFileSize(testPacks, 10, 500)
|
||||||
|
rtest.Assert(t, len(selectedPacks) == 0, "Expected 0 selected packs")
|
||||||
|
}
|
||||||
|
@ -249,9 +249,9 @@ integrity of the pack files in the repository, use the ``--read-data`` flag:
|
|||||||
and also that it takes more time than the default ``check``.
|
and also that it takes more time than the default ``check``.
|
||||||
|
|
||||||
Alternatively, use the ``--read-data-subset`` parameter to check only a
|
Alternatively, use the ``--read-data-subset`` parameter to check only a
|
||||||
subset of the repository pack files at a time. It supports two ways to select a
|
subset of the repository pack files at a time. It supports three ways to select a
|
||||||
subset. One selects a specific range of pack files, the other selects a random
|
subset. One selects a specific range of pack files, the second selects a random
|
||||||
percentage of pack files.
|
percentage of pack files, and the third selects pack files of the specified size.
|
||||||
|
|
||||||
Use ``--read-data-subset=n/t`` to check only a subset of the repository pack
|
Use ``--read-data-subset=n/t`` to check only a subset of the repository pack
|
||||||
files at a time. The parameter takes two values, ``n`` and ``t``. When the check
|
files at a time. The parameter takes two values, ``n`` and ``t``. When the check
|
||||||
@ -285,3 +285,16 @@ integer:
|
|||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|
||||||
$ restic -r /srv/restic-repo check --read-data-subset=10%
|
$ restic -r /srv/restic-repo check --read-data-subset=10%
|
||||||
|
|
||||||
|
Use ``--read-data-subset=NS`` to check a randomly chosen subset of the repository pack files.
|
||||||
|
It takes one parameter, ``NS``, where 'N' is a whole number representing file size and 'S' is the unit
|
||||||
|
of file size (B/K/M/G/T) of pack files to check. Behind the scenes, the specified size will be converted
|
||||||
|
to percentage of the total repository size. The behaviour of the check command following this conversion
|
||||||
|
will be the same as the percentage option above. For a file size value the following command may be used:
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
$ restic -r /srv/restic-repo check --read-data-subset=50M
|
||||||
|
$ restic -r /srv/restic-repo check --read-data-subset=10G
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user