mirror of
https://github.com/octoleo/restic.git
synced 2024-12-22 02:48:55 +00:00
backup: rework read concurrency
This commit is contained in:
parent
4a501d7118
commit
2e606ca70b
@ -1,5 +1,6 @@
|
|||||||
Enhancement: Make backup file read concurrency configurable
|
Enhancement: Make backup file read concurrency configurable
|
||||||
|
|
||||||
In order to tune restic for special situations we need to be able to configure `backup --file-read-concurrency`.
|
The `backup` command now supports a `--read-concurrency` flag to allowing
|
||||||
|
tuning restic for very fast storage like NVME disks.
|
||||||
|
|
||||||
https://github.com/restic/restic/pull/2750
|
https://github.com/restic/restic/pull/2750
|
||||||
|
@ -101,7 +101,7 @@ type BackupOptions struct {
|
|||||||
IgnoreCtime bool
|
IgnoreCtime bool
|
||||||
UseFsSnapshot bool
|
UseFsSnapshot bool
|
||||||
DryRun bool
|
DryRun bool
|
||||||
FileReadConcurrency uint
|
ReadConcurrency uint
|
||||||
}
|
}
|
||||||
|
|
||||||
var backupOptions BackupOptions
|
var backupOptions BackupOptions
|
||||||
@ -110,12 +110,6 @@ var backupOptions BackupOptions
|
|||||||
var ErrInvalidSourceData = errors.New("at least one source file could not be read")
|
var ErrInvalidSourceData = errors.New("at least one source file could not be read")
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
//set FileReadConcurrency to 2 if not set in env
|
|
||||||
fileReadConcurrency, err := strconv.ParseUint(os.Getenv("RESTIC_FILE_READ_CONCURRENCY"), 10, 32)
|
|
||||||
if err != nil || fileReadConcurrency < 1 {
|
|
||||||
fileReadConcurrency = 2
|
|
||||||
}
|
|
||||||
|
|
||||||
cmdRoot.AddCommand(cmdBackup)
|
cmdRoot.AddCommand(cmdBackup)
|
||||||
|
|
||||||
f := cmdBackup.Flags()
|
f := cmdBackup.Flags()
|
||||||
@ -132,10 +126,10 @@ func init() {
|
|||||||
f.BoolVar(&backupOptions.Stdin, "stdin", false, "read backup from stdin")
|
f.BoolVar(&backupOptions.Stdin, "stdin", false, "read backup from stdin")
|
||||||
f.StringVar(&backupOptions.StdinFilename, "stdin-filename", "stdin", "`filename` to use when reading from stdin")
|
f.StringVar(&backupOptions.StdinFilename, "stdin-filename", "stdin", "`filename` to use when reading from stdin")
|
||||||
f.Var(&backupOptions.Tags, "tag", "add `tags` for the new snapshot in the format `tag[,tag,...]` (can be specified multiple times)")
|
f.Var(&backupOptions.Tags, "tag", "add `tags` for the new snapshot in the format `tag[,tag,...]` (can be specified multiple times)")
|
||||||
f.UintVar(&backupOptions.FileReadConcurrency, "file-read-concurrency", uint(fileReadConcurrency), "set concurrency on file reads. (default: $RESTIC_FILE_READ_CONCURRENCY or 2)")
|
f.UintVar(&backupOptions.ReadConcurrency, "read-concurrency", 0, "read `n` file concurrently. (default: $RESTIC_READ_CONCURRENCY or 2)")
|
||||||
f.StringVarP(&backupOptions.Host, "host", "H", "", "set the `hostname` for the snapshot manually. To prevent an expensive rescan use the \"parent\" flag")
|
f.StringVarP(&backupOptions.Host, "host", "H", "", "set the `hostname` for the snapshot manually. To prevent an expensive rescan use the \"parent\" flag")
|
||||||
f.StringVar(&backupOptions.Host, "hostname", "", "set the `hostname` for the snapshot manually")
|
f.StringVar(&backupOptions.Host, "hostname", "", "set the `hostname` for the snapshot manually")
|
||||||
err = f.MarkDeprecated("hostname", "use --host")
|
err := f.MarkDeprecated("hostname", "use --host")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// MarkDeprecated only returns an error when the flag could not be found
|
// MarkDeprecated only returns an error when the flag could not be found
|
||||||
panic(err)
|
panic(err)
|
||||||
@ -152,6 +146,9 @@ func init() {
|
|||||||
f.BoolVar(&backupOptions.UseFsSnapshot, "use-fs-snapshot", false, "use filesystem snapshot where possible (currently only Windows VSS)")
|
f.BoolVar(&backupOptions.UseFsSnapshot, "use-fs-snapshot", false, "use filesystem snapshot where possible (currently only Windows VSS)")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// parse read concurrency from env, on error the default value will be used
|
||||||
|
readConcurrency, _ := strconv.ParseUint(os.Getenv("RESTIC_READ_CONCURRENCY"), 10, 32)
|
||||||
|
backupOptions.ReadConcurrency = uint(readConcurrency)
|
||||||
}
|
}
|
||||||
|
|
||||||
// filterExisting returns a slice of all existing items, or an error if no
|
// filterExisting returns a slice of all existing items, or an error if no
|
||||||
@ -292,10 +289,6 @@ func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if backupOptions.FileReadConcurrency == 0 {
|
|
||||||
return errors.Fatal("--file-read-concurrency must be a positive, nonzero integer")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -697,7 +690,7 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
|
|||||||
}
|
}
|
||||||
wg.Go(func() error { return sc.Scan(cancelCtx, targets) })
|
wg.Go(func() error { return sc.Scan(cancelCtx, targets) })
|
||||||
|
|
||||||
arch := archiver.New(repo, targetFS, archiver.Options{FileReadConcurrency: backupOptions.FileReadConcurrency})
|
arch := archiver.New(repo, targetFS, archiver.Options{ReadConcurrency: backupOptions.ReadConcurrency})
|
||||||
arch.SelectByName = selectByNameFilter
|
arch.SelectByName = selectByNameFilter
|
||||||
arch.Select = selectFilter
|
arch.Select = selectFilter
|
||||||
arch.WithAtime = opts.WithAtime
|
arch.WithAtime = opts.WithAtime
|
||||||
|
@ -555,6 +555,7 @@ environment variables. The following lists these environment variables:
|
|||||||
RESTIC_COMPRESSION Compression mode (only available for repository format version 2)
|
RESTIC_COMPRESSION Compression mode (only available for repository format version 2)
|
||||||
RESTIC_PROGRESS_FPS Frames per second by which the progress bar is updated
|
RESTIC_PROGRESS_FPS Frames per second by which the progress bar is updated
|
||||||
RESTIC_PACK_SIZE Target size for pack files
|
RESTIC_PACK_SIZE Target size for pack files
|
||||||
|
RESTIC_READ_CONCURRENCY Concurrency for file reads
|
||||||
|
|
||||||
TMPDIR Location for temporary files
|
TMPDIR Location for temporary files
|
||||||
|
|
||||||
|
@ -54,11 +54,11 @@ variable ``RESTIC_COMPRESSION``.
|
|||||||
File Read Concurrency
|
File Read Concurrency
|
||||||
=====================
|
=====================
|
||||||
|
|
||||||
In some instances, such as backing up traditional spinning disks, reducing the file read
|
When backing up fast storage like NVME disks, it can be beneficial to increase the read
|
||||||
concurrency is desired. This will help reduce the amount of time spent seeking around
|
concurrency. This can increase the overall performance of the backup operation by reading
|
||||||
the disk and can increase the overall performance of the backup operation. You can specify
|
more files in parallel. You can specify the concurrency of file reads with the
|
||||||
the concurrency of file reads with the ``RESTIC_FILE_READ_CONCURRENCY`` environment variable
|
``RESTIC_READ_CONCURRENCY`` environment variable or the ``--read-concurrency`` flag for
|
||||||
or the ``--file-read-concurrency`` flag for the ``backup`` subcommand.
|
the ``backup`` command.
|
||||||
|
|
||||||
|
|
||||||
Pack Size
|
Pack Size
|
||||||
|
@ -103,7 +103,6 @@ command:
|
|||||||
--files-from file read the files to backup from file (can be combined with file args; can be specified multiple times)
|
--files-from file read the files to backup from file (can be combined with file args; can be specified multiple times)
|
||||||
--files-from-raw file read the files to backup from file (can be combined with file args; can be specified multiple times)
|
--files-from-raw file read the files to backup from file (can be combined with file args; can be specified multiple times)
|
||||||
--files-from-verbatim file read the files to backup from file (can be combined with file args; can be specified multiple times)
|
--files-from-verbatim file read the files to backup from file (can be combined with file args; can be specified multiple times)
|
||||||
--file-read-concurrency uint set concurrency on file reads. (default: $RESTIC_FILE_READ_CONCURRENCY or 2)
|
|
||||||
-f, --force force re-reading the target files/directories (overrides the "parent" flag)
|
-f, --force force re-reading the target files/directories (overrides the "parent" flag)
|
||||||
-h, --help help for backup
|
-h, --help help for backup
|
||||||
-H, --host hostname set the hostname for the snapshot manually. To prevent an expensive rescan use the "parent" flag
|
-H, --host hostname set the hostname for the snapshot manually. To prevent an expensive rescan use the "parent" flag
|
||||||
@ -113,6 +112,7 @@ command:
|
|||||||
--ignore-inode ignore inode number changes when checking for modified files
|
--ignore-inode ignore inode number changes when checking for modified files
|
||||||
-x, --one-file-system exclude other file systems, don't cross filesystem boundaries and subvolumes
|
-x, --one-file-system exclude other file systems, don't cross filesystem boundaries and subvolumes
|
||||||
--parent snapshot use this parent snapshot (default: last snapshot in the repository that has the same target files/directories, and is not newer than the snapshot time)
|
--parent snapshot use this parent snapshot (default: last snapshot in the repository that has the same target files/directories, and is not newer than the snapshot time)
|
||||||
|
--read-concurrency n read n file concurrently. (default: $RESTIC_READ_CONCURRENCY or 2)
|
||||||
--stdin read backup from stdin
|
--stdin read backup from stdin
|
||||||
--stdin-filename filename filename to use when reading from stdin (default "stdin")
|
--stdin-filename filename filename to use when reading from stdin (default "stdin")
|
||||||
--tag tags add tags for the new snapshot in the format `tag[,tag,...]` (can be specified multiple times) (default [])
|
--tag tags add tags for the new snapshot in the format `tag[,tag,...]` (can be specified multiple times) (default [])
|
||||||
|
@ -95,10 +95,10 @@ const (
|
|||||||
|
|
||||||
// Options is used to configure the archiver.
|
// Options is used to configure the archiver.
|
||||||
type Options struct {
|
type Options struct {
|
||||||
// FileReadConcurrency sets how many files are read in concurrently. If
|
// ReadConcurrency sets how many files are read in concurrently. If
|
||||||
// it's set to zero, at most two files are read in concurrently (which
|
// it's set to zero, at most two files are read in concurrently (which
|
||||||
// turned out to be a good default for most situations).
|
// turned out to be a good default for most situations).
|
||||||
FileReadConcurrency uint
|
ReadConcurrency uint
|
||||||
|
|
||||||
// SaveBlobConcurrency sets how many blobs are hashed and saved
|
// SaveBlobConcurrency sets how many blobs are hashed and saved
|
||||||
// concurrently. If it's set to zero, the default is the number of CPUs
|
// concurrently. If it's set to zero, the default is the number of CPUs
|
||||||
@ -113,11 +113,11 @@ type Options struct {
|
|||||||
// ApplyDefaults returns a copy of o with the default options set for all unset
|
// ApplyDefaults returns a copy of o with the default options set for all unset
|
||||||
// fields.
|
// fields.
|
||||||
func (o Options) ApplyDefaults() Options {
|
func (o Options) ApplyDefaults() Options {
|
||||||
if o.FileReadConcurrency == 0 {
|
if o.ReadConcurrency == 0 {
|
||||||
// two is a sweet spot for almost all situations. We've done some
|
// two is a sweet spot for almost all situations. We've done some
|
||||||
// experiments documented here:
|
// experiments documented here:
|
||||||
// https://github.com/borgbackup/borg/issues/3500
|
// https://github.com/borgbackup/borg/issues/3500
|
||||||
o.FileReadConcurrency = 2
|
o.ReadConcurrency = 2
|
||||||
}
|
}
|
||||||
|
|
||||||
if o.SaveBlobConcurrency == 0 {
|
if o.SaveBlobConcurrency == 0 {
|
||||||
@ -132,7 +132,7 @@ func (o Options) ApplyDefaults() Options {
|
|||||||
// Also allow waiting for FileReadConcurrency files, this is the maximum of FutureFiles
|
// Also allow waiting for FileReadConcurrency files, this is the maximum of FutureFiles
|
||||||
// which currently can be in progress. The main backup loop blocks when trying to queue
|
// which currently can be in progress. The main backup loop blocks when trying to queue
|
||||||
// more files to read.
|
// more files to read.
|
||||||
o.SaveTreeConcurrency = uint(runtime.GOMAXPROCS(0)) + o.FileReadConcurrency
|
o.SaveTreeConcurrency = uint(runtime.GOMAXPROCS(0)) + o.ReadConcurrency
|
||||||
}
|
}
|
||||||
|
|
||||||
return o
|
return o
|
||||||
@ -782,7 +782,7 @@ func (arch *Archiver) runWorkers(ctx context.Context, wg *errgroup.Group) {
|
|||||||
arch.fileSaver = NewFileSaver(ctx, wg,
|
arch.fileSaver = NewFileSaver(ctx, wg,
|
||||||
arch.blobSaver.Save,
|
arch.blobSaver.Save,
|
||||||
arch.Repo.Config().ChunkerPolynomial,
|
arch.Repo.Config().ChunkerPolynomial,
|
||||||
arch.Options.FileReadConcurrency, arch.Options.SaveBlobConcurrency)
|
arch.Options.ReadConcurrency, arch.Options.SaveBlobConcurrency)
|
||||||
arch.fileSaver.CompleteBlob = arch.CompleteBlob
|
arch.fileSaver.CompleteBlob = arch.CompleteBlob
|
||||||
arch.fileSaver.NodeFromFileInfo = arch.nodeFromFileInfo
|
arch.fileSaver.NodeFromFileInfo = arch.nodeFromFileInfo
|
||||||
|
|
||||||
|
@ -2040,7 +2040,7 @@ func TestArchiverAbortEarlyOnError(t *testing.T) {
|
|||||||
|
|
||||||
// at most two files may be queued
|
// at most two files may be queued
|
||||||
arch := New(testRepo, testFS, Options{
|
arch := New(testRepo, testFS, Options{
|
||||||
FileReadConcurrency: 2,
|
ReadConcurrency: 2,
|
||||||
})
|
})
|
||||||
|
|
||||||
_, _, err := arch.Snapshot(ctx, []string{"."}, SnapshotOptions{Time: time.Now()})
|
_, _, err := arch.Snapshot(ctx, []string{"."}, SnapshotOptions{Time: time.Now()})
|
||||||
|
Loading…
Reference in New Issue
Block a user