diff --git a/changelog/unreleased/issue-4251 b/changelog/unreleased/issue-4251 new file mode 100644 index 000000000..31be52401 --- /dev/null +++ b/changelog/unreleased/issue-4251 @@ -0,0 +1,14 @@ +Enhancement: Support reading backup from a program's standard output + +When reading data from stdin, the `backup` command could not verify whether the +corresponding command completed successfully. + +The `backup` command now supports starting an arbitrary command and sourcing +the backup content from its standard output. This enables restic to verify that +the command completes with exit code zero. A non-zero exit code causes the +backup to fail. + +Example: `restic backup --stdin-from-command mysqldump [...]` + +https://github.com/restic/restic/issues/4251 +https://github.com/restic/restic/pull/4410 diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index 9499701aa..a2b81a759 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -97,6 +97,7 @@ type BackupOptions struct { ExcludeLargerThan string Stdin bool StdinFilename string + StdinCommand bool Tags restic.TagLists Host string FilesFrom []string @@ -134,6 +135,7 @@ func init() { f.StringVar(&backupOptions.ExcludeLargerThan, "exclude-larger-than", "", "max `size` of the files to be backed up (allowed suffixes: k/K, m/M, g/G, t/T)") f.BoolVar(&backupOptions.Stdin, "stdin", false, "read backup from stdin") f.StringVar(&backupOptions.StdinFilename, "stdin-filename", "stdin", "`filename` to use when reading from stdin") + f.BoolVar(&backupOptions.StdinCommand, "stdin-from-command", false, "execute command and store its stdout") f.Var(&backupOptions.Tags, "tag", "add `tags` for the new snapshot in the format `tag[,tag,...]` (can be specified multiple times)") f.UintVar(&backupOptions.ReadConcurrency, "read-concurrency", 0, "read `n` files concurrently (default: $RESTIC_READ_CONCURRENCY or 2)") f.StringVarP(&backupOptions.Host, "host", "H", "", "set the `hostname` for the snapshot manually. To prevent an expensive rescan use the \"parent\" flag") @@ -287,7 +289,7 @@ func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error { } } - if opts.Stdin { + if opts.Stdin || opts.StdinCommand { if len(opts.FilesFrom) > 0 { return errors.Fatal("--stdin and --files-from cannot be used together") } @@ -298,7 +300,7 @@ func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error { return errors.Fatal("--stdin and --files-from-raw cannot be used together") } - if len(args) > 0 { + if len(args) > 0 && !opts.StdinCommand { return errors.Fatal("--stdin was specified and files/dirs were listed as arguments") } } @@ -366,7 +368,7 @@ func collectRejectFuncs(opts BackupOptions, targets []string) (fs []RejectFunc, // collectTargets returns a list of target files/dirs from several sources. func collectTargets(opts BackupOptions, args []string) (targets []string, err error) { - if opts.Stdin { + if opts.Stdin || opts.StdinCommand { return nil, nil } @@ -592,16 +594,24 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter defer localVss.DeleteSnapshots() targetFS = localVss } - if opts.Stdin { + + if opts.Stdin || opts.StdinCommand { if !gopts.JSON { progressPrinter.V("read data from stdin") } filename := path.Join("/", opts.StdinFilename) + var source io.ReadCloser = os.Stdin + if opts.StdinCommand { + source, err = fs.NewCommandReader(ctx, args, globalOptions.stderr) + if err != nil { + return err + } + } targetFS = &fs.Reader{ ModTime: timeStamp, Name: filename, Mode: 0644, - ReadCloser: os.Stdin, + ReadCloser: source, } targets = []string{filename} } @@ -630,7 +640,13 @@ func runBackup(ctx context.Context, opts BackupOptions, gopts GlobalOptions, ter success := true arch.Error = func(item string, err error) error { success = false - return progressReporter.Error(item, err) + reterr := progressReporter.Error(item, err) + // If we receive a fatal error during the execution of the snapshot, + // we abort the snapshot. + if reterr == nil && errors.IsFatal(err) { + reterr = err + } + return reterr } arch.CompleteItem = progressReporter.CompleteItem arch.StartFile = progressReporter.StartFile diff --git a/cmd/restic/cmd_backup_integration_test.go b/cmd/restic/cmd_backup_integration_test.go index 742b6ff6c..c60e9c543 100644 --- a/cmd/restic/cmd_backup_integration_test.go +++ b/cmd/restic/cmd_backup_integration_test.go @@ -568,3 +568,72 @@ func linkEqual(source, dest []string) bool { return true } + +func TestStdinFromCommand(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + + testSetupBackupData(t, env) + opts := BackupOptions{ + StdinCommand: true, + StdinFilename: "stdin", + } + + testRunBackup(t, filepath.Dir(env.testdata), []string{"python", "-c", "import sys; print('something'); sys.exit(0)"}, opts, env.gopts) + testListSnapshots(t, env.gopts, 1) + + testRunCheck(t, env.gopts) +} + +func TestStdinFromCommandNoOutput(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + + testSetupBackupData(t, env) + opts := BackupOptions{ + StdinCommand: true, + StdinFilename: "stdin", + } + + err := testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{"python", "-c", "import sys; sys.exit(0)"}, opts, env.gopts) + rtest.Assert(t, err != nil && err.Error() == "at least one source file could not be read", "No data error expected") + testListSnapshots(t, env.gopts, 1) + + testRunCheck(t, env.gopts) +} + +func TestStdinFromCommandFailExitCode(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + + testSetupBackupData(t, env) + opts := BackupOptions{ + StdinCommand: true, + StdinFilename: "stdin", + } + + err := testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{"python", "-c", "import sys; print('test'); sys.exit(1)"}, opts, env.gopts) + rtest.Assert(t, err != nil, "Expected error while backing up") + + testListSnapshots(t, env.gopts, 0) + + testRunCheck(t, env.gopts) +} + +func TestStdinFromCommandFailNoOutputAndExitCode(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + + testSetupBackupData(t, env) + opts := BackupOptions{ + StdinCommand: true, + StdinFilename: "stdin", + } + + err := testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{"python", "-c", "import sys; sys.exit(1)"}, opts, env.gopts) + rtest.Assert(t, err != nil, "Expected error while backing up") + + testListSnapshots(t, env.gopts, 0) + + testRunCheck(t, env.gopts) +} diff --git a/doc/040_backup.rst b/doc/040_backup.rst index c917c3c29..acafe2694 100644 --- a/doc/040_backup.rst +++ b/doc/040_backup.rst @@ -489,35 +489,71 @@ particular note are:: - file ownership and ACLs on Windows - the "hidden" flag on Windows + +Reading data from a command +*************************** + +Sometimes, it can be useful to directly save the output of a program, for example, +``mysqldump`` so that the SQL can later be restored. Restic supports this mode +of operation; just supply the option ``--stdin-from-command`` when using the +``backup`` action, and write the command in place of the files/directories: + +.. code-block:: console + + $ restic -r /srv/restic-repo backup --stdin-from-command mysqldump [...] + +This command creates a new snapshot based on the standard output of ``mysqldump``. +By default, the command's standard output is saved in a file named ``stdin``. +A different name can be specified with ``--stdin-filename``: + +.. code-block:: console + + $ restic -r /srv/restic-repo backup --stdin-filename production.sql --stdin-from-command mysqldump [...] + +Restic uses the command exit code to determine whether the command succeeded. A +non-zero exit code from the command causes restic to cancel the backup. This causes +restic to fail with exit code 1. No snapshot will be created in this case. + + Reading data from stdin *********************** -Sometimes it can be nice to directly save the output of a program, e.g. -``mysqldump`` so that the SQL can later be restored. Restic supports -this mode of operation, just supply the option ``--stdin`` to the -``backup`` command like this: +.. warning:: + + Restic cannot detect if data read from stdin is complete or not. As explained + below, this can cause incomplete backup unless additional checks (outside of + restic) are configured. If possible, use ``--stdin-from-command`` instead. + +Alternatively, restic supports reading arbitrary data directly from the standard +input. Use the option ``--stdin`` of the ``backup`` command as follows: .. code-block:: console - $ set -o pipefail + # Will not notice failures, see the warning below + $ gzip bigfile.dat | restic -r /srv/restic-repo backup --stdin + +This creates a new snapshot of the content of ``bigfile.dat``. +As for ``--stdin-from-command``, the default file name is ``stdin``; a +different name can be specified with ``--stdin-filename``. + +**Important**: while it is possible to pipe a command output to restic using +``--stdin``, doing so is discouraged as it will mask errors from the +command, leading to corrupted backups. For example, in the following code +block, if ``mysqldump`` fails to connect to the MySQL database, the restic +backup will nevertheless succeed in creating an _empty_ backup: + +.. code-block:: console + + # Will not notice failures, read the warning above $ mysqldump [...] | restic -r /srv/restic-repo backup --stdin -This creates a new snapshot of the output of ``mysqldump``. You can then -use e.g. the fuse mounting option (see below) to mount the repository -and read the file. - -By default, the file name ``stdin`` is used, a different name can be -specified with ``--stdin-filename``, e.g. like this: - -.. code-block:: console - - $ mysqldump [...] | restic -r /srv/restic-repo backup --stdin --stdin-filename production.sql - -The option ``pipefail`` is highly recommended so that a non-zero exit code from -one of the programs in the pipe (e.g. ``mysqldump`` here) makes the whole chain -return a non-zero exit code. Refer to the `Use the Unofficial Bash Strict Mode -`__ for more -details on this. +A simple solution is to use ``--stdin-from-command`` (see above). If you +still need to use the ``--stdin`` flag, you must use the shell option ``set -o pipefail`` +(so that a non-zero exit code from one of the programs in the pipe makes the +whole chain return a non-zero exit code) and you must check the exit code of +the pipe and act accordingly (e.g., remove the last backup). Refer to the +`Use the Unofficial Bash Strict Mode `__ +for more details on this. Tags for backup diff --git a/internal/archiver/file_saver.go b/internal/archiver/file_saver.go index 0742c8b57..724f5e620 100644 --- a/internal/archiver/file_saver.go +++ b/internal/archiver/file_saver.go @@ -2,6 +2,7 @@ package archiver import ( "context" + "fmt" "io" "os" "sync" @@ -146,7 +147,7 @@ func (s *FileSaver) saveFile(ctx context.Context, chnker *chunker.Chunker, snPat panic("completed twice") } isCompleted = true - fnr.err = err + fnr.err = fmt.Errorf("failed to save %v: %w", target, err) fnr.node = nil fnr.stats = ItemStats{} finish(fnr) diff --git a/internal/fs/fs_reader_command.go b/internal/fs/fs_reader_command.go new file mode 100644 index 000000000..3830e5811 --- /dev/null +++ b/internal/fs/fs_reader_command.go @@ -0,0 +1,97 @@ +package fs + +import ( + "bufio" + "context" + "fmt" + "io" + "os/exec" + + "github.com/restic/restic/internal/errors" +) + +// CommandReader wrap a command such that its standard output can be read using +// a io.ReadCloser. Close() waits for the command to terminate, reporting +// any error back to the caller. +type CommandReader struct { + cmd *exec.Cmd + stdout io.ReadCloser + + // cmd.Wait() must only be called once. Prevent duplicate executions in + // Read() and Close(). + waitHandled bool + + // alreadyClosedReadErr is the error that we should return if we try to + // read the pipe again after closing. This works around a Read() call that + // is issued after a previous Read() with `io.EOF` (but some bytes were + // read in the past). + alreadyClosedReadErr error +} + +func NewCommandReader(ctx context.Context, args []string, logOutput io.Writer) (*CommandReader, error) { + // Prepare command and stdout + command := exec.CommandContext(ctx, args[0], args[1:]...) + stdout, err := command.StdoutPipe() + if err != nil { + return nil, fmt.Errorf("failed to setup stdout pipe: %w", err) + } + + // Use a Go routine to handle the stderr to avoid deadlocks + stderr, err := command.StderrPipe() + if err != nil { + return nil, fmt.Errorf("failed to setup stderr pipe: %w", err) + } + go func() { + sc := bufio.NewScanner(stderr) + for sc.Scan() { + _, _ = fmt.Fprintf(logOutput, "subprocess %v: %v\n", command.Args[0], sc.Text()) + } + }() + + if err := command.Start(); err != nil { + return nil, fmt.Errorf("failed to start command: %w", err) + } + + return &CommandReader{ + cmd: command, + stdout: stdout, + }, nil +} + +// Read populate the array with data from the process stdout. +func (fp *CommandReader) Read(p []byte) (int, error) { + if fp.alreadyClosedReadErr != nil { + return 0, fp.alreadyClosedReadErr + } + b, err := fp.stdout.Read(p) + + // If the error is io.EOF, the program terminated. We need to check the + // exit code here because, if the program terminated with no output, the + // error in `Close()` is ignored. + if errors.Is(err, io.EOF) { + fp.waitHandled = true + // check if the command terminated successfully, If not return the error. + if errw := fp.wait(); errw != nil { + err = errw + } + } + fp.alreadyClosedReadErr = err + return b, err +} + +func (fp *CommandReader) wait() error { + err := fp.cmd.Wait() + if err != nil { + // Use a fatal error to abort the snapshot. + return errors.Fatal(fmt.Errorf("command failed: %w", err).Error()) + } + return nil +} + +func (fp *CommandReader) Close() error { + if fp.waitHandled { + return nil + } + + return fp.wait() +} diff --git a/internal/fs/fs_reader_command_test.go b/internal/fs/fs_reader_command_test.go new file mode 100644 index 000000000..a9028544c --- /dev/null +++ b/internal/fs/fs_reader_command_test.go @@ -0,0 +1,48 @@ +package fs_test + +import ( + "bytes" + "context" + "io" + "strings" + "testing" + + "github.com/restic/restic/internal/fs" + "github.com/restic/restic/internal/test" +) + +func TestCommandReaderSuccess(t *testing.T) { + reader, err := fs.NewCommandReader(context.TODO(), []string{"true"}, io.Discard) + test.OK(t, err) + + _, err = io.Copy(io.Discard, reader) + test.OK(t, err) + + test.OK(t, reader.Close()) +} + +func TestCommandReaderFail(t *testing.T) { + reader, err := fs.NewCommandReader(context.TODO(), []string{"false"}, io.Discard) + test.OK(t, err) + + _, err = io.Copy(io.Discard, reader) + test.Assert(t, err != nil, "missing error") +} + +func TestCommandReaderInvalid(t *testing.T) { + _, err := fs.NewCommandReader(context.TODO(), []string{"w54fy098hj7fy5twijouytfrj098y645wr"}, io.Discard) + test.Assert(t, err != nil, "missing error") +} + +func TestCommandReaderOutput(t *testing.T) { + reader, err := fs.NewCommandReader(context.TODO(), []string{"echo", "hello world"}, io.Discard) + test.OK(t, err) + + var buf bytes.Buffer + + _, err = io.Copy(&buf, reader) + test.OK(t, err) + test.OK(t, reader.Close()) + + test.Equals(t, "hello world", strings.TrimSpace(buf.String())) +}