From 55bf76ba0cf5883949e9a2679172db9aa19447bc Mon Sep 17 00:00:00 2001 From: greatroar <61184462+greatroar@users.noreply.github.com> Date: Fri, 20 Nov 2020 12:15:16 +0100 Subject: [PATCH] backup: Add --files-from-{verbatim,raw} options --- changelog/unreleased/issue-2944 | 12 +++ cmd/restic/cmd_backup.go | 127 +++++++++++++++++++++++--------- cmd/restic/cmd_backup_test.go | 113 ++++++++++++++++++++++++++++ doc/040_backup.rst | 54 +++++++++----- 4 files changed, 255 insertions(+), 51 deletions(-) create mode 100644 changelog/unreleased/issue-2944 create mode 100644 cmd/restic/cmd_backup_test.go diff --git a/changelog/unreleased/issue-2944 b/changelog/unreleased/issue-2944 new file mode 100644 index 000000000..ac0063a29 --- /dev/null +++ b/changelog/unreleased/issue-2944 @@ -0,0 +1,12 @@ +Enhancement: Backup options --files-from-verbatim and --files-from-raw + +The new backup options `--files-from-verbatim` and `--files-from-raw` +read a list of files to back up from a file. Unlike the existing `--files-from`, +these options do not interpret the listed filenames as glob patterns; +whitespace in filenames is preserved as-is and no pattern expansion is done. + +These new options are recommended over `--files-from` when generating the +list of files to back up from a script. Please see the documentation for specifics. + +https://github.com/restic/restic/issues/2944 +https://github.com/restic/restic/issues/3013 diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index 4769654bf..0f4dabc84 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -56,14 +56,6 @@ Exit status is 3 if some source data could not be read (incomplete snapshot crea }, DisableAutoGenTag: true, RunE: func(cmd *cobra.Command, args []string) error { - if backupOptions.Stdin { - for _, filename := range backupOptions.FilesFrom { - if filename == "-" { - return errors.Fatal("cannot use both `--stdin` and `--files-from -`") - } - } - } - var t tomb.Tomb term := termstatus.New(globalOptions.stdout, globalOptions.stderr, globalOptions.Quiet) t.Go(func() error { term.Run(t.Context(globalOptions.ctx)); return nil }) @@ -94,6 +86,8 @@ type BackupOptions struct { Tags restic.TagList Host string FilesFrom []string + FilesFromVerbatim []string + FilesFromRaw []string TimeStamp string WithAtime bool IgnoreInode bool @@ -127,7 +121,9 @@ func init() { f.StringVar(&backupOptions.Host, "hostname", "", "set the `hostname` for the snapshot manually") f.MarkDeprecated("hostname", "use --host") - f.StringArrayVar(&backupOptions.FilesFrom, "files-from", nil, "read the files to backup from `file` (can be combined with file args/can be specified multiple times)") + f.StringArrayVar(&backupOptions.FilesFrom, "files-from", nil, "read the files to backup from `file` (can be combined with file args; can be specified multiple times)") + f.StringArrayVar(&backupOptions.FilesFromVerbatim, "files-from-verbatim", nil, "read the files to backup from `file` (can be combined with file args; can be specified multiple times)") + f.StringArrayVar(&backupOptions.FilesFromRaw, "files-from-raw", nil, "read the files to backup from `file` (can be combined with file args; can be specified multiple times)") f.StringVar(&backupOptions.TimeStamp, "time", "", "`time` of the backup (ex. '2012-11-01 22:08:41') (default: now)") f.BoolVar(&backupOptions.WithAtime, "with-atime", false, "store the atime for all files and directories") f.BoolVar(&backupOptions.IgnoreInode, "ignore-inode", false, "ignore inode number changes when checking for modified files") @@ -156,11 +152,13 @@ func filterExisting(items []string) (result []string, err error) { return } -// readFromFile will read all lines from the given filename and return them as -// a string array, if filename is empty readFromFile returns and empty string -// array. If filename is a dash (-), readFromFile will read the lines from the +// readLines reads all lines from the named file and returns them as a +// string slice. +// +// If filename is empty, readPatternsFromFile returns an empty slice. +// If filename is a dash (-), readPatternsFromFile will read the lines from the // standard input. -func readLinesFromFile(filename string) ([]string, error) { +func readLines(filename string) ([]string, error) { if filename == "" { return nil, nil } @@ -184,29 +182,61 @@ func readLinesFromFile(filename string) ([]string, error) { scanner := bufio.NewScanner(bytes.NewReader(data)) for scanner.Scan() { - line := strings.TrimSpace(scanner.Text()) - // ignore empty lines - if line == "" { - continue - } - // strip comments - if strings.HasPrefix(line, "#") { - continue - } - lines = append(lines, line) + lines = append(lines, scanner.Text()) } if err := scanner.Err(); err != nil { return nil, err } - return lines, nil } +// readFilenamesFromFileRaw reads a list of filenames from the given file, +// or stdin if filename is "-". Each filename is terminated by a zero byte, +// which is stripped off. +func readFilenamesFromFileRaw(filename string) (names []string, err error) { + f := os.Stdin + if filename != "-" { + if f, err = os.Open(filename); err != nil { + return nil, err + } + defer f.Close() + } + + return readFilenamesRaw(f) +} + +func readFilenamesRaw(r io.Reader) (names []string, err error) { + br := bufio.NewReader(r) + for { + name, err := br.ReadString(0) + switch err { + case nil: + case io.EOF: + if name == "" { + return names, nil + } + return nil, errors.Fatal("--files-from-raw: trailing zero byte missing") + default: + return nil, err + } + + name = name[:len(name)-1] + if name == "" { + // The empty filename is never valid. Handle this now to + // prevent downstream code from erroneously backing up + // filepath.Clean("") == ".". + return nil, errors.Fatal("--files-from-raw: empty filename in listing") + } + names = append(names, name) + } +} + // Check returns an error when an invalid combination of options was set. func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error { if gopts.password == "" { - for _, filename := range opts.FilesFrom { + filesFrom := append(append(opts.FilesFrom, opts.FilesFromVerbatim...), opts.FilesFromRaw...) + for _, filename := range filesFrom { if filename == "-" { return errors.Fatal("unable to read password from stdin when data is to be read from stdin, use --password-file or $RESTIC_PASSWORD") } @@ -217,6 +247,12 @@ func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error { if len(opts.FilesFrom) > 0 { return errors.Fatal("--stdin and --files-from cannot be used together") } + if len(opts.FilesFromVerbatim) > 0 { + return errors.Fatal("--stdin and --files-from-verbatim cannot be used together") + } + if len(opts.FilesFromRaw) > 0 { + return errors.Fatal("--stdin and --files-from-raw cannot be used together") + } if len(args) > 0 { return errors.Fatal("--stdin was specified and files/dirs were listed as arguments") @@ -356,15 +392,19 @@ func collectTargets(opts BackupOptions, args []string) (targets []string, err er return nil, nil } - var lines []string for _, file := range opts.FilesFrom { - fromfile, err := readLinesFromFile(file) + fromfile, err := readLines(file) if err != nil { return nil, err } // expand wildcards for _, line := range fromfile { + line = strings.TrimSpace(line) + if line == "" || line[0] == '#' { // '#' marks a comment. + continue + } + var expanded []string expanded, err := filepath.Glob(line) if err != nil { @@ -373,19 +413,38 @@ func collectTargets(opts BackupOptions, args []string) (targets []string, err er if len(expanded) == 0 { Warnf("pattern %q does not match any files, skipping\n", line) } - lines = append(lines, expanded...) + targets = append(targets, expanded...) } } - // merge files from files-from into normal args so we can reuse the normal - // args checks and have the ability to use both files-from and args at the - // same time - args = append(args, lines...) - if len(args) == 0 && !opts.Stdin { + for _, file := range opts.FilesFromVerbatim { + fromfile, err := readLines(file) + if err != nil { + return nil, err + } + for _, line := range fromfile { + if line == "" { + continue + } + targets = append(targets, line) + } + } + + for _, file := range opts.FilesFromRaw { + fromfile, err := readFilenamesFromFileRaw(file) + if err != nil { + return nil, err + } + targets = append(targets, fromfile...) + } + + // Merge args into files-from so we can reuse the normal args checks + // and have the ability to use both files-from and args at the same time. + targets = append(targets, args...) + if len(targets) == 0 && !opts.Stdin { return nil, errors.Fatal("nothing to backup, please specify target files/dirs") } - targets = args targets, err = filterExisting(targets) if err != nil { return nil, err diff --git a/cmd/restic/cmd_backup_test.go b/cmd/restic/cmd_backup_test.go new file mode 100644 index 000000000..90e9a5a4b --- /dev/null +++ b/cmd/restic/cmd_backup_test.go @@ -0,0 +1,113 @@ +package main + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "runtime" + "sort" + "strings" + "testing" + + rtest "github.com/restic/restic/internal/test" +) + +func TestCollectTargets(t *testing.T) { + dir, cleanup := rtest.TempDir(t) + defer cleanup() + + fooSpace := "foo " + barStar := "bar*" // Must sort before the others, below. + if runtime.GOOS == "windows" { // Doesn't allow "*" or trailing space. + fooSpace = "foo" + barStar = "bar" + } + + var expect []string + for _, filename := range []string{ + barStar, "baz", "cmdline arg", fooSpace, + "fromfile", "fromfile-raw", "fromfile-verbatim", "quux", + } { + // All mentioned files must exist for collectTargets. + f, err := os.Create(filepath.Join(dir, filename)) + rtest.OK(t, err) + f.Close() + + expect = append(expect, f.Name()) + } + + f1, err := os.Create(filepath.Join(dir, "fromfile")) + rtest.OK(t, err) + // Empty lines should be ignored. A line starting with '#' is a comment. + fmt.Fprintf(f1, "\n%s*\n # here's a comment\n", f1.Name()) + f1.Close() + + f2, err := os.Create(filepath.Join(dir, "fromfile-verbatim")) + rtest.OK(t, err) + for _, filename := range []string{fooSpace, barStar} { + // Empty lines should be ignored. CR+LF is allowed. + fmt.Fprintf(f2, "%s\r\n\n", filepath.Join(dir, filename)) + } + f2.Close() + + f3, err := os.Create(filepath.Join(dir, "fromfile-raw")) + rtest.OK(t, err) + for _, filename := range []string{"baz", "quux"} { + fmt.Fprintf(f3, "%s\x00", filepath.Join(dir, filename)) + } + rtest.OK(t, err) + f3.Close() + + opts := BackupOptions{ + FilesFrom: []string{f1.Name()}, + FilesFromVerbatim: []string{f2.Name()}, + FilesFromRaw: []string{f3.Name()}, + } + + targets, err := collectTargets(opts, []string{filepath.Join(dir, "cmdline arg")}) + rtest.OK(t, err) + sort.Strings(targets) + rtest.Equals(t, expect, targets) +} + +func TestReadFilenamesRaw(t *testing.T) { + // These should all be returned exactly as-is. + expected := []string{ + "\xef\xbb\xbf/utf-8-bom", + "/absolute", + "../.././relative", + "\t\t leading and trailing space \t\t", + "newline\nin filename", + "not UTF-8: \x80\xff/simple", + ` / *[]* \ `, + } + + var buf bytes.Buffer + for _, name := range expected { + buf.WriteString(name) + buf.WriteByte(0) + } + + got, err := readFilenamesRaw(&buf) + rtest.OK(t, err) + rtest.Equals(t, expected, got) + + // Empty input is ok. + got, err = readFilenamesRaw(strings.NewReader("")) + rtest.OK(t, err) + rtest.Equals(t, 0, len(got)) + + // An empty filename is an error. + _, err = readFilenamesRaw(strings.NewReader("foo\x00\x00")) + rtest.Assert(t, err != nil, "no error for zero byte") + rtest.Assert(t, strings.Contains(err.Error(), "empty filename"), + "wrong error message: %v", err.Error()) + + // No trailing NUL byte is an error, because it likely means we're + // reading a line-oriented text file (someone forgot -print0). + _, err = readFilenamesRaw(strings.NewReader("simple.txt")) + rtest.Assert(t, err != nil, "no error for zero byte") + rtest.Assert(t, strings.Contains(err.Error(), "zero byte"), + "wrong error message: %v", err.Error()) +} diff --git a/doc/040_backup.rst b/doc/040_backup.rst index 914330f81..f7d803576 100644 --- a/doc/040_backup.rst +++ b/doc/040_backup.rst @@ -276,36 +276,56 @@ suffix the size value with one of ``k``/``K`` for kilobytes, ``m``/``M`` for meg Including Files *************** -By using the ``--files-from`` option you can read the files you want to back -up from one or more folders. This is especially useful if a lot of files have -to be backed up that are not in the same folder or are maybe pre-filtered by -other software. +The options ``--files-from``, ``--files-from-verbatim`` and ``--files-from-raw`` +allow you to list files that should be backed up in a file, rather than on the +command line. This is useful when a lot of files have to be backed up that are +not in the same folder. -For example maybe you want to backup files which have a name that matches a -certain pattern: +The argument passed to ``--files-from`` must be the name of a text file that +contains one pattern per line. The file must be encoded as UTF-8, or UTF-16 +with a byte-order mark. Leading and trailing whitespace is removed from the +patterns. Empty lines and lines starting with a ``#`` are ignored. +The patterns are expanded, when the file is read, by the Go function +`filepath.Glob `__. + +The option ``--files-from-verbatim`` has the same behavior as ``--files-from``, +except that it contains literal filenames. It does expand patterns; filenames +are listed verbatim. Lines starting with a ``#`` are not ignored; leading and +trailing whitespace is not trimmed off. Empty lines are still allowed, so that +files can be grouped. + +``--files-from-raw`` is a third variant that requires filenames to be terminated +by a zero byte (the NUL character), so that it can even handle filenames that +contain newlines or are not encoded as UTF-8 (except on Windows, where the +listed filenames must still be encoded in UTF-8). + +This option is the safest choice when generating filename lists from a script. +Its file format is the output format generated by GNU find's ``-print0`` option. + +All three arguments interpret the argument ``-`` as standard input. + +In all cases, paths may be absolute or relative to ``restic backup``'s +working directory. + +For example, maybe you want to backup files which have a name that matches a +certain regular expression pattern (uses GNU find): .. code-block:: console - $ find /tmp/somefiles | grep 'PATTERN' > /tmp/files_to_backup + $ find /tmp/somefiles -regex PATTERN -print0 > /tmp/files_to_backup You can then use restic to backup the filtered files: .. code-block:: console - $ restic -r /srv/restic-repo backup --files-from /tmp/files_to_backup + $ restic -r /srv/restic-repo backup --files-from-raw /tmp/files_to_backup -Incidentally you can also combine ``--files-from`` with the normal files -args: +You can combine all three options with each other and with the normal file arguments: .. code-block:: console - $ restic -r /srv/restic-repo backup --files-from /tmp/files_to_backup /tmp/some_additional_file - -Paths in the listing file can be absolute or relative. Please note that -patterns listed in a ``--files-from`` file are treated the same way as -exclude patterns are, which means that beginning and trailing spaces are -trimmed and special characters must be escaped. See the documentation -above for more information. + $ restic backup --files-from /tmp/files_to_backup /tmp/some_additional_file + $ restic backup --files-from /tmp/glob-pattern --files-from-raw /tmp/generated-list /tmp/some_additional_file Comparing Snapshots *******************