backup: Add --files-from-{verbatim,raw} options

This commit is contained in:
greatroar 2020-11-20 12:15:16 +01:00 committed by Leo R. Lundgren
parent 2f9346a5af
commit 55bf76ba0c
4 changed files with 255 additions and 51 deletions

View File

@ -0,0 +1,12 @@
Enhancement: Backup options --files-from-verbatim and --files-from-raw
The new backup options `--files-from-verbatim` and `--files-from-raw`
read a list of files to back up from a file. Unlike the existing `--files-from`,
these options do not interpret the listed filenames as glob patterns;
whitespace in filenames is preserved as-is and no pattern expansion is done.
These new options are recommended over `--files-from` when generating the
list of files to back up from a script. Please see the documentation for specifics.
https://github.com/restic/restic/issues/2944
https://github.com/restic/restic/issues/3013

View File

@ -56,14 +56,6 @@ Exit status is 3 if some source data could not be read (incomplete snapshot crea
},
DisableAutoGenTag: true,
RunE: func(cmd *cobra.Command, args []string) error {
if backupOptions.Stdin {
for _, filename := range backupOptions.FilesFrom {
if filename == "-" {
return errors.Fatal("cannot use both `--stdin` and `--files-from -`")
}
}
}
var t tomb.Tomb
term := termstatus.New(globalOptions.stdout, globalOptions.stderr, globalOptions.Quiet)
t.Go(func() error { term.Run(t.Context(globalOptions.ctx)); return nil })
@ -94,6 +86,8 @@ type BackupOptions struct {
Tags restic.TagList
Host string
FilesFrom []string
FilesFromVerbatim []string
FilesFromRaw []string
TimeStamp string
WithAtime bool
IgnoreInode bool
@ -127,7 +121,9 @@ func init() {
f.StringVar(&backupOptions.Host, "hostname", "", "set the `hostname` for the snapshot manually")
f.MarkDeprecated("hostname", "use --host")
f.StringArrayVar(&backupOptions.FilesFrom, "files-from", nil, "read the files to backup from `file` (can be combined with file args/can be specified multiple times)")
f.StringArrayVar(&backupOptions.FilesFrom, "files-from", nil, "read the files to backup from `file` (can be combined with file args; can be specified multiple times)")
f.StringArrayVar(&backupOptions.FilesFromVerbatim, "files-from-verbatim", nil, "read the files to backup from `file` (can be combined with file args; can be specified multiple times)")
f.StringArrayVar(&backupOptions.FilesFromRaw, "files-from-raw", nil, "read the files to backup from `file` (can be combined with file args; can be specified multiple times)")
f.StringVar(&backupOptions.TimeStamp, "time", "", "`time` of the backup (ex. '2012-11-01 22:08:41') (default: now)")
f.BoolVar(&backupOptions.WithAtime, "with-atime", false, "store the atime for all files and directories")
f.BoolVar(&backupOptions.IgnoreInode, "ignore-inode", false, "ignore inode number changes when checking for modified files")
@ -156,11 +152,13 @@ func filterExisting(items []string) (result []string, err error) {
return
}
// readFromFile will read all lines from the given filename and return them as
// a string array, if filename is empty readFromFile returns and empty string
// array. If filename is a dash (-), readFromFile will read the lines from the
// readLines reads all lines from the named file and returns them as a
// string slice.
//
// If filename is empty, readPatternsFromFile returns an empty slice.
// If filename is a dash (-), readPatternsFromFile will read the lines from the
// standard input.
func readLinesFromFile(filename string) ([]string, error) {
func readLines(filename string) ([]string, error) {
if filename == "" {
return nil, nil
}
@ -184,29 +182,61 @@ func readLinesFromFile(filename string) ([]string, error) {
scanner := bufio.NewScanner(bytes.NewReader(data))
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
// ignore empty lines
if line == "" {
continue
}
// strip comments
if strings.HasPrefix(line, "#") {
continue
}
lines = append(lines, line)
lines = append(lines, scanner.Text())
}
if err := scanner.Err(); err != nil {
return nil, err
}
return lines, nil
}
// readFilenamesFromFileRaw reads a list of filenames from the given file,
// or stdin if filename is "-". Each filename is terminated by a zero byte,
// which is stripped off.
func readFilenamesFromFileRaw(filename string) (names []string, err error) {
f := os.Stdin
if filename != "-" {
if f, err = os.Open(filename); err != nil {
return nil, err
}
defer f.Close()
}
return readFilenamesRaw(f)
}
func readFilenamesRaw(r io.Reader) (names []string, err error) {
br := bufio.NewReader(r)
for {
name, err := br.ReadString(0)
switch err {
case nil:
case io.EOF:
if name == "" {
return names, nil
}
return nil, errors.Fatal("--files-from-raw: trailing zero byte missing")
default:
return nil, err
}
name = name[:len(name)-1]
if name == "" {
// The empty filename is never valid. Handle this now to
// prevent downstream code from erroneously backing up
// filepath.Clean("") == ".".
return nil, errors.Fatal("--files-from-raw: empty filename in listing")
}
names = append(names, name)
}
}
// Check returns an error when an invalid combination of options was set.
func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error {
if gopts.password == "" {
for _, filename := range opts.FilesFrom {
filesFrom := append(append(opts.FilesFrom, opts.FilesFromVerbatim...), opts.FilesFromRaw...)
for _, filename := range filesFrom {
if filename == "-" {
return errors.Fatal("unable to read password from stdin when data is to be read from stdin, use --password-file or $RESTIC_PASSWORD")
}
@ -217,6 +247,12 @@ func (opts BackupOptions) Check(gopts GlobalOptions, args []string) error {
if len(opts.FilesFrom) > 0 {
return errors.Fatal("--stdin and --files-from cannot be used together")
}
if len(opts.FilesFromVerbatim) > 0 {
return errors.Fatal("--stdin and --files-from-verbatim cannot be used together")
}
if len(opts.FilesFromRaw) > 0 {
return errors.Fatal("--stdin and --files-from-raw cannot be used together")
}
if len(args) > 0 {
return errors.Fatal("--stdin was specified and files/dirs were listed as arguments")
@ -356,15 +392,19 @@ func collectTargets(opts BackupOptions, args []string) (targets []string, err er
return nil, nil
}
var lines []string
for _, file := range opts.FilesFrom {
fromfile, err := readLinesFromFile(file)
fromfile, err := readLines(file)
if err != nil {
return nil, err
}
// expand wildcards
for _, line := range fromfile {
line = strings.TrimSpace(line)
if line == "" || line[0] == '#' { // '#' marks a comment.
continue
}
var expanded []string
expanded, err := filepath.Glob(line)
if err != nil {
@ -373,19 +413,38 @@ func collectTargets(opts BackupOptions, args []string) (targets []string, err er
if len(expanded) == 0 {
Warnf("pattern %q does not match any files, skipping\n", line)
}
lines = append(lines, expanded...)
targets = append(targets, expanded...)
}
}
// merge files from files-from into normal args so we can reuse the normal
// args checks and have the ability to use both files-from and args at the
// same time
args = append(args, lines...)
if len(args) == 0 && !opts.Stdin {
for _, file := range opts.FilesFromVerbatim {
fromfile, err := readLines(file)
if err != nil {
return nil, err
}
for _, line := range fromfile {
if line == "" {
continue
}
targets = append(targets, line)
}
}
for _, file := range opts.FilesFromRaw {
fromfile, err := readFilenamesFromFileRaw(file)
if err != nil {
return nil, err
}
targets = append(targets, fromfile...)
}
// Merge args into files-from so we can reuse the normal args checks
// and have the ability to use both files-from and args at the same time.
targets = append(targets, args...)
if len(targets) == 0 && !opts.Stdin {
return nil, errors.Fatal("nothing to backup, please specify target files/dirs")
}
targets = args
targets, err = filterExisting(targets)
if err != nil {
return nil, err

View File

@ -0,0 +1,113 @@
package main
import (
"bytes"
"fmt"
"os"
"path/filepath"
"runtime"
"sort"
"strings"
"testing"
rtest "github.com/restic/restic/internal/test"
)
func TestCollectTargets(t *testing.T) {
dir, cleanup := rtest.TempDir(t)
defer cleanup()
fooSpace := "foo "
barStar := "bar*" // Must sort before the others, below.
if runtime.GOOS == "windows" { // Doesn't allow "*" or trailing space.
fooSpace = "foo"
barStar = "bar"
}
var expect []string
for _, filename := range []string{
barStar, "baz", "cmdline arg", fooSpace,
"fromfile", "fromfile-raw", "fromfile-verbatim", "quux",
} {
// All mentioned files must exist for collectTargets.
f, err := os.Create(filepath.Join(dir, filename))
rtest.OK(t, err)
f.Close()
expect = append(expect, f.Name())
}
f1, err := os.Create(filepath.Join(dir, "fromfile"))
rtest.OK(t, err)
// Empty lines should be ignored. A line starting with '#' is a comment.
fmt.Fprintf(f1, "\n%s*\n # here's a comment\n", f1.Name())
f1.Close()
f2, err := os.Create(filepath.Join(dir, "fromfile-verbatim"))
rtest.OK(t, err)
for _, filename := range []string{fooSpace, barStar} {
// Empty lines should be ignored. CR+LF is allowed.
fmt.Fprintf(f2, "%s\r\n\n", filepath.Join(dir, filename))
}
f2.Close()
f3, err := os.Create(filepath.Join(dir, "fromfile-raw"))
rtest.OK(t, err)
for _, filename := range []string{"baz", "quux"} {
fmt.Fprintf(f3, "%s\x00", filepath.Join(dir, filename))
}
rtest.OK(t, err)
f3.Close()
opts := BackupOptions{
FilesFrom: []string{f1.Name()},
FilesFromVerbatim: []string{f2.Name()},
FilesFromRaw: []string{f3.Name()},
}
targets, err := collectTargets(opts, []string{filepath.Join(dir, "cmdline arg")})
rtest.OK(t, err)
sort.Strings(targets)
rtest.Equals(t, expect, targets)
}
func TestReadFilenamesRaw(t *testing.T) {
// These should all be returned exactly as-is.
expected := []string{
"\xef\xbb\xbf/utf-8-bom",
"/absolute",
"../.././relative",
"\t\t leading and trailing space \t\t",
"newline\nin filename",
"not UTF-8: \x80\xff/simple",
` / *[]* \ `,
}
var buf bytes.Buffer
for _, name := range expected {
buf.WriteString(name)
buf.WriteByte(0)
}
got, err := readFilenamesRaw(&buf)
rtest.OK(t, err)
rtest.Equals(t, expected, got)
// Empty input is ok.
got, err = readFilenamesRaw(strings.NewReader(""))
rtest.OK(t, err)
rtest.Equals(t, 0, len(got))
// An empty filename is an error.
_, err = readFilenamesRaw(strings.NewReader("foo\x00\x00"))
rtest.Assert(t, err != nil, "no error for zero byte")
rtest.Assert(t, strings.Contains(err.Error(), "empty filename"),
"wrong error message: %v", err.Error())
// No trailing NUL byte is an error, because it likely means we're
// reading a line-oriented text file (someone forgot -print0).
_, err = readFilenamesRaw(strings.NewReader("simple.txt"))
rtest.Assert(t, err != nil, "no error for zero byte")
rtest.Assert(t, strings.Contains(err.Error(), "zero byte"),
"wrong error message: %v", err.Error())
}

View File

@ -276,36 +276,56 @@ suffix the size value with one of ``k``/``K`` for kilobytes, ``m``/``M`` for meg
Including Files
***************
By using the ``--files-from`` option you can read the files you want to back
up from one or more folders. This is especially useful if a lot of files have
to be backed up that are not in the same folder or are maybe pre-filtered by
other software.
The options ``--files-from``, ``--files-from-verbatim`` and ``--files-from-raw``
allow you to list files that should be backed up in a file, rather than on the
command line. This is useful when a lot of files have to be backed up that are
not in the same folder.
For example maybe you want to backup files which have a name that matches a
certain pattern:
The argument passed to ``--files-from`` must be the name of a text file that
contains one pattern per line. The file must be encoded as UTF-8, or UTF-16
with a byte-order mark. Leading and trailing whitespace is removed from the
patterns. Empty lines and lines starting with a ``#`` are ignored.
The patterns are expanded, when the file is read, by the Go function
`filepath.Glob <https://golang.org/pkg/path/filepath/#Glob>`__.
The option ``--files-from-verbatim`` has the same behavior as ``--files-from``,
except that it contains literal filenames. It does expand patterns; filenames
are listed verbatim. Lines starting with a ``#`` are not ignored; leading and
trailing whitespace is not trimmed off. Empty lines are still allowed, so that
files can be grouped.
``--files-from-raw`` is a third variant that requires filenames to be terminated
by a zero byte (the NUL character), so that it can even handle filenames that
contain newlines or are not encoded as UTF-8 (except on Windows, where the
listed filenames must still be encoded in UTF-8).
This option is the safest choice when generating filename lists from a script.
Its file format is the output format generated by GNU find's ``-print0`` option.
All three arguments interpret the argument ``-`` as standard input.
In all cases, paths may be absolute or relative to ``restic backup``'s
working directory.
For example, maybe you want to backup files which have a name that matches a
certain regular expression pattern (uses GNU find):
.. code-block:: console
$ find /tmp/somefiles | grep 'PATTERN' > /tmp/files_to_backup
$ find /tmp/somefiles -regex PATTERN -print0 > /tmp/files_to_backup
You can then use restic to backup the filtered files:
.. code-block:: console
$ restic -r /srv/restic-repo backup --files-from /tmp/files_to_backup
$ restic -r /srv/restic-repo backup --files-from-raw /tmp/files_to_backup
Incidentally you can also combine ``--files-from`` with the normal files
args:
You can combine all three options with each other and with the normal file arguments:
.. code-block:: console
$ restic -r /srv/restic-repo backup --files-from /tmp/files_to_backup /tmp/some_additional_file
Paths in the listing file can be absolute or relative. Please note that
patterns listed in a ``--files-from`` file are treated the same way as
exclude patterns are, which means that beginning and trailing spaces are
trimmed and special characters must be escaped. See the documentation
above for more information.
$ restic backup --files-from /tmp/files_to_backup /tmp/some_additional_file
$ restic backup --files-from /tmp/glob-pattern --files-from-raw /tmp/generated-list /tmp/some_additional_file
Comparing Snapshots
*******************