diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index 1fdec081b..ceb7694b1 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -319,18 +319,18 @@ func collectRejectByNameFuncs(opts BackupOptions, repo *repository.Repository) ( // collectRejectFuncs returns a list of all functions which may reject data // from being saved in a snapshot based on path and file info -func collectRejectFuncs(opts BackupOptions, targets []string, fs fs.FS) (funcs []RejectFunc, err error) { +func collectRejectFuncs(opts BackupOptions, targets []string, fs fs.FS) (funcs []archiver.RejectFunc, err error) { // allowed devices - if opts.ExcludeOtherFS && !opts.Stdin { - f, err := rejectByDevice(targets, fs) + if opts.ExcludeOtherFS && !opts.Stdin && !opts.StdinCommand { + f, err := archiver.RejectByDevice(targets, fs) if err != nil { return nil, err } funcs = append(funcs, f) } - if len(opts.ExcludeLargerThan) != 0 && !opts.Stdin { - f, err := rejectBySize(opts.ExcludeLargerThan) + if len(opts.ExcludeLargerThan) != 0 && !opts.Stdin && !opts.StdinCommand { + f, err := archiver.RejectBySize(opts.ExcludeLargerThan) if err != nil { return nil, err } @@ -342,7 +342,7 @@ func collectRejectFuncs(opts BackupOptions, targets []string, fs fs.FS) (funcs [ } for _, spec := range opts.ExcludeIfPresent { - f, err := rejectIfPresent(spec) + f, err := archiver.RejectIfPresent(spec, Warnf) if err != nil { return nil, err } diff --git a/cmd/restic/exclude.go b/cmd/restic/exclude.go index f1e1011f2..40eb93933 100644 --- a/cmd/restic/exclude.go +++ b/cmd/restic/exclude.go @@ -4,10 +4,8 @@ import ( "bufio" "bytes" "fmt" - "io" "os" "strings" - "sync" "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/errors" @@ -15,64 +13,14 @@ import ( "github.com/restic/restic/internal/fs" "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/textfile" - "github.com/restic/restic/internal/ui" "github.com/spf13/pflag" ) -type rejectionCache struct { - m map[string]bool - mtx sync.Mutex -} - -// Lock locks the mutex in rc. -func (rc *rejectionCache) Lock() { - if rc != nil { - rc.mtx.Lock() - } -} - -// Unlock unlocks the mutex in rc. -func (rc *rejectionCache) Unlock() { - if rc != nil { - rc.mtx.Unlock() - } -} - -// Get returns the last stored value for dir and a second boolean that -// indicates whether that value was actually written to the cache. It is the -// callers responsibility to call rc.Lock and rc.Unlock before using this -// method, otherwise data races may occur. -func (rc *rejectionCache) Get(dir string) (bool, bool) { - if rc == nil || rc.m == nil { - return false, false - } - v, ok := rc.m[dir] - return v, ok -} - -// Store stores a new value for dir. It is the callers responsibility to call -// rc.Lock and rc.Unlock before using this method, otherwise data races may -// occur. -func (rc *rejectionCache) Store(dir string, rejected bool) { - if rc == nil { - return - } - if rc.m == nil { - rc.m = make(map[string]bool) - } - rc.m[dir] = rejected -} - // RejectByNameFunc is a function that takes a filename of a // file that would be included in the backup. The function returns true if it // should be excluded (rejected) from the backup. type RejectByNameFunc func(path string) bool -// RejectFunc is a function that takes a filename and os.FileInfo of a -// file that would be included in the backup. The function returns true if it -// should be excluded (rejected) from the backup. -type RejectFunc func(path string, fi os.FileInfo, fs fs.FS) bool - // rejectByPattern returns a RejectByNameFunc which rejects files that match // one of the patterns. func rejectByPattern(patterns []string) RejectByNameFunc { @@ -104,239 +52,6 @@ func rejectByInsensitivePattern(patterns []string) RejectByNameFunc { } } -// rejectIfPresent returns a RejectByNameFunc which itself returns whether a path -// should be excluded. The RejectByNameFunc considers a file to be excluded when -// it resides in a directory with an exclusion file, that is specified by -// excludeFileSpec in the form "filename[:content]". The returned error is -// non-nil if the filename component of excludeFileSpec is empty. If rc is -// non-nil, it is going to be used in the RejectByNameFunc to expedite the evaluation -// of a directory based on previous visits. -func rejectIfPresent(excludeFileSpec string) (RejectFunc, error) { - if excludeFileSpec == "" { - return nil, errors.New("name for exclusion tagfile is empty") - } - colon := strings.Index(excludeFileSpec, ":") - if colon == 0 { - return nil, fmt.Errorf("no name for exclusion tagfile provided") - } - tf, tc := "", "" - if colon > 0 { - tf = excludeFileSpec[:colon] - tc = excludeFileSpec[colon+1:] - } else { - tf = excludeFileSpec - } - debug.Log("using %q as exclusion tagfile", tf) - rc := &rejectionCache{} - return func(filename string, _ os.FileInfo, fs fs.FS) bool { - return isExcludedByFile(filename, tf, tc, rc, fs) - }, nil -} - -// isExcludedByFile interprets filename as a path and returns true if that file -// is in an excluded directory. A directory is identified as excluded if it contains a -// tagfile which bears the name specified in tagFilename and starts with -// header. If rc is non-nil, it is used to expedite the evaluation of a -// directory based on previous visits. -func isExcludedByFile(filename, tagFilename, header string, rc *rejectionCache, fs fs.FS) bool { - if tagFilename == "" { - return false - } - if fs.Base(filename) == tagFilename { - return false // do not exclude the tagfile itself - } - rc.Lock() - defer rc.Unlock() - - dir := fs.Dir(filename) - rejected, visited := rc.Get(dir) - if visited { - return rejected - } - rejected = isDirExcludedByFile(dir, tagFilename, header, fs) - rc.Store(dir, rejected) - return rejected -} - -func isDirExcludedByFile(dir, tagFilename, header string, fs fs.FS) bool { - tf := fs.Join(dir, tagFilename) - _, err := fs.Lstat(tf) - if os.IsNotExist(err) { - return false - } - if err != nil { - Warnf("could not access exclusion tagfile: %v", err) - return false - } - // when no signature is given, the mere presence of tf is enough reason - // to exclude filename - if len(header) == 0 { - return true - } - // From this stage, errors mean tagFilename exists but it is malformed. - // Warnings will be generated so that the user is informed that the - // indented ignore-action is not performed. - f, err := fs.OpenFile(tf, os.O_RDONLY, 0) - if err != nil { - Warnf("could not open exclusion tagfile: %v", err) - return false - } - defer func() { - _ = f.Close() - }() - buf := make([]byte, len(header)) - _, err = io.ReadFull(f, buf) - // EOF is handled with a dedicated message, otherwise the warning were too cryptic - if err == io.EOF { - Warnf("invalid (too short) signature in exclusion tagfile %q\n", tf) - return false - } - if err != nil { - Warnf("could not read signature from exclusion tagfile %q: %v\n", tf, err) - return false - } - if !bytes.Equal(buf, []byte(header)) { - Warnf("invalid signature in exclusion tagfile %q\n", tf) - return false - } - return true -} - -// DeviceMap is used to track allowed source devices for backup. This is used to -// check for crossing mount points during backup (for --one-file-system). It -// maps the name of a source path to its device ID. -type DeviceMap map[string]uint64 - -// NewDeviceMap creates a new device map from the list of source paths. -func NewDeviceMap(allowedSourcePaths []string, fs fs.FS) (DeviceMap, error) { - deviceMap := make(map[string]uint64) - - for _, item := range allowedSourcePaths { - item, err := fs.Abs(fs.Clean(item)) - if err != nil { - return nil, err - } - - fi, err := fs.Lstat(item) - if err != nil { - return nil, err - } - - id, err := fs.DeviceID(fi) - if err != nil { - return nil, err - } - - deviceMap[item] = id - } - - if len(deviceMap) == 0 { - return nil, errors.New("zero allowed devices") - } - - return deviceMap, nil -} - -// IsAllowed returns true if the path is located on an allowed device. -func (m DeviceMap) IsAllowed(item string, deviceID uint64, fs fs.FS) (bool, error) { - for dir := item; ; dir = fs.Dir(dir) { - debug.Log("item %v, test dir %v", item, dir) - - // find a parent directory that is on an allowed device (otherwise - // we would not traverse the directory at all) - allowedID, ok := m[dir] - if !ok { - if dir == fs.Dir(dir) { - // arrived at root, no allowed device found. this should not happen. - break - } - continue - } - - // if the item has a different device ID than the parent directory, - // we crossed a file system boundary - if allowedID != deviceID { - debug.Log("item %v (dir %v) on disallowed device %d", item, dir, deviceID) - return false, nil - } - - // item is on allowed device, accept it - debug.Log("item %v allowed", item) - return true, nil - } - - return false, fmt.Errorf("item %v (device ID %v) not found, deviceMap: %v", item, deviceID, m) -} - -// rejectByDevice returns a RejectFunc that rejects files which are on a -// different file systems than the files/dirs in samples. -func rejectByDevice(samples []string, filesystem fs.FS) (RejectFunc, error) { - deviceMap, err := NewDeviceMap(samples, filesystem) - if err != nil { - return nil, err - } - debug.Log("allowed devices: %v\n", deviceMap) - - return func(item string, fi os.FileInfo, fs fs.FS) bool { - id, err := fs.DeviceID(fi) - if err != nil { - // This should never happen because gatherDevices() would have - // errored out earlier. If it still does that's a reason to panic. - panic(err) - } - - allowed, err := deviceMap.IsAllowed(fs.Clean(item), id, fs) - if err != nil { - // this should not happen - panic(fmt.Sprintf("error checking device ID of %v: %v", item, err)) - } - - if allowed { - // accept item - return false - } - - // reject everything except directories - if !fi.IsDir() { - return true - } - - // special case: make sure we keep mountpoints (directories which - // contain a mounted file system). Test this by checking if the parent - // directory would be included. - parentDir := fs.Dir(fs.Clean(item)) - - parentFI, err := fs.Lstat(parentDir) - if err != nil { - debug.Log("item %v: error running lstat() on parent directory: %v", item, err) - // if in doubt, reject - return true - } - - parentDeviceID, err := fs.DeviceID(parentFI) - if err != nil { - debug.Log("item %v: getting device ID of parent directory: %v", item, err) - // if in doubt, reject - return true - } - - parentAllowed, err := deviceMap.IsAllowed(parentDir, parentDeviceID, fs) - if err != nil { - debug.Log("item %v: error checking parent directory: %v", item, err) - // if in doubt, reject - return true - } - - if parentAllowed { - // we found a mount point, so accept the directory - return false - } - - // reject everything else - return true - }, nil -} - // rejectResticCache returns a RejectByNameFunc that rejects the restic cache // directory (if set). func rejectResticCache(repo *repository.Repository) (RejectByNameFunc, error) { @@ -361,28 +76,6 @@ func rejectResticCache(repo *repository.Repository) (RejectByNameFunc, error) { }, nil } -func rejectBySize(maxSizeStr string) (RejectFunc, error) { - maxSize, err := ui.ParseBytes(maxSizeStr) - if err != nil { - return nil, err - } - - return func(item string, fi os.FileInfo, _ fs.FS) bool { - // directory will be ignored - if fi.IsDir() { - return false - } - - filesize := fi.Size() - if filesize > maxSize { - debug.Log("file %s is oversize: %d", item, filesize) - return true - } - - return false - }, nil -} - // readPatternsFromFiles reads all files and returns the list of // patterns. For each line, leading and trailing white space is removed // and comment lines are ignored. For each remaining pattern, environment diff --git a/cmd/restic/exclude_test.go b/cmd/restic/exclude_test.go index 166ee1d84..177a81df2 100644 --- a/cmd/restic/exclude_test.go +++ b/cmd/restic/exclude_test.go @@ -1,12 +1,7 @@ package main import ( - "os" - "path/filepath" "testing" - - "github.com/restic/restic/internal/fs" - "github.com/restic/restic/internal/test" ) func TestRejectByPattern(t *testing.T) { @@ -62,252 +57,3 @@ func TestRejectByInsensitivePattern(t *testing.T) { }) } } - -func TestIsExcludedByFile(t *testing.T) { - const ( - tagFilename = "CACHEDIR.TAG" - header = "Signature: 8a477f597d28d172789f06886806bc55" - ) - tests := []struct { - name string - tagFile string - content string - want bool - }{ - {"NoTagfile", "", "", false}, - {"EmptyTagfile", tagFilename, "", true}, - {"UnnamedTagFile", "", header, false}, - {"WrongTagFile", "notatagfile", header, false}, - {"IncorrectSig", tagFilename, header[1:], false}, - {"ValidSig", tagFilename, header, true}, - {"ValidPlusStuff", tagFilename, header + "foo", true}, - {"ValidPlusNewlineAndStuff", tagFilename, header + "\nbar", true}, - } - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - tempDir := test.TempDir(t) - - foo := filepath.Join(tempDir, "foo") - err := os.WriteFile(foo, []byte("foo"), 0666) - if err != nil { - t.Fatalf("could not write file: %v", err) - } - if tc.tagFile != "" { - tagFile := filepath.Join(tempDir, tc.tagFile) - err = os.WriteFile(tagFile, []byte(tc.content), 0666) - if err != nil { - t.Fatalf("could not write tagfile: %v", err) - } - } - h := header - if tc.content == "" { - h = "" - } - if got := isExcludedByFile(foo, tagFilename, h, nil, &fs.Local{}); tc.want != got { - t.Fatalf("expected %v, got %v", tc.want, got) - } - }) - } -} - -// TestMultipleIsExcludedByFile is for testing that multiple instances of -// the --exclude-if-present parameter (or the shortcut --exclude-caches do not -// cancel each other out. It was initially written to demonstrate a bug in -// rejectIfPresent. -func TestMultipleIsExcludedByFile(t *testing.T) { - tempDir := test.TempDir(t) - - // Create some files in a temporary directory. - // Files in UPPERCASE will be used as exclusion triggers later on. - // We will test the inclusion later, so we add the expected value as - // a bool. - files := []struct { - path string - incl bool - }{ - {"42", true}, - - // everything in foodir except the NOFOO tagfile - // should not be included. - {"foodir/NOFOO", true}, - {"foodir/foo", false}, - {"foodir/foosub/underfoo", false}, - - // everything in bardir except the NOBAR tagfile - // should not be included. - {"bardir/NOBAR", true}, - {"bardir/bar", false}, - {"bardir/barsub/underbar", false}, - - // everything in bazdir should be included. - {"bazdir/baz", true}, - {"bazdir/bazsub/underbaz", true}, - } - var errs []error - for _, f := range files { - // create directories first, then the file - p := filepath.Join(tempDir, filepath.FromSlash(f.path)) - errs = append(errs, os.MkdirAll(filepath.Dir(p), 0700)) - errs = append(errs, os.WriteFile(p, []byte(f.path), 0600)) - } - test.OKs(t, errs) // see if anything went wrong during the creation - - // create two rejection functions, one that tests for the NOFOO file - // and one for the NOBAR file - fooExclude, _ := rejectIfPresent("NOFOO") - barExclude, _ := rejectIfPresent("NOBAR") - - // To mock the archiver scanning walk, we create filepath.WalkFn - // that tests against the two rejection functions and stores - // the result in a map against we can test later. - m := make(map[string]bool) - walk := func(p string, fi os.FileInfo, err error) error { - if err != nil { - return err - } - excludedByFoo := fooExclude(p, nil, &fs.Local{}) - excludedByBar := barExclude(p, nil, &fs.Local{}) - excluded := excludedByFoo || excludedByBar - // the log message helps debugging in case the test fails - t.Logf("%q: %v || %v = %v", p, excludedByFoo, excludedByBar, excluded) - m[p] = !excluded - if excluded { - return filepath.SkipDir - } - return nil - } - // walk through the temporary file and check the error - test.OK(t, filepath.Walk(tempDir, walk)) - - // compare whether the walk gave the expected values for the test cases - for _, f := range files { - p := filepath.Join(tempDir, filepath.FromSlash(f.path)) - if m[p] != f.incl { - t.Errorf("inclusion status of %s is wrong: want %v, got %v", f.path, f.incl, m[p]) - } - } -} - -// TestIsExcludedByFileSize is for testing the instance of -// --exclude-larger-than parameters -func TestIsExcludedByFileSize(t *testing.T) { - tempDir := test.TempDir(t) - - // Max size of file is set to be 1k - maxSizeStr := "1k" - - // Create some files in a temporary directory. - // Files in UPPERCASE will be used as exclusion triggers later on. - // We will test the inclusion later, so we add the expected value as - // a bool. - files := []struct { - path string - size int64 - incl bool - }{ - {"42", 100, true}, - - // everything in foodir except the FOOLARGE tagfile - // should not be included. - {"foodir/FOOLARGE", 2048, false}, - {"foodir/foo", 1002, true}, - {"foodir/foosub/underfoo", 100, true}, - - // everything in bardir except the BARLARGE tagfile - // should not be included. - {"bardir/BARLARGE", 1030, false}, - {"bardir/bar", 1000, true}, - {"bardir/barsub/underbar", 500, true}, - - // everything in bazdir should be included. - {"bazdir/baz", 100, true}, - {"bazdir/bazsub/underbaz", 200, true}, - } - var errs []error - for _, f := range files { - // create directories first, then the file - p := filepath.Join(tempDir, filepath.FromSlash(f.path)) - errs = append(errs, os.MkdirAll(filepath.Dir(p), 0700)) - file, err := os.OpenFile(p, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0600) - errs = append(errs, err) - if err == nil { - // create a file with given size - errs = append(errs, file.Truncate(f.size)) - } - errs = append(errs, file.Close()) - } - test.OKs(t, errs) // see if anything went wrong during the creation - - // create rejection function - sizeExclude, _ := rejectBySize(maxSizeStr) - - // To mock the archiver scanning walk, we create filepath.WalkFn - // that tests against the two rejection functions and stores - // the result in a map against we can test later. - m := make(map[string]bool) - walk := func(p string, fi os.FileInfo, err error) error { - if err != nil { - return err - } - - excluded := sizeExclude(p, fi, nil) - // the log message helps debugging in case the test fails - t.Logf("%q: dir:%t; size:%d; excluded:%v", p, fi.IsDir(), fi.Size(), excluded) - m[p] = !excluded - return nil - } - // walk through the temporary file and check the error - test.OK(t, filepath.Walk(tempDir, walk)) - - // compare whether the walk gave the expected values for the test cases - for _, f := range files { - p := filepath.Join(tempDir, filepath.FromSlash(f.path)) - if m[p] != f.incl { - t.Errorf("inclusion status of %s is wrong: want %v, got %v", f.path, f.incl, m[p]) - } - } -} - -func TestDeviceMap(t *testing.T) { - deviceMap := DeviceMap{ - filepath.FromSlash("/"): 1, - filepath.FromSlash("/usr/local"): 5, - } - - var tests = []struct { - item string - deviceID uint64 - allowed bool - }{ - {"/root", 1, true}, - {"/usr", 1, true}, - - {"/proc", 2, false}, - {"/proc/1234", 2, false}, - - {"/usr", 3, false}, - {"/usr/share", 3, false}, - - {"/usr/local", 5, true}, - {"/usr/local/foobar", 5, true}, - - {"/usr/local/foobar/submount", 23, false}, - {"/usr/local/foobar/submount/file", 23, false}, - - {"/usr/local/foobar/outhersubmount", 1, false}, - {"/usr/local/foobar/outhersubmount/otherfile", 1, false}, - } - - for _, test := range tests { - t.Run("", func(t *testing.T) { - res, err := deviceMap.IsAllowed(filepath.FromSlash(test.item), test.deviceID, &fs.Local{}) - if err != nil { - t.Fatal(err) - } - - if res != test.allowed { - t.Fatalf("wrong result returned by IsAllowed(%v): want %v, got %v", test.item, test.allowed, res) - } - }) - } -} diff --git a/internal/archiver/exclude.go b/internal/archiver/exclude.go new file mode 100644 index 000000000..f4444812c --- /dev/null +++ b/internal/archiver/exclude.go @@ -0,0 +1,311 @@ +package archiver + +import ( + "bytes" + "fmt" + "io" + "os" + "strings" + "sync" + + "github.com/restic/restic/internal/debug" + "github.com/restic/restic/internal/errors" + "github.com/restic/restic/internal/fs" + "github.com/restic/restic/internal/ui" +) + +type rejectionCache struct { + m map[string]bool + mtx sync.Mutex +} + +func newRejectionCache() *rejectionCache { + return &rejectionCache{m: make(map[string]bool)} +} + +// Lock locks the mutex in rc. +func (rc *rejectionCache) Lock() { + rc.mtx.Lock() +} + +// Unlock unlocks the mutex in rc. +func (rc *rejectionCache) Unlock() { + rc.mtx.Unlock() +} + +// Get returns the last stored value for dir and a second boolean that +// indicates whether that value was actually written to the cache. It is the +// callers responsibility to call rc.Lock and rc.Unlock before using this +// method, otherwise data races may occur. +func (rc *rejectionCache) Get(dir string) (bool, bool) { + v, ok := rc.m[dir] + return v, ok +} + +// Store stores a new value for dir. It is the callers responsibility to call +// rc.Lock and rc.Unlock before using this method, otherwise data races may +// occur. +func (rc *rejectionCache) Store(dir string, rejected bool) { + rc.m[dir] = rejected +} + +// RejectFunc is a function that takes a filename and os.FileInfo of a +// file that would be included in the backup. The function returns true if it +// should be excluded (rejected) from the backup. +type RejectFunc func(path string, fi os.FileInfo, fs fs.FS) bool + +// RejectIfPresent returns a RejectByNameFunc which itself returns whether a path +// should be excluded. The RejectByNameFunc considers a file to be excluded when +// it resides in a directory with an exclusion file, that is specified by +// excludeFileSpec in the form "filename[:content]". The returned error is +// non-nil if the filename component of excludeFileSpec is empty. If rc is +// non-nil, it is going to be used in the RejectByNameFunc to expedite the evaluation +// of a directory based on previous visits. +func RejectIfPresent(excludeFileSpec string, warnf func(msg string, args ...interface{})) (RejectFunc, error) { + if excludeFileSpec == "" { + return nil, errors.New("name for exclusion tagfile is empty") + } + colon := strings.Index(excludeFileSpec, ":") + if colon == 0 { + return nil, fmt.Errorf("no name for exclusion tagfile provided") + } + tf, tc := "", "" + if colon > 0 { + tf = excludeFileSpec[:colon] + tc = excludeFileSpec[colon+1:] + } else { + tf = excludeFileSpec + } + debug.Log("using %q as exclusion tagfile", tf) + rc := newRejectionCache() + return func(filename string, _ os.FileInfo, fs fs.FS) bool { + return isExcludedByFile(filename, tf, tc, rc, fs, warnf) + }, nil +} + +// isExcludedByFile interprets filename as a path and returns true if that file +// is in an excluded directory. A directory is identified as excluded if it contains a +// tagfile which bears the name specified in tagFilename and starts with +// header. If rc is non-nil, it is used to expedite the evaluation of a +// directory based on previous visits. +func isExcludedByFile(filename, tagFilename, header string, rc *rejectionCache, fs fs.FS, warnf func(msg string, args ...interface{})) bool { + if tagFilename == "" { + return false + } + + if fs.Base(filename) == tagFilename { + return false // do not exclude the tagfile itself + } + rc.Lock() + defer rc.Unlock() + + dir := fs.Dir(filename) + rejected, visited := rc.Get(dir) + if visited { + return rejected + } + rejected = isDirExcludedByFile(dir, tagFilename, header, fs, warnf) + rc.Store(dir, rejected) + return rejected +} + +func isDirExcludedByFile(dir, tagFilename, header string, fs fs.FS, warnf func(msg string, args ...interface{})) bool { + tf := fs.Join(dir, tagFilename) + _, err := fs.Lstat(tf) + if os.IsNotExist(err) { + return false + } + if err != nil { + warnf("could not access exclusion tagfile: %v", err) + return false + } + // when no signature is given, the mere presence of tf is enough reason + // to exclude filename + if len(header) == 0 { + return true + } + // From this stage, errors mean tagFilename exists but it is malformed. + // Warnings will be generated so that the user is informed that the + // indented ignore-action is not performed. + f, err := fs.OpenFile(tf, os.O_RDONLY, 0) + if err != nil { + warnf("could not open exclusion tagfile: %v", err) + return false + } + defer func() { + _ = f.Close() + }() + buf := make([]byte, len(header)) + _, err = io.ReadFull(f, buf) + // EOF is handled with a dedicated message, otherwise the warning were too cryptic + if err == io.EOF { + warnf("invalid (too short) signature in exclusion tagfile %q\n", tf) + return false + } + if err != nil { + warnf("could not read signature from exclusion tagfile %q: %v\n", tf, err) + return false + } + if !bytes.Equal(buf, []byte(header)) { + warnf("invalid signature in exclusion tagfile %q\n", tf) + return false + } + return true +} + +// deviceMap is used to track allowed source devices for backup. This is used to +// check for crossing mount points during backup (for --one-file-system). It +// maps the name of a source path to its device ID. +type deviceMap map[string]uint64 + +// newDeviceMap creates a new device map from the list of source paths. +func newDeviceMap(allowedSourcePaths []string, fs fs.FS) (deviceMap, error) { + deviceMap := make(map[string]uint64) + + for _, item := range allowedSourcePaths { + item, err := fs.Abs(fs.Clean(item)) + if err != nil { + return nil, err + } + + fi, err := fs.Lstat(item) + if err != nil { + return nil, err + } + + id, err := fs.DeviceID(fi) + if err != nil { + return nil, err + } + + deviceMap[item] = id + } + + if len(deviceMap) == 0 { + return nil, errors.New("zero allowed devices") + } + + return deviceMap, nil +} + +// IsAllowed returns true if the path is located on an allowed device. +func (m deviceMap) IsAllowed(item string, deviceID uint64, fs fs.FS) (bool, error) { + for dir := item; ; dir = fs.Dir(dir) { + debug.Log("item %v, test dir %v", item, dir) + + // find a parent directory that is on an allowed device (otherwise + // we would not traverse the directory at all) + allowedID, ok := m[dir] + if !ok { + if dir == fs.Dir(dir) { + // arrived at root, no allowed device found. this should not happen. + break + } + continue + } + + // if the item has a different device ID than the parent directory, + // we crossed a file system boundary + if allowedID != deviceID { + debug.Log("item %v (dir %v) on disallowed device %d", item, dir, deviceID) + return false, nil + } + + // item is on allowed device, accept it + debug.Log("item %v allowed", item) + return true, nil + } + + return false, fmt.Errorf("item %v (device ID %v) not found, deviceMap: %v", item, deviceID, m) +} + +// RejectByDevice returns a RejectFunc that rejects files which are on a +// different file systems than the files/dirs in samples. +func RejectByDevice(samples []string, filesystem fs.FS) (RejectFunc, error) { + deviceMap, err := newDeviceMap(samples, filesystem) + if err != nil { + return nil, err + } + debug.Log("allowed devices: %v\n", deviceMap) + + return func(item string, fi os.FileInfo, fs fs.FS) bool { + id, err := fs.DeviceID(fi) + if err != nil { + // This should never happen because gatherDevices() would have + // errored out earlier. If it still does that's a reason to panic. + panic(err) + } + + allowed, err := deviceMap.IsAllowed(fs.Clean(item), id, fs) + if err != nil { + // this should not happen + panic(fmt.Sprintf("error checking device ID of %v: %v", item, err)) + } + + if allowed { + // accept item + return false + } + + // reject everything except directories + if !fi.IsDir() { + return true + } + + // special case: make sure we keep mountpoints (directories which + // contain a mounted file system). Test this by checking if the parent + // directory would be included. + parentDir := fs.Dir(fs.Clean(item)) + + parentFI, err := fs.Lstat(parentDir) + if err != nil { + debug.Log("item %v: error running lstat() on parent directory: %v", item, err) + // if in doubt, reject + return true + } + + parentDeviceID, err := fs.DeviceID(parentFI) + if err != nil { + debug.Log("item %v: getting device ID of parent directory: %v", item, err) + // if in doubt, reject + return true + } + + parentAllowed, err := deviceMap.IsAllowed(parentDir, parentDeviceID, fs) + if err != nil { + debug.Log("item %v: error checking parent directory: %v", item, err) + // if in doubt, reject + return true + } + + if parentAllowed { + // we found a mount point, so accept the directory + return false + } + + // reject everything else + return true + }, nil +} + +func RejectBySize(maxSizeStr string) (RejectFunc, error) { + maxSize, err := ui.ParseBytes(maxSizeStr) + if err != nil { + return nil, err + } + + return func(item string, fi os.FileInfo, _ fs.FS) bool { + // directory will be ignored + if fi.IsDir() { + return false + } + + filesize := fi.Size() + if filesize > maxSize { + debug.Log("file %s is oversize: %d", item, filesize) + return true + } + + return false + }, nil +} diff --git a/internal/archiver/exclude_test.go b/internal/archiver/exclude_test.go new file mode 100644 index 000000000..b9f1f8cdd --- /dev/null +++ b/internal/archiver/exclude_test.go @@ -0,0 +1,259 @@ +package archiver + +import ( + "os" + "path/filepath" + "testing" + + "github.com/restic/restic/internal/fs" + "github.com/restic/restic/internal/test" +) + +func TestIsExcludedByFile(t *testing.T) { + const ( + tagFilename = "CACHEDIR.TAG" + header = "Signature: 8a477f597d28d172789f06886806bc55" + ) + tests := []struct { + name string + tagFile string + content string + want bool + }{ + {"NoTagfile", "", "", false}, + {"EmptyTagfile", tagFilename, "", true}, + {"UnnamedTagFile", "", header, false}, + {"WrongTagFile", "notatagfile", header, false}, + {"IncorrectSig", tagFilename, header[1:], false}, + {"ValidSig", tagFilename, header, true}, + {"ValidPlusStuff", tagFilename, header + "foo", true}, + {"ValidPlusNewlineAndStuff", tagFilename, header + "\nbar", true}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + tempDir := test.TempDir(t) + + foo := filepath.Join(tempDir, "foo") + err := os.WriteFile(foo, []byte("foo"), 0666) + if err != nil { + t.Fatalf("could not write file: %v", err) + } + if tc.tagFile != "" { + tagFile := filepath.Join(tempDir, tc.tagFile) + err = os.WriteFile(tagFile, []byte(tc.content), 0666) + if err != nil { + t.Fatalf("could not write tagfile: %v", err) + } + } + h := header + if tc.content == "" { + h = "" + } + if got := isExcludedByFile(foo, tagFilename, h, newRejectionCache(), &fs.Local{}, func(msg string, args ...interface{}) { t.Logf(msg, args...) }); tc.want != got { + t.Fatalf("expected %v, got %v", tc.want, got) + } + }) + } +} + +// TestMultipleIsExcludedByFile is for testing that multiple instances of +// the --exclude-if-present parameter (or the shortcut --exclude-caches do not +// cancel each other out. It was initially written to demonstrate a bug in +// rejectIfPresent. +func TestMultipleIsExcludedByFile(t *testing.T) { + tempDir := test.TempDir(t) + + // Create some files in a temporary directory. + // Files in UPPERCASE will be used as exclusion triggers later on. + // We will test the inclusion later, so we add the expected value as + // a bool. + files := []struct { + path string + incl bool + }{ + {"42", true}, + + // everything in foodir except the NOFOO tagfile + // should not be included. + {"foodir/NOFOO", true}, + {"foodir/foo", false}, + {"foodir/foosub/underfoo", false}, + + // everything in bardir except the NOBAR tagfile + // should not be included. + {"bardir/NOBAR", true}, + {"bardir/bar", false}, + {"bardir/barsub/underbar", false}, + + // everything in bazdir should be included. + {"bazdir/baz", true}, + {"bazdir/bazsub/underbaz", true}, + } + var errs []error + for _, f := range files { + // create directories first, then the file + p := filepath.Join(tempDir, filepath.FromSlash(f.path)) + errs = append(errs, os.MkdirAll(filepath.Dir(p), 0700)) + errs = append(errs, os.WriteFile(p, []byte(f.path), 0600)) + } + test.OKs(t, errs) // see if anything went wrong during the creation + + // create two rejection functions, one that tests for the NOFOO file + // and one for the NOBAR file + fooExclude, _ := RejectIfPresent("NOFOO", nil) + barExclude, _ := RejectIfPresent("NOBAR", nil) + + // To mock the archiver scanning walk, we create filepath.WalkFn + // that tests against the two rejection functions and stores + // the result in a map against we can test later. + m := make(map[string]bool) + walk := func(p string, fi os.FileInfo, err error) error { + if err != nil { + return err + } + excludedByFoo := fooExclude(p, nil, &fs.Local{}) + excludedByBar := barExclude(p, nil, &fs.Local{}) + excluded := excludedByFoo || excludedByBar + // the log message helps debugging in case the test fails + t.Logf("%q: %v || %v = %v", p, excludedByFoo, excludedByBar, excluded) + m[p] = !excluded + if excluded { + return filepath.SkipDir + } + return nil + } + // walk through the temporary file and check the error + test.OK(t, filepath.Walk(tempDir, walk)) + + // compare whether the walk gave the expected values for the test cases + for _, f := range files { + p := filepath.Join(tempDir, filepath.FromSlash(f.path)) + if m[p] != f.incl { + t.Errorf("inclusion status of %s is wrong: want %v, got %v", f.path, f.incl, m[p]) + } + } +} + +// TestIsExcludedByFileSize is for testing the instance of +// --exclude-larger-than parameters +func TestIsExcludedByFileSize(t *testing.T) { + tempDir := test.TempDir(t) + + // Max size of file is set to be 1k + maxSizeStr := "1k" + + // Create some files in a temporary directory. + // Files in UPPERCASE will be used as exclusion triggers later on. + // We will test the inclusion later, so we add the expected value as + // a bool. + files := []struct { + path string + size int64 + incl bool + }{ + {"42", 100, true}, + + // everything in foodir except the FOOLARGE tagfile + // should not be included. + {"foodir/FOOLARGE", 2048, false}, + {"foodir/foo", 1002, true}, + {"foodir/foosub/underfoo", 100, true}, + + // everything in bardir except the BARLARGE tagfile + // should not be included. + {"bardir/BARLARGE", 1030, false}, + {"bardir/bar", 1000, true}, + {"bardir/barsub/underbar", 500, true}, + + // everything in bazdir should be included. + {"bazdir/baz", 100, true}, + {"bazdir/bazsub/underbaz", 200, true}, + } + var errs []error + for _, f := range files { + // create directories first, then the file + p := filepath.Join(tempDir, filepath.FromSlash(f.path)) + errs = append(errs, os.MkdirAll(filepath.Dir(p), 0700)) + file, err := os.OpenFile(p, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0600) + errs = append(errs, err) + if err == nil { + // create a file with given size + errs = append(errs, file.Truncate(f.size)) + } + errs = append(errs, file.Close()) + } + test.OKs(t, errs) // see if anything went wrong during the creation + + // create rejection function + sizeExclude, _ := RejectBySize(maxSizeStr) + + // To mock the archiver scanning walk, we create filepath.WalkFn + // that tests against the two rejection functions and stores + // the result in a map against we can test later. + m := make(map[string]bool) + walk := func(p string, fi os.FileInfo, err error) error { + if err != nil { + return err + } + + excluded := sizeExclude(p, fi, nil) + // the log message helps debugging in case the test fails + t.Logf("%q: dir:%t; size:%d; excluded:%v", p, fi.IsDir(), fi.Size(), excluded) + m[p] = !excluded + return nil + } + // walk through the temporary file and check the error + test.OK(t, filepath.Walk(tempDir, walk)) + + // compare whether the walk gave the expected values for the test cases + for _, f := range files { + p := filepath.Join(tempDir, filepath.FromSlash(f.path)) + if m[p] != f.incl { + t.Errorf("inclusion status of %s is wrong: want %v, got %v", f.path, f.incl, m[p]) + } + } +} + +func TestDeviceMap(t *testing.T) { + deviceMap := deviceMap{ + filepath.FromSlash("/"): 1, + filepath.FromSlash("/usr/local"): 5, + } + + var tests = []struct { + item string + deviceID uint64 + allowed bool + }{ + {"/root", 1, true}, + {"/usr", 1, true}, + + {"/proc", 2, false}, + {"/proc/1234", 2, false}, + + {"/usr", 3, false}, + {"/usr/share", 3, false}, + + {"/usr/local", 5, true}, + {"/usr/local/foobar", 5, true}, + + {"/usr/local/foobar/submount", 23, false}, + {"/usr/local/foobar/submount/file", 23, false}, + + {"/usr/local/foobar/outhersubmount", 1, false}, + {"/usr/local/foobar/outhersubmount/otherfile", 1, false}, + } + + for _, test := range tests { + t.Run("", func(t *testing.T) { + res, err := deviceMap.IsAllowed(filepath.FromSlash(test.item), test.deviceID, &fs.Local{}) + if err != nil { + t.Fatal(err) + } + + if res != test.allowed { + t.Fatalf("wrong result returned by IsAllowed(%v): want %v, got %v", test.item, test.allowed, res) + } + }) + } +}