2
2
mirror of https://github.com/octoleo/restic.git synced 2024-11-23 05:12:10 +00:00
restic/internal/archiver/scanner.go
Alexander Neumann 5c617859ab backup/scanner: Fix total size for overlapping targets
Before, the scanner would could files twice if they were included in the
list of backup targets twice, e.g. `restic backup foo foo/bar` would
could the file `foo/bar` twice.

This commit uses the tree structure from the archiver to run the
scanner, so both parts see the same files.
2021-01-29 11:31:36 +01:00

147 lines
3.3 KiB
Go

package archiver
import (
"context"
"os"
"path/filepath"
"sort"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/fs"
)
// Scanner traverses the targets and calls the function Result with cumulated
// stats concerning the files and folders found. Select is used to decide which
// items should be included. Error is called when an error occurs.
type Scanner struct {
FS fs.FS
SelectByName SelectByNameFunc
Select SelectFunc
Error ErrorFunc
Result func(item string, s ScanStats)
}
// NewScanner initializes a new Scanner.
func NewScanner(fs fs.FS) *Scanner {
return &Scanner{
FS: fs,
SelectByName: func(item string) bool { return true },
Select: func(item string, fi os.FileInfo) bool { return true },
Error: func(item string, fi os.FileInfo, err error) error { return err },
Result: func(item string, s ScanStats) {},
}
}
// ScanStats collect statistics.
type ScanStats struct {
Files, Dirs, Others uint
Bytes uint64
}
func (s *Scanner) scanTree(ctx context.Context, stats ScanStats, tree Tree) (ScanStats, error) {
// traverse the path in the file system for all leaf nodes
if tree.Leaf() {
abstarget, err := s.FS.Abs(tree.Path)
if err != nil {
return ScanStats{}, err
}
stats, err = s.scan(ctx, stats, abstarget)
if err != nil {
return ScanStats{}, err
}
return stats, nil
}
// otherwise recurse into the nodes in a deterministic order
for _, name := range tree.NodeNames() {
var err error
stats, err = s.scanTree(ctx, stats, tree.Nodes[name])
if err != nil {
return ScanStats{}, err
}
if ctx.Err() != nil {
return stats, nil
}
}
return stats, nil
}
// Scan traverses the targets. The function Result is called for each new item
// found, the complete result is also returned by Scan.
func (s *Scanner) Scan(ctx context.Context, targets []string) error {
debug.Log("start scan for %v", targets)
cleanTargets, err := resolveRelativeTargets(s.FS, targets)
if err != nil {
return err
}
debug.Log("clean targets %v", cleanTargets)
// we're using the same tree representation as the archiver does
tree, err := NewTree(s.FS, cleanTargets)
if err != nil {
return err
}
stats, err := s.scanTree(ctx, ScanStats{}, *tree)
if err != nil {
return err
}
s.Result("", stats)
debug.Log("result: %+v", stats)
return nil
}
func (s *Scanner) scan(ctx context.Context, stats ScanStats, target string) (ScanStats, error) {
if ctx.Err() != nil {
return stats, nil
}
// exclude files by path before running stat to reduce number of lstat calls
if !s.SelectByName(target) {
return stats, nil
}
// get file information
fi, err := s.FS.Lstat(target)
if err != nil {
return stats, s.Error(target, fi, err)
}
// run remaining select functions that require file information
if !s.Select(target, fi) {
return stats, nil
}
switch {
case fi.Mode().IsRegular():
stats.Files++
stats.Bytes += uint64(fi.Size())
case fi.Mode().IsDir():
names, err := readdirnames(s.FS, target, fs.O_NOFOLLOW)
if err != nil {
return stats, s.Error(target, fi, err)
}
sort.Strings(names)
for _, name := range names {
stats, err = s.scan(ctx, stats, filepath.Join(target, name))
if err != nil {
return stats, err
}
}
stats.Dirs++
default:
stats.Others++
}
s.Result(target, stats)
return stats, nil
}