backup: Add --ignore-ctime option and document change detection

This commit is contained in:
greatroar 2020-07-08 09:59:00 +02:00 committed by Leo R. Lundgren
parent 43cb26010a
commit 6bd8a2faaa
5 changed files with 174 additions and 68 deletions

View File

@ -0,0 +1,26 @@
Enhancement: Add option to let backup trust mtime without checking ctime
The backup command used to require that both ctime and mtime of a file matched
with a previously backed up version to determine that the file was unchanged.
In other words, if either ctime or mtime of the file had changed, it would be
considered changed and restic would read the file's content again to back up
the relevant (changed) parts of it.
The new option --ignore-ctime makes restic look at mtime only, such that ctime
changes for a file does not cause restic to read the file's contents again.
The check for both ctime and mtime was introduced in restic 0.9.6 to make
backups more reliable in the face of programs that reset mtime (some Unix
archivers do that), but it turned out to often be expensive because it made
restic read file contents even if only the metadata (owner, permissions) of
a file had changed. The new --ignore-ctime option lets the user restore the
0.9.5 behavior when needed. The existing --ignore-inode option already turned
off this behavior, but also removed a different check.
Please note that changes in files' metadata are still recorded, regardless of
the command line options provided to the backup command.
https://github.com/restic/restic/issues/2495
https://github.com/restic/restic/issues/2558
https://github.com/restic/restic/issues/2819
https://github.com/restic/restic/pull/2823

View File

@ -90,6 +90,7 @@ type BackupOptions struct {
TimeStamp string
WithAtime bool
IgnoreInode bool
IgnoreCtime bool
UseFsSnapshot bool
}
@ -126,6 +127,7 @@ func init() {
f.StringVar(&backupOptions.TimeStamp, "time", "", "`time` of the backup (ex. '2012-11-01 22:08:41') (default: now)")
f.BoolVar(&backupOptions.WithAtime, "with-atime", false, "store the atime for all files and directories")
f.BoolVar(&backupOptions.IgnoreInode, "ignore-inode", false, "ignore inode number changes when checking for modified files")
f.BoolVar(&backupOptions.IgnoreCtime, "ignore-ctime", false, "ignore ctime changes when checking for modified files")
if runtime.GOOS == "windows" {
f.BoolVar(&backupOptions.UseFsSnapshot, "use-fs-snapshot", false, "use filesystem snapshot where possible (currently only Windows VSS)")
}
@ -665,7 +667,15 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, term *termstatus.Termina
arch.CompleteItem = p.CompleteItem
arch.StartFile = p.StartFile
arch.CompleteBlob = p.CompleteBlob
arch.IgnoreInode = opts.IgnoreInode
if opts.IgnoreInode {
// --ignore-inode implies --ignore-ctime: on FUSE, the ctime is not
// reliable either.
arch.ChangeIgnoreFlags |= archiver.ChangeIgnoreCtime | archiver.ChangeIgnoreInode
}
if opts.IgnoreCtime {
arch.ChangeIgnoreFlags |= archiver.ChangeIgnoreCtime
}
if parentSnapshotID == nil {
parentSnapshotID = &restic.ID{}

View File

@ -131,24 +131,62 @@ restic encounters:
In fact several hosts may use the same repository to backup directories
and files leading to a greater de-duplication.
Please be aware that when you backup different directories (or the
directories to be saved have a variable name component like a
time/date), restic always needs to read all files and only afterwards
can compute which parts of the files need to be saved. When you backup
the same directory again (maybe with new or changed files) restic will
find the old snapshot in the repo and by default only reads those files
that are new or have been modified since the last snapshot. This is
decided based on the following attributes of the file in the file system:
* Type (file, symlink, or directory?)
* Modification time
* Size
* Inode number (internal number used to reference a file in a file system)
Now is a good time to run ``restic check`` to verify that all data
is properly stored in the repository. You should run this command regularly
to make sure the internal structure of the repository is free of errors.
File change detection
*********************
When restic encounters a file that has already been backed up, whether in the
current backup or a previous one, it makes sure the file's contents are only
stored once in the repository. To do so, it normally has to scan the entire
contents of every file. Because this can be very expensive, restic also uses a
change detection rule based on file metadata to determine whether a file is
likely unchanged since a previous backup. If it is, the file is not scanned
again.
Change detection is only performed for regular files (not special files,
symlinks or directories) that have the exact same path as they did in a
previous backup of the same location. If a file or one of its containing
directories was renamed, it is considered a different file and its entire
contents will be scanned again.
Metadata changes (permissions, ownership, etc.) are always included in the
backup, even if file contents are considered unchanged.
On **Unix** (including Linux and Mac), given that a file lives at the same
location as a file in a previous backup, the following file metadata
attributes have to match for its contents to be presumed unchanged:
* Modification timestamp (mtime).
* Metadata change timestamp (ctime).
* File size.
* Inode number (internal number used to reference a file in a filesystem).
The reason for requiring both mtime and ctime to match is that Unix programs
can freely change mtime (and some do). In such cases, a ctime change may be
the only hint that a file did change.
The following ``restic backup`` command line flags modify the change detection
rules:
* ``--force``: turn off change detection and rescan all files.
* ``--ignore-ctime``: require mtime to match, but allow ctime to differ.
* ``--ignore-inode``: require mtime to match, but allow inode number
and ctime to differ.
The option ``--ignore-inode`` exists to support FUSE-based filesystems and
pCloud, which do not assign stable inodes to files.
Note that the device id of the containing mount point is never taken into
account. Device numbers are not stable for removable devices and ZFS snapshots.
If you want to force a re-scan in such a case, you can change the mountpoint.
On **Windows**, a file is considered unchanged when its path and modification
time match, and only ``--force`` has any effect. The other options are
recognized but ignored.
Excluding Files
***************
@ -372,10 +410,6 @@ written, and the next backup needs to write new metadata again. If you really
want to save the access time for files and directories, you can pass the
``--with-atime`` option to the ``backup`` command.
In filesystems that do not support inode consistency, like FUSE-based ones and pCloud, it is
possible to ignore inode on changed files comparison by passing ``--ignore-inode`` to
``backup`` command.
Reading data from stdin
***********************

View File

@ -78,10 +78,18 @@ type Archiver struct {
// WithAtime configures if the access time for files and directories should
// be saved. Enabling it may result in much metadata, so it's off by
// default.
WithAtime bool
IgnoreInode bool
WithAtime bool
// Flags controlling change detection. See doc/040_backup.rst for details.
ChangeIgnoreFlags uint
}
// Flags for the ChangeIgnoreFlags bitfield.
const (
ChangeIgnoreCtime = 1 << iota
ChangeIgnoreInode
)
// Options is used to configure the archiver.
type Options struct {
// FileReadConcurrency sets how many files are read in concurrently. If
@ -134,7 +142,6 @@ func New(repo restic.Repository, fs fs.FS, opts Options) *Archiver {
CompleteItem: func(string, *restic.Node, *restic.Node, ItemStats, time.Duration) {},
StartFile: func(string) {},
CompleteBlob: func(string, uint64) {},
IgnoreInode: false,
}
return arch
@ -379,7 +386,7 @@ func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous
// check if the file has not changed before performing a fopen operation (more expensive, specially
// in network filesystems)
if previous != nil && !fileChanged(fi, previous, arch.IgnoreInode) {
if previous != nil && !fileChanged(fi, previous, arch.ChangeIgnoreFlags) {
if arch.allBlobsPresent(previous) {
debug.Log("%v hasn't changed, using old list of blobs", target)
arch.CompleteItem(snPath, previous, previous, ItemStats{}, time.Since(start))
@ -481,36 +488,30 @@ func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous
return fn, false, nil
}
// fileChanged returns true if the file's content has changed since the node
// was created.
func fileChanged(fi os.FileInfo, node *restic.Node, ignoreInode bool) bool {
if node == nil {
// fileChanged tries to detect whether a file's content has changed compared
// to the contents of node, which describes the same path in the parent backup.
// It should only be run for regular files.
func fileChanged(fi os.FileInfo, node *restic.Node, ignoreFlags uint) bool {
switch {
case node == nil:
return true
case node.Type != "file":
// We're only called for regular files, so this is a type change.
return true
case uint64(fi.Size()) != node.Size:
return true
case !fi.ModTime().Equal(node.ModTime):
return true
}
// check type change
if node.Type != "file" {
return true
}
checkCtime := ignoreFlags&ChangeIgnoreCtime == 0
checkInode := ignoreFlags&ChangeIgnoreInode == 0
// check modification timestamp
if !fi.ModTime().Equal(node.ModTime) {
return true
}
// check status change timestamp
extFI := fs.ExtendedStat(fi)
if !ignoreInode && !extFI.ChangeTime.Equal(node.ChangeTime) {
switch {
case checkCtime && !extFI.ChangeTime.Equal(node.ChangeTime):
return true
}
// check size
if uint64(fi.Size()) != node.Size || uint64(extFI.Size) != node.Size {
return true
}
// check inode
if !ignoreInode && node.Inode != extFI.Inode {
case checkInode && node.Inode != extFI.Inode:
return true
}

View File

@ -505,6 +505,18 @@ func save(t testing.TB, filename string, data []byte) {
}
}
func chmodTwice(t testing.TB, name string) {
// POSIX says that ctime is updated "even if the file status does not
// change", but let's make sure it does change, just in case.
err := os.Chmod(name, 0700)
restictest.OK(t, err)
sleep()
err = os.Chmod(name, 0600)
restictest.OK(t, err)
}
func lstat(t testing.TB, name string) os.FileInfo {
fi, err := os.Lstat(name)
if err != nil {
@ -533,6 +545,13 @@ func remove(t testing.TB, filename string) {
}
}
func rename(t testing.TB, oldname, newname string) {
err := os.Rename(oldname, newname)
if err != nil {
t.Fatal(err)
}
}
func nodeFromFI(t testing.TB, filename string, fi os.FileInfo) *restic.Node {
node, err := restic.NodeFromFileInfo(filename, fi)
if err != nil {
@ -542,26 +561,26 @@ func nodeFromFI(t testing.TB, filename string, fi os.FileInfo) *restic.Node {
return node
}
// sleep sleeps long enough to ensure a timestamp change.
func sleep() {
d := 50 * time.Millisecond
if runtime.GOOS == "darwin" {
// On older Darwin instances, the file system only supports one second
// granularity.
d = 1500 * time.Millisecond
}
time.Sleep(d)
}
func TestFileChanged(t *testing.T) {
var defaultContent = []byte("foobar")
var d = 50 * time.Millisecond
if runtime.GOOS == "darwin" {
// on older darwin instances the file system only supports one second
// granularity
d = time.Second
}
sleep := func() {
time.Sleep(d)
}
var tests = []struct {
Name string
SkipForWindows bool
Content []byte
Modify func(t testing.TB, filename string)
IgnoreInode bool
ChangeIgnore uint
SameFile bool
}{
{
@ -618,17 +637,33 @@ func TestFileChanged(t *testing.T) {
save(t, filename, defaultContent)
},
},
{
Name: "ctime-change",
Modify: chmodTwice,
SameFile: false,
SkipForWindows: true, // No ctime on Windows, so this test would fail.
},
{
Name: "ignore-ctime-change",
Modify: chmodTwice,
ChangeIgnore: ChangeIgnoreCtime,
SameFile: true,
SkipForWindows: true, // No ctime on Windows, so this test is meaningless.
},
{
Name: "ignore-inode",
Modify: func(t testing.TB, filename string) {
fi := lstat(t, filename)
remove(t, filename)
sleep()
// First create the new file, then remove the old one,
// so that the old file retains its inode number.
tempname := filename + ".old"
rename(t, filename, tempname)
save(t, filename, defaultContent)
remove(t, tempname)
setTimestamp(t, filename, fi.ModTime(), fi.ModTime())
},
IgnoreInode: true,
SameFile: true,
ChangeIgnore: ChangeIgnoreCtime | ChangeIgnoreInode,
SameFile: true,
},
}
@ -651,7 +686,7 @@ func TestFileChanged(t *testing.T) {
fiBefore := lstat(t, filename)
node := nodeFromFI(t, filename, fiBefore)
if fileChanged(fiBefore, node, false) {
if fileChanged(fiBefore, node, 0) {
t.Fatalf("unchanged file detected as changed")
}
@ -661,12 +696,12 @@ func TestFileChanged(t *testing.T) {
if test.SameFile {
// file should be detected as unchanged
if fileChanged(fiAfter, node, test.IgnoreInode) {
if fileChanged(fiAfter, node, test.ChangeIgnore) {
t.Fatalf("unmodified file detected as changed")
}
} else {
// file should be detected as changed
if !fileChanged(fiAfter, node, test.IgnoreInode) && !test.SameFile {
if !fileChanged(fiAfter, node, test.ChangeIgnore) && !test.SameFile {
t.Fatalf("modified file detected as unchanged")
}
}
@ -684,7 +719,7 @@ func TestFilChangedSpecialCases(t *testing.T) {
t.Run("nil-node", func(t *testing.T) {
fi := lstat(t, filename)
if !fileChanged(fi, nil, false) {
if !fileChanged(fi, nil, 0) {
t.Fatal("nil node detected as unchanged")
}
})
@ -693,7 +728,7 @@ func TestFilChangedSpecialCases(t *testing.T) {
fi := lstat(t, filename)
node := nodeFromFI(t, filename, fi)
node.Type = "symlink"
if !fileChanged(fi, node, false) {
if !fileChanged(fi, node, 0) {
t.Fatal("node with changed type detected as unchanged")
}
})