2
2
mirror of https://github.com/octoleo/restic.git synced 2025-01-23 15:18:31 +00:00

restore: only restore changed file parts

For files that should be overwritten, first check whether their content
matches the snapshot and only restore diverging parts.
This commit is contained in:
Michael Eischer 2024-05-31 17:06:08 +02:00
parent 62e0e5e8d8
commit a66658b4c9
3 changed files with 123 additions and 51 deletions

View File

@ -26,6 +26,7 @@ type fileInfo struct {
size int64 size int64
location string // file on local filesystem relative to restorer basedir location string // file on local filesystem relative to restorer basedir
blobs interface{} // blobs of the file blobs interface{} // blobs of the file
state *fileState
} }
type fileBlobInfo struct { type fileBlobInfo struct {
@ -80,25 +81,25 @@ func newFileRestorer(dst string,
} }
} }
func (r *fileRestorer) addFile(location string, content restic.IDs, size int64) { func (r *fileRestorer) addFile(location string, content restic.IDs, size int64, state *fileState) {
r.files = append(r.files, &fileInfo{location: location, blobs: content, size: size}) r.files = append(r.files, &fileInfo{location: location, blobs: content, size: size, state: state})
} }
func (r *fileRestorer) targetPath(location string) string { func (r *fileRestorer) targetPath(location string) string {
return filepath.Join(r.dst, location) return filepath.Join(r.dst, location)
} }
func (r *fileRestorer) forEachBlob(blobIDs []restic.ID, fn func(packID restic.ID, packBlob restic.Blob)) error { func (r *fileRestorer) forEachBlob(blobIDs []restic.ID, fn func(packID restic.ID, packBlob restic.Blob, idx int)) error {
if len(blobIDs) == 0 { if len(blobIDs) == 0 {
return nil return nil
} }
for _, blobID := range blobIDs { for i, blobID := range blobIDs {
packs := r.idx(restic.DataBlob, blobID) packs := r.idx(restic.DataBlob, blobID)
if len(packs) == 0 { if len(packs) == 0 {
return errors.Errorf("Unknown blob %s", blobID.String()) return errors.Errorf("Unknown blob %s", blobID.String())
} }
fn(packs[0].PackID, packs[0].Blob) fn(packs[0].PackID, packs[0].Blob, i)
} }
return nil return nil
@ -128,8 +129,8 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error {
packsMap = make(map[restic.ID][]fileBlobInfo) packsMap = make(map[restic.ID][]fileBlobInfo)
} }
fileOffset := int64(0) fileOffset := int64(0)
err := r.forEachBlob(fileBlobs, func(packID restic.ID, blob restic.Blob) { err := r.forEachBlob(fileBlobs, func(packID restic.ID, blob restic.Blob, idx int) {
if largeFile { if largeFile && !file.state.HasMatchingBlob(idx) {
packsMap[packID] = append(packsMap[packID], fileBlobInfo{id: blob.ID, offset: fileOffset}) packsMap[packID] = append(packsMap[packID], fileBlobInfo{id: blob.ID, offset: fileOffset})
fileOffset += int64(blob.DataLength()) fileOffset += int64(blob.DataLength())
} }
@ -232,8 +233,8 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error {
} }
if fileBlobs, ok := file.blobs.(restic.IDs); ok { if fileBlobs, ok := file.blobs.(restic.IDs); ok {
fileOffset := int64(0) fileOffset := int64(0)
err := r.forEachBlob(fileBlobs, func(packID restic.ID, blob restic.Blob) { err := r.forEachBlob(fileBlobs, func(packID restic.ID, blob restic.Blob, idx int) {
if packID.Equal(pack.id) { if packID.Equal(pack.id) && !file.state.HasMatchingBlob(idx) {
addBlob(blob, fileOffset) addBlob(blob, fileOffset)
} }
fileOffset += int64(blob.DataLength()) fileOffset += int64(blob.DataLength())

View File

@ -40,9 +40,8 @@ func newFilesWriter(count int) *filesWriter {
} }
func createFile(path string, createSize int64, sparse bool) (*os.File, error) { func createFile(path string, createSize int64, sparse bool) (*os.File, error) {
var f *os.File f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY, 0600)
var err error if err != nil {
if f, err = os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600); err != nil {
if !fs.IsAccessDenied(err) { if !fs.IsAccessDenied(err) {
return nil, err return nil, err
} }
@ -54,19 +53,31 @@ func createFile(path string, createSize int64, sparse bool) (*os.File, error) {
if err = fs.ResetPermissions(path); err != nil { if err = fs.ResetPermissions(path); err != nil {
return nil, err return nil, err
} }
if f, err = os.OpenFile(path, os.O_TRUNC|os.O_WRONLY, 0600); err != nil { if f, err = os.OpenFile(path, os.O_WRONLY, 0600); err != nil {
return nil, err return nil, err
} }
} }
if createSize > 0 { if sparse {
if sparse { err = truncateSparse(f, createSize)
err = truncateSparse(f, createSize) if err != nil {
_ = f.Close()
return nil, err
}
} else {
info, err := f.Stat()
if err != nil {
_ = f.Close()
return nil, err
}
if info.Size() > createSize {
// file is too long must shorten it
err = f.Truncate(createSize)
if err != nil { if err != nil {
_ = f.Close() _ = f.Close()
return nil, err return nil, err
} }
} else { } else if createSize > 0 {
err := fs.PreallocateFile(f, createSize) err := fs.PreallocateFile(f, createSize)
if err != nil { if err != nil {
// Just log the preallocate error but don't let it cause the restore process to fail. // Just log the preallocate error but don't let it cause the restore process to fail.
@ -78,7 +89,7 @@ func createFile(path string, createSize int64, sparse bool) (*os.File, error) {
} }
} }
} }
return f, err return f, nil
} }
func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64, sparse bool) error { func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64, sparse bool) error {

View File

@ -3,6 +3,7 @@ package restorer
import ( import (
"context" "context"
"fmt" "fmt"
"io"
"os" "os"
"path/filepath" "path/filepath"
"sync/atomic" "sync/atomic"
@ -22,7 +23,7 @@ type Restorer struct {
sn *restic.Snapshot sn *restic.Snapshot
opts Options opts Options
fileList map[string]struct{} fileList map[string]bool
Error func(location string, err error) error Error func(location string, err error) error
Warn func(message string) Warn func(message string)
@ -86,7 +87,7 @@ func NewRestorer(repo restic.Repository, sn *restic.Snapshot, opts Options) *Res
r := &Restorer{ r := &Restorer{
repo: repo, repo: repo,
opts: opts, opts: opts,
fileList: make(map[string]struct{}), fileList: make(map[string]bool),
Error: restorerAbortOnAllErrors, Error: restorerAbortOnAllErrors,
SelectFilter: func(string, string, *restic.Node) (bool, bool) { return true, true }, SelectFilter: func(string, string, *restic.Node) (bool, bool) { return true, true },
sn: sn, sn: sn,
@ -266,6 +267,8 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
debug.Log("first pass for %q", dst) debug.Log("first pass for %q", dst)
var buf []byte
// first tree pass: create directories and collect all files to restore // first tree pass: create directories and collect all files to restore
_, err = res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{ _, err = res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{
enterDir: func(_ *restic.Node, target, location string) error { enterDir: func(_ *restic.Node, target, location string) error {
@ -299,12 +302,17 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
idx.Add(node.Inode, node.DeviceID, location) idx.Add(node.Inode, node.DeviceID, location)
} }
return res.withOverwriteCheck(node, target, false, func() error { buf, err = res.withOverwriteCheck(node, target, false, buf, func(updateMetadataOnly bool, matches *fileState) error {
res.opts.Progress.AddFile(node.Size) if updateMetadataOnly {
filerestorer.addFile(location, node.Content, int64(node.Size)) res.opts.Progress.AddSkippedFile(node.Size)
res.trackFile(location) } else {
res.opts.Progress.AddFile(node.Size)
filerestorer.addFile(location, node.Content, int64(node.Size), matches)
}
res.trackFile(location, updateMetadataOnly)
return nil return nil
}) })
return err
}, },
}) })
if err != nil { if err != nil {
@ -323,18 +331,20 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
visitNode: func(node *restic.Node, target, location string) error { visitNode: func(node *restic.Node, target, location string) error {
debug.Log("second pass, visitNode: restore node %q", location) debug.Log("second pass, visitNode: restore node %q", location)
if node.Type != "file" { if node.Type != "file" {
return res.withOverwriteCheck(node, target, false, func() error { _, err := res.withOverwriteCheck(node, target, false, nil, func(_ bool, _ *fileState) error {
return res.restoreNodeTo(ctx, node, target, location) return res.restoreNodeTo(ctx, node, target, location)
}) })
return err
} }
if idx.Has(node.Inode, node.DeviceID) && idx.Value(node.Inode, node.DeviceID) != location { if idx.Has(node.Inode, node.DeviceID) && idx.Value(node.Inode, node.DeviceID) != location {
return res.withOverwriteCheck(node, target, true, func() error { _, err := res.withOverwriteCheck(node, target, true, nil, func(_ bool, _ *fileState) error {
return res.restoreHardlinkAt(node, filerestorer.targetPath(idx.Value(node.Inode, node.DeviceID)), target, location) return res.restoreHardlinkAt(node, filerestorer.targetPath(idx.Value(node.Inode, node.DeviceID)), target, location)
}) })
return err
} }
if res.hasRestoredFile(location) { if _, ok := res.hasRestoredFile(location); ok {
return res.restoreNodeMetadataTo(node, target, location) return res.restoreNodeMetadataTo(node, target, location)
} }
// don't touch skipped files // don't touch skipped files
@ -351,28 +361,38 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
return err return err
} }
func (res *Restorer) trackFile(location string) { func (res *Restorer) trackFile(location string, metadataOnly bool) {
res.fileList[location] = struct{}{} res.fileList[location] = metadataOnly
} }
func (res *Restorer) hasRestoredFile(location string) bool { func (res *Restorer) hasRestoredFile(location string) (metadataOnly bool, ok bool) {
_, ok := res.fileList[location] metadataOnly, ok = res.fileList[location]
return ok return metadataOnly, ok
} }
func (res *Restorer) withOverwriteCheck(node *restic.Node, target string, isHardlink bool, cb func() error) error { func (res *Restorer) withOverwriteCheck(node *restic.Node, target string, isHardlink bool, buf []byte, cb func(updateMetadataOnly bool, matches *fileState) error) ([]byte, error) {
overwrite, err := shouldOverwrite(res.opts.Overwrite, node, target) overwrite, err := shouldOverwrite(res.opts.Overwrite, node, target)
if err != nil { if err != nil {
return err return buf, err
} else if !overwrite { } else if !overwrite {
size := node.Size size := node.Size
if isHardlink { if isHardlink {
size = 0 size = 0
} }
res.opts.Progress.AddSkippedFile(size) res.opts.Progress.AddSkippedFile(size)
return nil return buf, nil
} }
return cb()
var matches *fileState
updateMetadataOnly := false
if node.Type == "file" && !isHardlink {
// if a file fails to verify, then matches is nil which results in restoring from scratch
matches, buf, _ = res.verifyFile(target, node, false, buf)
// skip files that are already correct completely
updateMetadataOnly = !matches.NeedsRestore()
}
return buf, cb(updateMetadataOnly, matches)
} }
func shouldOverwrite(overwrite OverwriteBehavior, node *restic.Node, destination string) (bool, error) { func shouldOverwrite(overwrite OverwriteBehavior, node *restic.Node, destination string) (bool, error) {
@ -429,7 +449,10 @@ func (res *Restorer) VerifyFiles(ctx context.Context, dst string) (int, error) {
_, err := res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{ _, err := res.traverseTree(ctx, dst, string(filepath.Separator), *res.sn.Tree, treeVisitor{
visitNode: func(node *restic.Node, target, location string) error { visitNode: func(node *restic.Node, target, location string) error {
if node.Type != "file" || !res.hasRestoredFile(location) { if node.Type != "file" {
return nil
}
if metadataOnly, ok := res.hasRestoredFile(location); !ok || metadataOnly {
return nil return nil
} }
select { select {
@ -447,7 +470,7 @@ func (res *Restorer) VerifyFiles(ctx context.Context, dst string) (int, error) {
g.Go(func() (err error) { g.Go(func() (err error) {
var buf []byte var buf []byte
for job := range work { for job := range work {
buf, err = res.verifyFile(job.path, job.node, buf) _, buf, err = res.verifyFile(job.path, job.node, true, buf)
if err != nil { if err != nil {
err = res.Error(job.path, err) err = res.Error(job.path, err)
} }
@ -463,36 +486,68 @@ func (res *Restorer) VerifyFiles(ctx context.Context, dst string) (int, error) {
return int(nchecked), g.Wait() return int(nchecked), g.Wait()
} }
type fileState struct {
blobMatches []bool
sizeMatches bool
}
func (s *fileState) NeedsRestore() bool {
if s == nil {
return true
}
if !s.sizeMatches {
return true
}
for _, match := range s.blobMatches {
if !match {
return true
}
}
return false
}
func (s *fileState) HasMatchingBlob(i int) bool {
if s == nil || s.blobMatches == nil {
return false
}
return i < len(s.blobMatches) && s.blobMatches[i]
}
// Verify that the file target has the contents of node. // Verify that the file target has the contents of node.
// //
// buf and the first return value are scratch space, passed around for reuse. // buf and the first return value are scratch space, passed around for reuse.
// Reusing buffers prevents the verifier goroutines allocating all of RAM and // Reusing buffers prevents the verifier goroutines allocating all of RAM and
// flushing the filesystem cache (at least on Linux). // flushing the filesystem cache (at least on Linux).
func (res *Restorer) verifyFile(target string, node *restic.Node, buf []byte) ([]byte, error) { func (res *Restorer) verifyFile(target string, node *restic.Node, failFast bool, buf []byte) (*fileState, []byte, error) {
f, err := os.OpenFile(target, fs.O_RDONLY|fs.O_NOFOLLOW, 0) f, err := os.OpenFile(target, fs.O_RDONLY|fs.O_NOFOLLOW, 0)
if err != nil { if err != nil {
return buf, err return nil, buf, err
} }
defer func() { defer func() {
_ = f.Close() _ = f.Close()
}() }()
fi, err := f.Stat() fi, err := f.Stat()
sizeMatches := true
switch { switch {
case err != nil: case err != nil:
return buf, err return nil, buf, err
case !fi.Mode().IsRegular(): case !fi.Mode().IsRegular():
return buf, errors.Errorf("Expected %s to be a regular file", target) return nil, buf, errors.Errorf("Expected %s to be a regular file", target)
case int64(node.Size) != fi.Size(): case int64(node.Size) != fi.Size():
return buf, errors.Errorf("Invalid file size for %s: expected %d, got %d", if failFast {
target, node.Size, fi.Size()) return nil, buf, errors.Errorf("Invalid file size for %s: expected %d, got %d",
target, node.Size, fi.Size())
}
sizeMatches = false
} }
matches := make([]bool, len(node.Content))
var offset int64 var offset int64
for _, blobID := range node.Content { for i, blobID := range node.Content {
length, found := res.repo.LookupBlobSize(restic.DataBlob, blobID) length, found := res.repo.LookupBlobSize(restic.DataBlob, blobID)
if !found { if !found {
return buf, errors.Errorf("Unable to fetch blob %s", blobID) return nil, buf, errors.Errorf("Unable to fetch blob %s", blobID)
} }
if length > uint(cap(buf)) { if length > uint(cap(buf)) {
@ -501,16 +556,21 @@ func (res *Restorer) verifyFile(target string, node *restic.Node, buf []byte) ([
buf = buf[:length] buf = buf[:length]
_, err = f.ReadAt(buf, offset) _, err = f.ReadAt(buf, offset)
if err != nil { if err == io.EOF && !failFast {
return buf, err sizeMatches = false
break
} }
if !blobID.Equal(restic.Hash(buf)) { if err != nil {
return buf, errors.Errorf( return nil, buf, err
}
matches[i] = blobID.Equal(restic.Hash(buf))
if failFast && !matches[i] {
return nil, buf, errors.Errorf(
"Unexpected content in %s, starting at offset %d", "Unexpected content in %s, starting at offset %d",
target, offset) target, offset)
} }
offset += int64(length) offset += int64(length)
} }
return buf, nil return &fileState{matches, sizeMatches}, buf, nil
} }