diff --git a/internal/restorer/fileswriter.go b/internal/restorer/fileswriter.go index 8b7ee4353..aa943e11b 100644 --- a/internal/restorer/fileswriter.go +++ b/internal/restorer/fileswriter.go @@ -19,15 +19,19 @@ type filesWriter struct { type filesWriterBucket struct { lock sync.Mutex - files map[string]*os.File - users map[string]int + files map[string]*partialFile +} + +type partialFile struct { + *os.File + size int64 // File size, tracked for sparse writes (not on Windows). + users int // Reference count. } func newFilesWriter(count int) *filesWriter { buckets := make([]filesWriterBucket, count) for b := 0; b < count; b++ { - buckets[b].files = make(map[string]*os.File) - buckets[b].users = make(map[string]int) + buckets[b].files = make(map[string]*partialFile) } return &filesWriter{ buckets: buckets, @@ -37,12 +41,12 @@ func newFilesWriter(count int) *filesWriter { func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64) error { bucket := &w.buckets[uint(xxhash.Sum64String(path))%uint(len(w.buckets))] - acquireWriter := func() (*os.File, error) { + acquireWriter := func() (*partialFile, error) { bucket.lock.Lock() defer bucket.lock.Unlock() if wr, ok := bucket.files[path]; ok { - bucket.users[path]++ + bucket.files[path].users++ return wr, nil } @@ -53,16 +57,23 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create flags = os.O_WRONLY } - wr, err := os.OpenFile(path, flags, 0600) + f, err := os.OpenFile(path, flags, 0600) if err != nil { return nil, err } + wr := &partialFile{File: f, users: 1} + if createSize < 0 { + info, err := f.Stat() + if err != nil { + return nil, err + } + wr.size = info.Size() + } bucket.files[path] = wr - bucket.users[path] = 1 if createSize >= 0 { - err := preallocateFile(wr, createSize) + err := preallocateFile(wr.File, createSize) if err != nil { // Just log the preallocate error but don't let it cause the restore process to fail. // Preallocate might return an error if the filesystem (implementation) does not @@ -76,16 +87,15 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create return wr, nil } - releaseWriter := func(wr *os.File) error { + releaseWriter := func(wr *partialFile) error { bucket.lock.Lock() defer bucket.lock.Unlock() - if bucket.users[path] == 1 { + if bucket.files[path].users == 1 { delete(bucket.files, path) - delete(bucket.users, path) return wr.Close() } - bucket.users[path]-- + bucket.files[path].users-- return nil } diff --git a/internal/restorer/fileswriter_test.go b/internal/restorer/fileswriter_test.go index a6b7e011b..f725be91c 100644 --- a/internal/restorer/fileswriter_test.go +++ b/internal/restorer/fileswriter_test.go @@ -18,19 +18,15 @@ func TestFilesWriterBasic(t *testing.T) { rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, 2)) rtest.Equals(t, 0, len(w.buckets[0].files)) - rtest.Equals(t, 0, len(w.buckets[0].users)) rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, 2)) rtest.Equals(t, 0, len(w.buckets[0].files)) - rtest.Equals(t, 0, len(w.buckets[0].users)) rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, -1)) rtest.Equals(t, 0, len(w.buckets[0].files)) - rtest.Equals(t, 0, len(w.buckets[0].users)) rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, -1)) rtest.Equals(t, 0, len(w.buckets[0].files)) - rtest.Equals(t, 0, len(w.buckets[0].users)) buf, err := ioutil.ReadFile(f1) rtest.OK(t, err) diff --git a/internal/restorer/restorer_unix_test.go b/internal/restorer/restorer_unix_test.go index 13e318c98..52216088f 100644 --- a/internal/restorer/restorer_unix_test.go +++ b/internal/restorer/restorer_unix_test.go @@ -4,12 +4,18 @@ package restorer import ( + "bytes" "context" + "io/ioutil" + "math" + "math/rand" "os" "path/filepath" "syscall" "testing" + "github.com/restic/restic/internal/archiver" + "github.com/restic/restic/internal/fs" "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/restic" rtest "github.com/restic/restic/internal/test" @@ -60,3 +66,85 @@ func TestRestorerRestoreEmptyHardlinkedFileds(t *testing.T) { rtest.Equals(t, s1.Ino, s2.Ino) } } + +func TestRestorerSparseFiles(t *testing.T) { + repo, cleanup := repository.TestRepository(t) + defer cleanup() + + var zeros [1<<20 + 13]byte + + target := &fs.Reader{ + Mode: 0600, + Name: "/zeros", + ReadCloser: ioutil.NopCloser(bytes.NewReader(zeros[:])), + } + sc := archiver.NewScanner(target) + err := sc.Scan(context.TODO(), []string{"/zeros"}) + rtest.OK(t, err) + + arch := archiver.New(repo, target, archiver.Options{}) + _, id, err := arch.Snapshot(context.Background(), []string{"/zeros"}, + archiver.SnapshotOptions{}) + + res, err := NewRestorer(repo, id) + rtest.OK(t, err) + + tempdir, cleanup := rtest.TempDir(t) + defer cleanup() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + err = res.RestoreTo(ctx, tempdir) + rtest.OK(t, err) + + filename := filepath.Join(tempdir, "zeros") + content, err := ioutil.ReadFile(filename) + rtest.OK(t, err) + + rtest.Equals(t, zeros[:], content) + + fi, err := os.Stat(filename) + rtest.OK(t, err) + st := fi.Sys().(*syscall.Stat_t) + if st == nil { + return + } + + // st.Blocks is the size in 512-byte blocks. + denseBlocks := math.Ceil(float64(len(zeros)) / 512) + sparsity := 1 - float64(st.Blocks)/denseBlocks + + // This should report 100% sparse. We don't assert that, + // as the behavior of sparse writes depends on the underlying + // file system as well as the OS. + t.Logf("wrote %d zeros as %d blocks, %.1f%% sparse", + len(zeros), st.Blocks, 100*sparsity) +} + +func BenchmarkZeroPrefixLen(b *testing.B) { + var ( + buf [4<<20 + 37]byte + r = rand.New(rand.NewSource(0x618732)) + sumSkipped int64 + ) + + b.ReportAllocs() + b.SetBytes(int64(len(buf))) + b.ResetTimer() + + for i := 0; i < b.N; i++ { + j := r.Intn(len(buf)) + buf[j] = 0xff + + skipped := zeroPrefixLen(buf[:]) + sumSkipped += int64(skipped) + + buf[j] = 0 + } + + // The closer this is to .5, the better. If it's far off, give the + // benchmark more time to run with -benchtime. + b.Logf("average number of zeros skipped: %.3f", + float64(sumSkipped)/(float64(b.N*len(buf)))) +} diff --git a/internal/restorer/sparsewrite.go b/internal/restorer/sparsewrite.go new file mode 100644 index 000000000..a2de93aa4 --- /dev/null +++ b/internal/restorer/sparsewrite.go @@ -0,0 +1,60 @@ +//go:build !windows +// +build !windows + +package restorer + +import "bytes" + +// WriteAt writes p to f.File at offset. It tries to do a sparse write +// and updates f.size. +func (f *partialFile) WriteAt(p []byte, offset int64) (n int, err error) { + n = len(p) + end := offset + int64(n) + + // Skip the longest all-zero prefix of p. + // If it's long enough, we can punch a hole in the file. + skipped := zeroPrefixLen(p) + p = p[skipped:] + offset += int64(skipped) + + switch { + case len(p) == 0 && end > f.size: + // We need to do a Truncate, as WriteAt with length-0 input + // doesn't actually extend the file. + err = f.Truncate(end) + if err != nil { + return 0, err + } + + case len(p) == 0: + // All zeros, file already big enough. A previous WriteAt or + // Truncate will have produced the zeros in f.File. + + default: + n, err = f.File.WriteAt(p, offset) + } + + end = offset + int64(n) + if end > f.size { + f.size = end + } + return n, err +} + +// zeroPrefixLen returns the length of the longest all-zero prefix of p. +func zeroPrefixLen(p []byte) (n int) { + // First skip 1kB-sized blocks, for speed. + var zeros [1024]byte + + for len(p) >= len(zeros) && bytes.Equal(p[:len(zeros)], zeros[:]) { + p = p[len(zeros):] + n += len(zeros) + } + + for len(p) > 0 && p[0] == 0 { + p = p[1:] + n++ + } + + return n +}