From 5d4568d393097ed90d5b1bf8c841b2876c3791ad Mon Sep 17 00:00:00 2001 From: greatroar <@> Date: Wed, 26 Feb 2020 21:48:05 +0100 Subject: [PATCH 1/8] Write sparse files in restorer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This writes files by using (*os.File).Truncate, which resolves to the truncate system call on Unix. Compared to the naive loop, for _, b := range p { if b != 0 { return false } } the optimized allZero is about 10× faster: name old time/op new time/op delta AllZero-8 1.09ms ± 1% 0.09ms ± 1% -92.10% (p=0.000 n=10+10) name old speed new speed delta AllZero-8 3.84GB/s ± 1% 48.59GB/s ± 1% +1166.51% (p=0.000 n=10+10) --- internal/restorer/fileswriter.go | 36 ++++++---- internal/restorer/fileswriter_test.go | 4 -- internal/restorer/restorer_unix_test.go | 88 +++++++++++++++++++++++++ internal/restorer/sparsewrite.go | 60 +++++++++++++++++ 4 files changed, 171 insertions(+), 17 deletions(-) create mode 100644 internal/restorer/sparsewrite.go diff --git a/internal/restorer/fileswriter.go b/internal/restorer/fileswriter.go index 8b7ee4353..aa943e11b 100644 --- a/internal/restorer/fileswriter.go +++ b/internal/restorer/fileswriter.go @@ -19,15 +19,19 @@ type filesWriter struct { type filesWriterBucket struct { lock sync.Mutex - files map[string]*os.File - users map[string]int + files map[string]*partialFile +} + +type partialFile struct { + *os.File + size int64 // File size, tracked for sparse writes (not on Windows). + users int // Reference count. } func newFilesWriter(count int) *filesWriter { buckets := make([]filesWriterBucket, count) for b := 0; b < count; b++ { - buckets[b].files = make(map[string]*os.File) - buckets[b].users = make(map[string]int) + buckets[b].files = make(map[string]*partialFile) } return &filesWriter{ buckets: buckets, @@ -37,12 +41,12 @@ func newFilesWriter(count int) *filesWriter { func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64) error { bucket := &w.buckets[uint(xxhash.Sum64String(path))%uint(len(w.buckets))] - acquireWriter := func() (*os.File, error) { + acquireWriter := func() (*partialFile, error) { bucket.lock.Lock() defer bucket.lock.Unlock() if wr, ok := bucket.files[path]; ok { - bucket.users[path]++ + bucket.files[path].users++ return wr, nil } @@ -53,16 +57,23 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create flags = os.O_WRONLY } - wr, err := os.OpenFile(path, flags, 0600) + f, err := os.OpenFile(path, flags, 0600) if err != nil { return nil, err } + wr := &partialFile{File: f, users: 1} + if createSize < 0 { + info, err := f.Stat() + if err != nil { + return nil, err + } + wr.size = info.Size() + } bucket.files[path] = wr - bucket.users[path] = 1 if createSize >= 0 { - err := preallocateFile(wr, createSize) + err := preallocateFile(wr.File, createSize) if err != nil { // Just log the preallocate error but don't let it cause the restore process to fail. // Preallocate might return an error if the filesystem (implementation) does not @@ -76,16 +87,15 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create return wr, nil } - releaseWriter := func(wr *os.File) error { + releaseWriter := func(wr *partialFile) error { bucket.lock.Lock() defer bucket.lock.Unlock() - if bucket.users[path] == 1 { + if bucket.files[path].users == 1 { delete(bucket.files, path) - delete(bucket.users, path) return wr.Close() } - bucket.users[path]-- + bucket.files[path].users-- return nil } diff --git a/internal/restorer/fileswriter_test.go b/internal/restorer/fileswriter_test.go index a6b7e011b..f725be91c 100644 --- a/internal/restorer/fileswriter_test.go +++ b/internal/restorer/fileswriter_test.go @@ -18,19 +18,15 @@ func TestFilesWriterBasic(t *testing.T) { rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, 2)) rtest.Equals(t, 0, len(w.buckets[0].files)) - rtest.Equals(t, 0, len(w.buckets[0].users)) rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, 2)) rtest.Equals(t, 0, len(w.buckets[0].files)) - rtest.Equals(t, 0, len(w.buckets[0].users)) rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, -1)) rtest.Equals(t, 0, len(w.buckets[0].files)) - rtest.Equals(t, 0, len(w.buckets[0].users)) rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, -1)) rtest.Equals(t, 0, len(w.buckets[0].files)) - rtest.Equals(t, 0, len(w.buckets[0].users)) buf, err := ioutil.ReadFile(f1) rtest.OK(t, err) diff --git a/internal/restorer/restorer_unix_test.go b/internal/restorer/restorer_unix_test.go index 13e318c98..52216088f 100644 --- a/internal/restorer/restorer_unix_test.go +++ b/internal/restorer/restorer_unix_test.go @@ -4,12 +4,18 @@ package restorer import ( + "bytes" "context" + "io/ioutil" + "math" + "math/rand" "os" "path/filepath" "syscall" "testing" + "github.com/restic/restic/internal/archiver" + "github.com/restic/restic/internal/fs" "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/restic" rtest "github.com/restic/restic/internal/test" @@ -60,3 +66,85 @@ func TestRestorerRestoreEmptyHardlinkedFileds(t *testing.T) { rtest.Equals(t, s1.Ino, s2.Ino) } } + +func TestRestorerSparseFiles(t *testing.T) { + repo, cleanup := repository.TestRepository(t) + defer cleanup() + + var zeros [1<<20 + 13]byte + + target := &fs.Reader{ + Mode: 0600, + Name: "/zeros", + ReadCloser: ioutil.NopCloser(bytes.NewReader(zeros[:])), + } + sc := archiver.NewScanner(target) + err := sc.Scan(context.TODO(), []string{"/zeros"}) + rtest.OK(t, err) + + arch := archiver.New(repo, target, archiver.Options{}) + _, id, err := arch.Snapshot(context.Background(), []string{"/zeros"}, + archiver.SnapshotOptions{}) + + res, err := NewRestorer(repo, id) + rtest.OK(t, err) + + tempdir, cleanup := rtest.TempDir(t) + defer cleanup() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + err = res.RestoreTo(ctx, tempdir) + rtest.OK(t, err) + + filename := filepath.Join(tempdir, "zeros") + content, err := ioutil.ReadFile(filename) + rtest.OK(t, err) + + rtest.Equals(t, zeros[:], content) + + fi, err := os.Stat(filename) + rtest.OK(t, err) + st := fi.Sys().(*syscall.Stat_t) + if st == nil { + return + } + + // st.Blocks is the size in 512-byte blocks. + denseBlocks := math.Ceil(float64(len(zeros)) / 512) + sparsity := 1 - float64(st.Blocks)/denseBlocks + + // This should report 100% sparse. We don't assert that, + // as the behavior of sparse writes depends on the underlying + // file system as well as the OS. + t.Logf("wrote %d zeros as %d blocks, %.1f%% sparse", + len(zeros), st.Blocks, 100*sparsity) +} + +func BenchmarkZeroPrefixLen(b *testing.B) { + var ( + buf [4<<20 + 37]byte + r = rand.New(rand.NewSource(0x618732)) + sumSkipped int64 + ) + + b.ReportAllocs() + b.SetBytes(int64(len(buf))) + b.ResetTimer() + + for i := 0; i < b.N; i++ { + j := r.Intn(len(buf)) + buf[j] = 0xff + + skipped := zeroPrefixLen(buf[:]) + sumSkipped += int64(skipped) + + buf[j] = 0 + } + + // The closer this is to .5, the better. If it's far off, give the + // benchmark more time to run with -benchtime. + b.Logf("average number of zeros skipped: %.3f", + float64(sumSkipped)/(float64(b.N*len(buf)))) +} diff --git a/internal/restorer/sparsewrite.go b/internal/restorer/sparsewrite.go new file mode 100644 index 000000000..a2de93aa4 --- /dev/null +++ b/internal/restorer/sparsewrite.go @@ -0,0 +1,60 @@ +//go:build !windows +// +build !windows + +package restorer + +import "bytes" + +// WriteAt writes p to f.File at offset. It tries to do a sparse write +// and updates f.size. +func (f *partialFile) WriteAt(p []byte, offset int64) (n int, err error) { + n = len(p) + end := offset + int64(n) + + // Skip the longest all-zero prefix of p. + // If it's long enough, we can punch a hole in the file. + skipped := zeroPrefixLen(p) + p = p[skipped:] + offset += int64(skipped) + + switch { + case len(p) == 0 && end > f.size: + // We need to do a Truncate, as WriteAt with length-0 input + // doesn't actually extend the file. + err = f.Truncate(end) + if err != nil { + return 0, err + } + + case len(p) == 0: + // All zeros, file already big enough. A previous WriteAt or + // Truncate will have produced the zeros in f.File. + + default: + n, err = f.File.WriteAt(p, offset) + } + + end = offset + int64(n) + if end > f.size { + f.size = end + } + return n, err +} + +// zeroPrefixLen returns the length of the longest all-zero prefix of p. +func zeroPrefixLen(p []byte) (n int) { + // First skip 1kB-sized blocks, for speed. + var zeros [1024]byte + + for len(p) >= len(zeros) && bytes.Equal(p[:len(zeros)], zeros[:]) { + p = p[len(zeros):] + n += len(zeros) + } + + for len(p) > 0 && p[0] == 0 { + p = p[1:] + n++ + } + + return n +} From 3047bf611ca34ba29271b3ebd1c7c0dcec45e130 Mon Sep 17 00:00:00 2001 From: greatroar <@> Date: Fri, 6 Mar 2020 10:14:27 +0100 Subject: [PATCH 2/8] Changelog entry for sparse file restoring --- changelog/unreleased/pull-2601 | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 changelog/unreleased/pull-2601 diff --git a/changelog/unreleased/pull-2601 b/changelog/unreleased/pull-2601 new file mode 100644 index 000000000..befa23ab2 --- /dev/null +++ b/changelog/unreleased/pull-2601 @@ -0,0 +1,12 @@ +Enhancement: Restore files with many zeros as sparse files + +On all platforms except Windows, the restorer may now write files containing +long runs of zeros as sparse files (also called files with holes): the zeros +are not actually written to disk. + +How much space is saved by writing sparse files depends on the operating +system, file system and the distribution of zeros in the file. + +https://github.com/restic/restic/issues/79 +https://github.com/restic/restic/pull/2601 +https://forum.restic.net/t/sparse-file-support/1264 From 5b6a77058a0ef386fee07dd579c5765a2d356599 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 7 Aug 2022 17:26:46 +0200 Subject: [PATCH 3/8] Enable sparseness only conditionally We can either preallocate storage for a file or sparsify it. This detects a pack file as sparse if it contains an all zero block or consists of only one block. As the file sparsification is just an approximation, hide it behind a `--sparse` parameter. --- cmd/restic/cmd_restore.go | 7 +- internal/restorer/filerestorer.go | 20 +++++- internal/restorer/filerestorer_test.go | 94 +++++++++++++------------ internal/restorer/fileswriter.go | 11 +-- internal/restorer/fileswriter_test.go | 8 +-- internal/restorer/restorer.go | 10 +-- internal/restorer/restorer_test.go | 10 +-- internal/restorer/restorer_unix_test.go | 6 +- internal/restorer/sparsewrite.go | 4 ++ 9 files changed, 102 insertions(+), 68 deletions(-) diff --git a/cmd/restic/cmd_restore.go b/cmd/restic/cmd_restore.go index b72799198..faf8c851a 100644 --- a/cmd/restic/cmd_restore.go +++ b/cmd/restic/cmd_restore.go @@ -1,6 +1,7 @@ package main import ( + "runtime" "strings" "time" @@ -42,6 +43,7 @@ type RestoreOptions struct { InsensitiveInclude []string Target string snapshotFilterOptions + Sparse bool Verify bool } @@ -58,6 +60,9 @@ func init() { flags.StringVarP(&restoreOptions.Target, "target", "t", "", "directory to extract data to") initSingleSnapshotFilterOptions(flags, &restoreOptions.snapshotFilterOptions) + if runtime.GOOS != "windows" { + flags.BoolVar(&restoreOptions.Sparse, "sparse", false, "restore files as sparse (not supported on windows)") + } flags.BoolVar(&restoreOptions.Verify, "verify", false, "verify restored files content") } @@ -147,7 +152,7 @@ func runRestore(opts RestoreOptions, gopts GlobalOptions, args []string) error { return err } - res, err := restorer.NewRestorer(ctx, repo, id) + res, err := restorer.NewRestorer(ctx, repo, id, opts.Sparse) if err != nil { Exitf(2, "creating restorer failed: %v\n", err) } diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index 362d821d2..659458cd8 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -7,6 +7,7 @@ import ( "golang.org/x/sync/errgroup" + "github.com/restic/chunker" "github.com/restic/restic/internal/crypto" "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/errors" @@ -27,6 +28,7 @@ const ( type fileInfo struct { lock sync.Mutex inProgress bool + sparse bool size int64 location string // file on local filesystem relative to restorer basedir blobs interface{} // blobs of the file @@ -51,6 +53,8 @@ type fileRestorer struct { workerCount int filesWriter *filesWriter + zeroChunk restic.ID + sparse bool dst string files []*fileInfo @@ -61,7 +65,8 @@ func newFileRestorer(dst string, packLoader repository.BackendLoadFn, key *crypto.Key, idx func(restic.BlobHandle) []restic.PackedBlob, - connections uint) *fileRestorer { + connections uint, + sparse bool) *fileRestorer { // as packs are streamed the concurrency is limited by IO workerCount := int(connections) @@ -71,6 +76,8 @@ func newFileRestorer(dst string, idx: idx, packLoader: packLoader, filesWriter: newFilesWriter(workerCount), + zeroChunk: restic.Hash(make([]byte, chunker.MinSize)), + sparse: sparse, workerCount: workerCount, dst: dst, Error: restorerAbortOnAllErrors, @@ -133,7 +140,16 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error { packOrder = append(packOrder, packID) } pack.files[file] = struct{}{} + if blob.ID.Equal(r.zeroChunk) { + file.sparse = r.sparse + } }) + if len(fileBlobs) == 1 { + // no need to preallocate files with a single block, thus we can always consider them to be sparse + // in addition, a short chunk will never match r.zeroChunk which would prevent sparseness for short files + file.sparse = r.sparse + } + if err != nil { // repository index is messed up, can't do anything return err @@ -253,7 +269,7 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error { file.inProgress = true createSize = file.size } - return r.filesWriter.writeToFile(r.targetPath(file.location), blobData, offset, createSize) + return r.filesWriter.writeToFile(r.targetPath(file.location), blobData, offset, createSize, file.sparse) } err := sanitizeError(file, writeToFile()) if err != nil { diff --git a/internal/restorer/filerestorer_test.go b/internal/restorer/filerestorer_test.go index fa781f8c8..06b4a6b81 100644 --- a/internal/restorer/filerestorer_test.go +++ b/internal/restorer/filerestorer_test.go @@ -147,10 +147,10 @@ func newTestRepo(content []TestFile) *TestRepo { return repo } -func restoreAndVerify(t *testing.T, tempdir string, content []TestFile, files map[string]bool) { +func restoreAndVerify(t *testing.T, tempdir string, content []TestFile, files map[string]bool, sparse bool) { repo := newTestRepo(content) - r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2) + r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2, sparse) if files == nil { r.files = repo.files @@ -188,30 +188,32 @@ func TestFileRestorerBasic(t *testing.T) { tempdir, cleanup := rtest.TempDir(t) defer cleanup() - restoreAndVerify(t, tempdir, []TestFile{ - { - name: "file1", - blobs: []TestBlob{ - {"data1-1", "pack1-1"}, - {"data1-2", "pack1-2"}, + for _, sparse := range []bool{false, true} { + restoreAndVerify(t, tempdir, []TestFile{ + { + name: "file1", + blobs: []TestBlob{ + {"data1-1", "pack1-1"}, + {"data1-2", "pack1-2"}, + }, }, - }, - { - name: "file2", - blobs: []TestBlob{ - {"data2-1", "pack2-1"}, - {"data2-2", "pack2-2"}, + { + name: "file2", + blobs: []TestBlob{ + {"data2-1", "pack2-1"}, + {"data2-2", "pack2-2"}, + }, }, - }, - { - name: "file3", - blobs: []TestBlob{ - // same blob multiple times - {"data3-1", "pack3-1"}, - {"data3-1", "pack3-1"}, + { + name: "file3", + blobs: []TestBlob{ + // same blob multiple times + {"data3-1", "pack3-1"}, + {"data3-1", "pack3-1"}, + }, }, - }, - }, nil) + }, nil, sparse) + } } func TestFileRestorerPackSkip(t *testing.T) { @@ -221,28 +223,30 @@ func TestFileRestorerPackSkip(t *testing.T) { files := make(map[string]bool) files["file2"] = true - restoreAndVerify(t, tempdir, []TestFile{ - { - name: "file1", - blobs: []TestBlob{ - {"data1-1", "pack1"}, - {"data1-2", "pack1"}, - {"data1-3", "pack1"}, - {"data1-4", "pack1"}, - {"data1-5", "pack1"}, - {"data1-6", "pack1"}, + for _, sparse := range []bool{false, true} { + restoreAndVerify(t, tempdir, []TestFile{ + { + name: "file1", + blobs: []TestBlob{ + {"data1-1", "pack1"}, + {"data1-2", "pack1"}, + {"data1-3", "pack1"}, + {"data1-4", "pack1"}, + {"data1-5", "pack1"}, + {"data1-6", "pack1"}, + }, }, - }, - { - name: "file2", - blobs: []TestBlob{ - // file is contained in pack1 but need pack parts to be skipped - {"data1-2", "pack1"}, - {"data1-4", "pack1"}, - {"data1-6", "pack1"}, + { + name: "file2", + blobs: []TestBlob{ + // file is contained in pack1 but need pack parts to be skipped + {"data1-2", "pack1"}, + {"data1-4", "pack1"}, + {"data1-6", "pack1"}, + }, }, - }, - }, files) + }, files, sparse) + } } func TestErrorRestoreFiles(t *testing.T) { @@ -264,7 +268,7 @@ func TestErrorRestoreFiles(t *testing.T) { return loadError } - r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2) + r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2, false) r.files = repo.files err := r.restoreFiles(context.TODO()) @@ -304,7 +308,7 @@ func testPartialDownloadError(t *testing.T, part int) { return loader(ctx, h, length, offset, fn) } - r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2) + r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2, false) r.files = repo.files r.Error = func(s string, e error) error { // ignore errors as in the `restore` command diff --git a/internal/restorer/fileswriter.go b/internal/restorer/fileswriter.go index aa943e11b..d7483cd84 100644 --- a/internal/restorer/fileswriter.go +++ b/internal/restorer/fileswriter.go @@ -24,8 +24,9 @@ type filesWriterBucket struct { type partialFile struct { *os.File - size int64 // File size, tracked for sparse writes (not on Windows). - users int // Reference count. + size int64 // File size, tracked for sparse writes (not on Windows). + users int // Reference count. + sparse bool } func newFilesWriter(count int) *filesWriter { @@ -38,7 +39,7 @@ func newFilesWriter(count int) *filesWriter { } } -func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64) error { +func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64, sparse bool) error { bucket := &w.buckets[uint(xxhash.Sum64String(path))%uint(len(w.buckets))] acquireWriter := func() (*partialFile, error) { @@ -62,7 +63,7 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create return nil, err } - wr := &partialFile{File: f, users: 1} + wr := &partialFile{File: f, users: 1, sparse: sparse} if createSize < 0 { info, err := f.Stat() if err != nil { @@ -72,7 +73,7 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create } bucket.files[path] = wr - if createSize >= 0 { + if createSize >= 0 && !sparse { err := preallocateFile(wr.File, createSize) if err != nil { // Just log the preallocate error but don't let it cause the restore process to fail. diff --git a/internal/restorer/fileswriter_test.go b/internal/restorer/fileswriter_test.go index f725be91c..825d52bca 100644 --- a/internal/restorer/fileswriter_test.go +++ b/internal/restorer/fileswriter_test.go @@ -16,16 +16,16 @@ func TestFilesWriterBasic(t *testing.T) { f1 := dir + "/f1" f2 := dir + "/f2" - rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, 2)) + rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, 2, false)) rtest.Equals(t, 0, len(w.buckets[0].files)) - rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, 2)) + rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, 2, false)) rtest.Equals(t, 0, len(w.buckets[0].files)) - rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, -1)) + rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, -1, false)) rtest.Equals(t, 0, len(w.buckets[0].files)) - rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, -1)) + rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, -1, false)) rtest.Equals(t, 0, len(w.buckets[0].files)) buf, err := ioutil.ReadFile(f1) diff --git a/internal/restorer/restorer.go b/internal/restorer/restorer.go index 829e5aedc..1b645a6f0 100644 --- a/internal/restorer/restorer.go +++ b/internal/restorer/restorer.go @@ -16,8 +16,9 @@ import ( // Restorer is used to restore a snapshot to a directory. type Restorer struct { - repo restic.Repository - sn *restic.Snapshot + repo restic.Repository + sn *restic.Snapshot + sparse bool Error func(location string, err error) error SelectFilter func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) @@ -26,9 +27,10 @@ type Restorer struct { var restorerAbortOnAllErrors = func(location string, err error) error { return err } // NewRestorer creates a restorer preloaded with the content from the snapshot id. -func NewRestorer(ctx context.Context, repo restic.Repository, id restic.ID) (*Restorer, error) { +func NewRestorer(ctx context.Context, repo restic.Repository, id restic.ID, sparse bool) (*Restorer, error) { r := &Restorer{ repo: repo, + sparse: sparse, Error: restorerAbortOnAllErrors, SelectFilter: func(string, string, *restic.Node) (bool, bool) { return true, true }, } @@ -219,7 +221,7 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { } idx := NewHardlinkIndex() - filerestorer := newFileRestorer(dst, res.repo.Backend().Load, res.repo.Key(), res.repo.Index().Lookup, res.repo.Connections()) + filerestorer := newFileRestorer(dst, res.repo.Backend().Load, res.repo.Key(), res.repo.Index().Lookup, res.repo.Connections(), res.sparse) filerestorer.Error = res.Error debug.Log("first pass for %q", dst) diff --git a/internal/restorer/restorer_test.go b/internal/restorer/restorer_test.go index 2eea1a6fd..7113ca100 100644 --- a/internal/restorer/restorer_test.go +++ b/internal/restorer/restorer_test.go @@ -324,7 +324,7 @@ func TestRestorer(t *testing.T) { _, id := saveSnapshot(t, repo, test.Snapshot) t.Logf("snapshot saved as %v", id.Str()) - res, err := NewRestorer(context.TODO(), repo, id) + res, err := NewRestorer(context.TODO(), repo, id, false) if err != nil { t.Fatal(err) } @@ -447,7 +447,7 @@ func TestRestorerRelative(t *testing.T) { _, id := saveSnapshot(t, repo, test.Snapshot) t.Logf("snapshot saved as %v", id.Str()) - res, err := NewRestorer(context.TODO(), repo, id) + res, err := NewRestorer(context.TODO(), repo, id, false) if err != nil { t.Fatal(err) } @@ -682,7 +682,7 @@ func TestRestorerTraverseTree(t *testing.T) { defer cleanup() sn, id := saveSnapshot(t, repo, test.Snapshot) - res, err := NewRestorer(context.TODO(), repo, id) + res, err := NewRestorer(context.TODO(), repo, id, false) if err != nil { t.Fatal(err) } @@ -764,7 +764,7 @@ func TestRestorerConsistentTimestampsAndPermissions(t *testing.T) { }, }) - res, err := NewRestorer(context.TODO(), repo, id) + res, err := NewRestorer(context.TODO(), repo, id, false) rtest.OK(t, err) res.SelectFilter = func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) { @@ -824,7 +824,7 @@ func TestVerifyCancel(t *testing.T) { _, id := saveSnapshot(t, repo, snapshot) - res, err := NewRestorer(context.TODO(), repo, id) + res, err := NewRestorer(context.TODO(), repo, id, false) rtest.OK(t, err) tempdir, cleanup := rtest.TempDir(t) diff --git a/internal/restorer/restorer_unix_test.go b/internal/restorer/restorer_unix_test.go index 52216088f..9e5f61ff4 100644 --- a/internal/restorer/restorer_unix_test.go +++ b/internal/restorer/restorer_unix_test.go @@ -36,7 +36,7 @@ func TestRestorerRestoreEmptyHardlinkedFileds(t *testing.T) { }, }) - res, err := NewRestorer(context.TODO(), repo, id) + res, err := NewRestorer(context.TODO(), repo, id, false) rtest.OK(t, err) res.SelectFilter = func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) { @@ -85,8 +85,9 @@ func TestRestorerSparseFiles(t *testing.T) { arch := archiver.New(repo, target, archiver.Options{}) _, id, err := arch.Snapshot(context.Background(), []string{"/zeros"}, archiver.SnapshotOptions{}) + rtest.OK(t, err) - res, err := NewRestorer(repo, id) + res, err := NewRestorer(context.TODO(), repo, id, true) rtest.OK(t, err) tempdir, cleanup := rtest.TempDir(t) @@ -102,6 +103,7 @@ func TestRestorerSparseFiles(t *testing.T) { content, err := ioutil.ReadFile(filename) rtest.OK(t, err) + rtest.Equals(t, len(zeros[:]), len(content)) rtest.Equals(t, zeros[:], content) fi, err := os.Stat(filename) diff --git a/internal/restorer/sparsewrite.go b/internal/restorer/sparsewrite.go index a2de93aa4..dec95d784 100644 --- a/internal/restorer/sparsewrite.go +++ b/internal/restorer/sparsewrite.go @@ -8,6 +8,10 @@ import "bytes" // WriteAt writes p to f.File at offset. It tries to do a sparse write // and updates f.size. func (f *partialFile) WriteAt(p []byte, offset int64) (n int, err error) { + if !f.sparse { + return f.File.WriteAt(p, offset) + } + n = len(p) end := offset + int64(n) From a5ebd5de4b22de8299d8b025cd2c098ef1eb327e Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 7 Aug 2022 17:56:14 +0200 Subject: [PATCH 4/8] restorer: Fix race condition in partialFile.WriteAt The restorer can issue multiple calls to WriteAt in parallel. This can result in unexpected orderings of the Truncate and WriteAt calls and sometimes too short restored files. --- internal/restorer/fileswriter.go | 35 ++++++++++++++++---------------- internal/restorer/sparsewrite.go | 17 +++------------- 2 files changed, 20 insertions(+), 32 deletions(-) diff --git a/internal/restorer/fileswriter.go b/internal/restorer/fileswriter.go index d7483cd84..47fb5572c 100644 --- a/internal/restorer/fileswriter.go +++ b/internal/restorer/fileswriter.go @@ -24,8 +24,7 @@ type filesWriterBucket struct { type partialFile struct { *os.File - size int64 // File size, tracked for sparse writes (not on Windows). - users int // Reference count. + users int // Reference count. sparse bool } @@ -64,24 +63,24 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create } wr := &partialFile{File: f, users: 1, sparse: sparse} - if createSize < 0 { - info, err := f.Stat() - if err != nil { - return nil, err - } - wr.size = info.Size() - } bucket.files[path] = wr - if createSize >= 0 && !sparse { - err := preallocateFile(wr.File, createSize) - if err != nil { - // Just log the preallocate error but don't let it cause the restore process to fail. - // Preallocate might return an error if the filesystem (implementation) does not - // support preallocation or our parameters combination to the preallocate call - // This should yield a syscall.ENOTSUP error, but some other errors might also - // show up. - debug.Log("Failed to preallocate %v with size %v: %v", path, createSize, err) + if createSize >= 0 { + if sparse { + err = f.Truncate(createSize) + if err != nil { + return nil, err + } + } else { + err := preallocateFile(wr.File, createSize) + if err != nil { + // Just log the preallocate error but don't let it cause the restore process to fail. + // Preallocate might return an error if the filesystem (implementation) does not + // support preallocation or our parameters combination to the preallocate call + // This should yield a syscall.ENOTSUP error, but some other errors might also + // show up. + debug.Log("Failed to preallocate %v with size %v: %v", path, createSize, err) + } } } diff --git a/internal/restorer/sparsewrite.go b/internal/restorer/sparsewrite.go index dec95d784..9dec4bfa3 100644 --- a/internal/restorer/sparsewrite.go +++ b/internal/restorer/sparsewrite.go @@ -13,7 +13,6 @@ func (f *partialFile) WriteAt(p []byte, offset int64) (n int, err error) { } n = len(p) - end := offset + int64(n) // Skip the longest all-zero prefix of p. // If it's long enough, we can punch a hole in the file. @@ -22,26 +21,16 @@ func (f *partialFile) WriteAt(p []byte, offset int64) (n int, err error) { offset += int64(skipped) switch { - case len(p) == 0 && end > f.size: - // We need to do a Truncate, as WriteAt with length-0 input - // doesn't actually extend the file. - err = f.Truncate(end) - if err != nil { - return 0, err - } - case len(p) == 0: // All zeros, file already big enough. A previous WriteAt or // Truncate will have produced the zeros in f.File. default: - n, err = f.File.WriteAt(p, offset) + var n2 int + n2, err = f.File.WriteAt(p, offset) + n = skipped + n2 } - end = offset + int64(n) - if end > f.size { - f.size = end - } return n, err } From 34fe1362dac25d6740365a42eca947dd8c573c24 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 4 Sep 2022 10:39:16 +0200 Subject: [PATCH 5/8] restorer: move zeroPrefixLen to restic package --- internal/restic/zeroprefix.go | 21 ++++++++++ internal/restic/zeroprefix_test.go | 52 +++++++++++++++++++++++++ internal/restorer/restorer_unix_test.go | 28 ------------- internal/restorer/sparsewrite.go | 24 ++---------- 4 files changed, 77 insertions(+), 48 deletions(-) create mode 100644 internal/restic/zeroprefix.go create mode 100644 internal/restic/zeroprefix_test.go diff --git a/internal/restic/zeroprefix.go b/internal/restic/zeroprefix.go new file mode 100644 index 000000000..b25e7ab27 --- /dev/null +++ b/internal/restic/zeroprefix.go @@ -0,0 +1,21 @@ +package restic + +import "bytes" + +// ZeroPrefixLen returns the length of the longest all-zero prefix of p. +func ZeroPrefixLen(p []byte) (n int) { + // First skip 1kB-sized blocks, for speed. + var zeros [1024]byte + + for len(p) >= len(zeros) && bytes.Equal(p[:len(zeros)], zeros[:]) { + p = p[len(zeros):] + n += len(zeros) + } + + for len(p) > 0 && p[0] == 0 { + p = p[1:] + n++ + } + + return n +} diff --git a/internal/restic/zeroprefix_test.go b/internal/restic/zeroprefix_test.go new file mode 100644 index 000000000..a21806851 --- /dev/null +++ b/internal/restic/zeroprefix_test.go @@ -0,0 +1,52 @@ +package restic_test + +import ( + "math/rand" + "testing" + + "github.com/restic/restic/internal/restic" + "github.com/restic/restic/internal/test" +) + +func TestZeroPrefixLen(t *testing.T) { + var buf [2048]byte + + // test zero prefixes of various lengths + for i := len(buf) - 1; i >= 0; i-- { + buf[i] = 42 + skipped := restic.ZeroPrefixLen(buf[:]) + test.Equals(t, i, skipped) + } + // test buffers of various sizes + for i := 0; i < len(buf); i++ { + skipped := restic.ZeroPrefixLen(buf[i:]) + test.Equals(t, 0, skipped) + } +} + +func BenchmarkZeroPrefixLen(b *testing.B) { + var ( + buf [4<<20 + 37]byte + r = rand.New(rand.NewSource(0x618732)) + sumSkipped int64 + ) + + b.ReportAllocs() + b.SetBytes(int64(len(buf))) + b.ResetTimer() + + for i := 0; i < b.N; i++ { + j := r.Intn(len(buf)) + buf[j] = 0xff + + skipped := restic.ZeroPrefixLen(buf[:]) + sumSkipped += int64(skipped) + + buf[j] = 0 + } + + // The closer this is to .5, the better. If it's far off, give the + // benchmark more time to run with -benchtime. + b.Logf("average number of zeros skipped: %.3f", + float64(sumSkipped)/(float64(b.N*len(buf)))) +} diff --git a/internal/restorer/restorer_unix_test.go b/internal/restorer/restorer_unix_test.go index 9e5f61ff4..dc8d6adeb 100644 --- a/internal/restorer/restorer_unix_test.go +++ b/internal/restorer/restorer_unix_test.go @@ -8,7 +8,6 @@ import ( "context" "io/ioutil" "math" - "math/rand" "os" "path/filepath" "syscall" @@ -123,30 +122,3 @@ func TestRestorerSparseFiles(t *testing.T) { t.Logf("wrote %d zeros as %d blocks, %.1f%% sparse", len(zeros), st.Blocks, 100*sparsity) } - -func BenchmarkZeroPrefixLen(b *testing.B) { - var ( - buf [4<<20 + 37]byte - r = rand.New(rand.NewSource(0x618732)) - sumSkipped int64 - ) - - b.ReportAllocs() - b.SetBytes(int64(len(buf))) - b.ResetTimer() - - for i := 0; i < b.N; i++ { - j := r.Intn(len(buf)) - buf[j] = 0xff - - skipped := zeroPrefixLen(buf[:]) - sumSkipped += int64(skipped) - - buf[j] = 0 - } - - // The closer this is to .5, the better. If it's far off, give the - // benchmark more time to run with -benchtime. - b.Logf("average number of zeros skipped: %.3f", - float64(sumSkipped)/(float64(b.N*len(buf)))) -} diff --git a/internal/restorer/sparsewrite.go b/internal/restorer/sparsewrite.go index 9dec4bfa3..2c1f234de 100644 --- a/internal/restorer/sparsewrite.go +++ b/internal/restorer/sparsewrite.go @@ -3,7 +3,9 @@ package restorer -import "bytes" +import ( + "github.com/restic/restic/internal/restic" +) // WriteAt writes p to f.File at offset. It tries to do a sparse write // and updates f.size. @@ -16,7 +18,7 @@ func (f *partialFile) WriteAt(p []byte, offset int64) (n int, err error) { // Skip the longest all-zero prefix of p. // If it's long enough, we can punch a hole in the file. - skipped := zeroPrefixLen(p) + skipped := restic.ZeroPrefixLen(p) p = p[skipped:] offset += int64(skipped) @@ -33,21 +35,3 @@ func (f *partialFile) WriteAt(p []byte, offset int64) (n int, err error) { return n, err } - -// zeroPrefixLen returns the length of the longest all-zero prefix of p. -func zeroPrefixLen(p []byte) (n int) { - // First skip 1kB-sized blocks, for speed. - var zeros [1024]byte - - for len(p) >= len(zeros) && bytes.Equal(p[:len(zeros)], zeros[:]) { - p = p[len(zeros):] - n += len(zeros) - } - - for len(p) > 0 && p[0] == 0 { - p = p[1:] - n++ - } - - return n -} From c147422ba5a4247b8303990f9273e20291b9b317 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 4 Sep 2022 10:49:16 +0200 Subject: [PATCH 6/8] repository: special case SaveBlob for all zero chunks Sparse files contain large regions containing only zero bytes. Checking that a blob only contains zeros is possible with over 100GB/s for modern x86 CPUs. Calculating sha256 hashes is only possible with 500MB/s (or 2GB/s using hardware acceleration). Thus we can speed up the hash calculation for all zero blobs (which always have length chunker.MinSize) by checking for zero bytes and then using the precomputed hash. The all zeros check is only performed for blobs with the minimal chunk size, and thus should add no overhead most of the time. For chunks which are not all zero but have the minimal chunks size, the overhead will be below 2% based on the above performance numbers. This allows reading sparse sections of files as fast as the kernel can return data to us. On my system using BTRFS this resulted in about 4GB/s. --- internal/repository/repository.go | 20 +++++++++++++++++++- internal/restorer/filerestorer.go | 3 +-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/internal/repository/repository.go b/internal/repository/repository.go index 625ad9b16..f41ce38a5 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -813,7 +813,14 @@ func (r *Repository) SaveBlob(ctx context.Context, t restic.BlobType, buf []byte // compute plaintext hash if not already set if id.IsNull() { - newID = restic.Hash(buf) + // Special case the hash calculation for all zero chunks. This is especially + // useful for sparse files containing large all zero regions. For these we can + // process chunks as fast as we can read the from disk. + if len(buf) == chunker.MinSize && restic.ZeroPrefixLen(buf) == chunker.MinSize { + newID = ZeroChunk() + } else { + newID = restic.Hash(buf) + } } else { newID = id } @@ -967,3 +974,14 @@ func streamPackPart(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, }) return errors.Wrap(err, "StreamPack") } + +var zeroChunkOnce sync.Once +var zeroChunkID restic.ID + +// ZeroChunk computes and returns (cached) the ID of an all-zero chunk with size chunker.MinSize +func ZeroChunk() restic.ID { + zeroChunkOnce.Do(func() { + zeroChunkID = restic.Hash(make([]byte, chunker.MinSize)) + }) + return zeroChunkID +} diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index 659458cd8..2deef1cd2 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -7,7 +7,6 @@ import ( "golang.org/x/sync/errgroup" - "github.com/restic/chunker" "github.com/restic/restic/internal/crypto" "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/errors" @@ -76,7 +75,7 @@ func newFileRestorer(dst string, idx: idx, packLoader: packLoader, filesWriter: newFilesWriter(workerCount), - zeroChunk: restic.Hash(make([]byte, chunker.MinSize)), + zeroChunk: repository.ZeroChunk(), sparse: sparse, workerCount: workerCount, dst: dst, From 0f89f443c750917075ce99c539095159e0052c95 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 4 Sep 2022 11:03:59 +0200 Subject: [PATCH 7/8] update sparse restore changelog --- changelog/unreleased/{pull-2601 => issue-79} | 5 +++++ 1 file changed, 5 insertions(+) rename changelog/unreleased/{pull-2601 => issue-79} (70%) diff --git a/changelog/unreleased/pull-2601 b/changelog/unreleased/issue-79 similarity index 70% rename from changelog/unreleased/pull-2601 rename to changelog/unreleased/issue-79 index befa23ab2..c5f8ffaed 100644 --- a/changelog/unreleased/pull-2601 +++ b/changelog/unreleased/issue-79 @@ -7,6 +7,11 @@ are not actually written to disk. How much space is saved by writing sparse files depends on the operating system, file system and the distribution of zeros in the file. +During backup restic still reads the whole file including sparse regions. We +have optimized the processing speed of sparse regions. + https://github.com/restic/restic/issues/79 +https://github.com/restic/restic/issues/3903 https://github.com/restic/restic/pull/2601 +https://github.com/restic/restic/pull/3854 https://forum.restic.net/t/sparse-file-support/1264 From 19afad8a0952149447a22d95fe144dc24c82283a Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 4 Sep 2022 11:23:31 +0200 Subject: [PATCH 8/8] restore: support sparse restores also on windows --- changelog/unreleased/issue-79 | 6 +-- cmd/restic/cmd_restore.go | 5 +- internal/restorer/fileswriter.go | 2 +- internal/restorer/restorer_test.go | 57 +++++++++++++++++++++ internal/restorer/restorer_unix_test.go | 58 ++-------------------- internal/restorer/restorer_windows_test.go | 35 +++++++++++++ internal/restorer/truncate_other.go | 10 ++++ internal/restorer/truncate_windows.go | 19 +++++++ 8 files changed, 129 insertions(+), 63 deletions(-) create mode 100644 internal/restorer/restorer_windows_test.go create mode 100644 internal/restorer/truncate_other.go create mode 100644 internal/restorer/truncate_windows.go diff --git a/changelog/unreleased/issue-79 b/changelog/unreleased/issue-79 index c5f8ffaed..b69ea519c 100644 --- a/changelog/unreleased/issue-79 +++ b/changelog/unreleased/issue-79 @@ -1,8 +1,8 @@ Enhancement: Restore files with many zeros as sparse files -On all platforms except Windows, the restorer may now write files containing -long runs of zeros as sparse files (also called files with holes): the zeros -are not actually written to disk. +When using `restore --sparse`, the restorer may now write files containing long +runs of zeros as sparse files (also called files with holes): the zeros are not +actually written to disk. How much space is saved by writing sparse files depends on the operating system, file system and the distribution of zeros in the file. diff --git a/cmd/restic/cmd_restore.go b/cmd/restic/cmd_restore.go index faf8c851a..1da8407a4 100644 --- a/cmd/restic/cmd_restore.go +++ b/cmd/restic/cmd_restore.go @@ -1,7 +1,6 @@ package main import ( - "runtime" "strings" "time" @@ -60,9 +59,7 @@ func init() { flags.StringVarP(&restoreOptions.Target, "target", "t", "", "directory to extract data to") initSingleSnapshotFilterOptions(flags, &restoreOptions.snapshotFilterOptions) - if runtime.GOOS != "windows" { - flags.BoolVar(&restoreOptions.Sparse, "sparse", false, "restore files as sparse (not supported on windows)") - } + flags.BoolVar(&restoreOptions.Sparse, "sparse", false, "restore files as sparse") flags.BoolVar(&restoreOptions.Verify, "verify", false, "verify restored files content") } diff --git a/internal/restorer/fileswriter.go b/internal/restorer/fileswriter.go index 47fb5572c..0a26101f4 100644 --- a/internal/restorer/fileswriter.go +++ b/internal/restorer/fileswriter.go @@ -67,7 +67,7 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create if createSize >= 0 { if sparse { - err = f.Truncate(createSize) + err = truncateSparse(f, createSize) if err != nil { return nil, err } diff --git a/internal/restorer/restorer_test.go b/internal/restorer/restorer_test.go index 7113ca100..f57868b4f 100644 --- a/internal/restorer/restorer_test.go +++ b/internal/restorer/restorer_test.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "io/ioutil" + "math" "os" "path/filepath" "runtime" @@ -11,6 +12,7 @@ import ( "testing" "time" + "github.com/restic/restic/internal/archiver" "github.com/restic/restic/internal/fs" "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/restic" @@ -849,3 +851,58 @@ func TestVerifyCancel(t *testing.T) { rtest.Equals(t, 1, len(errs)) rtest.Assert(t, strings.Contains(errs[0].Error(), "Invalid file size for"), "wrong error %q", errs[0].Error()) } + +func TestRestorerSparseFiles(t *testing.T) { + repo, cleanup := repository.TestRepository(t) + defer cleanup() + + var zeros [1<<20 + 13]byte + + target := &fs.Reader{ + Mode: 0600, + Name: "/zeros", + ReadCloser: ioutil.NopCloser(bytes.NewReader(zeros[:])), + } + sc := archiver.NewScanner(target) + err := sc.Scan(context.TODO(), []string{"/zeros"}) + rtest.OK(t, err) + + arch := archiver.New(repo, target, archiver.Options{}) + _, id, err := arch.Snapshot(context.Background(), []string{"/zeros"}, + archiver.SnapshotOptions{}) + rtest.OK(t, err) + + res, err := NewRestorer(context.TODO(), repo, id, true) + rtest.OK(t, err) + + tempdir, cleanup := rtest.TempDir(t) + defer cleanup() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + err = res.RestoreTo(ctx, tempdir) + rtest.OK(t, err) + + filename := filepath.Join(tempdir, "zeros") + content, err := ioutil.ReadFile(filename) + rtest.OK(t, err) + + rtest.Equals(t, len(zeros[:]), len(content)) + rtest.Equals(t, zeros[:], content) + + blocks := getBlockCount(t, filename) + if blocks < 0 { + return + } + + // st.Blocks is the size in 512-byte blocks. + denseBlocks := math.Ceil(float64(len(zeros)) / 512) + sparsity := 1 - float64(blocks)/denseBlocks + + // This should report 100% sparse. We don't assert that, + // as the behavior of sparse writes depends on the underlying + // file system as well as the OS. + t.Logf("wrote %d zeros as %d blocks, %.1f%% sparse", + len(zeros), blocks, 100*sparsity) +} diff --git a/internal/restorer/restorer_unix_test.go b/internal/restorer/restorer_unix_test.go index dc8d6adeb..76f86c60b 100644 --- a/internal/restorer/restorer_unix_test.go +++ b/internal/restorer/restorer_unix_test.go @@ -4,17 +4,12 @@ package restorer import ( - "bytes" "context" - "io/ioutil" - "math" "os" "path/filepath" "syscall" "testing" - "github.com/restic/restic/internal/archiver" - "github.com/restic/restic/internal/fs" "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/restic" rtest "github.com/restic/restic/internal/test" @@ -66,59 +61,12 @@ func TestRestorerRestoreEmptyHardlinkedFileds(t *testing.T) { } } -func TestRestorerSparseFiles(t *testing.T) { - repo, cleanup := repository.TestRepository(t) - defer cleanup() - - var zeros [1<<20 + 13]byte - - target := &fs.Reader{ - Mode: 0600, - Name: "/zeros", - ReadCloser: ioutil.NopCloser(bytes.NewReader(zeros[:])), - } - sc := archiver.NewScanner(target) - err := sc.Scan(context.TODO(), []string{"/zeros"}) - rtest.OK(t, err) - - arch := archiver.New(repo, target, archiver.Options{}) - _, id, err := arch.Snapshot(context.Background(), []string{"/zeros"}, - archiver.SnapshotOptions{}) - rtest.OK(t, err) - - res, err := NewRestorer(context.TODO(), repo, id, true) - rtest.OK(t, err) - - tempdir, cleanup := rtest.TempDir(t) - defer cleanup() - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - err = res.RestoreTo(ctx, tempdir) - rtest.OK(t, err) - - filename := filepath.Join(tempdir, "zeros") - content, err := ioutil.ReadFile(filename) - rtest.OK(t, err) - - rtest.Equals(t, len(zeros[:]), len(content)) - rtest.Equals(t, zeros[:], content) - +func getBlockCount(t *testing.T, filename string) int64 { fi, err := os.Stat(filename) rtest.OK(t, err) st := fi.Sys().(*syscall.Stat_t) if st == nil { - return + return -1 } - - // st.Blocks is the size in 512-byte blocks. - denseBlocks := math.Ceil(float64(len(zeros)) / 512) - sparsity := 1 - float64(st.Blocks)/denseBlocks - - // This should report 100% sparse. We don't assert that, - // as the behavior of sparse writes depends on the underlying - // file system as well as the OS. - t.Logf("wrote %d zeros as %d blocks, %.1f%% sparse", - len(zeros), st.Blocks, 100*sparsity) + return st.Blocks } diff --git a/internal/restorer/restorer_windows_test.go b/internal/restorer/restorer_windows_test.go new file mode 100644 index 000000000..3ec4b1f11 --- /dev/null +++ b/internal/restorer/restorer_windows_test.go @@ -0,0 +1,35 @@ +//go:build windows +// +build windows + +package restorer + +import ( + "math" + "syscall" + "testing" + "unsafe" + + rtest "github.com/restic/restic/internal/test" + "golang.org/x/sys/windows" +) + +func getBlockCount(t *testing.T, filename string) int64 { + libkernel32 := windows.NewLazySystemDLL("kernel32.dll") + err := libkernel32.Load() + rtest.OK(t, err) + proc := libkernel32.NewProc("GetCompressedFileSizeW") + err = proc.Find() + rtest.OK(t, err) + + namePtr, err := syscall.UTF16PtrFromString(filename) + rtest.OK(t, err) + + result, _, _ := proc.Call(uintptr(unsafe.Pointer(namePtr)), 0) + + const invalidFileSize = uintptr(4294967295) + if result == invalidFileSize { + return -1 + } + + return int64(math.Ceil(float64(result) / 512)) +} diff --git a/internal/restorer/truncate_other.go b/internal/restorer/truncate_other.go new file mode 100644 index 000000000..ed7ab04c5 --- /dev/null +++ b/internal/restorer/truncate_other.go @@ -0,0 +1,10 @@ +//go:build !windows +// +build !windows + +package restorer + +import "os" + +func truncateSparse(f *os.File, size int64) error { + return f.Truncate(size) +} diff --git a/internal/restorer/truncate_windows.go b/internal/restorer/truncate_windows.go new file mode 100644 index 000000000..831a117d1 --- /dev/null +++ b/internal/restorer/truncate_windows.go @@ -0,0 +1,19 @@ +package restorer + +import ( + "os" + + "github.com/restic/restic/internal/debug" + "golang.org/x/sys/windows" +) + +func truncateSparse(f *os.File, size int64) error { + // try setting the sparse file attribute, but ignore the error if it fails + var t uint32 + err := windows.DeviceIoControl(windows.Handle(f.Fd()), windows.FSCTL_SET_SPARSE, nil, 0, nil, 0, &t, nil) + if err != nil { + debug.Log("failed to set sparse attribute for %v: %v", f.Name(), err) + } + + return f.Truncate(size) +}