diff --git a/changelog/unreleased/issue-79 b/changelog/unreleased/issue-79 new file mode 100644 index 000000000..b69ea519c --- /dev/null +++ b/changelog/unreleased/issue-79 @@ -0,0 +1,17 @@ +Enhancement: Restore files with many zeros as sparse files + +When using `restore --sparse`, the restorer may now write files containing long +runs of zeros as sparse files (also called files with holes): the zeros are not +actually written to disk. + +How much space is saved by writing sparse files depends on the operating +system, file system and the distribution of zeros in the file. + +During backup restic still reads the whole file including sparse regions. We +have optimized the processing speed of sparse regions. + +https://github.com/restic/restic/issues/79 +https://github.com/restic/restic/issues/3903 +https://github.com/restic/restic/pull/2601 +https://github.com/restic/restic/pull/3854 +https://forum.restic.net/t/sparse-file-support/1264 diff --git a/cmd/restic/cmd_restore.go b/cmd/restic/cmd_restore.go index b72799198..1da8407a4 100644 --- a/cmd/restic/cmd_restore.go +++ b/cmd/restic/cmd_restore.go @@ -42,6 +42,7 @@ type RestoreOptions struct { InsensitiveInclude []string Target string snapshotFilterOptions + Sparse bool Verify bool } @@ -58,6 +59,7 @@ func init() { flags.StringVarP(&restoreOptions.Target, "target", "t", "", "directory to extract data to") initSingleSnapshotFilterOptions(flags, &restoreOptions.snapshotFilterOptions) + flags.BoolVar(&restoreOptions.Sparse, "sparse", false, "restore files as sparse") flags.BoolVar(&restoreOptions.Verify, "verify", false, "verify restored files content") } @@ -147,7 +149,7 @@ func runRestore(opts RestoreOptions, gopts GlobalOptions, args []string) error { return err } - res, err := restorer.NewRestorer(ctx, repo, id) + res, err := restorer.NewRestorer(ctx, repo, id, opts.Sparse) if err != nil { Exitf(2, "creating restorer failed: %v\n", err) } diff --git a/internal/repository/repository.go b/internal/repository/repository.go index 5fe43164f..6f3bb7c02 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -818,7 +818,14 @@ func (r *Repository) SaveBlob(ctx context.Context, t restic.BlobType, buf []byte // compute plaintext hash if not already set if id.IsNull() { - newID = restic.Hash(buf) + // Special case the hash calculation for all zero chunks. This is especially + // useful for sparse files containing large all zero regions. For these we can + // process chunks as fast as we can read the from disk. + if len(buf) == chunker.MinSize && restic.ZeroPrefixLen(buf) == chunker.MinSize { + newID = ZeroChunk() + } else { + newID = restic.Hash(buf) + } } else { newID = id } @@ -972,3 +979,14 @@ func streamPackPart(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, }) return errors.Wrap(err, "StreamPack") } + +var zeroChunkOnce sync.Once +var zeroChunkID restic.ID + +// ZeroChunk computes and returns (cached) the ID of an all-zero chunk with size chunker.MinSize +func ZeroChunk() restic.ID { + zeroChunkOnce.Do(func() { + zeroChunkID = restic.Hash(make([]byte, chunker.MinSize)) + }) + return zeroChunkID +} diff --git a/internal/restic/zeroprefix.go b/internal/restic/zeroprefix.go new file mode 100644 index 000000000..b25e7ab27 --- /dev/null +++ b/internal/restic/zeroprefix.go @@ -0,0 +1,21 @@ +package restic + +import "bytes" + +// ZeroPrefixLen returns the length of the longest all-zero prefix of p. +func ZeroPrefixLen(p []byte) (n int) { + // First skip 1kB-sized blocks, for speed. + var zeros [1024]byte + + for len(p) >= len(zeros) && bytes.Equal(p[:len(zeros)], zeros[:]) { + p = p[len(zeros):] + n += len(zeros) + } + + for len(p) > 0 && p[0] == 0 { + p = p[1:] + n++ + } + + return n +} diff --git a/internal/restic/zeroprefix_test.go b/internal/restic/zeroprefix_test.go new file mode 100644 index 000000000..a21806851 --- /dev/null +++ b/internal/restic/zeroprefix_test.go @@ -0,0 +1,52 @@ +package restic_test + +import ( + "math/rand" + "testing" + + "github.com/restic/restic/internal/restic" + "github.com/restic/restic/internal/test" +) + +func TestZeroPrefixLen(t *testing.T) { + var buf [2048]byte + + // test zero prefixes of various lengths + for i := len(buf) - 1; i >= 0; i-- { + buf[i] = 42 + skipped := restic.ZeroPrefixLen(buf[:]) + test.Equals(t, i, skipped) + } + // test buffers of various sizes + for i := 0; i < len(buf); i++ { + skipped := restic.ZeroPrefixLen(buf[i:]) + test.Equals(t, 0, skipped) + } +} + +func BenchmarkZeroPrefixLen(b *testing.B) { + var ( + buf [4<<20 + 37]byte + r = rand.New(rand.NewSource(0x618732)) + sumSkipped int64 + ) + + b.ReportAllocs() + b.SetBytes(int64(len(buf))) + b.ResetTimer() + + for i := 0; i < b.N; i++ { + j := r.Intn(len(buf)) + buf[j] = 0xff + + skipped := restic.ZeroPrefixLen(buf[:]) + sumSkipped += int64(skipped) + + buf[j] = 0 + } + + // The closer this is to .5, the better. If it's far off, give the + // benchmark more time to run with -benchtime. + b.Logf("average number of zeros skipped: %.3f", + float64(sumSkipped)/(float64(b.N*len(buf)))) +} diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index 362d821d2..2deef1cd2 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -27,6 +27,7 @@ const ( type fileInfo struct { lock sync.Mutex inProgress bool + sparse bool size int64 location string // file on local filesystem relative to restorer basedir blobs interface{} // blobs of the file @@ -51,6 +52,8 @@ type fileRestorer struct { workerCount int filesWriter *filesWriter + zeroChunk restic.ID + sparse bool dst string files []*fileInfo @@ -61,7 +64,8 @@ func newFileRestorer(dst string, packLoader repository.BackendLoadFn, key *crypto.Key, idx func(restic.BlobHandle) []restic.PackedBlob, - connections uint) *fileRestorer { + connections uint, + sparse bool) *fileRestorer { // as packs are streamed the concurrency is limited by IO workerCount := int(connections) @@ -71,6 +75,8 @@ func newFileRestorer(dst string, idx: idx, packLoader: packLoader, filesWriter: newFilesWriter(workerCount), + zeroChunk: repository.ZeroChunk(), + sparse: sparse, workerCount: workerCount, dst: dst, Error: restorerAbortOnAllErrors, @@ -133,7 +139,16 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error { packOrder = append(packOrder, packID) } pack.files[file] = struct{}{} + if blob.ID.Equal(r.zeroChunk) { + file.sparse = r.sparse + } }) + if len(fileBlobs) == 1 { + // no need to preallocate files with a single block, thus we can always consider them to be sparse + // in addition, a short chunk will never match r.zeroChunk which would prevent sparseness for short files + file.sparse = r.sparse + } + if err != nil { // repository index is messed up, can't do anything return err @@ -253,7 +268,7 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error { file.inProgress = true createSize = file.size } - return r.filesWriter.writeToFile(r.targetPath(file.location), blobData, offset, createSize) + return r.filesWriter.writeToFile(r.targetPath(file.location), blobData, offset, createSize, file.sparse) } err := sanitizeError(file, writeToFile()) if err != nil { diff --git a/internal/restorer/filerestorer_test.go b/internal/restorer/filerestorer_test.go index fa781f8c8..06b4a6b81 100644 --- a/internal/restorer/filerestorer_test.go +++ b/internal/restorer/filerestorer_test.go @@ -147,10 +147,10 @@ func newTestRepo(content []TestFile) *TestRepo { return repo } -func restoreAndVerify(t *testing.T, tempdir string, content []TestFile, files map[string]bool) { +func restoreAndVerify(t *testing.T, tempdir string, content []TestFile, files map[string]bool, sparse bool) { repo := newTestRepo(content) - r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2) + r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2, sparse) if files == nil { r.files = repo.files @@ -188,30 +188,32 @@ func TestFileRestorerBasic(t *testing.T) { tempdir, cleanup := rtest.TempDir(t) defer cleanup() - restoreAndVerify(t, tempdir, []TestFile{ - { - name: "file1", - blobs: []TestBlob{ - {"data1-1", "pack1-1"}, - {"data1-2", "pack1-2"}, + for _, sparse := range []bool{false, true} { + restoreAndVerify(t, tempdir, []TestFile{ + { + name: "file1", + blobs: []TestBlob{ + {"data1-1", "pack1-1"}, + {"data1-2", "pack1-2"}, + }, }, - }, - { - name: "file2", - blobs: []TestBlob{ - {"data2-1", "pack2-1"}, - {"data2-2", "pack2-2"}, + { + name: "file2", + blobs: []TestBlob{ + {"data2-1", "pack2-1"}, + {"data2-2", "pack2-2"}, + }, }, - }, - { - name: "file3", - blobs: []TestBlob{ - // same blob multiple times - {"data3-1", "pack3-1"}, - {"data3-1", "pack3-1"}, + { + name: "file3", + blobs: []TestBlob{ + // same blob multiple times + {"data3-1", "pack3-1"}, + {"data3-1", "pack3-1"}, + }, }, - }, - }, nil) + }, nil, sparse) + } } func TestFileRestorerPackSkip(t *testing.T) { @@ -221,28 +223,30 @@ func TestFileRestorerPackSkip(t *testing.T) { files := make(map[string]bool) files["file2"] = true - restoreAndVerify(t, tempdir, []TestFile{ - { - name: "file1", - blobs: []TestBlob{ - {"data1-1", "pack1"}, - {"data1-2", "pack1"}, - {"data1-3", "pack1"}, - {"data1-4", "pack1"}, - {"data1-5", "pack1"}, - {"data1-6", "pack1"}, + for _, sparse := range []bool{false, true} { + restoreAndVerify(t, tempdir, []TestFile{ + { + name: "file1", + blobs: []TestBlob{ + {"data1-1", "pack1"}, + {"data1-2", "pack1"}, + {"data1-3", "pack1"}, + {"data1-4", "pack1"}, + {"data1-5", "pack1"}, + {"data1-6", "pack1"}, + }, }, - }, - { - name: "file2", - blobs: []TestBlob{ - // file is contained in pack1 but need pack parts to be skipped - {"data1-2", "pack1"}, - {"data1-4", "pack1"}, - {"data1-6", "pack1"}, + { + name: "file2", + blobs: []TestBlob{ + // file is contained in pack1 but need pack parts to be skipped + {"data1-2", "pack1"}, + {"data1-4", "pack1"}, + {"data1-6", "pack1"}, + }, }, - }, - }, files) + }, files, sparse) + } } func TestErrorRestoreFiles(t *testing.T) { @@ -264,7 +268,7 @@ func TestErrorRestoreFiles(t *testing.T) { return loadError } - r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2) + r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2, false) r.files = repo.files err := r.restoreFiles(context.TODO()) @@ -304,7 +308,7 @@ func testPartialDownloadError(t *testing.T, part int) { return loader(ctx, h, length, offset, fn) } - r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2) + r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2, false) r.files = repo.files r.Error = func(s string, e error) error { // ignore errors as in the `restore` command diff --git a/internal/restorer/fileswriter.go b/internal/restorer/fileswriter.go index 8b7ee4353..0a26101f4 100644 --- a/internal/restorer/fileswriter.go +++ b/internal/restorer/fileswriter.go @@ -19,30 +19,34 @@ type filesWriter struct { type filesWriterBucket struct { lock sync.Mutex - files map[string]*os.File - users map[string]int + files map[string]*partialFile +} + +type partialFile struct { + *os.File + users int // Reference count. + sparse bool } func newFilesWriter(count int) *filesWriter { buckets := make([]filesWriterBucket, count) for b := 0; b < count; b++ { - buckets[b].files = make(map[string]*os.File) - buckets[b].users = make(map[string]int) + buckets[b].files = make(map[string]*partialFile) } return &filesWriter{ buckets: buckets, } } -func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64) error { +func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64, sparse bool) error { bucket := &w.buckets[uint(xxhash.Sum64String(path))%uint(len(w.buckets))] - acquireWriter := func() (*os.File, error) { + acquireWriter := func() (*partialFile, error) { bucket.lock.Lock() defer bucket.lock.Unlock() if wr, ok := bucket.files[path]; ok { - bucket.users[path]++ + bucket.files[path].users++ return wr, nil } @@ -53,39 +57,45 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create flags = os.O_WRONLY } - wr, err := os.OpenFile(path, flags, 0600) + f, err := os.OpenFile(path, flags, 0600) if err != nil { return nil, err } + wr := &partialFile{File: f, users: 1, sparse: sparse} bucket.files[path] = wr - bucket.users[path] = 1 if createSize >= 0 { - err := preallocateFile(wr, createSize) - if err != nil { - // Just log the preallocate error but don't let it cause the restore process to fail. - // Preallocate might return an error if the filesystem (implementation) does not - // support preallocation or our parameters combination to the preallocate call - // This should yield a syscall.ENOTSUP error, but some other errors might also - // show up. - debug.Log("Failed to preallocate %v with size %v: %v", path, createSize, err) + if sparse { + err = truncateSparse(f, createSize) + if err != nil { + return nil, err + } + } else { + err := preallocateFile(wr.File, createSize) + if err != nil { + // Just log the preallocate error but don't let it cause the restore process to fail. + // Preallocate might return an error if the filesystem (implementation) does not + // support preallocation or our parameters combination to the preallocate call + // This should yield a syscall.ENOTSUP error, but some other errors might also + // show up. + debug.Log("Failed to preallocate %v with size %v: %v", path, createSize, err) + } } } return wr, nil } - releaseWriter := func(wr *os.File) error { + releaseWriter := func(wr *partialFile) error { bucket.lock.Lock() defer bucket.lock.Unlock() - if bucket.users[path] == 1 { + if bucket.files[path].users == 1 { delete(bucket.files, path) - delete(bucket.users, path) return wr.Close() } - bucket.users[path]-- + bucket.files[path].users-- return nil } diff --git a/internal/restorer/fileswriter_test.go b/internal/restorer/fileswriter_test.go index a6b7e011b..825d52bca 100644 --- a/internal/restorer/fileswriter_test.go +++ b/internal/restorer/fileswriter_test.go @@ -16,21 +16,17 @@ func TestFilesWriterBasic(t *testing.T) { f1 := dir + "/f1" f2 := dir + "/f2" - rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, 2)) + rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, 2, false)) rtest.Equals(t, 0, len(w.buckets[0].files)) - rtest.Equals(t, 0, len(w.buckets[0].users)) - rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, 2)) + rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, 2, false)) rtest.Equals(t, 0, len(w.buckets[0].files)) - rtest.Equals(t, 0, len(w.buckets[0].users)) - rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, -1)) + rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, -1, false)) rtest.Equals(t, 0, len(w.buckets[0].files)) - rtest.Equals(t, 0, len(w.buckets[0].users)) - rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, -1)) + rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, -1, false)) rtest.Equals(t, 0, len(w.buckets[0].files)) - rtest.Equals(t, 0, len(w.buckets[0].users)) buf, err := ioutil.ReadFile(f1) rtest.OK(t, err) diff --git a/internal/restorer/restorer.go b/internal/restorer/restorer.go index 829e5aedc..1b645a6f0 100644 --- a/internal/restorer/restorer.go +++ b/internal/restorer/restorer.go @@ -16,8 +16,9 @@ import ( // Restorer is used to restore a snapshot to a directory. type Restorer struct { - repo restic.Repository - sn *restic.Snapshot + repo restic.Repository + sn *restic.Snapshot + sparse bool Error func(location string, err error) error SelectFilter func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) @@ -26,9 +27,10 @@ type Restorer struct { var restorerAbortOnAllErrors = func(location string, err error) error { return err } // NewRestorer creates a restorer preloaded with the content from the snapshot id. -func NewRestorer(ctx context.Context, repo restic.Repository, id restic.ID) (*Restorer, error) { +func NewRestorer(ctx context.Context, repo restic.Repository, id restic.ID, sparse bool) (*Restorer, error) { r := &Restorer{ repo: repo, + sparse: sparse, Error: restorerAbortOnAllErrors, SelectFilter: func(string, string, *restic.Node) (bool, bool) { return true, true }, } @@ -219,7 +221,7 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { } idx := NewHardlinkIndex() - filerestorer := newFileRestorer(dst, res.repo.Backend().Load, res.repo.Key(), res.repo.Index().Lookup, res.repo.Connections()) + filerestorer := newFileRestorer(dst, res.repo.Backend().Load, res.repo.Key(), res.repo.Index().Lookup, res.repo.Connections(), res.sparse) filerestorer.Error = res.Error debug.Log("first pass for %q", dst) diff --git a/internal/restorer/restorer_test.go b/internal/restorer/restorer_test.go index 2eea1a6fd..f57868b4f 100644 --- a/internal/restorer/restorer_test.go +++ b/internal/restorer/restorer_test.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "io/ioutil" + "math" "os" "path/filepath" "runtime" @@ -11,6 +12,7 @@ import ( "testing" "time" + "github.com/restic/restic/internal/archiver" "github.com/restic/restic/internal/fs" "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/restic" @@ -324,7 +326,7 @@ func TestRestorer(t *testing.T) { _, id := saveSnapshot(t, repo, test.Snapshot) t.Logf("snapshot saved as %v", id.Str()) - res, err := NewRestorer(context.TODO(), repo, id) + res, err := NewRestorer(context.TODO(), repo, id, false) if err != nil { t.Fatal(err) } @@ -447,7 +449,7 @@ func TestRestorerRelative(t *testing.T) { _, id := saveSnapshot(t, repo, test.Snapshot) t.Logf("snapshot saved as %v", id.Str()) - res, err := NewRestorer(context.TODO(), repo, id) + res, err := NewRestorer(context.TODO(), repo, id, false) if err != nil { t.Fatal(err) } @@ -682,7 +684,7 @@ func TestRestorerTraverseTree(t *testing.T) { defer cleanup() sn, id := saveSnapshot(t, repo, test.Snapshot) - res, err := NewRestorer(context.TODO(), repo, id) + res, err := NewRestorer(context.TODO(), repo, id, false) if err != nil { t.Fatal(err) } @@ -764,7 +766,7 @@ func TestRestorerConsistentTimestampsAndPermissions(t *testing.T) { }, }) - res, err := NewRestorer(context.TODO(), repo, id) + res, err := NewRestorer(context.TODO(), repo, id, false) rtest.OK(t, err) res.SelectFilter = func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) { @@ -824,7 +826,7 @@ func TestVerifyCancel(t *testing.T) { _, id := saveSnapshot(t, repo, snapshot) - res, err := NewRestorer(context.TODO(), repo, id) + res, err := NewRestorer(context.TODO(), repo, id, false) rtest.OK(t, err) tempdir, cleanup := rtest.TempDir(t) @@ -849,3 +851,58 @@ func TestVerifyCancel(t *testing.T) { rtest.Equals(t, 1, len(errs)) rtest.Assert(t, strings.Contains(errs[0].Error(), "Invalid file size for"), "wrong error %q", errs[0].Error()) } + +func TestRestorerSparseFiles(t *testing.T) { + repo, cleanup := repository.TestRepository(t) + defer cleanup() + + var zeros [1<<20 + 13]byte + + target := &fs.Reader{ + Mode: 0600, + Name: "/zeros", + ReadCloser: ioutil.NopCloser(bytes.NewReader(zeros[:])), + } + sc := archiver.NewScanner(target) + err := sc.Scan(context.TODO(), []string{"/zeros"}) + rtest.OK(t, err) + + arch := archiver.New(repo, target, archiver.Options{}) + _, id, err := arch.Snapshot(context.Background(), []string{"/zeros"}, + archiver.SnapshotOptions{}) + rtest.OK(t, err) + + res, err := NewRestorer(context.TODO(), repo, id, true) + rtest.OK(t, err) + + tempdir, cleanup := rtest.TempDir(t) + defer cleanup() + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + err = res.RestoreTo(ctx, tempdir) + rtest.OK(t, err) + + filename := filepath.Join(tempdir, "zeros") + content, err := ioutil.ReadFile(filename) + rtest.OK(t, err) + + rtest.Equals(t, len(zeros[:]), len(content)) + rtest.Equals(t, zeros[:], content) + + blocks := getBlockCount(t, filename) + if blocks < 0 { + return + } + + // st.Blocks is the size in 512-byte blocks. + denseBlocks := math.Ceil(float64(len(zeros)) / 512) + sparsity := 1 - float64(blocks)/denseBlocks + + // This should report 100% sparse. We don't assert that, + // as the behavior of sparse writes depends on the underlying + // file system as well as the OS. + t.Logf("wrote %d zeros as %d blocks, %.1f%% sparse", + len(zeros), blocks, 100*sparsity) +} diff --git a/internal/restorer/restorer_unix_test.go b/internal/restorer/restorer_unix_test.go index 13e318c98..76f86c60b 100644 --- a/internal/restorer/restorer_unix_test.go +++ b/internal/restorer/restorer_unix_test.go @@ -30,7 +30,7 @@ func TestRestorerRestoreEmptyHardlinkedFileds(t *testing.T) { }, }) - res, err := NewRestorer(context.TODO(), repo, id) + res, err := NewRestorer(context.TODO(), repo, id, false) rtest.OK(t, err) res.SelectFilter = func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) { @@ -60,3 +60,13 @@ func TestRestorerRestoreEmptyHardlinkedFileds(t *testing.T) { rtest.Equals(t, s1.Ino, s2.Ino) } } + +func getBlockCount(t *testing.T, filename string) int64 { + fi, err := os.Stat(filename) + rtest.OK(t, err) + st := fi.Sys().(*syscall.Stat_t) + if st == nil { + return -1 + } + return st.Blocks +} diff --git a/internal/restorer/restorer_windows_test.go b/internal/restorer/restorer_windows_test.go new file mode 100644 index 000000000..3ec4b1f11 --- /dev/null +++ b/internal/restorer/restorer_windows_test.go @@ -0,0 +1,35 @@ +//go:build windows +// +build windows + +package restorer + +import ( + "math" + "syscall" + "testing" + "unsafe" + + rtest "github.com/restic/restic/internal/test" + "golang.org/x/sys/windows" +) + +func getBlockCount(t *testing.T, filename string) int64 { + libkernel32 := windows.NewLazySystemDLL("kernel32.dll") + err := libkernel32.Load() + rtest.OK(t, err) + proc := libkernel32.NewProc("GetCompressedFileSizeW") + err = proc.Find() + rtest.OK(t, err) + + namePtr, err := syscall.UTF16PtrFromString(filename) + rtest.OK(t, err) + + result, _, _ := proc.Call(uintptr(unsafe.Pointer(namePtr)), 0) + + const invalidFileSize = uintptr(4294967295) + if result == invalidFileSize { + return -1 + } + + return int64(math.Ceil(float64(result) / 512)) +} diff --git a/internal/restorer/sparsewrite.go b/internal/restorer/sparsewrite.go new file mode 100644 index 000000000..2c1f234de --- /dev/null +++ b/internal/restorer/sparsewrite.go @@ -0,0 +1,37 @@ +//go:build !windows +// +build !windows + +package restorer + +import ( + "github.com/restic/restic/internal/restic" +) + +// WriteAt writes p to f.File at offset. It tries to do a sparse write +// and updates f.size. +func (f *partialFile) WriteAt(p []byte, offset int64) (n int, err error) { + if !f.sparse { + return f.File.WriteAt(p, offset) + } + + n = len(p) + + // Skip the longest all-zero prefix of p. + // If it's long enough, we can punch a hole in the file. + skipped := restic.ZeroPrefixLen(p) + p = p[skipped:] + offset += int64(skipped) + + switch { + case len(p) == 0: + // All zeros, file already big enough. A previous WriteAt or + // Truncate will have produced the zeros in f.File. + + default: + var n2 int + n2, err = f.File.WriteAt(p, offset) + n = skipped + n2 + } + + return n, err +} diff --git a/internal/restorer/truncate_other.go b/internal/restorer/truncate_other.go new file mode 100644 index 000000000..ed7ab04c5 --- /dev/null +++ b/internal/restorer/truncate_other.go @@ -0,0 +1,10 @@ +//go:build !windows +// +build !windows + +package restorer + +import "os" + +func truncateSparse(f *os.File, size int64) error { + return f.Truncate(size) +} diff --git a/internal/restorer/truncate_windows.go b/internal/restorer/truncate_windows.go new file mode 100644 index 000000000..831a117d1 --- /dev/null +++ b/internal/restorer/truncate_windows.go @@ -0,0 +1,19 @@ +package restorer + +import ( + "os" + + "github.com/restic/restic/internal/debug" + "golang.org/x/sys/windows" +) + +func truncateSparse(f *os.File, size int64) error { + // try setting the sparse file attribute, but ignore the error if it fails + var t uint32 + err := windows.DeviceIoControl(windows.Handle(f.Fd()), windows.FSCTL_SET_SPARSE, nil, 0, nil, 0, &t, nil) + if err != nil { + debug.Log("failed to set sparse attribute for %v: %v", f.Name(), err) + } + + return f.Truncate(size) +}