From 5b6a77058a0ef386fee07dd579c5765a2d356599 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 7 Aug 2022 17:26:46 +0200 Subject: [PATCH] Enable sparseness only conditionally We can either preallocate storage for a file or sparsify it. This detects a pack file as sparse if it contains an all zero block or consists of only one block. As the file sparsification is just an approximation, hide it behind a `--sparse` parameter. --- cmd/restic/cmd_restore.go | 7 +- internal/restorer/filerestorer.go | 20 +++++- internal/restorer/filerestorer_test.go | 94 +++++++++++++------------ internal/restorer/fileswriter.go | 11 +-- internal/restorer/fileswriter_test.go | 8 +-- internal/restorer/restorer.go | 10 +-- internal/restorer/restorer_test.go | 10 +-- internal/restorer/restorer_unix_test.go | 6 +- internal/restorer/sparsewrite.go | 4 ++ 9 files changed, 102 insertions(+), 68 deletions(-) diff --git a/cmd/restic/cmd_restore.go b/cmd/restic/cmd_restore.go index b72799198..faf8c851a 100644 --- a/cmd/restic/cmd_restore.go +++ b/cmd/restic/cmd_restore.go @@ -1,6 +1,7 @@ package main import ( + "runtime" "strings" "time" @@ -42,6 +43,7 @@ type RestoreOptions struct { InsensitiveInclude []string Target string snapshotFilterOptions + Sparse bool Verify bool } @@ -58,6 +60,9 @@ func init() { flags.StringVarP(&restoreOptions.Target, "target", "t", "", "directory to extract data to") initSingleSnapshotFilterOptions(flags, &restoreOptions.snapshotFilterOptions) + if runtime.GOOS != "windows" { + flags.BoolVar(&restoreOptions.Sparse, "sparse", false, "restore files as sparse (not supported on windows)") + } flags.BoolVar(&restoreOptions.Verify, "verify", false, "verify restored files content") } @@ -147,7 +152,7 @@ func runRestore(opts RestoreOptions, gopts GlobalOptions, args []string) error { return err } - res, err := restorer.NewRestorer(ctx, repo, id) + res, err := restorer.NewRestorer(ctx, repo, id, opts.Sparse) if err != nil { Exitf(2, "creating restorer failed: %v\n", err) } diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index 362d821d2..659458cd8 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -7,6 +7,7 @@ import ( "golang.org/x/sync/errgroup" + "github.com/restic/chunker" "github.com/restic/restic/internal/crypto" "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/errors" @@ -27,6 +28,7 @@ const ( type fileInfo struct { lock sync.Mutex inProgress bool + sparse bool size int64 location string // file on local filesystem relative to restorer basedir blobs interface{} // blobs of the file @@ -51,6 +53,8 @@ type fileRestorer struct { workerCount int filesWriter *filesWriter + zeroChunk restic.ID + sparse bool dst string files []*fileInfo @@ -61,7 +65,8 @@ func newFileRestorer(dst string, packLoader repository.BackendLoadFn, key *crypto.Key, idx func(restic.BlobHandle) []restic.PackedBlob, - connections uint) *fileRestorer { + connections uint, + sparse bool) *fileRestorer { // as packs are streamed the concurrency is limited by IO workerCount := int(connections) @@ -71,6 +76,8 @@ func newFileRestorer(dst string, idx: idx, packLoader: packLoader, filesWriter: newFilesWriter(workerCount), + zeroChunk: restic.Hash(make([]byte, chunker.MinSize)), + sparse: sparse, workerCount: workerCount, dst: dst, Error: restorerAbortOnAllErrors, @@ -133,7 +140,16 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error { packOrder = append(packOrder, packID) } pack.files[file] = struct{}{} + if blob.ID.Equal(r.zeroChunk) { + file.sparse = r.sparse + } }) + if len(fileBlobs) == 1 { + // no need to preallocate files with a single block, thus we can always consider them to be sparse + // in addition, a short chunk will never match r.zeroChunk which would prevent sparseness for short files + file.sparse = r.sparse + } + if err != nil { // repository index is messed up, can't do anything return err @@ -253,7 +269,7 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error { file.inProgress = true createSize = file.size } - return r.filesWriter.writeToFile(r.targetPath(file.location), blobData, offset, createSize) + return r.filesWriter.writeToFile(r.targetPath(file.location), blobData, offset, createSize, file.sparse) } err := sanitizeError(file, writeToFile()) if err != nil { diff --git a/internal/restorer/filerestorer_test.go b/internal/restorer/filerestorer_test.go index fa781f8c8..06b4a6b81 100644 --- a/internal/restorer/filerestorer_test.go +++ b/internal/restorer/filerestorer_test.go @@ -147,10 +147,10 @@ func newTestRepo(content []TestFile) *TestRepo { return repo } -func restoreAndVerify(t *testing.T, tempdir string, content []TestFile, files map[string]bool) { +func restoreAndVerify(t *testing.T, tempdir string, content []TestFile, files map[string]bool, sparse bool) { repo := newTestRepo(content) - r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2) + r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2, sparse) if files == nil { r.files = repo.files @@ -188,30 +188,32 @@ func TestFileRestorerBasic(t *testing.T) { tempdir, cleanup := rtest.TempDir(t) defer cleanup() - restoreAndVerify(t, tempdir, []TestFile{ - { - name: "file1", - blobs: []TestBlob{ - {"data1-1", "pack1-1"}, - {"data1-2", "pack1-2"}, + for _, sparse := range []bool{false, true} { + restoreAndVerify(t, tempdir, []TestFile{ + { + name: "file1", + blobs: []TestBlob{ + {"data1-1", "pack1-1"}, + {"data1-2", "pack1-2"}, + }, }, - }, - { - name: "file2", - blobs: []TestBlob{ - {"data2-1", "pack2-1"}, - {"data2-2", "pack2-2"}, + { + name: "file2", + blobs: []TestBlob{ + {"data2-1", "pack2-1"}, + {"data2-2", "pack2-2"}, + }, }, - }, - { - name: "file3", - blobs: []TestBlob{ - // same blob multiple times - {"data3-1", "pack3-1"}, - {"data3-1", "pack3-1"}, + { + name: "file3", + blobs: []TestBlob{ + // same blob multiple times + {"data3-1", "pack3-1"}, + {"data3-1", "pack3-1"}, + }, }, - }, - }, nil) + }, nil, sparse) + } } func TestFileRestorerPackSkip(t *testing.T) { @@ -221,28 +223,30 @@ func TestFileRestorerPackSkip(t *testing.T) { files := make(map[string]bool) files["file2"] = true - restoreAndVerify(t, tempdir, []TestFile{ - { - name: "file1", - blobs: []TestBlob{ - {"data1-1", "pack1"}, - {"data1-2", "pack1"}, - {"data1-3", "pack1"}, - {"data1-4", "pack1"}, - {"data1-5", "pack1"}, - {"data1-6", "pack1"}, + for _, sparse := range []bool{false, true} { + restoreAndVerify(t, tempdir, []TestFile{ + { + name: "file1", + blobs: []TestBlob{ + {"data1-1", "pack1"}, + {"data1-2", "pack1"}, + {"data1-3", "pack1"}, + {"data1-4", "pack1"}, + {"data1-5", "pack1"}, + {"data1-6", "pack1"}, + }, }, - }, - { - name: "file2", - blobs: []TestBlob{ - // file is contained in pack1 but need pack parts to be skipped - {"data1-2", "pack1"}, - {"data1-4", "pack1"}, - {"data1-6", "pack1"}, + { + name: "file2", + blobs: []TestBlob{ + // file is contained in pack1 but need pack parts to be skipped + {"data1-2", "pack1"}, + {"data1-4", "pack1"}, + {"data1-6", "pack1"}, + }, }, - }, - }, files) + }, files, sparse) + } } func TestErrorRestoreFiles(t *testing.T) { @@ -264,7 +268,7 @@ func TestErrorRestoreFiles(t *testing.T) { return loadError } - r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2) + r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2, false) r.files = repo.files err := r.restoreFiles(context.TODO()) @@ -304,7 +308,7 @@ func testPartialDownloadError(t *testing.T, part int) { return loader(ctx, h, length, offset, fn) } - r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2) + r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2, false) r.files = repo.files r.Error = func(s string, e error) error { // ignore errors as in the `restore` command diff --git a/internal/restorer/fileswriter.go b/internal/restorer/fileswriter.go index aa943e11b..d7483cd84 100644 --- a/internal/restorer/fileswriter.go +++ b/internal/restorer/fileswriter.go @@ -24,8 +24,9 @@ type filesWriterBucket struct { type partialFile struct { *os.File - size int64 // File size, tracked for sparse writes (not on Windows). - users int // Reference count. + size int64 // File size, tracked for sparse writes (not on Windows). + users int // Reference count. + sparse bool } func newFilesWriter(count int) *filesWriter { @@ -38,7 +39,7 @@ func newFilesWriter(count int) *filesWriter { } } -func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64) error { +func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64, sparse bool) error { bucket := &w.buckets[uint(xxhash.Sum64String(path))%uint(len(w.buckets))] acquireWriter := func() (*partialFile, error) { @@ -62,7 +63,7 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create return nil, err } - wr := &partialFile{File: f, users: 1} + wr := &partialFile{File: f, users: 1, sparse: sparse} if createSize < 0 { info, err := f.Stat() if err != nil { @@ -72,7 +73,7 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create } bucket.files[path] = wr - if createSize >= 0 { + if createSize >= 0 && !sparse { err := preallocateFile(wr.File, createSize) if err != nil { // Just log the preallocate error but don't let it cause the restore process to fail. diff --git a/internal/restorer/fileswriter_test.go b/internal/restorer/fileswriter_test.go index f725be91c..825d52bca 100644 --- a/internal/restorer/fileswriter_test.go +++ b/internal/restorer/fileswriter_test.go @@ -16,16 +16,16 @@ func TestFilesWriterBasic(t *testing.T) { f1 := dir + "/f1" f2 := dir + "/f2" - rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, 2)) + rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, 2, false)) rtest.Equals(t, 0, len(w.buckets[0].files)) - rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, 2)) + rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, 2, false)) rtest.Equals(t, 0, len(w.buckets[0].files)) - rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, -1)) + rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, -1, false)) rtest.Equals(t, 0, len(w.buckets[0].files)) - rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, -1)) + rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, -1, false)) rtest.Equals(t, 0, len(w.buckets[0].files)) buf, err := ioutil.ReadFile(f1) diff --git a/internal/restorer/restorer.go b/internal/restorer/restorer.go index 829e5aedc..1b645a6f0 100644 --- a/internal/restorer/restorer.go +++ b/internal/restorer/restorer.go @@ -16,8 +16,9 @@ import ( // Restorer is used to restore a snapshot to a directory. type Restorer struct { - repo restic.Repository - sn *restic.Snapshot + repo restic.Repository + sn *restic.Snapshot + sparse bool Error func(location string, err error) error SelectFilter func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) @@ -26,9 +27,10 @@ type Restorer struct { var restorerAbortOnAllErrors = func(location string, err error) error { return err } // NewRestorer creates a restorer preloaded with the content from the snapshot id. -func NewRestorer(ctx context.Context, repo restic.Repository, id restic.ID) (*Restorer, error) { +func NewRestorer(ctx context.Context, repo restic.Repository, id restic.ID, sparse bool) (*Restorer, error) { r := &Restorer{ repo: repo, + sparse: sparse, Error: restorerAbortOnAllErrors, SelectFilter: func(string, string, *restic.Node) (bool, bool) { return true, true }, } @@ -219,7 +221,7 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { } idx := NewHardlinkIndex() - filerestorer := newFileRestorer(dst, res.repo.Backend().Load, res.repo.Key(), res.repo.Index().Lookup, res.repo.Connections()) + filerestorer := newFileRestorer(dst, res.repo.Backend().Load, res.repo.Key(), res.repo.Index().Lookup, res.repo.Connections(), res.sparse) filerestorer.Error = res.Error debug.Log("first pass for %q", dst) diff --git a/internal/restorer/restorer_test.go b/internal/restorer/restorer_test.go index 2eea1a6fd..7113ca100 100644 --- a/internal/restorer/restorer_test.go +++ b/internal/restorer/restorer_test.go @@ -324,7 +324,7 @@ func TestRestorer(t *testing.T) { _, id := saveSnapshot(t, repo, test.Snapshot) t.Logf("snapshot saved as %v", id.Str()) - res, err := NewRestorer(context.TODO(), repo, id) + res, err := NewRestorer(context.TODO(), repo, id, false) if err != nil { t.Fatal(err) } @@ -447,7 +447,7 @@ func TestRestorerRelative(t *testing.T) { _, id := saveSnapshot(t, repo, test.Snapshot) t.Logf("snapshot saved as %v", id.Str()) - res, err := NewRestorer(context.TODO(), repo, id) + res, err := NewRestorer(context.TODO(), repo, id, false) if err != nil { t.Fatal(err) } @@ -682,7 +682,7 @@ func TestRestorerTraverseTree(t *testing.T) { defer cleanup() sn, id := saveSnapshot(t, repo, test.Snapshot) - res, err := NewRestorer(context.TODO(), repo, id) + res, err := NewRestorer(context.TODO(), repo, id, false) if err != nil { t.Fatal(err) } @@ -764,7 +764,7 @@ func TestRestorerConsistentTimestampsAndPermissions(t *testing.T) { }, }) - res, err := NewRestorer(context.TODO(), repo, id) + res, err := NewRestorer(context.TODO(), repo, id, false) rtest.OK(t, err) res.SelectFilter = func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) { @@ -824,7 +824,7 @@ func TestVerifyCancel(t *testing.T) { _, id := saveSnapshot(t, repo, snapshot) - res, err := NewRestorer(context.TODO(), repo, id) + res, err := NewRestorer(context.TODO(), repo, id, false) rtest.OK(t, err) tempdir, cleanup := rtest.TempDir(t) diff --git a/internal/restorer/restorer_unix_test.go b/internal/restorer/restorer_unix_test.go index 52216088f..9e5f61ff4 100644 --- a/internal/restorer/restorer_unix_test.go +++ b/internal/restorer/restorer_unix_test.go @@ -36,7 +36,7 @@ func TestRestorerRestoreEmptyHardlinkedFileds(t *testing.T) { }, }) - res, err := NewRestorer(context.TODO(), repo, id) + res, err := NewRestorer(context.TODO(), repo, id, false) rtest.OK(t, err) res.SelectFilter = func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) { @@ -85,8 +85,9 @@ func TestRestorerSparseFiles(t *testing.T) { arch := archiver.New(repo, target, archiver.Options{}) _, id, err := arch.Snapshot(context.Background(), []string{"/zeros"}, archiver.SnapshotOptions{}) + rtest.OK(t, err) - res, err := NewRestorer(repo, id) + res, err := NewRestorer(context.TODO(), repo, id, true) rtest.OK(t, err) tempdir, cleanup := rtest.TempDir(t) @@ -102,6 +103,7 @@ func TestRestorerSparseFiles(t *testing.T) { content, err := ioutil.ReadFile(filename) rtest.OK(t, err) + rtest.Equals(t, len(zeros[:]), len(content)) rtest.Equals(t, zeros[:], content) fi, err := os.Stat(filename) diff --git a/internal/restorer/sparsewrite.go b/internal/restorer/sparsewrite.go index a2de93aa4..dec95d784 100644 --- a/internal/restorer/sparsewrite.go +++ b/internal/restorer/sparsewrite.go @@ -8,6 +8,10 @@ import "bytes" // WriteAt writes p to f.File at offset. It tries to do a sparse write // and updates f.size. func (f *partialFile) WriteAt(p []byte, offset int64) (n int, err error) { + if !f.sparse { + return f.File.WriteAt(p, offset) + } + n = len(p) end := offset + int64(n)