Enable sparseness only conditionally

We can either preallocate storage for a file or sparsify it. This
detects a pack file as sparse if it contains an all zero block or
consists of only one block. As the file sparsification is just an
approximation, hide it behind a `--sparse` parameter.
This commit is contained in:
Michael Eischer 2022-08-07 17:26:46 +02:00
parent 3047bf611c
commit 5b6a77058a
9 changed files with 102 additions and 68 deletions

View File

@ -1,6 +1,7 @@
package main package main
import ( import (
"runtime"
"strings" "strings"
"time" "time"
@ -42,6 +43,7 @@ type RestoreOptions struct {
InsensitiveInclude []string InsensitiveInclude []string
Target string Target string
snapshotFilterOptions snapshotFilterOptions
Sparse bool
Verify bool Verify bool
} }
@ -58,6 +60,9 @@ func init() {
flags.StringVarP(&restoreOptions.Target, "target", "t", "", "directory to extract data to") flags.StringVarP(&restoreOptions.Target, "target", "t", "", "directory to extract data to")
initSingleSnapshotFilterOptions(flags, &restoreOptions.snapshotFilterOptions) initSingleSnapshotFilterOptions(flags, &restoreOptions.snapshotFilterOptions)
if runtime.GOOS != "windows" {
flags.BoolVar(&restoreOptions.Sparse, "sparse", false, "restore files as sparse (not supported on windows)")
}
flags.BoolVar(&restoreOptions.Verify, "verify", false, "verify restored files content") flags.BoolVar(&restoreOptions.Verify, "verify", false, "verify restored files content")
} }
@ -147,7 +152,7 @@ func runRestore(opts RestoreOptions, gopts GlobalOptions, args []string) error {
return err return err
} }
res, err := restorer.NewRestorer(ctx, repo, id) res, err := restorer.NewRestorer(ctx, repo, id, opts.Sparse)
if err != nil { if err != nil {
Exitf(2, "creating restorer failed: %v\n", err) Exitf(2, "creating restorer failed: %v\n", err)
} }

View File

@ -7,6 +7,7 @@ import (
"golang.org/x/sync/errgroup" "golang.org/x/sync/errgroup"
"github.com/restic/chunker"
"github.com/restic/restic/internal/crypto" "github.com/restic/restic/internal/crypto"
"github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/errors"
@ -27,6 +28,7 @@ const (
type fileInfo struct { type fileInfo struct {
lock sync.Mutex lock sync.Mutex
inProgress bool inProgress bool
sparse bool
size int64 size int64
location string // file on local filesystem relative to restorer basedir location string // file on local filesystem relative to restorer basedir
blobs interface{} // blobs of the file blobs interface{} // blobs of the file
@ -51,6 +53,8 @@ type fileRestorer struct {
workerCount int workerCount int
filesWriter *filesWriter filesWriter *filesWriter
zeroChunk restic.ID
sparse bool
dst string dst string
files []*fileInfo files []*fileInfo
@ -61,7 +65,8 @@ func newFileRestorer(dst string,
packLoader repository.BackendLoadFn, packLoader repository.BackendLoadFn,
key *crypto.Key, key *crypto.Key,
idx func(restic.BlobHandle) []restic.PackedBlob, idx func(restic.BlobHandle) []restic.PackedBlob,
connections uint) *fileRestorer { connections uint,
sparse bool) *fileRestorer {
// as packs are streamed the concurrency is limited by IO // as packs are streamed the concurrency is limited by IO
workerCount := int(connections) workerCount := int(connections)
@ -71,6 +76,8 @@ func newFileRestorer(dst string,
idx: idx, idx: idx,
packLoader: packLoader, packLoader: packLoader,
filesWriter: newFilesWriter(workerCount), filesWriter: newFilesWriter(workerCount),
zeroChunk: restic.Hash(make([]byte, chunker.MinSize)),
sparse: sparse,
workerCount: workerCount, workerCount: workerCount,
dst: dst, dst: dst,
Error: restorerAbortOnAllErrors, Error: restorerAbortOnAllErrors,
@ -133,7 +140,16 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error {
packOrder = append(packOrder, packID) packOrder = append(packOrder, packID)
} }
pack.files[file] = struct{}{} pack.files[file] = struct{}{}
if blob.ID.Equal(r.zeroChunk) {
file.sparse = r.sparse
}
}) })
if len(fileBlobs) == 1 {
// no need to preallocate files with a single block, thus we can always consider them to be sparse
// in addition, a short chunk will never match r.zeroChunk which would prevent sparseness for short files
file.sparse = r.sparse
}
if err != nil { if err != nil {
// repository index is messed up, can't do anything // repository index is messed up, can't do anything
return err return err
@ -253,7 +269,7 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error {
file.inProgress = true file.inProgress = true
createSize = file.size createSize = file.size
} }
return r.filesWriter.writeToFile(r.targetPath(file.location), blobData, offset, createSize) return r.filesWriter.writeToFile(r.targetPath(file.location), blobData, offset, createSize, file.sparse)
} }
err := sanitizeError(file, writeToFile()) err := sanitizeError(file, writeToFile())
if err != nil { if err != nil {

View File

@ -147,10 +147,10 @@ func newTestRepo(content []TestFile) *TestRepo {
return repo return repo
} }
func restoreAndVerify(t *testing.T, tempdir string, content []TestFile, files map[string]bool) { func restoreAndVerify(t *testing.T, tempdir string, content []TestFile, files map[string]bool, sparse bool) {
repo := newTestRepo(content) repo := newTestRepo(content)
r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2) r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2, sparse)
if files == nil { if files == nil {
r.files = repo.files r.files = repo.files
@ -188,30 +188,32 @@ func TestFileRestorerBasic(t *testing.T) {
tempdir, cleanup := rtest.TempDir(t) tempdir, cleanup := rtest.TempDir(t)
defer cleanup() defer cleanup()
restoreAndVerify(t, tempdir, []TestFile{ for _, sparse := range []bool{false, true} {
{ restoreAndVerify(t, tempdir, []TestFile{
name: "file1", {
blobs: []TestBlob{ name: "file1",
{"data1-1", "pack1-1"}, blobs: []TestBlob{
{"data1-2", "pack1-2"}, {"data1-1", "pack1-1"},
{"data1-2", "pack1-2"},
},
}, },
}, {
{ name: "file2",
name: "file2", blobs: []TestBlob{
blobs: []TestBlob{ {"data2-1", "pack2-1"},
{"data2-1", "pack2-1"}, {"data2-2", "pack2-2"},
{"data2-2", "pack2-2"}, },
}, },
}, {
{ name: "file3",
name: "file3", blobs: []TestBlob{
blobs: []TestBlob{ // same blob multiple times
// same blob multiple times {"data3-1", "pack3-1"},
{"data3-1", "pack3-1"}, {"data3-1", "pack3-1"},
{"data3-1", "pack3-1"}, },
}, },
}, }, nil, sparse)
}, nil) }
} }
func TestFileRestorerPackSkip(t *testing.T) { func TestFileRestorerPackSkip(t *testing.T) {
@ -221,28 +223,30 @@ func TestFileRestorerPackSkip(t *testing.T) {
files := make(map[string]bool) files := make(map[string]bool)
files["file2"] = true files["file2"] = true
restoreAndVerify(t, tempdir, []TestFile{ for _, sparse := range []bool{false, true} {
{ restoreAndVerify(t, tempdir, []TestFile{
name: "file1", {
blobs: []TestBlob{ name: "file1",
{"data1-1", "pack1"}, blobs: []TestBlob{
{"data1-2", "pack1"}, {"data1-1", "pack1"},
{"data1-3", "pack1"}, {"data1-2", "pack1"},
{"data1-4", "pack1"}, {"data1-3", "pack1"},
{"data1-5", "pack1"}, {"data1-4", "pack1"},
{"data1-6", "pack1"}, {"data1-5", "pack1"},
{"data1-6", "pack1"},
},
}, },
}, {
{ name: "file2",
name: "file2", blobs: []TestBlob{
blobs: []TestBlob{ // file is contained in pack1 but need pack parts to be skipped
// file is contained in pack1 but need pack parts to be skipped {"data1-2", "pack1"},
{"data1-2", "pack1"}, {"data1-4", "pack1"},
{"data1-4", "pack1"}, {"data1-6", "pack1"},
{"data1-6", "pack1"}, },
}, },
}, }, files, sparse)
}, files) }
} }
func TestErrorRestoreFiles(t *testing.T) { func TestErrorRestoreFiles(t *testing.T) {
@ -264,7 +268,7 @@ func TestErrorRestoreFiles(t *testing.T) {
return loadError return loadError
} }
r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2) r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2, false)
r.files = repo.files r.files = repo.files
err := r.restoreFiles(context.TODO()) err := r.restoreFiles(context.TODO())
@ -304,7 +308,7 @@ func testPartialDownloadError(t *testing.T, part int) {
return loader(ctx, h, length, offset, fn) return loader(ctx, h, length, offset, fn)
} }
r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2) r := newFileRestorer(tempdir, repo.loader, repo.key, repo.Lookup, 2, false)
r.files = repo.files r.files = repo.files
r.Error = func(s string, e error) error { r.Error = func(s string, e error) error {
// ignore errors as in the `restore` command // ignore errors as in the `restore` command

View File

@ -24,8 +24,9 @@ type filesWriterBucket struct {
type partialFile struct { type partialFile struct {
*os.File *os.File
size int64 // File size, tracked for sparse writes (not on Windows). size int64 // File size, tracked for sparse writes (not on Windows).
users int // Reference count. users int // Reference count.
sparse bool
} }
func newFilesWriter(count int) *filesWriter { func newFilesWriter(count int) *filesWriter {
@ -38,7 +39,7 @@ func newFilesWriter(count int) *filesWriter {
} }
} }
func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64) error { func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64, sparse bool) error {
bucket := &w.buckets[uint(xxhash.Sum64String(path))%uint(len(w.buckets))] bucket := &w.buckets[uint(xxhash.Sum64String(path))%uint(len(w.buckets))]
acquireWriter := func() (*partialFile, error) { acquireWriter := func() (*partialFile, error) {
@ -62,7 +63,7 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create
return nil, err return nil, err
} }
wr := &partialFile{File: f, users: 1} wr := &partialFile{File: f, users: 1, sparse: sparse}
if createSize < 0 { if createSize < 0 {
info, err := f.Stat() info, err := f.Stat()
if err != nil { if err != nil {
@ -72,7 +73,7 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create
} }
bucket.files[path] = wr bucket.files[path] = wr
if createSize >= 0 { if createSize >= 0 && !sparse {
err := preallocateFile(wr.File, createSize) err := preallocateFile(wr.File, createSize)
if err != nil { if err != nil {
// Just log the preallocate error but don't let it cause the restore process to fail. // Just log the preallocate error but don't let it cause the restore process to fail.

View File

@ -16,16 +16,16 @@ func TestFilesWriterBasic(t *testing.T) {
f1 := dir + "/f1" f1 := dir + "/f1"
f2 := dir + "/f2" f2 := dir + "/f2"
rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, 2)) rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, 2, false))
rtest.Equals(t, 0, len(w.buckets[0].files)) rtest.Equals(t, 0, len(w.buckets[0].files))
rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, 2)) rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, 2, false))
rtest.Equals(t, 0, len(w.buckets[0].files)) rtest.Equals(t, 0, len(w.buckets[0].files))
rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, -1)) rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, -1, false))
rtest.Equals(t, 0, len(w.buckets[0].files)) rtest.Equals(t, 0, len(w.buckets[0].files))
rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, -1)) rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, -1, false))
rtest.Equals(t, 0, len(w.buckets[0].files)) rtest.Equals(t, 0, len(w.buckets[0].files))
buf, err := ioutil.ReadFile(f1) buf, err := ioutil.ReadFile(f1)

View File

@ -16,8 +16,9 @@ import (
// Restorer is used to restore a snapshot to a directory. // Restorer is used to restore a snapshot to a directory.
type Restorer struct { type Restorer struct {
repo restic.Repository repo restic.Repository
sn *restic.Snapshot sn *restic.Snapshot
sparse bool
Error func(location string, err error) error Error func(location string, err error) error
SelectFilter func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) SelectFilter func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool)
@ -26,9 +27,10 @@ type Restorer struct {
var restorerAbortOnAllErrors = func(location string, err error) error { return err } var restorerAbortOnAllErrors = func(location string, err error) error { return err }
// NewRestorer creates a restorer preloaded with the content from the snapshot id. // NewRestorer creates a restorer preloaded with the content from the snapshot id.
func NewRestorer(ctx context.Context, repo restic.Repository, id restic.ID) (*Restorer, error) { func NewRestorer(ctx context.Context, repo restic.Repository, id restic.ID, sparse bool) (*Restorer, error) {
r := &Restorer{ r := &Restorer{
repo: repo, repo: repo,
sparse: sparse,
Error: restorerAbortOnAllErrors, Error: restorerAbortOnAllErrors,
SelectFilter: func(string, string, *restic.Node) (bool, bool) { return true, true }, SelectFilter: func(string, string, *restic.Node) (bool, bool) { return true, true },
} }
@ -219,7 +221,7 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error {
} }
idx := NewHardlinkIndex() idx := NewHardlinkIndex()
filerestorer := newFileRestorer(dst, res.repo.Backend().Load, res.repo.Key(), res.repo.Index().Lookup, res.repo.Connections()) filerestorer := newFileRestorer(dst, res.repo.Backend().Load, res.repo.Key(), res.repo.Index().Lookup, res.repo.Connections(), res.sparse)
filerestorer.Error = res.Error filerestorer.Error = res.Error
debug.Log("first pass for %q", dst) debug.Log("first pass for %q", dst)

View File

@ -324,7 +324,7 @@ func TestRestorer(t *testing.T) {
_, id := saveSnapshot(t, repo, test.Snapshot) _, id := saveSnapshot(t, repo, test.Snapshot)
t.Logf("snapshot saved as %v", id.Str()) t.Logf("snapshot saved as %v", id.Str())
res, err := NewRestorer(context.TODO(), repo, id) res, err := NewRestorer(context.TODO(), repo, id, false)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -447,7 +447,7 @@ func TestRestorerRelative(t *testing.T) {
_, id := saveSnapshot(t, repo, test.Snapshot) _, id := saveSnapshot(t, repo, test.Snapshot)
t.Logf("snapshot saved as %v", id.Str()) t.Logf("snapshot saved as %v", id.Str())
res, err := NewRestorer(context.TODO(), repo, id) res, err := NewRestorer(context.TODO(), repo, id, false)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -682,7 +682,7 @@ func TestRestorerTraverseTree(t *testing.T) {
defer cleanup() defer cleanup()
sn, id := saveSnapshot(t, repo, test.Snapshot) sn, id := saveSnapshot(t, repo, test.Snapshot)
res, err := NewRestorer(context.TODO(), repo, id) res, err := NewRestorer(context.TODO(), repo, id, false)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -764,7 +764,7 @@ func TestRestorerConsistentTimestampsAndPermissions(t *testing.T) {
}, },
}) })
res, err := NewRestorer(context.TODO(), repo, id) res, err := NewRestorer(context.TODO(), repo, id, false)
rtest.OK(t, err) rtest.OK(t, err)
res.SelectFilter = func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) { res.SelectFilter = func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) {
@ -824,7 +824,7 @@ func TestVerifyCancel(t *testing.T) {
_, id := saveSnapshot(t, repo, snapshot) _, id := saveSnapshot(t, repo, snapshot)
res, err := NewRestorer(context.TODO(), repo, id) res, err := NewRestorer(context.TODO(), repo, id, false)
rtest.OK(t, err) rtest.OK(t, err)
tempdir, cleanup := rtest.TempDir(t) tempdir, cleanup := rtest.TempDir(t)

View File

@ -36,7 +36,7 @@ func TestRestorerRestoreEmptyHardlinkedFileds(t *testing.T) {
}, },
}) })
res, err := NewRestorer(context.TODO(), repo, id) res, err := NewRestorer(context.TODO(), repo, id, false)
rtest.OK(t, err) rtest.OK(t, err)
res.SelectFilter = func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) { res.SelectFilter = func(item string, dstpath string, node *restic.Node) (selectedForRestore bool, childMayBeSelected bool) {
@ -85,8 +85,9 @@ func TestRestorerSparseFiles(t *testing.T) {
arch := archiver.New(repo, target, archiver.Options{}) arch := archiver.New(repo, target, archiver.Options{})
_, id, err := arch.Snapshot(context.Background(), []string{"/zeros"}, _, id, err := arch.Snapshot(context.Background(), []string{"/zeros"},
archiver.SnapshotOptions{}) archiver.SnapshotOptions{})
rtest.OK(t, err)
res, err := NewRestorer(repo, id) res, err := NewRestorer(context.TODO(), repo, id, true)
rtest.OK(t, err) rtest.OK(t, err)
tempdir, cleanup := rtest.TempDir(t) tempdir, cleanup := rtest.TempDir(t)
@ -102,6 +103,7 @@ func TestRestorerSparseFiles(t *testing.T) {
content, err := ioutil.ReadFile(filename) content, err := ioutil.ReadFile(filename)
rtest.OK(t, err) rtest.OK(t, err)
rtest.Equals(t, len(zeros[:]), len(content))
rtest.Equals(t, zeros[:], content) rtest.Equals(t, zeros[:], content)
fi, err := os.Stat(filename) fi, err := os.Stat(filename)

View File

@ -8,6 +8,10 @@ import "bytes"
// WriteAt writes p to f.File at offset. It tries to do a sparse write // WriteAt writes p to f.File at offset. It tries to do a sparse write
// and updates f.size. // and updates f.size.
func (f *partialFile) WriteAt(p []byte, offset int64) (n int, err error) { func (f *partialFile) WriteAt(p []byte, offset int64) (n int, err error) {
if !f.sparse {
return f.File.WriteAt(p, offset)
}
n = len(p) n = len(p)
end := offset + int64(n) end := offset + int64(n)