mirror of
https://github.com/octoleo/restic.git
synced 2025-01-03 15:17:28 +00:00
Write sparse files in restorer
This writes files by using (*os.File).Truncate, which resolves to the truncate system call on Unix. Compared to the naive loop, for _, b := range p { if b != 0 { return false } } the optimized allZero is about 10× faster: name old time/op new time/op delta AllZero-8 1.09ms ± 1% 0.09ms ± 1% -92.10% (p=0.000 n=10+10) name old speed new speed delta AllZero-8 3.84GB/s ± 1% 48.59GB/s ± 1% +1166.51% (p=0.000 n=10+10)
This commit is contained in:
parent
b48766d7b8
commit
5d4568d393
@ -19,15 +19,19 @@ type filesWriter struct {
|
|||||||
|
|
||||||
type filesWriterBucket struct {
|
type filesWriterBucket struct {
|
||||||
lock sync.Mutex
|
lock sync.Mutex
|
||||||
files map[string]*os.File
|
files map[string]*partialFile
|
||||||
users map[string]int
|
}
|
||||||
|
|
||||||
|
type partialFile struct {
|
||||||
|
*os.File
|
||||||
|
size int64 // File size, tracked for sparse writes (not on Windows).
|
||||||
|
users int // Reference count.
|
||||||
}
|
}
|
||||||
|
|
||||||
func newFilesWriter(count int) *filesWriter {
|
func newFilesWriter(count int) *filesWriter {
|
||||||
buckets := make([]filesWriterBucket, count)
|
buckets := make([]filesWriterBucket, count)
|
||||||
for b := 0; b < count; b++ {
|
for b := 0; b < count; b++ {
|
||||||
buckets[b].files = make(map[string]*os.File)
|
buckets[b].files = make(map[string]*partialFile)
|
||||||
buckets[b].users = make(map[string]int)
|
|
||||||
}
|
}
|
||||||
return &filesWriter{
|
return &filesWriter{
|
||||||
buckets: buckets,
|
buckets: buckets,
|
||||||
@ -37,12 +41,12 @@ func newFilesWriter(count int) *filesWriter {
|
|||||||
func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64) error {
|
func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64) error {
|
||||||
bucket := &w.buckets[uint(xxhash.Sum64String(path))%uint(len(w.buckets))]
|
bucket := &w.buckets[uint(xxhash.Sum64String(path))%uint(len(w.buckets))]
|
||||||
|
|
||||||
acquireWriter := func() (*os.File, error) {
|
acquireWriter := func() (*partialFile, error) {
|
||||||
bucket.lock.Lock()
|
bucket.lock.Lock()
|
||||||
defer bucket.lock.Unlock()
|
defer bucket.lock.Unlock()
|
||||||
|
|
||||||
if wr, ok := bucket.files[path]; ok {
|
if wr, ok := bucket.files[path]; ok {
|
||||||
bucket.users[path]++
|
bucket.files[path].users++
|
||||||
return wr, nil
|
return wr, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -53,16 +57,23 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create
|
|||||||
flags = os.O_WRONLY
|
flags = os.O_WRONLY
|
||||||
}
|
}
|
||||||
|
|
||||||
wr, err := os.OpenFile(path, flags, 0600)
|
f, err := os.OpenFile(path, flags, 0600)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
wr := &partialFile{File: f, users: 1}
|
||||||
|
if createSize < 0 {
|
||||||
|
info, err := f.Stat()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
wr.size = info.Size()
|
||||||
|
}
|
||||||
bucket.files[path] = wr
|
bucket.files[path] = wr
|
||||||
bucket.users[path] = 1
|
|
||||||
|
|
||||||
if createSize >= 0 {
|
if createSize >= 0 {
|
||||||
err := preallocateFile(wr, createSize)
|
err := preallocateFile(wr.File, createSize)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Just log the preallocate error but don't let it cause the restore process to fail.
|
// Just log the preallocate error but don't let it cause the restore process to fail.
|
||||||
// Preallocate might return an error if the filesystem (implementation) does not
|
// Preallocate might return an error if the filesystem (implementation) does not
|
||||||
@ -76,16 +87,15 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create
|
|||||||
return wr, nil
|
return wr, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
releaseWriter := func(wr *os.File) error {
|
releaseWriter := func(wr *partialFile) error {
|
||||||
bucket.lock.Lock()
|
bucket.lock.Lock()
|
||||||
defer bucket.lock.Unlock()
|
defer bucket.lock.Unlock()
|
||||||
|
|
||||||
if bucket.users[path] == 1 {
|
if bucket.files[path].users == 1 {
|
||||||
delete(bucket.files, path)
|
delete(bucket.files, path)
|
||||||
delete(bucket.users, path)
|
|
||||||
return wr.Close()
|
return wr.Close()
|
||||||
}
|
}
|
||||||
bucket.users[path]--
|
bucket.files[path].users--
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -18,19 +18,15 @@ func TestFilesWriterBasic(t *testing.T) {
|
|||||||
|
|
||||||
rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, 2))
|
rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, 2))
|
||||||
rtest.Equals(t, 0, len(w.buckets[0].files))
|
rtest.Equals(t, 0, len(w.buckets[0].files))
|
||||||
rtest.Equals(t, 0, len(w.buckets[0].users))
|
|
||||||
|
|
||||||
rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, 2))
|
rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, 2))
|
||||||
rtest.Equals(t, 0, len(w.buckets[0].files))
|
rtest.Equals(t, 0, len(w.buckets[0].files))
|
||||||
rtest.Equals(t, 0, len(w.buckets[0].users))
|
|
||||||
|
|
||||||
rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, -1))
|
rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, -1))
|
||||||
rtest.Equals(t, 0, len(w.buckets[0].files))
|
rtest.Equals(t, 0, len(w.buckets[0].files))
|
||||||
rtest.Equals(t, 0, len(w.buckets[0].users))
|
|
||||||
|
|
||||||
rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, -1))
|
rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, -1))
|
||||||
rtest.Equals(t, 0, len(w.buckets[0].files))
|
rtest.Equals(t, 0, len(w.buckets[0].files))
|
||||||
rtest.Equals(t, 0, len(w.buckets[0].users))
|
|
||||||
|
|
||||||
buf, err := ioutil.ReadFile(f1)
|
buf, err := ioutil.ReadFile(f1)
|
||||||
rtest.OK(t, err)
|
rtest.OK(t, err)
|
||||||
|
@ -4,12 +4,18 @@
|
|||||||
package restorer
|
package restorer
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
|
"io/ioutil"
|
||||||
|
"math"
|
||||||
|
"math/rand"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"syscall"
|
"syscall"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/restic/restic/internal/archiver"
|
||||||
|
"github.com/restic/restic/internal/fs"
|
||||||
"github.com/restic/restic/internal/repository"
|
"github.com/restic/restic/internal/repository"
|
||||||
"github.com/restic/restic/internal/restic"
|
"github.com/restic/restic/internal/restic"
|
||||||
rtest "github.com/restic/restic/internal/test"
|
rtest "github.com/restic/restic/internal/test"
|
||||||
@ -60,3 +66,85 @@ func TestRestorerRestoreEmptyHardlinkedFileds(t *testing.T) {
|
|||||||
rtest.Equals(t, s1.Ino, s2.Ino)
|
rtest.Equals(t, s1.Ino, s2.Ino)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRestorerSparseFiles(t *testing.T) {
|
||||||
|
repo, cleanup := repository.TestRepository(t)
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
var zeros [1<<20 + 13]byte
|
||||||
|
|
||||||
|
target := &fs.Reader{
|
||||||
|
Mode: 0600,
|
||||||
|
Name: "/zeros",
|
||||||
|
ReadCloser: ioutil.NopCloser(bytes.NewReader(zeros[:])),
|
||||||
|
}
|
||||||
|
sc := archiver.NewScanner(target)
|
||||||
|
err := sc.Scan(context.TODO(), []string{"/zeros"})
|
||||||
|
rtest.OK(t, err)
|
||||||
|
|
||||||
|
arch := archiver.New(repo, target, archiver.Options{})
|
||||||
|
_, id, err := arch.Snapshot(context.Background(), []string{"/zeros"},
|
||||||
|
archiver.SnapshotOptions{})
|
||||||
|
|
||||||
|
res, err := NewRestorer(repo, id)
|
||||||
|
rtest.OK(t, err)
|
||||||
|
|
||||||
|
tempdir, cleanup := rtest.TempDir(t)
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
err = res.RestoreTo(ctx, tempdir)
|
||||||
|
rtest.OK(t, err)
|
||||||
|
|
||||||
|
filename := filepath.Join(tempdir, "zeros")
|
||||||
|
content, err := ioutil.ReadFile(filename)
|
||||||
|
rtest.OK(t, err)
|
||||||
|
|
||||||
|
rtest.Equals(t, zeros[:], content)
|
||||||
|
|
||||||
|
fi, err := os.Stat(filename)
|
||||||
|
rtest.OK(t, err)
|
||||||
|
st := fi.Sys().(*syscall.Stat_t)
|
||||||
|
if st == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// st.Blocks is the size in 512-byte blocks.
|
||||||
|
denseBlocks := math.Ceil(float64(len(zeros)) / 512)
|
||||||
|
sparsity := 1 - float64(st.Blocks)/denseBlocks
|
||||||
|
|
||||||
|
// This should report 100% sparse. We don't assert that,
|
||||||
|
// as the behavior of sparse writes depends on the underlying
|
||||||
|
// file system as well as the OS.
|
||||||
|
t.Logf("wrote %d zeros as %d blocks, %.1f%% sparse",
|
||||||
|
len(zeros), st.Blocks, 100*sparsity)
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkZeroPrefixLen(b *testing.B) {
|
||||||
|
var (
|
||||||
|
buf [4<<20 + 37]byte
|
||||||
|
r = rand.New(rand.NewSource(0x618732))
|
||||||
|
sumSkipped int64
|
||||||
|
)
|
||||||
|
|
||||||
|
b.ReportAllocs()
|
||||||
|
b.SetBytes(int64(len(buf)))
|
||||||
|
b.ResetTimer()
|
||||||
|
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
j := r.Intn(len(buf))
|
||||||
|
buf[j] = 0xff
|
||||||
|
|
||||||
|
skipped := zeroPrefixLen(buf[:])
|
||||||
|
sumSkipped += int64(skipped)
|
||||||
|
|
||||||
|
buf[j] = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// The closer this is to .5, the better. If it's far off, give the
|
||||||
|
// benchmark more time to run with -benchtime.
|
||||||
|
b.Logf("average number of zeros skipped: %.3f",
|
||||||
|
float64(sumSkipped)/(float64(b.N*len(buf))))
|
||||||
|
}
|
||||||
|
60
internal/restorer/sparsewrite.go
Normal file
60
internal/restorer/sparsewrite.go
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
//go:build !windows
|
||||||
|
// +build !windows
|
||||||
|
|
||||||
|
package restorer
|
||||||
|
|
||||||
|
import "bytes"
|
||||||
|
|
||||||
|
// WriteAt writes p to f.File at offset. It tries to do a sparse write
|
||||||
|
// and updates f.size.
|
||||||
|
func (f *partialFile) WriteAt(p []byte, offset int64) (n int, err error) {
|
||||||
|
n = len(p)
|
||||||
|
end := offset + int64(n)
|
||||||
|
|
||||||
|
// Skip the longest all-zero prefix of p.
|
||||||
|
// If it's long enough, we can punch a hole in the file.
|
||||||
|
skipped := zeroPrefixLen(p)
|
||||||
|
p = p[skipped:]
|
||||||
|
offset += int64(skipped)
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case len(p) == 0 && end > f.size:
|
||||||
|
// We need to do a Truncate, as WriteAt with length-0 input
|
||||||
|
// doesn't actually extend the file.
|
||||||
|
err = f.Truncate(end)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
case len(p) == 0:
|
||||||
|
// All zeros, file already big enough. A previous WriteAt or
|
||||||
|
// Truncate will have produced the zeros in f.File.
|
||||||
|
|
||||||
|
default:
|
||||||
|
n, err = f.File.WriteAt(p, offset)
|
||||||
|
}
|
||||||
|
|
||||||
|
end = offset + int64(n)
|
||||||
|
if end > f.size {
|
||||||
|
f.size = end
|
||||||
|
}
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// zeroPrefixLen returns the length of the longest all-zero prefix of p.
|
||||||
|
func zeroPrefixLen(p []byte) (n int) {
|
||||||
|
// First skip 1kB-sized blocks, for speed.
|
||||||
|
var zeros [1024]byte
|
||||||
|
|
||||||
|
for len(p) >= len(zeros) && bytes.Equal(p[:len(zeros)], zeros[:]) {
|
||||||
|
p = p[len(zeros):]
|
||||||
|
n += len(zeros)
|
||||||
|
}
|
||||||
|
|
||||||
|
for len(p) > 0 && p[0] == 0 {
|
||||||
|
p = p[1:]
|
||||||
|
n++
|
||||||
|
}
|
||||||
|
|
||||||
|
return n
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user