2
2
mirror of https://github.com/octoleo/restic.git synced 2024-12-18 08:34:20 +00:00

Merge pull request #3522 from greatroar/dump-lru

Use LRU cache in restic dump
This commit is contained in:
MichaelEischer 2021-09-24 20:33:58 +02:00 committed by GitHub
commit a5e103a212
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 141 additions and 92 deletions

View File

@ -0,0 +1,10 @@
Enhancement: Cache blobs read by the dump command
When dumping a file using the `restic dump` command, restic did not cache blobs
in any way, so even consecutive runs of the same blob did get loaded from the
repository again and again, slowing down the dump.
Now, the caching mechanism already used by the `fuse` command is also used by
the `dump` command. This makes dumping much faster, especially for sparse files.
https://github.com/restic/restic/pull/3508

View File

@ -95,7 +95,8 @@ func printFromTree(ctx context.Context, tree *restic.Tree, repo restic.Repositor
if node.Name == pathComponents[0] { if node.Name == pathComponents[0] {
switch { switch {
case l == 1 && dump.IsFile(node): case l == 1 && dump.IsFile(node):
return dump.GetNodeData(ctx, os.Stdout, repo, node) cache := dump.NewCache()
return dump.WriteNodeData(ctx, os.Stdout, repo, node, cache)
case l > 1 && dump.IsDir(node): case l > 1 && dump.IsDir(node):
subtree, err := repo.LoadTree(ctx, *node.Subtree) subtree, err := repo.LoadTree(ctx, *node.Subtree)
if err != nil { if err != nil {

View File

@ -1,4 +1,4 @@
package fuse package bloblru
import ( import (
"sync" "sync"
@ -10,12 +10,12 @@ import (
) )
// Crude estimate of the overhead per blob: a SHA-256, a linked list node // Crude estimate of the overhead per blob: a SHA-256, a linked list node
// and some pointers. See comment in blobCache.add. // and some pointers. See comment in Cache.add.
const cacheOverhead = len(restic.ID{}) + 64 const overhead = len(restic.ID{}) + 64
// A blobCache is a fixed-size cache of blob contents. // A Cache is a fixed-size LRU cache of blob contents.
// It is safe for concurrent access. // It is safe for concurrent access.
type blobCache struct { type Cache struct {
mu sync.Mutex mu sync.Mutex
c *simplelru.LRU c *simplelru.LRU
@ -23,16 +23,16 @@ type blobCache struct {
} }
// Construct a blob cache that stores at most size bytes worth of blobs. // Construct a blob cache that stores at most size bytes worth of blobs.
func newBlobCache(size int) *blobCache { func New(size int) *Cache {
c := &blobCache{ c := &Cache{
free: size, free: size,
size: size, size: size,
} }
// NewLRU wants us to specify some max. number of entries, else it errors. // NewLRU wants us to specify some max. number of entries, else it errors.
// The actual maximum will be smaller than size/cacheOverhead, because we // The actual maximum will be smaller than size/overhead, because we
// evict entries (RemoveOldest in add) to maintain our size bound. // evict entries (RemoveOldest in add) to maintain our size bound.
maxEntries := size / cacheOverhead maxEntries := size / overhead
lru, err := simplelru.NewLRU(maxEntries, c.evict) lru, err := simplelru.NewLRU(maxEntries, c.evict)
if err != nil { if err != nil {
panic(err) // Can only be maxEntries <= 0. panic(err) // Can only be maxEntries <= 0.
@ -42,10 +42,12 @@ func newBlobCache(size int) *blobCache {
return c return c
} }
func (c *blobCache) add(id restic.ID, blob []byte) { // Add adds key id with value blob to c.
debug.Log("blobCache: add %v", id) // It may return an evicted buffer for reuse.
func (c *Cache) Add(id restic.ID, blob []byte) (old []byte) {
debug.Log("bloblru.Cache: add %v", id)
size := len(blob) + cacheOverhead size := len(blob) + overhead
if size > c.size { if size > c.size {
return return
} }
@ -59,29 +61,36 @@ func (c *blobCache) add(id restic.ID, blob []byte) {
return return
} }
// This loop takes at most min(maxEntries, maxchunksize/cacheOverhead) // This loop takes at most min(maxEntries, maxchunksize/overhead)
// iterations. // iterations.
for size > c.free { for size > c.free {
c.c.RemoveOldest() _, val, _ := c.c.RemoveOldest()
b := val.([]byte)
if len(b) > len(old) {
// We can only return one buffer, so pick the largest.
old = b
}
} }
c.c.Add(key, blob) c.c.Add(key, blob)
c.free -= size c.free -= size
return old
} }
func (c *blobCache) get(id restic.ID) ([]byte, bool) { func (c *Cache) Get(id restic.ID) ([]byte, bool) {
c.mu.Lock() c.mu.Lock()
value, ok := c.c.Get(id) value, ok := c.c.Get(id)
c.mu.Unlock() c.mu.Unlock()
debug.Log("blobCache: get %v, hit %v", id, ok) debug.Log("bloblru.Cache: get %v, hit %v", id, ok)
blob, ok := value.([]byte) blob, ok := value.([]byte)
return blob, ok return blob, ok
} }
func (c *blobCache) evict(key, value interface{}) { func (c *Cache) evict(key, value interface{}) {
blob := value.([]byte) blob := value.([]byte)
debug.Log("blobCache: evict %v, %d bytes", key, len(blob)) debug.Log("bloblru.Cache: evict %v, %d bytes", key, len(blob))
c.free += len(blob) + cacheOverhead c.free += len(blob) + overhead
} }

View File

@ -0,0 +1,50 @@
package bloblru
import (
"testing"
"github.com/restic/restic/internal/restic"
rtest "github.com/restic/restic/internal/test"
)
func TestCache(t *testing.T) {
var id1, id2, id3 restic.ID
id1[0] = 1
id2[0] = 2
id3[0] = 3
const (
kiB = 1 << 10
cacheSize = 64*kiB + 3*overhead
)
c := New(cacheSize)
addAndCheck := func(id restic.ID, exp []byte) {
c.Add(id, exp)
blob, ok := c.Get(id)
rtest.Assert(t, ok, "blob %v added but not found in cache", id)
rtest.Equals(t, &exp[0], &blob[0])
rtest.Equals(t, exp, blob)
}
addAndCheck(id1, make([]byte, 32*kiB))
addAndCheck(id2, make([]byte, 30*kiB))
addAndCheck(id3, make([]byte, 10*kiB))
_, ok := c.Get(id2)
rtest.Assert(t, ok, "blob %v not present", id2)
_, ok = c.Get(id1)
rtest.Assert(t, !ok, "blob %v present, but should have been evicted", id1)
c.Add(id1, make([]byte, 1+c.size))
_, ok = c.Get(id1)
rtest.Assert(t, !ok, "blob %v too large but still added to cache")
c.c.Remove(id1)
c.c.Remove(id3)
c.c.Remove(id2)
rtest.Equals(t, cacheSize, c.size)
rtest.Equals(t, cacheSize, c.free)
}

View File

@ -5,6 +5,7 @@ import (
"io" "io"
"path" "path"
"github.com/restic/restic/internal/bloblru"
"github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/walker" "github.com/restic/restic/internal/walker"
@ -20,7 +21,11 @@ type dumper interface {
// It will loop over all nodes in the tree and dump them recursively. // It will loop over all nodes in the tree and dump them recursively.
type WriteDump func(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dst io.Writer) error type WriteDump func(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dst io.Writer) error
func writeDump(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dmp dumper, dst io.Writer) error { func NewCache() *bloblru.Cache {
return bloblru.New(64 << 20)
}
func writeDump(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dmp dumper) error {
for _, rootNode := range tree.Nodes { for _, rootNode := range tree.Nodes {
rootNode.Path = rootPath rootNode.Path = rootPath
err := dumpTree(ctx, repo, rootNode, rootPath, dmp) err := dumpTree(ctx, repo, rootNode, rootPath, dmp)
@ -71,20 +76,24 @@ func dumpTree(ctx context.Context, repo restic.Repository, rootNode *restic.Node
return err return err
} }
// GetNodeData will write the contents of the node to the given output. // WriteNodeData writes the contents of the node to the given Writer.
func GetNodeData(ctx context.Context, output io.Writer, repo restic.Repository, node *restic.Node) error { func WriteNodeData(ctx context.Context, w io.Writer, repo restic.Repository, node *restic.Node, cache *bloblru.Cache) error {
var ( var (
buf []byte buf []byte
err error err error
) )
for _, id := range node.Content { for _, id := range node.Content {
buf, err = repo.LoadBlob(ctx, restic.DataBlob, id, buf) blob, ok := cache.Get(id)
if !ok {
blob, err = repo.LoadBlob(ctx, restic.DataBlob, id, buf)
if err != nil { if err != nil {
return err return err
} }
_, err = output.Write(buf) buf = cache.Add(id, blob) // Reuse evicted buffer.
if err != nil { }
if _, err := w.Write(blob); err != nil {
return errors.Wrap(err, "Write") return errors.Wrap(err, "Write")
} }
} }

View File

@ -8,25 +8,29 @@ import (
"path/filepath" "path/filepath"
"strings" "strings"
"github.com/restic/restic/internal/bloblru"
"github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/restic"
) )
type tarDumper struct { type tarDumper struct {
cache *bloblru.Cache
w *tar.Writer w *tar.Writer
} }
// Statically ensure that tarDumper implements dumper. // Statically ensure that tarDumper implements dumper.
var _ dumper = tarDumper{} var _ dumper = &tarDumper{}
// WriteTar will write the contents of the given tree, encoded as a tar to the given destination. // WriteTar will write the contents of the given tree, encoded as a tar to the given destination.
func WriteTar(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dst io.Writer) error { func WriteTar(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dst io.Writer) error {
dmp := tarDumper{w: tar.NewWriter(dst)} dmp := &tarDumper{
cache: NewCache(),
return writeDump(ctx, repo, tree, rootPath, dmp, dst) w: tar.NewWriter(dst),
}
return writeDump(ctx, repo, tree, rootPath, dmp)
} }
func (dmp tarDumper) Close() error { func (dmp *tarDumper) Close() error {
return dmp.w.Close() return dmp.w.Close()
} }
@ -39,7 +43,7 @@ const (
cISVTX = 0o1000 // Save text (sticky bit) cISVTX = 0o1000 // Save text (sticky bit)
) )
func (dmp tarDumper) dumpNode(ctx context.Context, node *restic.Node, repo restic.Repository) error { func (dmp *tarDumper) dumpNode(ctx context.Context, node *restic.Node, repo restic.Repository) error {
relPath, err := filepath.Rel("/", node.Path) relPath, err := filepath.Rel("/", node.Path)
if err != nil { if err != nil {
return err return err
@ -90,7 +94,7 @@ func (dmp tarDumper) dumpNode(ctx context.Context, node *restic.Node, repo resti
return errors.Wrap(err, "TarHeader") return errors.Wrap(err, "TarHeader")
} }
return GetNodeData(ctx, dmp.w, repo, node) return WriteNodeData(ctx, dmp.w, repo, node, dmp.cache)
} }
func parseXattrs(xattrs []restic.ExtendedAttribute) map[string]string { func parseXattrs(xattrs []restic.ExtendedAttribute) map[string]string {

View File

@ -6,29 +6,33 @@ import (
"io" "io"
"path/filepath" "path/filepath"
"github.com/restic/restic/internal/bloblru"
"github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/restic"
) )
type zipDumper struct { type zipDumper struct {
cache *bloblru.Cache
w *zip.Writer w *zip.Writer
} }
// Statically ensure that zipDumper implements dumper. // Statically ensure that zipDumper implements dumper.
var _ dumper = zipDumper{} var _ dumper = &zipDumper{}
// WriteZip will write the contents of the given tree, encoded as a zip to the given destination. // WriteZip will write the contents of the given tree, encoded as a zip to the given destination.
func WriteZip(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dst io.Writer) error { func WriteZip(ctx context.Context, repo restic.Repository, tree *restic.Tree, rootPath string, dst io.Writer) error {
dmp := zipDumper{w: zip.NewWriter(dst)} dmp := &zipDumper{
cache: NewCache(),
return writeDump(ctx, repo, tree, rootPath, dmp, dst) w: zip.NewWriter(dst),
}
return writeDump(ctx, repo, tree, rootPath, dmp)
} }
func (dmp zipDumper) Close() error { func (dmp *zipDumper) Close() error {
return dmp.w.Close() return dmp.w.Close()
} }
func (dmp zipDumper) dumpNode(ctx context.Context, node *restic.Node, repo restic.Repository) error { func (dmp *zipDumper) dumpNode(ctx context.Context, node *restic.Node, repo restic.Repository) error {
relPath, err := filepath.Rel("/", node.Path) relPath, err := filepath.Rel("/", node.Path)
if err != nil { if err != nil {
return err return err
@ -58,5 +62,5 @@ func (dmp zipDumper) dumpNode(ctx context.Context, node *restic.Node, repo resti
return nil return nil
} }
return GetNodeData(ctx, w, repo, node) return WriteNodeData(ctx, w, repo, node, dmp.cache)
} }

View File

@ -96,7 +96,7 @@ func (f *file) Open(ctx context.Context, req *fuse.OpenRequest, resp *fuse.OpenR
func (f *openFile) getBlobAt(ctx context.Context, i int) (blob []byte, err error) { func (f *openFile) getBlobAt(ctx context.Context, i int) (blob []byte, err error) {
blob, ok := f.root.blobCache.get(f.node.Content[i]) blob, ok := f.root.blobCache.Get(f.node.Content[i])
if ok { if ok {
return blob, nil return blob, nil
} }
@ -107,7 +107,7 @@ func (f *openFile) getBlobAt(ctx context.Context, i int) (blob []byte, err error
return nil, err return nil, err
} }
f.root.blobCache.add(f.node.Content[i], blob) f.root.blobCache.Add(f.node.Content[i], blob)
return blob, nil return blob, nil
} }

View File

@ -1,3 +1,4 @@
//go:build darwin || freebsd || linux
// +build darwin freebsd linux // +build darwin freebsd linux
package fuse package fuse
@ -10,6 +11,7 @@ import (
"testing" "testing"
"time" "time"
"github.com/restic/restic/internal/bloblru"
"github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/restic"
@ -19,48 +21,6 @@ import (
rtest "github.com/restic/restic/internal/test" rtest "github.com/restic/restic/internal/test"
) )
func TestCache(t *testing.T) {
var id1, id2, id3 restic.ID
id1[0] = 1
id2[0] = 2
id3[0] = 3
const (
kiB = 1 << 10
cacheSize = 64*kiB + 3*cacheOverhead
)
c := newBlobCache(cacheSize)
addAndCheck := func(id restic.ID, exp []byte) {
c.add(id, exp)
blob, ok := c.get(id)
rtest.Assert(t, ok, "blob %v added but not found in cache", id)
rtest.Equals(t, &exp[0], &blob[0])
rtest.Equals(t, exp, blob)
}
addAndCheck(id1, make([]byte, 32*kiB))
addAndCheck(id2, make([]byte, 30*kiB))
addAndCheck(id3, make([]byte, 10*kiB))
_, ok := c.get(id2)
rtest.Assert(t, ok, "blob %v not present", id2)
_, ok = c.get(id1)
rtest.Assert(t, !ok, "blob %v present, but should have been evicted", id1)
c.add(id1, make([]byte, 1+c.size))
_, ok = c.get(id1)
rtest.Assert(t, !ok, "blob %v too large but still added to cache")
c.c.Remove(id1)
c.c.Remove(id3)
c.c.Remove(id2)
rtest.Equals(t, cacheSize, c.size)
rtest.Equals(t, cacheSize, c.free)
}
func testRead(t testing.TB, f fs.Handle, offset, length int, data []byte) { func testRead(t testing.TB, f fs.Handle, offset, length int, data []byte) {
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
defer cancel() defer cancel()
@ -156,7 +116,7 @@ func TestFuseFile(t *testing.T) {
Size: filesize, Size: filesize,
Content: content, Content: content,
} }
root := &Root{repo: repo, blobCache: newBlobCache(blobCacheSize)} root := &Root{repo: repo, blobCache: bloblru.New(blobCacheSize)}
inode := fs.GenerateDynamicInode(1, "foo") inode := fs.GenerateDynamicInode(1, "foo")
f, err := newFile(context.TODO(), root, inode, node) f, err := newFile(context.TODO(), root, inode, node)
@ -191,7 +151,7 @@ func TestFuseDir(t *testing.T) {
repo, cleanup := repository.TestRepository(t) repo, cleanup := repository.TestRepository(t)
defer cleanup() defer cleanup()
root := &Root{repo: repo, blobCache: newBlobCache(blobCacheSize)} root := &Root{repo: repo, blobCache: bloblru.New(blobCacheSize)}
node := &restic.Node{ node := &restic.Node{
Mode: 0755, Mode: 0755,

View File

@ -1,3 +1,4 @@
//go:build darwin || freebsd || linux
// +build darwin freebsd linux // +build darwin freebsd linux
package fuse package fuse
@ -6,6 +7,7 @@ import (
"os" "os"
"time" "time"
"github.com/restic/restic/internal/bloblru"
"github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/restic"
@ -27,7 +29,7 @@ type Root struct {
cfg Config cfg Config
inode uint64 inode uint64
snapshots restic.Snapshots snapshots restic.Snapshots
blobCache *blobCache blobCache *bloblru.Cache
snCount int snCount int
lastCheck time.Time lastCheck time.Time
@ -54,7 +56,7 @@ func NewRoot(repo restic.Repository, cfg Config) *Root {
repo: repo, repo: repo,
inode: rootInode, inode: rootInode,
cfg: cfg, cfg: cfg,
blobCache: newBlobCache(blobCacheSize), blobCache: bloblru.New(blobCacheSize),
} }
if !cfg.OwnerIsRoot { if !cfg.OwnerIsRoot {