2
2
mirror of https://github.com/octoleo/restic.git synced 2025-01-22 14:48:24 +00:00

Merge pull request #1549 from MJDSys/more_index_lookup_avoids

More optimizations to avoid calling Index.Lookup()
This commit is contained in:
Alexander Neumann 2018-01-24 20:53:30 +01:00
commit 9c55e8d69c
17 changed files with 246 additions and 81 deletions

View File

@ -0,0 +1,9 @@
Enhancement: Speed up querying across indices and scanning existing files
This change increases the whenever a blob (part of a file) is searched for in a
restic repository. This will reduce cpu usage some when backing up files already
backed up by restic. Cpu usage is further decreased when scanning files.
https://github.com/restic/restic/issues/1234
https://github.com/restic/restic/pull/55555
https://forum.restic/.net/foo/bar/baz

View File

@ -165,8 +165,8 @@ func runCat(gopts GlobalOptions, args []string) error {
case "blob":
for _, t := range []restic.BlobType{restic.DataBlob, restic.TreeBlob} {
list, err := repo.Index().Lookup(id, t)
if err != nil {
list, found := repo.Index().Lookup(id, t)
if !found {
continue
}
blob := list[0]

View File

@ -135,9 +135,9 @@ func updateBlobs(repo restic.Repository, blobs restic.BlobSet, stats *DiffStat)
stats.TreeBlobs++
}
size, err := repo.LookupBlobSize(h.ID, h.Type)
if err != nil {
Warnf("unable to find blob size for %v: %v\n", h, err)
size, found := repo.LookupBlobSize(h.ID, h.Type)
if !found {
Warnf("unable to find blob size for %v\n", h)
continue
}

View File

@ -59,9 +59,9 @@ func splitPath(path string) []string {
func dumpNode(ctx context.Context, repo restic.Repository, node *restic.Node) error {
var buf []byte
for _, id := range node.Content {
size, err := repo.LookupBlobSize(id, restic.DataBlob)
if err != nil {
return err
size, found := repo.LookupBlobSize(id, restic.DataBlob)
if !found {
return errors.Errorf("id %v not found in repository", id)
}
buf = buf[:cap(buf)]

View File

@ -43,9 +43,9 @@ func checkSavedFile(t *testing.T, repo restic.Repository, treeID restic.ID, name
// check blobs
for i, id := range node.Content {
size, err := repo.LookupBlobSize(id, restic.DataBlob)
if err != nil {
t.Fatal(err)
size, found := repo.LookupBlobSize(id, restic.DataBlob)
if !found {
t.Fatal("Failed to find blob", id.Str())
}
buf := restic.NewBlobBuffer(int(size))
@ -55,7 +55,7 @@ func checkSavedFile(t *testing.T, repo restic.Repository, treeID restic.ID, name
}
buf2 := make([]byte, int(size))
_, err = io.ReadFull(rd, buf2)
_, err := io.ReadFull(rd, buf2)
if err != nil {
t.Fatal(err)
}

View File

@ -86,8 +86,7 @@ func (arch *Archiver) isKnownBlob(id restic.ID, t restic.BlobType) bool {
arch.knownBlobs.Insert(id)
_, err := arch.repo.Index().Lookup(id, t)
if err == nil {
if arch.repo.Index().Has(id, t) {
return true
}

View File

@ -0,0 +1,34 @@
package debug_test
import (
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/restic"
"testing"
)
func BenchmarkLogStatic(b *testing.B) {
for i := 0; i < b.N; i++ {
debug.Log("Static string")
}
}
func BenchmarkLogIDStr(b *testing.B) {
id := restic.NewRandomID()
b.ResetTimer()
for i := 0; i < b.N; i++ {
debug.Log("id: %v", id.Str())
}
}
func BenchmarkLogIDString(b *testing.B) {
id := restic.NewRandomID()
b.ResetTimer()
for i := 0; i < b.N; i++ {
debug.Log("id: %v", id.String())
}
}

View File

@ -4,6 +4,7 @@
package fuse
import (
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/debug"
@ -36,9 +37,10 @@ func newFile(ctx context.Context, root *Root, inode uint64, node *restic.Node) (
for i, id := range node.Content {
size, ok := root.blobSizeCache.Lookup(id)
if !ok {
size, err = root.repo.LookupBlobSize(id, restic.DataBlob)
if err != nil {
return nil, err
var found bool
size, found = root.repo.LookupBlobSize(id, restic.DataBlob)
if !found {
return nil, errors.Errorf("id %v not found in repository", id)
}
}

View File

@ -87,8 +87,8 @@ func TestFuseFile(t *testing.T) {
memfile []byte
)
for _, id := range content {
size, err := repo.LookupBlobSize(id, restic.DataBlob)
rtest.OK(t, err)
size, found := repo.LookupBlobSize(id, restic.DataBlob)
rtest.Assert(t, found, "Expected to find blob id %v", id)
filesize += uint64(size)
buf := restic.NewBlobBuffer(int(size))

View File

@ -110,7 +110,7 @@ func (idx *Index) Store(blob restic.PackedBlob) {
}
// Lookup queries the index for the blob ID and returns a restic.PackedBlob.
func (idx *Index) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob, err error) {
func (idx *Index) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob, found bool) {
idx.m.Lock()
defer idx.m.Unlock()
@ -120,9 +120,6 @@ func (idx *Index) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.Pack
blobs = make([]restic.PackedBlob, 0, len(packs))
for _, p := range packs {
debug.Log("id %v found in pack %v at %d, length %d",
id.Str(), p.packID.Str(), p.offset, p.length)
blob := restic.PackedBlob{
Blob: restic.Blob{
Type: tpe,
@ -136,11 +133,10 @@ func (idx *Index) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.Pack
blobs = append(blobs, blob)
}
return blobs, nil
return blobs, true
}
debug.Log("id %v not found", id.Str())
return nil, errors.Errorf("id %v not found in index", id)
return nil, false
}
// ListPack returns a list of blobs contained in a pack.
@ -180,13 +176,13 @@ func (idx *Index) Has(id restic.ID, tpe restic.BlobType) bool {
// LookupSize returns the length of the plaintext content of the blob with the
// given id.
func (idx *Index) LookupSize(id restic.ID, tpe restic.BlobType) (plaintextLength uint, err error) {
blobs, err := idx.Lookup(id, tpe)
if err != nil {
return 0, err
func (idx *Index) LookupSize(id restic.ID, tpe restic.BlobType) (plaintextLength uint, found bool) {
blobs, found := idx.Lookup(id, tpe)
if !found {
return 0, found
}
return uint(restic.PlaintextLength(int(blobs[0].Length))), nil
return uint(restic.PlaintextLength(int(blobs[0].Length))), true
}
// Supersedes returns the list of indexes this index supersedes, if any.

View File

@ -65,8 +65,8 @@ func TestIndexSerialize(t *testing.T) {
rtest.OK(t, err)
for _, testBlob := range tests {
list, err := idx.Lookup(testBlob.id, testBlob.tpe)
rtest.OK(t, err)
list, found := idx.Lookup(testBlob.id, testBlob.tpe)
rtest.Assert(t, found, "Expected to find blob id %v", testBlob.id.Str())
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", testBlob.id.Str(), len(list), list)
@ -78,8 +78,8 @@ func TestIndexSerialize(t *testing.T) {
rtest.Equals(t, testBlob.offset, result.Offset)
rtest.Equals(t, testBlob.length, result.Length)
list2, err := idx2.Lookup(testBlob.id, testBlob.tpe)
rtest.OK(t, err)
list2, found := idx2.Lookup(testBlob.id, testBlob.tpe)
rtest.Assert(t, found, "Expected to find blob id %v", testBlob.id)
if len(list2) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", testBlob.id.Str(), len(list2), list2)
@ -146,8 +146,8 @@ func TestIndexSerialize(t *testing.T) {
// all new blobs must be in the index
for _, testBlob := range newtests {
list, err := idx3.Lookup(testBlob.id, testBlob.tpe)
rtest.OK(t, err)
list, found := idx3.Lookup(testBlob.id, testBlob.tpe)
rtest.Assert(t, found, "Expected to find blob id %v", testBlob.id.Str())
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", testBlob.id.Str(), len(list), list)
@ -293,8 +293,8 @@ func TestIndexUnserialize(t *testing.T) {
rtest.OK(t, err)
for _, test := range exampleTests {
list, err := idx.Lookup(test.id, test.tpe)
rtest.OK(t, err)
list, found := idx.Lookup(test.id, test.tpe)
rtest.Assert(t, found, "Expected to find blob id %v", test.id.Str())
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list)
@ -341,8 +341,8 @@ func TestIndexUnserializeOld(t *testing.T) {
rtest.OK(t, err)
for _, test := range exampleTests {
list, err := idx.Lookup(test.id, test.tpe)
rtest.OK(t, err)
list, found := idx.Lookup(test.id, test.tpe)
rtest.Assert(t, found, "Expected to find blob id %v", test.id.Str())
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list)
@ -383,16 +383,23 @@ func TestIndexPacks(t *testing.T) {
const maxPackSize = 16 * 1024 * 1024
func createRandomIndex() (idx *repository.Index, lookupID restic.ID) {
// This function generates a (insecure) random ID, similar to NewRandomID
func NewRandomTestID(rng *rand.Rand) restic.ID {
id := restic.ID{}
rng.Read(id[:])
return id
}
func createRandomIndex(rng *rand.Rand) (idx *repository.Index, lookupID restic.ID) {
idx = repository.NewIndex()
// create index with 200k pack files
for i := 0; i < 200000; i++ {
packID := restic.NewRandomID()
packID := NewRandomTestID(rng)
offset := 0
for offset < maxPackSize {
size := 2000 + rand.Intn(4*1024*1024)
id := restic.NewRandomID()
id := NewRandomTestID(rng)
idx.Store(restic.PackedBlob{
PackID: packID,
Blob: restic.Blob{
@ -415,7 +422,7 @@ func createRandomIndex() (idx *repository.Index, lookupID restic.ID) {
}
func BenchmarkIndexHasUnknown(b *testing.B) {
idx, _ := createRandomIndex()
idx, _ := createRandomIndex(rand.New(rand.NewSource(0)))
lookupID := restic.NewRandomID()
b.ResetTimer()
@ -426,7 +433,7 @@ func BenchmarkIndexHasUnknown(b *testing.B) {
}
func BenchmarkIndexHasKnown(b *testing.B) {
idx, lookupID := createRandomIndex()
idx, lookupID := createRandomIndex(rand.New(rand.NewSource(0)))
b.ResetTimer()

View File

@ -4,7 +4,6 @@ import (
"context"
"sync"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/debug"
@ -22,36 +21,32 @@ func NewMasterIndex() *MasterIndex {
}
// Lookup queries all known Indexes for the ID and returns the first match.
func (mi *MasterIndex) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob, err error) {
func (mi *MasterIndex) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob, found bool) {
mi.idxMutex.RLock()
defer mi.idxMutex.RUnlock()
debug.Log("looking up id %v, tpe %v", id.Str(), tpe)
for _, idx := range mi.idx {
blobs, err = idx.Lookup(id, tpe)
if err == nil {
debug.Log("found id %v: %v", id.Str(), blobs)
blobs, found = idx.Lookup(id, tpe)
if found {
return
}
}
debug.Log("id %v not found in any index", id.Str())
return nil, errors.Errorf("id %v not found in any index", id)
return nil, false
}
// LookupSize queries all known Indexes for the ID and returns the first match.
func (mi *MasterIndex) LookupSize(id restic.ID, tpe restic.BlobType) (uint, error) {
func (mi *MasterIndex) LookupSize(id restic.ID, tpe restic.BlobType) (uint, bool) {
mi.idxMutex.RLock()
defer mi.idxMutex.RUnlock()
for _, idx := range mi.idx {
if idx.Has(id, tpe) {
return idx.LookupSize(id, tpe)
if size, found := idx.LookupSize(id, tpe); found {
return size, found
}
}
return 0, errors.Errorf("id %v not found in any index", id)
return 0, false
}
// ListPack returns the list of blobs in a pack. The first matching index is

View File

@ -0,0 +1,123 @@
package repository_test
import (
"math/rand"
"testing"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
rtest "github.com/restic/restic/internal/test"
)
func TestMasterIndexLookup(t *testing.T) {
idInIdx1 := restic.NewRandomID()
idInIdx2 := restic.NewRandomID()
blob1 := restic.PackedBlob{
PackID: restic.NewRandomID(),
Blob: restic.Blob{
Type: restic.DataBlob,
ID: idInIdx1,
Length: 10,
Offset: 0,
},
}
blob2 := restic.PackedBlob{
PackID: restic.NewRandomID(),
Blob: restic.Blob{
Type: restic.DataBlob,
ID: idInIdx2,
Length: 100,
Offset: 10,
},
}
idx1 := repository.NewIndex()
idx1.Store(blob1)
idx2 := repository.NewIndex()
idx2.Store(blob2)
mIdx := repository.NewMasterIndex()
mIdx.Insert(idx1)
mIdx.Insert(idx2)
blobs, found := mIdx.Lookup(idInIdx1, restic.DataBlob)
rtest.Assert(t, found, "Expected to find blob id %v from index 1", idInIdx1)
rtest.Equals(t, []restic.PackedBlob{blob1}, blobs)
blobs, found = mIdx.Lookup(idInIdx2, restic.DataBlob)
rtest.Assert(t, found, "Expected to find blob id %v from index 2", idInIdx2)
rtest.Equals(t, []restic.PackedBlob{blob2}, blobs)
blobs, found = mIdx.Lookup(restic.NewRandomID(), restic.DataBlob)
rtest.Assert(t, !found, "Expected to not find a blob when fetching with a random id")
rtest.Assert(t, blobs == nil, "Expected no blobs when fetching with a random id")
}
func BenchmarkMasterIndexLookupSingleIndex(b *testing.B) {
idx1, lookupID := createRandomIndex(rand.New(rand.NewSource(0)))
mIdx := repository.NewMasterIndex()
mIdx.Insert(idx1)
b.ResetTimer()
for i := 0; i < b.N; i++ {
mIdx.Lookup(lookupID, restic.DataBlob)
}
}
func BenchmarkMasterIndexLookupMultipleIndex(b *testing.B) {
rng := rand.New(rand.NewSource(0))
mIdx := repository.NewMasterIndex()
for i := 0; i < 5; i++ {
idx, _ := createRandomIndex(rand.New(rng))
mIdx.Insert(idx)
}
idx1, lookupID := createRandomIndex(rand.New(rng))
mIdx.Insert(idx1)
b.ResetTimer()
for i := 0; i < b.N; i++ {
mIdx.Lookup(lookupID, restic.DataBlob)
}
}
func BenchmarkMasterIndexLookupSingleIndexUnknown(b *testing.B) {
lookupID := restic.NewRandomID()
idx1, _ := createRandomIndex(rand.New(rand.NewSource(0)))
mIdx := repository.NewMasterIndex()
mIdx.Insert(idx1)
b.ResetTimer()
for i := 0; i < b.N; i++ {
mIdx.Lookup(lookupID, restic.DataBlob)
}
}
func BenchmarkMasterIndexLookupMultipleIndexUnknown(b *testing.B) {
rng := rand.New(rand.NewSource(0))
lookupID := restic.NewRandomID()
mIdx := repository.NewMasterIndex()
for i := 0; i < 5; i++ {
idx, _ := createRandomIndex(rand.New(rng))
mIdx.Insert(idx)
}
idx1, _ := createRandomIndex(rand.New(rng))
mIdx.Insert(idx1)
b.ResetTimer()
for i := 0; i < b.N; i++ {
mIdx.Lookup(lookupID, restic.DataBlob)
}
}

View File

@ -122,9 +122,9 @@ func findPacksForBlobs(t *testing.T, repo restic.Repository, blobs restic.BlobSe
idx := repo.Index()
for h := range blobs {
list, err := idx.Lookup(h.ID, h.Type)
if err != nil {
t.Fatal(err)
list, found := idx.Lookup(h.ID, h.Type)
if !found {
t.Fatal("Failed to find blob", h.ID.Str(), "with type", h.Type)
}
for _, pb := range list {
@ -227,8 +227,8 @@ func TestRepack(t *testing.T) {
idx := repo.Index()
for h := range keepBlobs {
list, err := idx.Lookup(h.ID, h.Type)
if err != nil {
list, found := idx.Lookup(h.ID, h.Type)
if !found {
t.Errorf("unable to find blob %v in repo", h.ID.Str())
continue
}
@ -246,7 +246,7 @@ func TestRepack(t *testing.T) {
}
for h := range removeBlobs {
if _, err := idx.Lookup(h.ID, h.Type); err == nil {
if _, found := idx.Lookup(h.ID, h.Type); found {
t.Errorf("blob %v still contained in the repo", h)
}
}

View File

@ -115,10 +115,10 @@ func (r *Repository) loadBlob(ctx context.Context, id restic.ID, t restic.BlobTy
debug.Log("load %v with id %v (buf len %v, cap %d)", t, id.Str(), len(plaintextBuf), cap(plaintextBuf))
// lookup packs
blobs, err := r.idx.Lookup(id, t)
if err != nil {
debug.Log("id %v not found in index: %v", id.Str(), err)
return 0, err
blobs, found := r.idx.Lookup(id, t)
if !found {
debug.Log("id %v not found in index", id.Str())
return 0, errors.Errorf("id %v not found in repository", id)
}
// try cached pack files first
@ -193,7 +193,7 @@ func (r *Repository) LoadJSONUnpacked(ctx context.Context, t restic.FileType, id
}
// LookupBlobSize returns the size of blob id.
func (r *Repository) LookupBlobSize(id restic.ID, tpe restic.BlobType) (uint, error) {
func (r *Repository) LookupBlobSize(id restic.ID, tpe restic.BlobType) (uint, bool) {
return r.idx.LookupSize(id, tpe)
}
@ -576,9 +576,9 @@ func (r *Repository) Close() error {
// space.
func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic.ID, buf []byte) (int, error) {
debug.Log("load blob %v into buf (len %v, cap %v)", id.Str(), len(buf), cap(buf))
size, err := r.idx.LookupSize(id, t)
if err != nil {
return 0, err
size, found := r.idx.LookupSize(id, t)
if !found {
return 0, errors.Errorf("id %v not found in repository", id)
}
if cap(buf) < restic.CiphertextLength(int(size)) {
@ -610,9 +610,9 @@ func (r *Repository) SaveBlob(ctx context.Context, t restic.BlobType, buf []byte
func (r *Repository) LoadTree(ctx context.Context, id restic.ID) (*restic.Tree, error) {
debug.Log("load tree %v", id.Str())
size, err := r.idx.LookupSize(id, restic.TreeBlob)
if err != nil {
return nil, err
size, found := r.idx.LookupSize(id, restic.TreeBlob)
if !found {
return nil, errors.Errorf("tree %v not found in repository", id)
}
debug.Log("size is %d, create buffer", size)

View File

@ -285,9 +285,9 @@ func (node Node) createFileAt(ctx context.Context, path string, repo Repository,
func (node Node) writeNodeContent(ctx context.Context, repo Repository, f *os.File) error {
var buf []byte
for _, id := range node.Content {
size, err := repo.LookupBlobSize(id, DataBlob)
if err != nil {
return err
size, found := repo.LookupBlobSize(id, DataBlob)
if !found {
return errors.Errorf("id %v not found in repository", id)
}
buf = buf[:cap(buf)]

View File

@ -24,7 +24,7 @@ type Repository interface {
Config() Config
LookupBlobSize(ID, BlobType) (uint, error)
LookupBlobSize(ID, BlobType) (uint, bool)
// List calls the function fn for each file of type t in the repository.
// When an error is returned by fn, processing stops and List() returns the
@ -57,7 +57,7 @@ type Lister interface {
// Index keeps track of the blobs are stored within files.
type Index interface {
Has(ID, BlobType) bool
Lookup(ID, BlobType) ([]PackedBlob, error)
Lookup(ID, BlobType) ([]PackedBlob, bool)
Count(BlobType) uint
// Each returns a channel that yields all blobs known to the index. When