From df2c03a6a42af137c72113c6c4c56cbc0d230adc Mon Sep 17 00:00:00 2001 From: Matthew Dawson Date: Fri, 12 Jan 2018 01:20:12 -0500 Subject: [PATCH] repository/master_index: Optimize Index.Lookup() When looking up a blob in the master index, with several indexes present in the master index, a significant amount of time is spent generating errors for each failed lookup. However, these errors are often used to check if a blob is present, but the contents are not inspected making the overhead of the error not useful. Instead, change Index.Lookup (and Index.LookupSize) to instead return a boolean denoting if the blob was found instead of an error. Also change all the calls to these functions to handle the new function signature. benchmark old ns/op new ns/op delta BenchmarkMasterIndexLookupSingleIndex-6 820 897 +9.39% BenchmarkMasterIndexLookupMultipleIndex-6 12821 2001 -84.39% BenchmarkMasterIndexLookupSingleIndexUnknown-6 5378 492 -90.85% BenchmarkMasterIndexLookupMultipleIndexUnknown-6 17026 1649 -90.31% benchmark old allocs new allocs delta BenchmarkMasterIndexLookupSingleIndex-6 9 9 +0.00% BenchmarkMasterIndexLookupMultipleIndex-6 59 19 -67.80% BenchmarkMasterIndexLookupSingleIndexUnknown-6 22 6 -72.73% BenchmarkMasterIndexLookupMultipleIndexUnknown-6 72 16 -77.78% benchmark old bytes new bytes delta BenchmarkMasterIndexLookupSingleIndex-6 160 160 +0.00% BenchmarkMasterIndexLookupMultipleIndex-6 3200 240 -92.50% BenchmarkMasterIndexLookupSingleIndexUnknown-6 1232 48 -96.10% BenchmarkMasterIndexLookupMultipleIndexUnknown-6 4272 128 -97.00% --- cmd/restic/cmd_cat.go | 4 +- cmd/restic/cmd_diff.go | 6 +- cmd/restic/cmd_dump.go | 6 +- internal/archiver/archive_reader_test.go | 8 +- internal/archiver/archiver.go | 4 +- internal/fuse/file.go | 8 +- internal/fuse/file_test.go | 4 +- internal/repository/index.go | 16 +-- internal/repository/index_test.go | 20 ++-- internal/repository/master_index.go | 17 ++-- internal/repository/master_index_test.go | 123 +++++++++++++++++++++++ internal/repository/repack_test.go | 12 +-- internal/repository/repository.go | 22 ++-- internal/restic/node.go | 6 +- internal/restic/repository.go | 4 +- 15 files changed, 192 insertions(+), 68 deletions(-) create mode 100644 internal/repository/master_index_test.go diff --git a/cmd/restic/cmd_cat.go b/cmd/restic/cmd_cat.go index 7e66af734..98c97a5a5 100644 --- a/cmd/restic/cmd_cat.go +++ b/cmd/restic/cmd_cat.go @@ -165,8 +165,8 @@ func runCat(gopts GlobalOptions, args []string) error { case "blob": for _, t := range []restic.BlobType{restic.DataBlob, restic.TreeBlob} { - list, err := repo.Index().Lookup(id, t) - if err != nil { + list, found := repo.Index().Lookup(id, t) + if !found { continue } blob := list[0] diff --git a/cmd/restic/cmd_diff.go b/cmd/restic/cmd_diff.go index dfaf8c85d..9d9057d33 100644 --- a/cmd/restic/cmd_diff.go +++ b/cmd/restic/cmd_diff.go @@ -135,9 +135,9 @@ func updateBlobs(repo restic.Repository, blobs restic.BlobSet, stats *DiffStat) stats.TreeBlobs++ } - size, err := repo.LookupBlobSize(h.ID, h.Type) - if err != nil { - Warnf("unable to find blob size for %v: %v\n", h, err) + size, found := repo.LookupBlobSize(h.ID, h.Type) + if !found { + Warnf("unable to find blob size for %v\n", h) continue } diff --git a/cmd/restic/cmd_dump.go b/cmd/restic/cmd_dump.go index eb8250979..39c861bff 100644 --- a/cmd/restic/cmd_dump.go +++ b/cmd/restic/cmd_dump.go @@ -59,9 +59,9 @@ func splitPath(path string) []string { func dumpNode(ctx context.Context, repo restic.Repository, node *restic.Node) error { var buf []byte for _, id := range node.Content { - size, err := repo.LookupBlobSize(id, restic.DataBlob) - if err != nil { - return err + size, found := repo.LookupBlobSize(id, restic.DataBlob) + if !found { + return errors.Errorf("id %v not found in repository", id) } buf = buf[:cap(buf)] diff --git a/internal/archiver/archive_reader_test.go b/internal/archiver/archive_reader_test.go index 56e5fec5f..1c8efc969 100644 --- a/internal/archiver/archive_reader_test.go +++ b/internal/archiver/archive_reader_test.go @@ -43,9 +43,9 @@ func checkSavedFile(t *testing.T, repo restic.Repository, treeID restic.ID, name // check blobs for i, id := range node.Content { - size, err := repo.LookupBlobSize(id, restic.DataBlob) - if err != nil { - t.Fatal(err) + size, found := repo.LookupBlobSize(id, restic.DataBlob) + if !found { + t.Fatal("Failed to find blob", id.Str()) } buf := restic.NewBlobBuffer(int(size)) @@ -55,7 +55,7 @@ func checkSavedFile(t *testing.T, repo restic.Repository, treeID restic.ID, name } buf2 := make([]byte, int(size)) - _, err = io.ReadFull(rd, buf2) + _, err := io.ReadFull(rd, buf2) if err != nil { t.Fatal(err) } diff --git a/internal/archiver/archiver.go b/internal/archiver/archiver.go index 20ba863c3..d64702bb7 100644 --- a/internal/archiver/archiver.go +++ b/internal/archiver/archiver.go @@ -86,8 +86,8 @@ func (arch *Archiver) isKnownBlob(id restic.ID, t restic.BlobType) bool { arch.knownBlobs.Insert(id) - _, err := arch.repo.Index().Lookup(id, t) - if err == nil { + _, found := arch.repo.Index().Lookup(id, t) + if found { return true } diff --git a/internal/fuse/file.go b/internal/fuse/file.go index 233875993..80cdd59ad 100644 --- a/internal/fuse/file.go +++ b/internal/fuse/file.go @@ -4,6 +4,7 @@ package fuse import ( + "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/debug" @@ -36,9 +37,10 @@ func newFile(ctx context.Context, root *Root, inode uint64, node *restic.Node) ( for i, id := range node.Content { size, ok := root.blobSizeCache.Lookup(id) if !ok { - size, err = root.repo.LookupBlobSize(id, restic.DataBlob) - if err != nil { - return nil, err + var found bool + size, found = root.repo.LookupBlobSize(id, restic.DataBlob) + if !found { + return nil, errors.Errorf("id %v not found in repository", id) } } diff --git a/internal/fuse/file_test.go b/internal/fuse/file_test.go index 121c218f5..bacb13d74 100644 --- a/internal/fuse/file_test.go +++ b/internal/fuse/file_test.go @@ -81,8 +81,8 @@ func TestFuseFile(t *testing.T) { memfile []byte ) for _, id := range content { - size, err := repo.LookupBlobSize(id, restic.DataBlob) - rtest.OK(t, err) + size, found := repo.LookupBlobSize(id, restic.DataBlob) + rtest.Assert(t, found, "Expected to find blob id %v", id) filesize += uint64(size) buf := restic.NewBlobBuffer(int(size)) diff --git a/internal/repository/index.go b/internal/repository/index.go index b42ea6baf..7da96955a 100644 --- a/internal/repository/index.go +++ b/internal/repository/index.go @@ -110,7 +110,7 @@ func (idx *Index) Store(blob restic.PackedBlob) { } // Lookup queries the index for the blob ID and returns a restic.PackedBlob. -func (idx *Index) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob, err error) { +func (idx *Index) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob, found bool) { idx.m.Lock() defer idx.m.Unlock() @@ -136,11 +136,11 @@ func (idx *Index) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.Pack blobs = append(blobs, blob) } - return blobs, nil + return blobs, true } debug.Log("id %v not found", id.Str()) - return nil, errors.Errorf("id %v not found in index", id) + return nil, false } // ListPack returns a list of blobs contained in a pack. @@ -180,13 +180,13 @@ func (idx *Index) Has(id restic.ID, tpe restic.BlobType) bool { // LookupSize returns the length of the plaintext content of the blob with the // given id. -func (idx *Index) LookupSize(id restic.ID, tpe restic.BlobType) (plaintextLength uint, err error) { - blobs, err := idx.Lookup(id, tpe) - if err != nil { - return 0, err +func (idx *Index) LookupSize(id restic.ID, tpe restic.BlobType) (plaintextLength uint, found bool) { + blobs, found := idx.Lookup(id, tpe) + if !found { + return 0, found } - return uint(restic.PlaintextLength(int(blobs[0].Length))), nil + return uint(restic.PlaintextLength(int(blobs[0].Length))), true } // Supersedes returns the list of indexes this index supersedes, if any. diff --git a/internal/repository/index_test.go b/internal/repository/index_test.go index e5ed6489d..599f243f3 100644 --- a/internal/repository/index_test.go +++ b/internal/repository/index_test.go @@ -65,8 +65,8 @@ func TestIndexSerialize(t *testing.T) { rtest.OK(t, err) for _, testBlob := range tests { - list, err := idx.Lookup(testBlob.id, testBlob.tpe) - rtest.OK(t, err) + list, found := idx.Lookup(testBlob.id, testBlob.tpe) + rtest.Assert(t, found, "Expected to find blob id %v", testBlob.id.Str()) if len(list) != 1 { t.Errorf("expected one result for blob %v, got %v: %v", testBlob.id.Str(), len(list), list) @@ -78,8 +78,8 @@ func TestIndexSerialize(t *testing.T) { rtest.Equals(t, testBlob.offset, result.Offset) rtest.Equals(t, testBlob.length, result.Length) - list2, err := idx2.Lookup(testBlob.id, testBlob.tpe) - rtest.OK(t, err) + list2, found := idx2.Lookup(testBlob.id, testBlob.tpe) + rtest.Assert(t, found, "Expected to find blob id %v", testBlob.id) if len(list2) != 1 { t.Errorf("expected one result for blob %v, got %v: %v", testBlob.id.Str(), len(list2), list2) @@ -146,8 +146,8 @@ func TestIndexSerialize(t *testing.T) { // all new blobs must be in the index for _, testBlob := range newtests { - list, err := idx3.Lookup(testBlob.id, testBlob.tpe) - rtest.OK(t, err) + list, found := idx3.Lookup(testBlob.id, testBlob.tpe) + rtest.Assert(t, found, "Expected to find blob id %v", testBlob.id.Str()) if len(list) != 1 { t.Errorf("expected one result for blob %v, got %v: %v", testBlob.id.Str(), len(list), list) @@ -293,8 +293,8 @@ func TestIndexUnserialize(t *testing.T) { rtest.OK(t, err) for _, test := range exampleTests { - list, err := idx.Lookup(test.id, test.tpe) - rtest.OK(t, err) + list, found := idx.Lookup(test.id, test.tpe) + rtest.Assert(t, found, "Expected to find blob id %v", test.id.Str()) if len(list) != 1 { t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list) @@ -341,8 +341,8 @@ func TestIndexUnserializeOld(t *testing.T) { rtest.OK(t, err) for _, test := range exampleTests { - list, err := idx.Lookup(test.id, test.tpe) - rtest.OK(t, err) + list, found := idx.Lookup(test.id, test.tpe) + rtest.Assert(t, found, "Expected to find blob id %v", test.id.Str()) if len(list) != 1 { t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list) diff --git a/internal/repository/master_index.go b/internal/repository/master_index.go index ad458774d..4c41e8ba2 100644 --- a/internal/repository/master_index.go +++ b/internal/repository/master_index.go @@ -4,7 +4,6 @@ import ( "context" "sync" - "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/debug" @@ -22,36 +21,36 @@ func NewMasterIndex() *MasterIndex { } // Lookup queries all known Indexes for the ID and returns the first match. -func (mi *MasterIndex) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob, err error) { +func (mi *MasterIndex) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob, found bool) { mi.idxMutex.RLock() defer mi.idxMutex.RUnlock() debug.Log("looking up id %v, tpe %v", id.Str(), tpe) for _, idx := range mi.idx { - blobs, err = idx.Lookup(id, tpe) - if err == nil { + blobs, found = idx.Lookup(id, tpe) + if found { debug.Log("found id %v: %v", id.Str(), blobs) return } } debug.Log("id %v not found in any index", id.Str()) - return nil, errors.Errorf("id %v not found in any index", id) + return nil, false } // LookupSize queries all known Indexes for the ID and returns the first match. -func (mi *MasterIndex) LookupSize(id restic.ID, tpe restic.BlobType) (uint, error) { +func (mi *MasterIndex) LookupSize(id restic.ID, tpe restic.BlobType) (uint, bool) { mi.idxMutex.RLock() defer mi.idxMutex.RUnlock() for _, idx := range mi.idx { - if idx.Has(id, tpe) { - return idx.LookupSize(id, tpe) + if size, found := idx.LookupSize(id, tpe); found { + return size, found } } - return 0, errors.Errorf("id %v not found in any index", id) + return 0, false } // ListPack returns the list of blobs in a pack. The first matching index is diff --git a/internal/repository/master_index_test.go b/internal/repository/master_index_test.go new file mode 100644 index 000000000..7dfcdda5f --- /dev/null +++ b/internal/repository/master_index_test.go @@ -0,0 +1,123 @@ +package repository_test + +import ( + "math/rand" + "testing" + + "github.com/restic/restic/internal/repository" + "github.com/restic/restic/internal/restic" + rtest "github.com/restic/restic/internal/test" +) + +func TestMasterIndexLookup(t *testing.T) { + idInIdx1 := restic.NewRandomID() + idInIdx2 := restic.NewRandomID() + + blob1 := restic.PackedBlob{ + PackID: restic.NewRandomID(), + Blob: restic.Blob{ + Type: restic.DataBlob, + ID: idInIdx1, + Length: 10, + Offset: 0, + }, + } + + blob2 := restic.PackedBlob{ + PackID: restic.NewRandomID(), + Blob: restic.Blob{ + Type: restic.DataBlob, + ID: idInIdx2, + Length: 100, + Offset: 10, + }, + } + + idx1 := repository.NewIndex() + idx1.Store(blob1) + + idx2 := repository.NewIndex() + idx2.Store(blob2) + + mIdx := repository.NewMasterIndex() + mIdx.Insert(idx1) + mIdx.Insert(idx2) + + blobs, found := mIdx.Lookup(idInIdx1, restic.DataBlob) + rtest.Assert(t, found, "Expected to find blob id %v from index 1", idInIdx1) + rtest.Equals(t, []restic.PackedBlob{blob1}, blobs) + + blobs, found = mIdx.Lookup(idInIdx2, restic.DataBlob) + rtest.Assert(t, found, "Expected to find blob id %v from index 2", idInIdx2) + rtest.Equals(t, []restic.PackedBlob{blob2}, blobs) + + blobs, found = mIdx.Lookup(restic.NewRandomID(), restic.DataBlob) + rtest.Assert(t, !found, "Expected to not find a blob when fetching with a random id") + rtest.Assert(t, blobs == nil, "Expected no blobs when fetching with a random id") +} + +func BenchmarkMasterIndexLookupSingleIndex(b *testing.B) { + idx1, lookupID := createRandomIndex(rand.New(rand.NewSource(0))) + + mIdx := repository.NewMasterIndex() + mIdx.Insert(idx1) + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + mIdx.Lookup(lookupID, restic.DataBlob) + } +} + +func BenchmarkMasterIndexLookupMultipleIndex(b *testing.B) { + rng := rand.New(rand.NewSource(0)) + mIdx := repository.NewMasterIndex() + + for i := 0; i < 5; i++ { + idx, _ := createRandomIndex(rand.New(rng)) + mIdx.Insert(idx) + } + + idx1, lookupID := createRandomIndex(rand.New(rng)) + mIdx.Insert(idx1) + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + mIdx.Lookup(lookupID, restic.DataBlob) + } +} + +func BenchmarkMasterIndexLookupSingleIndexUnknown(b *testing.B) { + lookupID := restic.NewRandomID() + idx1, _ := createRandomIndex(rand.New(rand.NewSource(0))) + + mIdx := repository.NewMasterIndex() + mIdx.Insert(idx1) + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + mIdx.Lookup(lookupID, restic.DataBlob) + } +} + +func BenchmarkMasterIndexLookupMultipleIndexUnknown(b *testing.B) { + rng := rand.New(rand.NewSource(0)) + lookupID := restic.NewRandomID() + mIdx := repository.NewMasterIndex() + + for i := 0; i < 5; i++ { + idx, _ := createRandomIndex(rand.New(rng)) + mIdx.Insert(idx) + } + + idx1, _ := createRandomIndex(rand.New(rng)) + mIdx.Insert(idx1) + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + mIdx.Lookup(lookupID, restic.DataBlob) + } +} diff --git a/internal/repository/repack_test.go b/internal/repository/repack_test.go index 2d29a589a..6aed91bb2 100644 --- a/internal/repository/repack_test.go +++ b/internal/repository/repack_test.go @@ -114,9 +114,9 @@ func findPacksForBlobs(t *testing.T, repo restic.Repository, blobs restic.BlobSe idx := repo.Index() for h := range blobs { - list, err := idx.Lookup(h.ID, h.Type) - if err != nil { - t.Fatal(err) + list, found := idx.Lookup(h.ID, h.Type) + if !found { + t.Fatal("Failed to find blob", h.ID.Str(), "with type", h.Type) } for _, pb := range list { @@ -215,8 +215,8 @@ func TestRepack(t *testing.T) { idx := repo.Index() for h := range keepBlobs { - list, err := idx.Lookup(h.ID, h.Type) - if err != nil { + list, found := idx.Lookup(h.ID, h.Type) + if !found { t.Errorf("unable to find blob %v in repo", h.ID.Str()) continue } @@ -234,7 +234,7 @@ func TestRepack(t *testing.T) { } for h := range removeBlobs { - if _, err := idx.Lookup(h.ID, h.Type); err == nil { + if _, found := idx.Lookup(h.ID, h.Type); found { t.Errorf("blob %v still contained in the repo", h) } } diff --git a/internal/repository/repository.go b/internal/repository/repository.go index 193ec1ca7..5207fe611 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -115,10 +115,10 @@ func (r *Repository) loadBlob(ctx context.Context, id restic.ID, t restic.BlobTy debug.Log("load %v with id %v (buf len %v, cap %d)", t, id.Str(), len(plaintextBuf), cap(plaintextBuf)) // lookup packs - blobs, err := r.idx.Lookup(id, t) - if err != nil { - debug.Log("id %v not found in index: %v", id.Str(), err) - return 0, err + blobs, found := r.idx.Lookup(id, t) + if !found { + debug.Log("id %v not found in index", id.Str()) + return 0, errors.Errorf("id %v not found in repository", id) } // try cached pack files first @@ -193,7 +193,7 @@ func (r *Repository) LoadJSONUnpacked(ctx context.Context, t restic.FileType, id } // LookupBlobSize returns the size of blob id. -func (r *Repository) LookupBlobSize(id restic.ID, tpe restic.BlobType) (uint, error) { +func (r *Repository) LookupBlobSize(id restic.ID, tpe restic.BlobType) (uint, bool) { return r.idx.LookupSize(id, tpe) } @@ -588,9 +588,9 @@ func (r *Repository) Close() error { // space. func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic.ID, buf []byte) (int, error) { debug.Log("load blob %v into buf (len %v, cap %v)", id.Str(), len(buf), cap(buf)) - size, err := r.idx.LookupSize(id, t) - if err != nil { - return 0, err + size, found := r.idx.LookupSize(id, t) + if !found { + return 0, errors.Errorf("id %v not found in repository", id) } if cap(buf) < restic.CiphertextLength(int(size)) { @@ -622,9 +622,9 @@ func (r *Repository) SaveBlob(ctx context.Context, t restic.BlobType, buf []byte func (r *Repository) LoadTree(ctx context.Context, id restic.ID) (*restic.Tree, error) { debug.Log("load tree %v", id.Str()) - size, err := r.idx.LookupSize(id, restic.TreeBlob) - if err != nil { - return nil, err + size, found := r.idx.LookupSize(id, restic.TreeBlob) + if !found { + return nil, errors.Errorf("tree %v not found in repository", id) } debug.Log("size is %d, create buffer", size) diff --git a/internal/restic/node.go b/internal/restic/node.go index 81576057a..33e5285a8 100644 --- a/internal/restic/node.go +++ b/internal/restic/node.go @@ -285,9 +285,9 @@ func (node Node) createFileAt(ctx context.Context, path string, repo Repository, func (node Node) writeNodeContent(ctx context.Context, repo Repository, f *os.File) error { var buf []byte for _, id := range node.Content { - size, err := repo.LookupBlobSize(id, DataBlob) - if err != nil { - return err + size, found := repo.LookupBlobSize(id, DataBlob) + if !found { + return errors.Errorf("id %v not found in repository", id) } buf = buf[:cap(buf)] diff --git a/internal/restic/repository.go b/internal/restic/repository.go index 2daae41b2..c2f55d326 100644 --- a/internal/restic/repository.go +++ b/internal/restic/repository.go @@ -24,7 +24,7 @@ type Repository interface { Config() Config - LookupBlobSize(ID, BlobType) (uint, error) + LookupBlobSize(ID, BlobType) (uint, bool) List(context.Context, FileType) <-chan ID ListPack(context.Context, ID) ([]Blob, int64, error) @@ -52,7 +52,7 @@ type Lister interface { // Index keeps track of the blobs are stored within files. type Index interface { Has(ID, BlobType) bool - Lookup(ID, BlobType) ([]PackedBlob, error) + Lookup(ID, BlobType) ([]PackedBlob, bool) Count(BlobType) uint // Each returns a channel that yields all blobs known to the index. When