From ebce4b2581d2899fb6adbf9a6971045723108cb8 Mon Sep 17 00:00:00 2001 From: Matthew Dawson Date: Sat, 13 Jan 2018 19:43:37 -0500 Subject: [PATCH 1/6] repository/index: Speed up benchmarks and tests When setting up the index used for benchmarking, use math/rand instead of crypto/rand since the generated ids don't need to be evenly distributed, and not be secure against guessing. As such, use a different random id function (only available during tests) that uses math/rand instead. --- internal/repository/index_test.go | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/internal/repository/index_test.go b/internal/repository/index_test.go index f2e0746c0..e5ed6489d 100644 --- a/internal/repository/index_test.go +++ b/internal/repository/index_test.go @@ -383,16 +383,23 @@ func TestIndexPacks(t *testing.T) { const maxPackSize = 16 * 1024 * 1024 -func createRandomIndex() (idx *repository.Index, lookupID restic.ID) { +// This function generates a (insecure) random ID, similar to NewRandomID +func NewRandomTestID(rng *rand.Rand) restic.ID { + id := restic.ID{} + rng.Read(id[:]) + return id +} + +func createRandomIndex(rng *rand.Rand) (idx *repository.Index, lookupID restic.ID) { idx = repository.NewIndex() // create index with 200k pack files for i := 0; i < 200000; i++ { - packID := restic.NewRandomID() + packID := NewRandomTestID(rng) offset := 0 for offset < maxPackSize { size := 2000 + rand.Intn(4*1024*1024) - id := restic.NewRandomID() + id := NewRandomTestID(rng) idx.Store(restic.PackedBlob{ PackID: packID, Blob: restic.Blob{ @@ -415,7 +422,7 @@ func createRandomIndex() (idx *repository.Index, lookupID restic.ID) { } func BenchmarkIndexHasUnknown(b *testing.B) { - idx, _ := createRandomIndex() + idx, _ := createRandomIndex(rand.New(rand.NewSource(0))) lookupID := restic.NewRandomID() b.ResetTimer() @@ -426,7 +433,7 @@ func BenchmarkIndexHasUnknown(b *testing.B) { } func BenchmarkIndexHasKnown(b *testing.B) { - idx, lookupID := createRandomIndex() + idx, lookupID := createRandomIndex(rand.New(rand.NewSource(0))) b.ResetTimer() From df2c03a6a42af137c72113c6c4c56cbc0d230adc Mon Sep 17 00:00:00 2001 From: Matthew Dawson Date: Fri, 12 Jan 2018 01:20:12 -0500 Subject: [PATCH 2/6] repository/master_index: Optimize Index.Lookup() When looking up a blob in the master index, with several indexes present in the master index, a significant amount of time is spent generating errors for each failed lookup. However, these errors are often used to check if a blob is present, but the contents are not inspected making the overhead of the error not useful. Instead, change Index.Lookup (and Index.LookupSize) to instead return a boolean denoting if the blob was found instead of an error. Also change all the calls to these functions to handle the new function signature. benchmark old ns/op new ns/op delta BenchmarkMasterIndexLookupSingleIndex-6 820 897 +9.39% BenchmarkMasterIndexLookupMultipleIndex-6 12821 2001 -84.39% BenchmarkMasterIndexLookupSingleIndexUnknown-6 5378 492 -90.85% BenchmarkMasterIndexLookupMultipleIndexUnknown-6 17026 1649 -90.31% benchmark old allocs new allocs delta BenchmarkMasterIndexLookupSingleIndex-6 9 9 +0.00% BenchmarkMasterIndexLookupMultipleIndex-6 59 19 -67.80% BenchmarkMasterIndexLookupSingleIndexUnknown-6 22 6 -72.73% BenchmarkMasterIndexLookupMultipleIndexUnknown-6 72 16 -77.78% benchmark old bytes new bytes delta BenchmarkMasterIndexLookupSingleIndex-6 160 160 +0.00% BenchmarkMasterIndexLookupMultipleIndex-6 3200 240 -92.50% BenchmarkMasterIndexLookupSingleIndexUnknown-6 1232 48 -96.10% BenchmarkMasterIndexLookupMultipleIndexUnknown-6 4272 128 -97.00% --- cmd/restic/cmd_cat.go | 4 +- cmd/restic/cmd_diff.go | 6 +- cmd/restic/cmd_dump.go | 6 +- internal/archiver/archive_reader_test.go | 8 +- internal/archiver/archiver.go | 4 +- internal/fuse/file.go | 8 +- internal/fuse/file_test.go | 4 +- internal/repository/index.go | 16 +-- internal/repository/index_test.go | 20 ++-- internal/repository/master_index.go | 17 ++-- internal/repository/master_index_test.go | 123 +++++++++++++++++++++++ internal/repository/repack_test.go | 12 +-- internal/repository/repository.go | 22 ++-- internal/restic/node.go | 6 +- internal/restic/repository.go | 4 +- 15 files changed, 192 insertions(+), 68 deletions(-) create mode 100644 internal/repository/master_index_test.go diff --git a/cmd/restic/cmd_cat.go b/cmd/restic/cmd_cat.go index 7e66af734..98c97a5a5 100644 --- a/cmd/restic/cmd_cat.go +++ b/cmd/restic/cmd_cat.go @@ -165,8 +165,8 @@ func runCat(gopts GlobalOptions, args []string) error { case "blob": for _, t := range []restic.BlobType{restic.DataBlob, restic.TreeBlob} { - list, err := repo.Index().Lookup(id, t) - if err != nil { + list, found := repo.Index().Lookup(id, t) + if !found { continue } blob := list[0] diff --git a/cmd/restic/cmd_diff.go b/cmd/restic/cmd_diff.go index dfaf8c85d..9d9057d33 100644 --- a/cmd/restic/cmd_diff.go +++ b/cmd/restic/cmd_diff.go @@ -135,9 +135,9 @@ func updateBlobs(repo restic.Repository, blobs restic.BlobSet, stats *DiffStat) stats.TreeBlobs++ } - size, err := repo.LookupBlobSize(h.ID, h.Type) - if err != nil { - Warnf("unable to find blob size for %v: %v\n", h, err) + size, found := repo.LookupBlobSize(h.ID, h.Type) + if !found { + Warnf("unable to find blob size for %v\n", h) continue } diff --git a/cmd/restic/cmd_dump.go b/cmd/restic/cmd_dump.go index eb8250979..39c861bff 100644 --- a/cmd/restic/cmd_dump.go +++ b/cmd/restic/cmd_dump.go @@ -59,9 +59,9 @@ func splitPath(path string) []string { func dumpNode(ctx context.Context, repo restic.Repository, node *restic.Node) error { var buf []byte for _, id := range node.Content { - size, err := repo.LookupBlobSize(id, restic.DataBlob) - if err != nil { - return err + size, found := repo.LookupBlobSize(id, restic.DataBlob) + if !found { + return errors.Errorf("id %v not found in repository", id) } buf = buf[:cap(buf)] diff --git a/internal/archiver/archive_reader_test.go b/internal/archiver/archive_reader_test.go index 56e5fec5f..1c8efc969 100644 --- a/internal/archiver/archive_reader_test.go +++ b/internal/archiver/archive_reader_test.go @@ -43,9 +43,9 @@ func checkSavedFile(t *testing.T, repo restic.Repository, treeID restic.ID, name // check blobs for i, id := range node.Content { - size, err := repo.LookupBlobSize(id, restic.DataBlob) - if err != nil { - t.Fatal(err) + size, found := repo.LookupBlobSize(id, restic.DataBlob) + if !found { + t.Fatal("Failed to find blob", id.Str()) } buf := restic.NewBlobBuffer(int(size)) @@ -55,7 +55,7 @@ func checkSavedFile(t *testing.T, repo restic.Repository, treeID restic.ID, name } buf2 := make([]byte, int(size)) - _, err = io.ReadFull(rd, buf2) + _, err := io.ReadFull(rd, buf2) if err != nil { t.Fatal(err) } diff --git a/internal/archiver/archiver.go b/internal/archiver/archiver.go index 20ba863c3..d64702bb7 100644 --- a/internal/archiver/archiver.go +++ b/internal/archiver/archiver.go @@ -86,8 +86,8 @@ func (arch *Archiver) isKnownBlob(id restic.ID, t restic.BlobType) bool { arch.knownBlobs.Insert(id) - _, err := arch.repo.Index().Lookup(id, t) - if err == nil { + _, found := arch.repo.Index().Lookup(id, t) + if found { return true } diff --git a/internal/fuse/file.go b/internal/fuse/file.go index 233875993..80cdd59ad 100644 --- a/internal/fuse/file.go +++ b/internal/fuse/file.go @@ -4,6 +4,7 @@ package fuse import ( + "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/debug" @@ -36,9 +37,10 @@ func newFile(ctx context.Context, root *Root, inode uint64, node *restic.Node) ( for i, id := range node.Content { size, ok := root.blobSizeCache.Lookup(id) if !ok { - size, err = root.repo.LookupBlobSize(id, restic.DataBlob) - if err != nil { - return nil, err + var found bool + size, found = root.repo.LookupBlobSize(id, restic.DataBlob) + if !found { + return nil, errors.Errorf("id %v not found in repository", id) } } diff --git a/internal/fuse/file_test.go b/internal/fuse/file_test.go index 121c218f5..bacb13d74 100644 --- a/internal/fuse/file_test.go +++ b/internal/fuse/file_test.go @@ -81,8 +81,8 @@ func TestFuseFile(t *testing.T) { memfile []byte ) for _, id := range content { - size, err := repo.LookupBlobSize(id, restic.DataBlob) - rtest.OK(t, err) + size, found := repo.LookupBlobSize(id, restic.DataBlob) + rtest.Assert(t, found, "Expected to find blob id %v", id) filesize += uint64(size) buf := restic.NewBlobBuffer(int(size)) diff --git a/internal/repository/index.go b/internal/repository/index.go index b42ea6baf..7da96955a 100644 --- a/internal/repository/index.go +++ b/internal/repository/index.go @@ -110,7 +110,7 @@ func (idx *Index) Store(blob restic.PackedBlob) { } // Lookup queries the index for the blob ID and returns a restic.PackedBlob. -func (idx *Index) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob, err error) { +func (idx *Index) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob, found bool) { idx.m.Lock() defer idx.m.Unlock() @@ -136,11 +136,11 @@ func (idx *Index) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.Pack blobs = append(blobs, blob) } - return blobs, nil + return blobs, true } debug.Log("id %v not found", id.Str()) - return nil, errors.Errorf("id %v not found in index", id) + return nil, false } // ListPack returns a list of blobs contained in a pack. @@ -180,13 +180,13 @@ func (idx *Index) Has(id restic.ID, tpe restic.BlobType) bool { // LookupSize returns the length of the plaintext content of the blob with the // given id. -func (idx *Index) LookupSize(id restic.ID, tpe restic.BlobType) (plaintextLength uint, err error) { - blobs, err := idx.Lookup(id, tpe) - if err != nil { - return 0, err +func (idx *Index) LookupSize(id restic.ID, tpe restic.BlobType) (plaintextLength uint, found bool) { + blobs, found := idx.Lookup(id, tpe) + if !found { + return 0, found } - return uint(restic.PlaintextLength(int(blobs[0].Length))), nil + return uint(restic.PlaintextLength(int(blobs[0].Length))), true } // Supersedes returns the list of indexes this index supersedes, if any. diff --git a/internal/repository/index_test.go b/internal/repository/index_test.go index e5ed6489d..599f243f3 100644 --- a/internal/repository/index_test.go +++ b/internal/repository/index_test.go @@ -65,8 +65,8 @@ func TestIndexSerialize(t *testing.T) { rtest.OK(t, err) for _, testBlob := range tests { - list, err := idx.Lookup(testBlob.id, testBlob.tpe) - rtest.OK(t, err) + list, found := idx.Lookup(testBlob.id, testBlob.tpe) + rtest.Assert(t, found, "Expected to find blob id %v", testBlob.id.Str()) if len(list) != 1 { t.Errorf("expected one result for blob %v, got %v: %v", testBlob.id.Str(), len(list), list) @@ -78,8 +78,8 @@ func TestIndexSerialize(t *testing.T) { rtest.Equals(t, testBlob.offset, result.Offset) rtest.Equals(t, testBlob.length, result.Length) - list2, err := idx2.Lookup(testBlob.id, testBlob.tpe) - rtest.OK(t, err) + list2, found := idx2.Lookup(testBlob.id, testBlob.tpe) + rtest.Assert(t, found, "Expected to find blob id %v", testBlob.id) if len(list2) != 1 { t.Errorf("expected one result for blob %v, got %v: %v", testBlob.id.Str(), len(list2), list2) @@ -146,8 +146,8 @@ func TestIndexSerialize(t *testing.T) { // all new blobs must be in the index for _, testBlob := range newtests { - list, err := idx3.Lookup(testBlob.id, testBlob.tpe) - rtest.OK(t, err) + list, found := idx3.Lookup(testBlob.id, testBlob.tpe) + rtest.Assert(t, found, "Expected to find blob id %v", testBlob.id.Str()) if len(list) != 1 { t.Errorf("expected one result for blob %v, got %v: %v", testBlob.id.Str(), len(list), list) @@ -293,8 +293,8 @@ func TestIndexUnserialize(t *testing.T) { rtest.OK(t, err) for _, test := range exampleTests { - list, err := idx.Lookup(test.id, test.tpe) - rtest.OK(t, err) + list, found := idx.Lookup(test.id, test.tpe) + rtest.Assert(t, found, "Expected to find blob id %v", test.id.Str()) if len(list) != 1 { t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list) @@ -341,8 +341,8 @@ func TestIndexUnserializeOld(t *testing.T) { rtest.OK(t, err) for _, test := range exampleTests { - list, err := idx.Lookup(test.id, test.tpe) - rtest.OK(t, err) + list, found := idx.Lookup(test.id, test.tpe) + rtest.Assert(t, found, "Expected to find blob id %v", test.id.Str()) if len(list) != 1 { t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list) diff --git a/internal/repository/master_index.go b/internal/repository/master_index.go index ad458774d..4c41e8ba2 100644 --- a/internal/repository/master_index.go +++ b/internal/repository/master_index.go @@ -4,7 +4,6 @@ import ( "context" "sync" - "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/debug" @@ -22,36 +21,36 @@ func NewMasterIndex() *MasterIndex { } // Lookup queries all known Indexes for the ID and returns the first match. -func (mi *MasterIndex) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob, err error) { +func (mi *MasterIndex) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob, found bool) { mi.idxMutex.RLock() defer mi.idxMutex.RUnlock() debug.Log("looking up id %v, tpe %v", id.Str(), tpe) for _, idx := range mi.idx { - blobs, err = idx.Lookup(id, tpe) - if err == nil { + blobs, found = idx.Lookup(id, tpe) + if found { debug.Log("found id %v: %v", id.Str(), blobs) return } } debug.Log("id %v not found in any index", id.Str()) - return nil, errors.Errorf("id %v not found in any index", id) + return nil, false } // LookupSize queries all known Indexes for the ID and returns the first match. -func (mi *MasterIndex) LookupSize(id restic.ID, tpe restic.BlobType) (uint, error) { +func (mi *MasterIndex) LookupSize(id restic.ID, tpe restic.BlobType) (uint, bool) { mi.idxMutex.RLock() defer mi.idxMutex.RUnlock() for _, idx := range mi.idx { - if idx.Has(id, tpe) { - return idx.LookupSize(id, tpe) + if size, found := idx.LookupSize(id, tpe); found { + return size, found } } - return 0, errors.Errorf("id %v not found in any index", id) + return 0, false } // ListPack returns the list of blobs in a pack. The first matching index is diff --git a/internal/repository/master_index_test.go b/internal/repository/master_index_test.go new file mode 100644 index 000000000..7dfcdda5f --- /dev/null +++ b/internal/repository/master_index_test.go @@ -0,0 +1,123 @@ +package repository_test + +import ( + "math/rand" + "testing" + + "github.com/restic/restic/internal/repository" + "github.com/restic/restic/internal/restic" + rtest "github.com/restic/restic/internal/test" +) + +func TestMasterIndexLookup(t *testing.T) { + idInIdx1 := restic.NewRandomID() + idInIdx2 := restic.NewRandomID() + + blob1 := restic.PackedBlob{ + PackID: restic.NewRandomID(), + Blob: restic.Blob{ + Type: restic.DataBlob, + ID: idInIdx1, + Length: 10, + Offset: 0, + }, + } + + blob2 := restic.PackedBlob{ + PackID: restic.NewRandomID(), + Blob: restic.Blob{ + Type: restic.DataBlob, + ID: idInIdx2, + Length: 100, + Offset: 10, + }, + } + + idx1 := repository.NewIndex() + idx1.Store(blob1) + + idx2 := repository.NewIndex() + idx2.Store(blob2) + + mIdx := repository.NewMasterIndex() + mIdx.Insert(idx1) + mIdx.Insert(idx2) + + blobs, found := mIdx.Lookup(idInIdx1, restic.DataBlob) + rtest.Assert(t, found, "Expected to find blob id %v from index 1", idInIdx1) + rtest.Equals(t, []restic.PackedBlob{blob1}, blobs) + + blobs, found = mIdx.Lookup(idInIdx2, restic.DataBlob) + rtest.Assert(t, found, "Expected to find blob id %v from index 2", idInIdx2) + rtest.Equals(t, []restic.PackedBlob{blob2}, blobs) + + blobs, found = mIdx.Lookup(restic.NewRandomID(), restic.DataBlob) + rtest.Assert(t, !found, "Expected to not find a blob when fetching with a random id") + rtest.Assert(t, blobs == nil, "Expected no blobs when fetching with a random id") +} + +func BenchmarkMasterIndexLookupSingleIndex(b *testing.B) { + idx1, lookupID := createRandomIndex(rand.New(rand.NewSource(0))) + + mIdx := repository.NewMasterIndex() + mIdx.Insert(idx1) + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + mIdx.Lookup(lookupID, restic.DataBlob) + } +} + +func BenchmarkMasterIndexLookupMultipleIndex(b *testing.B) { + rng := rand.New(rand.NewSource(0)) + mIdx := repository.NewMasterIndex() + + for i := 0; i < 5; i++ { + idx, _ := createRandomIndex(rand.New(rng)) + mIdx.Insert(idx) + } + + idx1, lookupID := createRandomIndex(rand.New(rng)) + mIdx.Insert(idx1) + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + mIdx.Lookup(lookupID, restic.DataBlob) + } +} + +func BenchmarkMasterIndexLookupSingleIndexUnknown(b *testing.B) { + lookupID := restic.NewRandomID() + idx1, _ := createRandomIndex(rand.New(rand.NewSource(0))) + + mIdx := repository.NewMasterIndex() + mIdx.Insert(idx1) + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + mIdx.Lookup(lookupID, restic.DataBlob) + } +} + +func BenchmarkMasterIndexLookupMultipleIndexUnknown(b *testing.B) { + rng := rand.New(rand.NewSource(0)) + lookupID := restic.NewRandomID() + mIdx := repository.NewMasterIndex() + + for i := 0; i < 5; i++ { + idx, _ := createRandomIndex(rand.New(rng)) + mIdx.Insert(idx) + } + + idx1, _ := createRandomIndex(rand.New(rng)) + mIdx.Insert(idx1) + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + mIdx.Lookup(lookupID, restic.DataBlob) + } +} diff --git a/internal/repository/repack_test.go b/internal/repository/repack_test.go index 2d29a589a..6aed91bb2 100644 --- a/internal/repository/repack_test.go +++ b/internal/repository/repack_test.go @@ -114,9 +114,9 @@ func findPacksForBlobs(t *testing.T, repo restic.Repository, blobs restic.BlobSe idx := repo.Index() for h := range blobs { - list, err := idx.Lookup(h.ID, h.Type) - if err != nil { - t.Fatal(err) + list, found := idx.Lookup(h.ID, h.Type) + if !found { + t.Fatal("Failed to find blob", h.ID.Str(), "with type", h.Type) } for _, pb := range list { @@ -215,8 +215,8 @@ func TestRepack(t *testing.T) { idx := repo.Index() for h := range keepBlobs { - list, err := idx.Lookup(h.ID, h.Type) - if err != nil { + list, found := idx.Lookup(h.ID, h.Type) + if !found { t.Errorf("unable to find blob %v in repo", h.ID.Str()) continue } @@ -234,7 +234,7 @@ func TestRepack(t *testing.T) { } for h := range removeBlobs { - if _, err := idx.Lookup(h.ID, h.Type); err == nil { + if _, found := idx.Lookup(h.ID, h.Type); found { t.Errorf("blob %v still contained in the repo", h) } } diff --git a/internal/repository/repository.go b/internal/repository/repository.go index 193ec1ca7..5207fe611 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -115,10 +115,10 @@ func (r *Repository) loadBlob(ctx context.Context, id restic.ID, t restic.BlobTy debug.Log("load %v with id %v (buf len %v, cap %d)", t, id.Str(), len(plaintextBuf), cap(plaintextBuf)) // lookup packs - blobs, err := r.idx.Lookup(id, t) - if err != nil { - debug.Log("id %v not found in index: %v", id.Str(), err) - return 0, err + blobs, found := r.idx.Lookup(id, t) + if !found { + debug.Log("id %v not found in index", id.Str()) + return 0, errors.Errorf("id %v not found in repository", id) } // try cached pack files first @@ -193,7 +193,7 @@ func (r *Repository) LoadJSONUnpacked(ctx context.Context, t restic.FileType, id } // LookupBlobSize returns the size of blob id. -func (r *Repository) LookupBlobSize(id restic.ID, tpe restic.BlobType) (uint, error) { +func (r *Repository) LookupBlobSize(id restic.ID, tpe restic.BlobType) (uint, bool) { return r.idx.LookupSize(id, tpe) } @@ -588,9 +588,9 @@ func (r *Repository) Close() error { // space. func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic.ID, buf []byte) (int, error) { debug.Log("load blob %v into buf (len %v, cap %v)", id.Str(), len(buf), cap(buf)) - size, err := r.idx.LookupSize(id, t) - if err != nil { - return 0, err + size, found := r.idx.LookupSize(id, t) + if !found { + return 0, errors.Errorf("id %v not found in repository", id) } if cap(buf) < restic.CiphertextLength(int(size)) { @@ -622,9 +622,9 @@ func (r *Repository) SaveBlob(ctx context.Context, t restic.BlobType, buf []byte func (r *Repository) LoadTree(ctx context.Context, id restic.ID) (*restic.Tree, error) { debug.Log("load tree %v", id.Str()) - size, err := r.idx.LookupSize(id, restic.TreeBlob) - if err != nil { - return nil, err + size, found := r.idx.LookupSize(id, restic.TreeBlob) + if !found { + return nil, errors.Errorf("tree %v not found in repository", id) } debug.Log("size is %d, create buffer", size) diff --git a/internal/restic/node.go b/internal/restic/node.go index 81576057a..33e5285a8 100644 --- a/internal/restic/node.go +++ b/internal/restic/node.go @@ -285,9 +285,9 @@ func (node Node) createFileAt(ctx context.Context, path string, repo Repository, func (node Node) writeNodeContent(ctx context.Context, repo Repository, f *os.File) error { var buf []byte for _, id := range node.Content { - size, err := repo.LookupBlobSize(id, DataBlob) - if err != nil { - return err + size, found := repo.LookupBlobSize(id, DataBlob) + if !found { + return errors.Errorf("id %v not found in repository", id) } buf = buf[:cap(buf)] diff --git a/internal/restic/repository.go b/internal/restic/repository.go index 2daae41b2..c2f55d326 100644 --- a/internal/restic/repository.go +++ b/internal/restic/repository.go @@ -24,7 +24,7 @@ type Repository interface { Config() Config - LookupBlobSize(ID, BlobType) (uint, error) + LookupBlobSize(ID, BlobType) (uint, bool) List(context.Context, FileType) <-chan ID ListPack(context.Context, ID) ([]Blob, int64, error) @@ -52,7 +52,7 @@ type Lister interface { // Index keeps track of the blobs are stored within files. type Index interface { Has(ID, BlobType) bool - Lookup(ID, BlobType) ([]PackedBlob, error) + Lookup(ID, BlobType) ([]PackedBlob, bool) Count(BlobType) uint // Each returns a channel that yields all blobs known to the index. When From 3a1614844785bccfe30221a69ec7f268f3912ad5 Mon Sep 17 00:00:00 2001 From: Matthew Dawson Date: Fri, 12 Jan 2018 01:21:39 -0500 Subject: [PATCH 3/6] archiver/archiver: Use Index.Has() instead of Index.Lookup() in isKnownBlob Index.Has() is a faster then Index.Lookup() for checking if a blob exists in the index. As the returned data is never used, this avoids a ton of allocations. --- internal/archiver/archiver.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/internal/archiver/archiver.go b/internal/archiver/archiver.go index d64702bb7..b1e612409 100644 --- a/internal/archiver/archiver.go +++ b/internal/archiver/archiver.go @@ -86,8 +86,7 @@ func (arch *Archiver) isKnownBlob(id restic.ID, t restic.BlobType) bool { arch.knownBlobs.Insert(id) - _, found := arch.repo.Index().Lookup(id, t) - if found { + if arch.repo.Index().Has(id, t) { return true } From 4cec7e236a504c4665ed0ed19d3d4fd02bb95829 Mon Sep 17 00:00:00 2001 From: Matthew Dawson Date: Fri, 12 Jan 2018 01:37:33 -0500 Subject: [PATCH 4/6] Add Changelog --- changelog/0.8.2/pull-1549 | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 changelog/0.8.2/pull-1549 diff --git a/changelog/0.8.2/pull-1549 b/changelog/0.8.2/pull-1549 new file mode 100644 index 000000000..b337facd3 --- /dev/null +++ b/changelog/0.8.2/pull-1549 @@ -0,0 +1,9 @@ +Enhancement: Speed up querying across indices and scanning existing files + +This change increases the whenever a blob (part of a file) is searched for in a +restic repository. This will reduce cpu usage some when backing up files already +backed up by restic. Cpu usage is further decreased when scanning files. + +https://github.com/restic/restic/issues/1234 +https://github.com/restic/restic/pull/55555 +https://forum.restic/.net/foo/bar/baz From 3789e55e20b3234b85de2172ea5e6ef75c3e6946 Mon Sep 17 00:00:00 2001 From: Matthew Dawson Date: Sun, 21 Jan 2018 23:26:24 -0500 Subject: [PATCH 5/6] repostiory/index: Remove logging from Lookup function. The logging in these functions double the time they take to execute. However, it is only really useful on failures, which are better reported by the calling functions. benchmark old ns/op new ns/op delta BenchmarkMasterIndexLookupSingleIndex-6 897 395 -55.96% BenchmarkMasterIndexLookupMultipleIndex-6 2001 1090 -45.53% BenchmarkMasterIndexLookupSingleIndexUnknown-6 492 215 -56.30% BenchmarkMasterIndexLookupMultipleIndexUnknown-6 1649 912 -44.69% benchmark old allocs new allocs delta BenchmarkMasterIndexLookupSingleIndex-6 9 1 -88.89% BenchmarkMasterIndexLookupMultipleIndex-6 19 1 -94.74% BenchmarkMasterIndexLookupSingleIndexUnknown-6 6 0 -100.00% BenchmarkMasterIndexLookupMultipleIndexUnknown-6 16 0 -100.00% benchmark old bytes new bytes delta BenchmarkMasterIndexLookupSingleIndex-6 160 96 -40.00% BenchmarkMasterIndexLookupMultipleIndex-6 240 96 -60.00% BenchmarkMasterIndexLookupSingleIndexUnknown-6 48 0 -100.00% BenchmarkMasterIndexLookupMultipleIndexUnknown-6 128 0 -100.00% --- internal/repository/index.go | 4 ---- internal/repository/master_index.go | 4 ---- 2 files changed, 8 deletions(-) diff --git a/internal/repository/index.go b/internal/repository/index.go index 7da96955a..b9ed9cf98 100644 --- a/internal/repository/index.go +++ b/internal/repository/index.go @@ -120,9 +120,6 @@ func (idx *Index) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.Pack blobs = make([]restic.PackedBlob, 0, len(packs)) for _, p := range packs { - debug.Log("id %v found in pack %v at %d, length %d", - id.Str(), p.packID.Str(), p.offset, p.length) - blob := restic.PackedBlob{ Blob: restic.Blob{ Type: tpe, @@ -139,7 +136,6 @@ func (idx *Index) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.Pack return blobs, true } - debug.Log("id %v not found", id.Str()) return nil, false } diff --git a/internal/repository/master_index.go b/internal/repository/master_index.go index 4c41e8ba2..02a27d5d1 100644 --- a/internal/repository/master_index.go +++ b/internal/repository/master_index.go @@ -25,17 +25,13 @@ func (mi *MasterIndex) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic mi.idxMutex.RLock() defer mi.idxMutex.RUnlock() - debug.Log("looking up id %v, tpe %v", id.Str(), tpe) - for _, idx := range mi.idx { blobs, found = idx.Lookup(id, tpe) if found { - debug.Log("found id %v: %v", id.Str(), blobs) return } } - debug.Log("id %v not found in any index", id.Str()) return nil, false } From fe33c05a20d49fb934bfd28b8f223554bc8fc859 Mon Sep 17 00:00:00 2001 From: Matthew Dawson Date: Sun, 21 Jan 2018 23:26:47 -0500 Subject: [PATCH 6/6] debug/log: Add benchmarks for calling the logging function Add some benchmarks for calling Log, both with a static string along with calling the ID.Str and ID.String functions. --- internal/debug/log_test.go | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 internal/debug/log_test.go diff --git a/internal/debug/log_test.go b/internal/debug/log_test.go new file mode 100644 index 000000000..647e16e85 --- /dev/null +++ b/internal/debug/log_test.go @@ -0,0 +1,34 @@ +package debug_test + +import ( + "github.com/restic/restic/internal/debug" + "github.com/restic/restic/internal/restic" + + "testing" +) + +func BenchmarkLogStatic(b *testing.B) { + for i := 0; i < b.N; i++ { + debug.Log("Static string") + } +} + +func BenchmarkLogIDStr(b *testing.B) { + id := restic.NewRandomID() + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + debug.Log("id: %v", id.Str()) + } +} + +func BenchmarkLogIDString(b *testing.B) { + id := restic.NewRandomID() + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + debug.Log("id: %v", id.String()) + } +}