repository/index: Optimize index.Has()

When backing up several million files (>14M tested here) with few changes,
a large amount of time is spent failing to find an id in an index and creating
an error to signify this.  Since this is checked using the Has method,
which doesn't use this error, this time creating the error is wasted.

Instead, directly check if the given id and type are present in the index.
This also avoids reporting all the packs containing this blob, further
reducing cpu usage.
This commit is contained in:
Matthew Dawson 2018-01-08 13:38:21 -05:00 committed by Alexander Neumann
parent d77a326bb0
commit 539599d1f1
2 changed files with 55 additions and 5 deletions

View File

@ -169,12 +169,13 @@ func (idx *Index) ListPack(id restic.ID) (list []restic.PackedBlob) {
// Has returns true iff the id is listed in the index.
func (idx *Index) Has(id restic.ID, tpe restic.BlobType) bool {
_, err := idx.Lookup(id, tpe)
if err == nil {
return true
}
idx.m.Lock()
defer idx.m.Unlock()
return false
h := restic.BlobHandle{ID: id, Type: tpe}
_, ok := idx.pack[h]
return ok
}
// LookupSize returns the length of the plaintext content of the blob with the

View File

@ -434,3 +434,52 @@ func BenchmarkIndexHasKnown(b *testing.B) {
idx.Has(lookupID, restic.DataBlob)
}
}
func TestIndexHas(t *testing.T) {
type testEntry struct {
id restic.ID
pack restic.ID
tpe restic.BlobType
offset, length uint
}
tests := []testEntry{}
idx := repository.NewIndex()
// create 50 packs with 20 blobs each
for i := 0; i < 50; i++ {
packID := restic.NewRandomID()
pos := uint(0)
for j := 0; j < 20; j++ {
id := restic.NewRandomID()
length := uint(i*100 + j)
idx.Store(restic.PackedBlob{
Blob: restic.Blob{
Type: restic.DataBlob,
ID: id,
Offset: pos,
Length: length,
},
PackID: packID,
})
tests = append(tests, testEntry{
id: id,
pack: packID,
tpe: restic.DataBlob,
offset: pos,
length: length,
})
pos += length
}
}
for _, testBlob := range tests {
rtest.Assert(t, idx.Has(testBlob.id, testBlob.tpe), "Index reports not having data blob added to it")
}
rtest.Assert(t, !idx.Has(restic.NewRandomID(), restic.DataBlob), "Index reports having a data blob not added to it")
rtest.Assert(t, !idx.Has(tests[0].id, restic.TreeBlob), "Index reports having a tree blob added to it with the same id as a data blob")
}