From b10acd2af7462e18f63ddd7c93132eb90c9b16ab Mon Sep 17 00:00:00 2001 From: greatroar <@> Date: Fri, 6 Mar 2020 09:17:33 +0100 Subject: [PATCH] Test and benchmark blob sorting in internal/repository --- internal/repository/repository.go | 14 ++-- .../repository/repository_internal_test.go | 81 +++++++++++++++++++ 2 files changed, 90 insertions(+), 5 deletions(-) create mode 100644 internal/repository/repository_internal_test.go diff --git a/internal/repository/repository.go b/internal/repository/repository.go index 0d5242022..cee36ace8 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -111,9 +111,13 @@ func (r *Repository) LoadAndDecrypt(ctx context.Context, buf []byte, t restic.Fi return plaintext, nil } -// sortCachedPacks moves all cached pack files to the front of blobs. -func (r *Repository) sortCachedPacks(blobs []restic.PackedBlob) []restic.PackedBlob { - if r.Cache == nil { +type haver interface { + Has(restic.Handle) bool +} + +// sortCachedPacksFirst moves all cached pack files to the front of blobs. +func sortCachedPacksFirst(cache haver, blobs []restic.PackedBlob) []restic.PackedBlob { + if cache == nil { return blobs } @@ -126,7 +130,7 @@ func (r *Repository) sortCachedPacks(blobs []restic.PackedBlob) []restic.PackedB noncached := make([]restic.PackedBlob, 0, len(blobs)/2) for _, blob := range blobs { - if r.Cache.Has(restic.Handle{Type: restic.DataFile, Name: blob.PackID.String()}) { + if cache.Has(restic.Handle{Type: restic.DataFile, Name: blob.PackID.String()}) { cached = append(cached, blob) continue } @@ -149,7 +153,7 @@ func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic. } // try cached pack files first - blobs = r.sortCachedPacks(blobs) + blobs = sortCachedPacksFirst(r.Cache, blobs) var lastError error for _, blob := range blobs { diff --git a/internal/repository/repository_internal_test.go b/internal/repository/repository_internal_test.go new file mode 100644 index 000000000..0aeea992a --- /dev/null +++ b/internal/repository/repository_internal_test.go @@ -0,0 +1,81 @@ +package repository + +import ( + "math/rand" + "testing" + + "github.com/restic/restic/internal/restic" + rtest "github.com/restic/restic/internal/test" +) + +type mapcache map[restic.Handle]struct{} + +func (c mapcache) Has(h restic.Handle) bool { + _, ok := c[h] + return ok +} + +func TestSortCachedPacksFirst(t *testing.T) { + var ( + blobs [100]restic.PackedBlob + blobset = make(map[restic.PackedBlob]struct{}) + cache = make(mapcache) + r = rand.New(rand.NewSource(1261)) + ) + + for i := 0; i < len(blobs); i++ { + var id restic.ID + r.Read(id[:]) + blobs[i] = restic.PackedBlob{PackID: id} + blobset[blobs[i]] = struct{}{} + + if i%3 == 0 { + h := restic.Handle{Name: id.String(), Type: restic.DataFile} + cache[h] = struct{}{} + } + } + + sorted := sortCachedPacksFirst(cache, blobs[:]) + + rtest.Equals(t, len(blobs), len(sorted)) + for i := 0; i < len(blobs); i++ { + h := restic.Handle{Type: restic.DataFile, Name: sorted[i].PackID.String()} + if i < len(cache) { + rtest.Assert(t, cache.Has(h), "non-cached blob at front of sorted output") + } else { + rtest.Assert(t, !cache.Has(h), "cached blob at end of sorted output") + } + _, ok := blobset[sorted[i]] + rtest.Assert(t, ok, "sortCachedPacksFirst changed blob id") + } +} + +func BenchmarkSortCachedPacksFirst(b *testing.B) { + const nblobs = 512 // Corresponds to a file of ca. 2GB. + + var ( + blobs [nblobs]restic.PackedBlob + cache = make(mapcache) + r = rand.New(rand.NewSource(1261)) + ) + + for i := 0; i < nblobs; i++ { + var id restic.ID + r.Read(id[:]) + blobs[i] = restic.PackedBlob{PackID: id} + + if i%3 == 0 { + h := restic.Handle{Name: id.String(), Type: restic.DataFile} + cache[h] = struct{}{} + } + } + + var cpy [nblobs]restic.PackedBlob + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + copy(cpy[:], blobs[:]) + sortCachedPacksFirst(cache, cpy[:]) + } +}