From cb9cbe55d945e3f7b5df14c688e0bba8807144fc Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Thu, 28 Sep 2023 20:58:45 +0200 Subject: [PATCH] repository: store oversized blobs in separate pack files Store oversized blobs in separate pack files as the blobs is large enough to warrant its own pack file. This simplifies the garbage collection of such blobs and keeps the cache smaller, as oversize (tree) blobs only have to be downloaded if they are actually used. --- internal/repository/packer_manager.go | 17 ++++++++++++----- internal/repository/packer_manager_test.go | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/internal/repository/packer_manager.go b/internal/repository/packer_manager.go index 4422e3418..2e2368aad 100644 --- a/internal/repository/packer_manager.go +++ b/internal/repository/packer_manager.go @@ -70,14 +70,19 @@ func (r *packerManager) SaveBlob(ctx context.Context, t restic.BlobType, id rest var err error packer := r.packer - if r.packer == nil { + // use separate packer if compressed length is larger than the packsize + // this speeds up the garbage collection of oversized blobs and reduces the cache size + // as the oversize blobs are only downloaded if necessary + if len(ciphertext) >= int(r.packSize) || r.packer == nil { packer, err = r.newPacker() if err != nil { return 0, err } + // don't store packer for oversized blob + if r.packer == nil { + r.packer = packer + } } - // remember packer - r.packer = packer // save ciphertext // Add only appends bytes in memory to avoid being a scaling bottleneck @@ -91,8 +96,10 @@ func (r *packerManager) SaveBlob(ctx context.Context, t restic.BlobType, id rest debug.Log("pack is not full enough (%d bytes)", packer.Size()) return size, nil } - // forget full packer - r.packer = nil + if packer == r.packer { + // forget full packer + r.packer = nil + } // call while holding lock to prevent findPacker from creating new packers if the uploaders are busy // else write the pack to the backend diff --git a/internal/repository/packer_manager_test.go b/internal/repository/packer_manager_test.go index 90f716e0d..8984073da 100644 --- a/internal/repository/packer_manager_test.go +++ b/internal/repository/packer_manager_test.go @@ -89,6 +89,24 @@ func testPackerManager(t testing.TB) int64 { return int64(bytes) } +func TestPackerManagerWithOversizeBlob(t *testing.T) { + packFiles := int(0) + sizeLimit := uint(512 * 1024) + pm := newPackerManager(crypto.NewRandomKey(), restic.DataBlob, sizeLimit, func(ctx context.Context, tp restic.BlobType, p *Packer) error { + packFiles++ + return nil + }) + + for _, i := range []uint{sizeLimit / 2, sizeLimit, sizeLimit / 3} { + _, err := pm.SaveBlob(context.TODO(), restic.DataBlob, restic.ID{}, make([]byte, i), 0) + test.OK(t, err) + } + test.OK(t, pm.Flush(context.TODO())) + + // oversized blob must be stored in a separate packfile + test.Equals(t, packFiles, 2) +} + func BenchmarkPackerManager(t *testing.B) { // Run testPackerManager if it hasn't run already, to set totalSize. once.Do(func() {