2021-09-11 13:26:10 +02:00
|
|
|
package bloblru
|
2020-06-17 12:17:55 +02:00
|
|
|
|
|
|
|
import (
|
|
|
|
"sync"
|
|
|
|
|
|
|
|
"github.com/restic/restic/internal/debug"
|
|
|
|
"github.com/restic/restic/internal/restic"
|
|
|
|
|
2022-11-27 09:58:19 +01:00
|
|
|
"github.com/hashicorp/golang-lru/v2/simplelru"
|
2020-06-17 12:17:55 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
// Crude estimate of the overhead per blob: a SHA-256, a linked list node
|
2021-09-11 13:26:10 +02:00
|
|
|
// and some pointers. See comment in Cache.add.
|
|
|
|
const overhead = len(restic.ID{}) + 64
|
2020-06-17 12:17:55 +02:00
|
|
|
|
2021-09-11 13:26:10 +02:00
|
|
|
// A Cache is a fixed-size LRU cache of blob contents.
|
2020-06-17 12:17:55 +02:00
|
|
|
// It is safe for concurrent access.
|
2021-09-11 13:26:10 +02:00
|
|
|
type Cache struct {
|
2020-06-17 12:17:55 +02:00
|
|
|
mu sync.Mutex
|
2022-11-27 09:58:19 +01:00
|
|
|
c *simplelru.LRU[restic.ID, []byte]
|
2020-06-17 12:17:55 +02:00
|
|
|
|
|
|
|
free, size int // Current and max capacity, in bytes.
|
2024-05-05 11:37:35 +02:00
|
|
|
inProgress map[restic.ID]chan struct{}
|
2020-06-17 12:17:55 +02:00
|
|
|
}
|
|
|
|
|
2022-03-28 22:25:25 +02:00
|
|
|
// New constructs a blob cache that stores at most size bytes worth of blobs.
|
2021-09-11 13:26:10 +02:00
|
|
|
func New(size int) *Cache {
|
|
|
|
c := &Cache{
|
2024-05-05 11:37:35 +02:00
|
|
|
free: size,
|
|
|
|
size: size,
|
|
|
|
inProgress: make(map[restic.ID]chan struct{}),
|
2020-06-17 12:17:55 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewLRU wants us to specify some max. number of entries, else it errors.
|
2021-09-11 13:26:10 +02:00
|
|
|
// The actual maximum will be smaller than size/overhead, because we
|
2020-06-17 12:17:55 +02:00
|
|
|
// evict entries (RemoveOldest in add) to maintain our size bound.
|
2021-09-11 13:26:10 +02:00
|
|
|
maxEntries := size / overhead
|
2022-11-27 09:58:19 +01:00
|
|
|
lru, err := simplelru.NewLRU[restic.ID, []byte](maxEntries, c.evict)
|
2020-06-17 12:17:55 +02:00
|
|
|
if err != nil {
|
|
|
|
panic(err) // Can only be maxEntries <= 0.
|
|
|
|
}
|
|
|
|
c.c = lru
|
|
|
|
|
|
|
|
return c
|
|
|
|
}
|
|
|
|
|
2021-09-24 15:38:23 +02:00
|
|
|
// Add adds key id with value blob to c.
|
|
|
|
// It may return an evicted buffer for reuse.
|
|
|
|
func (c *Cache) Add(id restic.ID, blob []byte) (old []byte) {
|
2021-09-11 13:26:10 +02:00
|
|
|
debug.Log("bloblru.Cache: add %v", id)
|
2020-06-17 12:17:55 +02:00
|
|
|
|
2021-10-03 09:33:58 +02:00
|
|
|
size := cap(blob) + overhead
|
2020-06-17 12:17:55 +02:00
|
|
|
if size > c.size {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
c.mu.Lock()
|
|
|
|
defer c.mu.Unlock()
|
|
|
|
|
2022-11-27 09:58:19 +01:00
|
|
|
if c.c.Contains(id) { // Doesn't update the recency list.
|
2020-06-17 12:17:55 +02:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2021-09-11 13:26:10 +02:00
|
|
|
// This loop takes at most min(maxEntries, maxchunksize/overhead)
|
2020-06-17 12:17:55 +02:00
|
|
|
// iterations.
|
|
|
|
for size > c.free {
|
2022-11-27 09:58:19 +01:00
|
|
|
_, b, _ := c.c.RemoveOldest()
|
2021-10-03 09:33:58 +02:00
|
|
|
if cap(b) > cap(old) {
|
2021-09-24 15:38:23 +02:00
|
|
|
// We can only return one buffer, so pick the largest.
|
|
|
|
old = b
|
|
|
|
}
|
2020-06-17 12:17:55 +02:00
|
|
|
}
|
|
|
|
|
2022-11-27 09:58:19 +01:00
|
|
|
c.c.Add(id, blob)
|
2020-06-17 12:17:55 +02:00
|
|
|
c.free -= size
|
2021-09-24 15:38:23 +02:00
|
|
|
|
|
|
|
return old
|
2020-06-17 12:17:55 +02:00
|
|
|
}
|
|
|
|
|
2021-09-11 13:26:10 +02:00
|
|
|
func (c *Cache) Get(id restic.ID) ([]byte, bool) {
|
2020-06-17 12:17:55 +02:00
|
|
|
c.mu.Lock()
|
2022-11-27 09:58:19 +01:00
|
|
|
blob, ok := c.c.Get(id)
|
2020-06-17 12:17:55 +02:00
|
|
|
c.mu.Unlock()
|
|
|
|
|
2021-09-11 13:26:10 +02:00
|
|
|
debug.Log("bloblru.Cache: get %v, hit %v", id, ok)
|
2020-06-17 12:17:55 +02:00
|
|
|
|
|
|
|
return blob, ok
|
|
|
|
}
|
|
|
|
|
2024-05-05 11:37:35 +02:00
|
|
|
func (c *Cache) GetOrCompute(id restic.ID, compute func() ([]byte, error)) ([]byte, error) {
|
|
|
|
// check if already cached
|
|
|
|
blob, ok := c.Get(id)
|
|
|
|
if ok {
|
|
|
|
return blob, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// check for parallel download or start our own
|
|
|
|
finish := make(chan struct{})
|
|
|
|
c.mu.Lock()
|
2024-05-26 12:38:41 +02:00
|
|
|
waitForResult, isComputing := c.inProgress[id]
|
|
|
|
if !isComputing {
|
2024-05-05 11:37:35 +02:00
|
|
|
c.inProgress[id] = finish
|
2024-05-26 12:38:20 +02:00
|
|
|
}
|
|
|
|
c.mu.Unlock()
|
2024-05-05 11:37:35 +02:00
|
|
|
|
2024-05-26 12:38:41 +02:00
|
|
|
if isComputing {
|
2024-05-26 12:38:20 +02:00
|
|
|
// wait for result of parallel download
|
|
|
|
<-waitForResult
|
|
|
|
} else {
|
2024-05-05 11:37:35 +02:00
|
|
|
// remove progress channel once finished here
|
|
|
|
defer func() {
|
|
|
|
c.mu.Lock()
|
|
|
|
delete(c.inProgress, id)
|
|
|
|
c.mu.Unlock()
|
|
|
|
close(finish)
|
|
|
|
}()
|
|
|
|
}
|
2024-05-26 12:37:24 +02:00
|
|
|
|
2024-05-26 12:38:41 +02:00
|
|
|
// try again. This is necessary independent of whether isComputing is true or not.
|
2024-05-26 12:37:24 +02:00
|
|
|
// The calls to `c.Get()` and checking/adding the entry in `c.inProgress` are not atomic,
|
|
|
|
// thus the item might have been computed in the meantime.
|
|
|
|
// The following scenario would compute() the value multiple times otherwise:
|
|
|
|
// Goroutine A does not find a value in the initial call to `c.Get`, then goroutine B
|
|
|
|
// takes over, caches the computed value and cleans up its channel in c.inProgress.
|
|
|
|
// Then goroutine A continues, does not detect a parallel computation and would try
|
|
|
|
// to call compute() again.
|
|
|
|
blob, ok = c.Get(id)
|
|
|
|
if ok {
|
|
|
|
return blob, nil
|
2024-05-05 11:37:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// download it
|
|
|
|
blob, err := compute()
|
|
|
|
if err == nil {
|
|
|
|
c.Add(id, blob)
|
|
|
|
}
|
|
|
|
|
|
|
|
return blob, err
|
|
|
|
}
|
|
|
|
|
2022-11-27 09:58:19 +01:00
|
|
|
func (c *Cache) evict(key restic.ID, blob []byte) {
|
2021-10-03 09:33:58 +02:00
|
|
|
debug.Log("bloblru.Cache: evict %v, %d bytes", key, cap(blob))
|
|
|
|
c.free += cap(blob) + overhead
|
2020-06-17 12:17:55 +02:00
|
|
|
}
|