2
2
mirror of https://github.com/octoleo/restic.git synced 2024-11-22 21:05:10 +00:00
restic/internal/repository/indexmap.go
greatroar 5141228e0c repository: Re-tune indexmap allocation strategy
fd05037e1a changed the allocation batch
size from 256 to 128 under the assumption that an indexEntry is 60 bytes
on amd64, but it's 64: structs are padded out to a multiple of 8 for
alignment reasons. That means we'd waste no space in malloc even without
the batch allocation, at least on 64-bit machines. While that strategy
cuts the overallocation down dramatically for many small indexes, it also
seems to slow allocation down (Go 1.18, Linux, amd64, -benchtime=2s):

    name                   old time/op    new time/op    delta
    DecodeIndex-8             4.67s ± 5%     4.60s ± 1%      ~     (p=0.953 n=10+5)
    DecodeIndexParallel-8     4.67s ± 3%     4.60s ± 1%      ~     (p=0.953 n=10+5)
    IndexHasUnknown-8        37.8ns ± 8%    36.5ns ±14%      ~     (p=0.841 n=5+5)
    IndexHasKnown-8          38.5ns ±12%    37.7ns ±10%      ~     (p=0.968 n=5+5)
    IndexAlloc-8              615ms ±18%     607ms ± 1%      ~     (p=1.000 n=10+5)
    IndexAllocParallel-8      245ms ±11%     285ms ± 6%   +16.40%  (p=0.001 n=10+5)
    MasterIndexAlloc-8        286ms ± 9%     275ms ± 2%      ~     (p=1.000 n=10+5)
    LoadIndex/v1-8           27.0ms ± 4%    26.8ms ± 1%      ~     (p=0.690 n=5+5)
    LoadIndex/v2-8           22.4ms ± 1%    22.8ms ± 2%    +1.48%  (p=0.016 n=5+5)

    name                   old alloc/op   new alloc/op   delta
    IndexAlloc-8              446MB ± 0%     446MB ± 0%    -0.00%  (p=0.000 n=8+4)
    IndexAllocParallel-8      446MB ± 0%     446MB ± 0%    -0.00%  (p=0.008 n=8+5)
    MasterIndexAlloc-8        213MB ± 0%     159MB ± 0%   -25.47%  (p=0.000 n=10+5)

    name                   old allocs/op  new allocs/op  delta
    IndexAlloc-8               913k ± 0%     2632k ± 0%  +188.19%  (p=0.008 n=5+5)
    IndexAllocParallel-8       913k ± 0%     2632k ± 0%  +188.21%  (p=0.008 n=5+5)
    MasterIndexAlloc-8         318k ± 0%     1172k ± 0%  +267.86%  (p=0.008 n=5+5)

Instead, this patch sets a batch size of 4, which means no space is
wasted by malloc on 64-bit and very little on 32-bit. It still gets very
close to the savings from not allocating in batches, without requiring
special code for bits.UintSize==64. Benchmark results, again for
Linux/amd64:

    name                   old time/op    new time/op    delta
    DecodeIndex-8             4.67s ± 5%     4.83s ± 9%     ~     (p=0.315 n=10+10)
    DecodeIndexParallel-8     4.67s ± 3%     4.68s ± 4%     ~     (p=0.315 n=10+10)
    IndexHasUnknown-8        37.8ns ± 8%    44.5ns ±19%     ~     (p=0.095 n=5+5)
    IndexHasKnown-8          38.5ns ±12%    36.9ns ± 8%     ~     (p=0.690 n=5+5)
    IndexAlloc-8              615ms ±18%     628ms ±18%     ~     (p=0.218 n=10+10)
    IndexAllocParallel-8      245ms ±11%     262ms ± 9%   +7.02%  (p=0.043 n=10+10)
    MasterIndexAlloc-8        286ms ± 9%     287ms ±13%     ~     (p=1.000 n=10+10)
    LoadIndex/v1-8           27.0ms ± 4%    26.8ms ± 0%     ~     (p=1.000 n=5+5)
    LoadIndex/v2-8           22.4ms ± 1%    22.5ms ± 0%     ~     (p=0.056 n=5+5)

    name                   old alloc/op   new alloc/op   delta
    IndexAlloc-8              446MB ± 0%     446MB ± 0%     ~     (p=1.000 n=8+10)
    IndexAllocParallel-8      446MB ± 0%     446MB ± 0%   -0.00%  (p=0.000 n=8+8)
    MasterIndexAlloc-8        213MB ± 0%     160MB ± 0%  -25.02%  (p=0.000 n=10+9)

    name                   old allocs/op  new allocs/op  delta
    IndexAlloc-8               913k ± 0%     1333k ± 0%  +45.94%  (p=0.000 n=8+10)
    IndexAllocParallel-8       913k ± 0%     1333k ± 0%  +45.94%  (p=0.000 n=8+8)
    MasterIndexAlloc-8         318k ± 0%      525k ± 0%  +64.99%  (p=0.000 n=10+10)

The allocation method indexmap.newEntry has also been rewritten in a
form that is a few instructions shorter.
2022-05-11 21:22:14 +02:00

171 lines
4.2 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package repository
import (
"hash/maphash"
"github.com/restic/restic/internal/restic"
)
// An indexMap is a chained hash table that maps blob IDs to indexEntries.
// It allows storing multiple entries with the same key.
//
// IndexMap uses some optimizations that are not compatible with supporting
// deletions.
//
// The buckets in this hash table contain only pointers, rather than inlined
// key-value pairs like the standard Go map. This way, only a pointer array
// needs to be resized when the table grows, preventing memory usage spikes.
type indexMap struct {
// The number of buckets is always a power of two and never zero.
buckets []*indexEntry
numentries uint
mh maphash.Hash
free *indexEntry // Free list.
}
const (
growthFactor = 2 // Must be a power of 2.
maxLoad = 4 // Max. number of entries per bucket.
)
// add inserts an indexEntry for the given arguments into the map,
// using id as the key.
func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32, uncompressedLength uint32) {
switch {
case m.numentries == 0: // Lazy initialization.
m.init()
case m.numentries >= maxLoad*uint(len(m.buckets)):
m.grow()
}
h := m.hash(id)
e := m.newEntry()
e.id = id
e.next = m.buckets[h] // Prepend to existing chain.
e.packIndex = packIdx
e.offset = offset
e.length = length
e.uncompressedLength = uncompressedLength
m.buckets[h] = e
m.numentries++
}
// foreach calls fn for all entries in the map, until fn returns false.
func (m *indexMap) foreach(fn func(*indexEntry) bool) {
for _, e := range m.buckets {
for e != nil {
if !fn(e) {
return
}
e = e.next
}
}
}
// foreachWithID calls fn for all entries with the given id.
func (m *indexMap) foreachWithID(id restic.ID, fn func(*indexEntry)) {
if len(m.buckets) == 0 {
return
}
h := m.hash(id)
for e := m.buckets[h]; e != nil; e = e.next {
if e.id != id {
continue
}
fn(e)
}
}
// get returns the first entry for the given id.
func (m *indexMap) get(id restic.ID) *indexEntry {
if len(m.buckets) == 0 {
return nil
}
h := m.hash(id)
for e := m.buckets[h]; e != nil; e = e.next {
if e.id == id {
return e
}
}
return nil
}
func (m *indexMap) grow() {
old := m.buckets
m.buckets = make([]*indexEntry, growthFactor*len(m.buckets))
for _, e := range old {
for e != nil {
h := m.hash(e.id)
next := e.next
e.next = m.buckets[h]
m.buckets[h] = e
e = next
}
}
}
func (m *indexMap) hash(id restic.ID) uint {
// We use maphash to prevent backups of specially crafted inputs
// from degrading performance.
// While SHA-256 should be collision-resistant, for hash table indices
// we use only a few bits of it and finding collisions for those is
// much easier than breaking the whole algorithm.
m.mh.Reset()
_, _ = m.mh.Write(id[:])
h := uint(m.mh.Sum64())
return h & uint(len(m.buckets)-1)
}
func (m *indexMap) init() {
const initialBuckets = 64
m.buckets = make([]*indexEntry, initialBuckets)
}
func (m *indexMap) len() uint { return m.numentries }
func (m *indexMap) newEntry() *indexEntry {
// We keep a free list of objects to speed up allocation and GC.
// There's an obvious trade-off here: allocating in larger batches
// means we allocate faster and the GC has to keep fewer bits to track
// what we have in use, but it means we waste some space.
//
// Then again, allocating each indexEntry separately also wastes space
// on 32-bit platforms, because the Go malloc has no size class for
// exactly 52 bytes, so it puts the indexEntry in a 64-byte slot instead.
// See src/runtime/sizeclasses.go in the Go source repo.
//
// The batch size of 4 means we hit the size classes for 4×64=256 bytes
// (64-bit) and 4×52=208 bytes (32-bit), wasting nothing in malloc on
// 64-bit and relatively little on 32-bit.
const entryAllocBatch = 4
e := m.free
if e != nil {
m.free = e.next
} else {
free := new([entryAllocBatch]indexEntry)
e = &free[0]
for i := 1; i < len(free)-1; i++ {
free[i].next = &free[i+1]
}
m.free = &free[1]
}
return e
}
type indexEntry struct {
id restic.ID
next *indexEntry
packIndex int // Position in containing Index's packs field.
offset uint32
length uint32
uncompressedLength uint32
}