mirror of
https://github.com/octoleo/restic.git
synced 2024-11-22 12:55:18 +00:00
Merge pull request #4352 from MichaelEischer/pointerless-index
index: optimize go GC performance
This commit is contained in:
commit
dd1ef13c1c
@ -17,12 +17,12 @@ import (
|
|||||||
// needs to be resized when the table grows, preventing memory usage spikes.
|
// needs to be resized when the table grows, preventing memory usage spikes.
|
||||||
type indexMap struct {
|
type indexMap struct {
|
||||||
// The number of buckets is always a power of two and never zero.
|
// The number of buckets is always a power of two and never zero.
|
||||||
buckets []*indexEntry
|
buckets []uint
|
||||||
numentries uint
|
numentries uint
|
||||||
|
|
||||||
mh maphash.Hash
|
mh maphash.Hash
|
||||||
|
|
||||||
free *indexEntry // Free list.
|
blockList hashedArrayTree
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -41,7 +41,7 @@ func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32, uncompr
|
|||||||
}
|
}
|
||||||
|
|
||||||
h := m.hash(id)
|
h := m.hash(id)
|
||||||
e := m.newEntry()
|
e, idx := m.newEntry()
|
||||||
e.id = id
|
e.id = id
|
||||||
e.next = m.buckets[h] // Prepend to existing chain.
|
e.next = m.buckets[h] // Prepend to existing chain.
|
||||||
e.packIndex = packIdx
|
e.packIndex = packIdx
|
||||||
@ -49,18 +49,16 @@ func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32, uncompr
|
|||||||
e.length = length
|
e.length = length
|
||||||
e.uncompressedLength = uncompressedLength
|
e.uncompressedLength = uncompressedLength
|
||||||
|
|
||||||
m.buckets[h] = e
|
m.buckets[h] = idx
|
||||||
m.numentries++
|
m.numentries++
|
||||||
}
|
}
|
||||||
|
|
||||||
// foreach calls fn for all entries in the map, until fn returns false.
|
// foreach calls fn for all entries in the map, until fn returns false.
|
||||||
func (m *indexMap) foreach(fn func(*indexEntry) bool) {
|
func (m *indexMap) foreach(fn func(*indexEntry) bool) {
|
||||||
for _, e := range m.buckets {
|
blockCount := m.blockList.Size()
|
||||||
for e != nil {
|
for i := uint(1); i < blockCount; i++ {
|
||||||
if !fn(e) {
|
if !fn(m.resolve(i)) {
|
||||||
return
|
return
|
||||||
}
|
|
||||||
e = e.next
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -72,7 +70,10 @@ func (m *indexMap) foreachWithID(id restic.ID, fn func(*indexEntry)) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
h := m.hash(id)
|
h := m.hash(id)
|
||||||
for e := m.buckets[h]; e != nil; e = e.next {
|
ei := m.buckets[h]
|
||||||
|
for ei != 0 {
|
||||||
|
e := m.resolve(ei)
|
||||||
|
ei = e.next
|
||||||
if e.id != id {
|
if e.id != id {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -87,26 +88,27 @@ func (m *indexMap) get(id restic.ID) *indexEntry {
|
|||||||
}
|
}
|
||||||
|
|
||||||
h := m.hash(id)
|
h := m.hash(id)
|
||||||
for e := m.buckets[h]; e != nil; e = e.next {
|
ei := m.buckets[h]
|
||||||
|
for ei != 0 {
|
||||||
|
e := m.resolve(ei)
|
||||||
if e.id == id {
|
if e.id == id {
|
||||||
return e
|
return e
|
||||||
}
|
}
|
||||||
|
ei = e.next
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *indexMap) grow() {
|
func (m *indexMap) grow() {
|
||||||
old := m.buckets
|
m.buckets = make([]uint, growthFactor*len(m.buckets))
|
||||||
m.buckets = make([]*indexEntry, growthFactor*len(m.buckets))
|
|
||||||
|
|
||||||
for _, e := range old {
|
blockCount := m.blockList.Size()
|
||||||
for e != nil {
|
for i := uint(1); i < blockCount; i++ {
|
||||||
h := m.hash(e.id)
|
e := m.resolve(i)
|
||||||
next := e.next
|
|
||||||
e.next = m.buckets[h]
|
h := m.hash(e.id)
|
||||||
m.buckets[h] = e
|
e.next = m.buckets[h]
|
||||||
e = next
|
m.buckets[h] = i
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -124,47 +126,106 @@ func (m *indexMap) hash(id restic.ID) uint {
|
|||||||
|
|
||||||
func (m *indexMap) init() {
|
func (m *indexMap) init() {
|
||||||
const initialBuckets = 64
|
const initialBuckets = 64
|
||||||
m.buckets = make([]*indexEntry, initialBuckets)
|
m.buckets = make([]uint, initialBuckets)
|
||||||
|
// first entry in blockList serves as null byte
|
||||||
|
m.blockList = *newHAT()
|
||||||
|
m.newEntry()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *indexMap) len() uint { return m.numentries }
|
func (m *indexMap) len() uint { return m.numentries }
|
||||||
|
|
||||||
func (m *indexMap) newEntry() *indexEntry {
|
func (m *indexMap) newEntry() (*indexEntry, uint) {
|
||||||
// We keep a free list of objects to speed up allocation and GC.
|
return m.blockList.Alloc()
|
||||||
// There's an obvious trade-off here: allocating in larger batches
|
}
|
||||||
// means we allocate faster and the GC has to keep fewer bits to track
|
|
||||||
// what we have in use, but it means we waste some space.
|
|
||||||
//
|
|
||||||
// Then again, allocating each indexEntry separately also wastes space
|
|
||||||
// on 32-bit platforms, because the Go malloc has no size class for
|
|
||||||
// exactly 52 bytes, so it puts the indexEntry in a 64-byte slot instead.
|
|
||||||
// See src/runtime/sizeclasses.go in the Go source repo.
|
|
||||||
//
|
|
||||||
// The batch size of 4 means we hit the size classes for 4×64=256 bytes
|
|
||||||
// (64-bit) and 4×52=208 bytes (32-bit), wasting nothing in malloc on
|
|
||||||
// 64-bit and relatively little on 32-bit.
|
|
||||||
const entryAllocBatch = 4
|
|
||||||
|
|
||||||
e := m.free
|
func (m *indexMap) resolve(idx uint) *indexEntry {
|
||||||
if e != nil {
|
return m.blockList.Ref(idx)
|
||||||
m.free = e.next
|
|
||||||
} else {
|
|
||||||
free := new([entryAllocBatch]indexEntry)
|
|
||||||
e = &free[0]
|
|
||||||
for i := 1; i < len(free)-1; i++ {
|
|
||||||
free[i].next = &free[i+1]
|
|
||||||
}
|
|
||||||
m.free = &free[1]
|
|
||||||
}
|
|
||||||
|
|
||||||
return e
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type indexEntry struct {
|
type indexEntry struct {
|
||||||
id restic.ID
|
id restic.ID
|
||||||
next *indexEntry
|
next uint
|
||||||
packIndex int // Position in containing Index's packs field.
|
packIndex int // Position in containing Index's packs field.
|
||||||
offset uint32
|
offset uint32
|
||||||
length uint32
|
length uint32
|
||||||
uncompressedLength uint32
|
uncompressedLength uint32
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type hashedArrayTree struct {
|
||||||
|
mask uint
|
||||||
|
maskShift uint
|
||||||
|
blockSize uint
|
||||||
|
|
||||||
|
size uint
|
||||||
|
blockList [][]indexEntry
|
||||||
|
}
|
||||||
|
|
||||||
|
func newHAT() *hashedArrayTree {
|
||||||
|
// start with a small block size
|
||||||
|
blockSizePower := uint(2)
|
||||||
|
blockSize := uint(1 << blockSizePower)
|
||||||
|
|
||||||
|
return &hashedArrayTree{
|
||||||
|
mask: blockSize - 1,
|
||||||
|
maskShift: blockSizePower,
|
||||||
|
blockSize: blockSize,
|
||||||
|
size: 0,
|
||||||
|
blockList: make([][]indexEntry, blockSize),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *hashedArrayTree) Alloc() (*indexEntry, uint) {
|
||||||
|
h.grow()
|
||||||
|
size := h.size
|
||||||
|
idx, subIdx := h.index(size)
|
||||||
|
h.size++
|
||||||
|
return &h.blockList[idx][subIdx], size
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *hashedArrayTree) index(pos uint) (idx uint, subIdx uint) {
|
||||||
|
subIdx = pos & h.mask
|
||||||
|
idx = pos >> h.maskShift
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *hashedArrayTree) Ref(pos uint) *indexEntry {
|
||||||
|
if pos >= h.size {
|
||||||
|
panic("array index out of bounds")
|
||||||
|
}
|
||||||
|
|
||||||
|
idx, subIdx := h.index(pos)
|
||||||
|
return &h.blockList[idx][subIdx]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *hashedArrayTree) Size() uint {
|
||||||
|
return h.size
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *hashedArrayTree) grow() {
|
||||||
|
idx, subIdx := h.index(h.size)
|
||||||
|
if int(idx) == len(h.blockList) {
|
||||||
|
// blockList is too small -> double list and block size
|
||||||
|
h.blockSize *= 2
|
||||||
|
h.mask = h.mask*2 + 1
|
||||||
|
h.maskShift++
|
||||||
|
idx = idx / 2
|
||||||
|
|
||||||
|
oldBlocks := h.blockList
|
||||||
|
h.blockList = make([][]indexEntry, h.blockSize)
|
||||||
|
|
||||||
|
// pairwise merging of blocks
|
||||||
|
for i := 0; i < len(oldBlocks); i += 2 {
|
||||||
|
block := make([]indexEntry, 0, h.blockSize)
|
||||||
|
block = append(block, oldBlocks[i]...)
|
||||||
|
block = append(block, oldBlocks[i+1]...)
|
||||||
|
h.blockList[i/2] = block
|
||||||
|
// allow GC
|
||||||
|
oldBlocks[i] = nil
|
||||||
|
oldBlocks[i+1] = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if subIdx == 0 {
|
||||||
|
// new index entry batch
|
||||||
|
h.blockList[idx] = make([]indexEntry, h.blockSize)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -108,6 +108,21 @@ func TestIndexMapForeachWithID(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestHashedArrayTree(t *testing.T) {
|
||||||
|
hat := newHAT()
|
||||||
|
const testSize = 1024
|
||||||
|
for i := uint(0); i < testSize; i++ {
|
||||||
|
rtest.Assert(t, hat.Size() == i, "expected hat size %v got %v", i, hat.Size())
|
||||||
|
e, idx := hat.Alloc()
|
||||||
|
rtest.Assert(t, idx == i, "expected entry at idx %v got %v", i, idx)
|
||||||
|
e.length = uint32(i)
|
||||||
|
}
|
||||||
|
for i := uint(0); i < testSize; i++ {
|
||||||
|
e := hat.Ref(i)
|
||||||
|
rtest.Assert(t, e.length == uint32(i), "expected entry to contain %v got %v", uint32(i), e.length)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkIndexMapHash(b *testing.B) {
|
func BenchmarkIndexMapHash(b *testing.B) {
|
||||||
var m indexMap
|
var m indexMap
|
||||||
m.add(restic.ID{}, 0, 0, 0, 0) // Trigger lazy initialization.
|
m.add(restic.ID{}, 0, 0, 0, 0) // Trigger lazy initialization.
|
||||||
|
@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
|
"runtime"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -323,6 +324,17 @@ func BenchmarkMasterIndexEach(b *testing.B) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkMasterIndexGC(b *testing.B) {
|
||||||
|
mIdx, _ := createRandomMasterIndex(b, rand.New(rand.NewSource(0)), 100, 10000)
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
runtime.GC()
|
||||||
|
}
|
||||||
|
runtime.KeepAlive(mIdx)
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
snapshotTime = time.Unix(1470492820, 207401672)
|
snapshotTime = time.Unix(1470492820, 207401672)
|
||||||
depth = 3
|
depth = 3
|
||||||
|
@ -346,6 +346,7 @@ func benchmarkLoadIndex(b *testing.B, version uint) {
|
|||||||
},
|
},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
idx.Finalize()
|
||||||
|
|
||||||
id, err := index.SaveIndex(context.TODO(), repo, idx)
|
id, err := index.SaveIndex(context.TODO(), repo, idx)
|
||||||
rtest.OK(b, err)
|
rtest.OK(b, err)
|
||||||
|
Loading…
Reference in New Issue
Block a user