2022-06-12 12:43:43 +00:00
|
|
|
package index
|
2020-06-23 20:13:25 +00:00
|
|
|
|
|
|
|
import (
|
Replace siphash by hash/maphash
In Go 1.17.1, maphash has become quite a bit faster than siphash, so we
can drop one third-party dependency. maphash is just an interface to the
standard Go map's hash function, which we already trust for other use
cases.
Benchmark results on linux/amd64, -benchtime=3s:
name old time/op new time/op delta
IndexHasUnknown-8 50.6ns ±10% 41.0ns ±19% -18.92% (p=0.000 n=9+10)
IndexHasKnown-8 52.6ns ±12% 41.5ns ±12% -21.13% (p=0.000 n=9+10)
IndexMapHash-8 3.64µs ± 1% 2.00µs ± 0% -45.09% (p=0.000 n=10+9)
IndexAlloc-8 700ms ± 1% 601ms ± 6% -14.18% (p=0.000 n=8+10)
IndexAllocParallel-8 205ms ± 5% 192ms ± 8% -6.18% (p=0.043 n=10+10)
MasterIndexAlloc-8 319ms ± 1% 279ms ± 5% -12.58% (p=0.000 n=10+10)
MasterIndexLookupSingleIndex-8 156ns ± 8% 147ns ± 6% -5.46% (p=0.023 n=10+10)
MasterIndexLookupMultipleIndex-8 150ns ± 7% 142ns ± 8% -5.69% (p=0.007 n=10+10)
MasterIndexLookupSingleIndexUnknown-8 74.4ns ± 6% 72.0ns ± 9% ~ (p=0.175 n=10+9)
MasterIndexLookupMultipleIndexUnknown-8 67.4ns ± 9% 65.5ns ± 7% ~ (p=0.340 n=9+9)
MasterIndexLookupParallel/known,indices=25-8 461ns ± 2% 445ns ± 2% -3.49% (p=0.000 n=10+10)
MasterIndexLookupParallel/unknown,indices=25-8 408ns ±11% 378ns ± 5% -7.22% (p=0.035 n=10+9)
MasterIndexLookupParallel/known,indices=50-8 479ns ± 1% 437ns ± 4% -8.82% (p=0.000 n=10+10)
MasterIndexLookupParallel/unknown,indices=50-8 406ns ± 8% 343ns ±15% -15.44% (p=0.001 n=10+10)
MasterIndexLookupParallel/known,indices=100-8 480ns ± 1% 455ns ± 5% -5.15% (p=0.000 n=8+10)
MasterIndexLookupParallel/unknown,indices=100-8 391ns ±18% 382ns ± 8% ~ (p=0.315 n=10+10)
MasterIndexLookupBlobSize-8 71.0ns ± 8% 57.2ns ±11% -19.36% (p=0.000 n=9+10)
PackerManager-8 254ms ± 1% 254ms ± 1% ~ (p=0.285 n=15+15)
name old speed new speed delta
IndexMapHash-8 1.12GB/s ± 1% 2.05GB/s ± 0% +82.13% (p=0.000 n=10+9)
PackerManager-8 208MB/s ± 1% 207MB/s ± 1% ~ (p=0.281 n=15+15)
name old alloc/op new alloc/op delta
IndexMapHash-8 0.00B 0.00B ~ (all equal)
IndexAlloc-8 400MB ± 0% 400MB ± 0% ~ (p=1.000 n=9+10)
IndexAllocParallel-8 401MB ± 0% 401MB ± 0% +0.00% (p=0.000 n=10+10)
MasterIndexAlloc-8 258MB ± 0% 262MB ± 0% +1.42% (p=0.000 n=9+10)
PackerManager-8 73.1kB ± 0% 73.1kB ± 0% ~ (p=0.382 n=13+13)
name old allocs/op new allocs/op delta
IndexMapHash-8 0.00 0.00 ~ (all equal)
IndexAlloc-8 907k ± 0% 907k ± 0% -0.00% (p=0.000 n=10+10)
IndexAllocParallel-8 907k ± 0% 907k ± 0% +0.00% (p=0.009 n=10+10)
MasterIndexAlloc-8 327k ± 0% 317k ± 0% -3.06% (p=0.000 n=10+10)
PackerManager-8 744 ± 0% 744 ± 0% ~ (all equal)
2021-09-17 10:38:17 +00:00
|
|
|
"hash/maphash"
|
2020-06-23 20:13:25 +00:00
|
|
|
|
|
|
|
"github.com/restic/restic/internal/restic"
|
|
|
|
)
|
|
|
|
|
|
|
|
// An indexMap is a chained hash table that maps blob IDs to indexEntries.
|
|
|
|
// It allows storing multiple entries with the same key.
|
|
|
|
//
|
|
|
|
// IndexMap uses some optimizations that are not compatible with supporting
|
|
|
|
// deletions.
|
|
|
|
//
|
|
|
|
// The buckets in this hash table contain only pointers, rather than inlined
|
|
|
|
// key-value pairs like the standard Go map. This way, only a pointer array
|
|
|
|
// needs to be resized when the table grows, preventing memory usage spikes.
|
|
|
|
type indexMap struct {
|
|
|
|
// The number of buckets is always a power of two and never zero.
|
2023-06-08 16:07:06 +00:00
|
|
|
buckets []uint
|
|
|
|
numentries uint
|
2020-06-23 20:13:25 +00:00
|
|
|
|
Replace siphash by hash/maphash
In Go 1.17.1, maphash has become quite a bit faster than siphash, so we
can drop one third-party dependency. maphash is just an interface to the
standard Go map's hash function, which we already trust for other use
cases.
Benchmark results on linux/amd64, -benchtime=3s:
name old time/op new time/op delta
IndexHasUnknown-8 50.6ns ±10% 41.0ns ±19% -18.92% (p=0.000 n=9+10)
IndexHasKnown-8 52.6ns ±12% 41.5ns ±12% -21.13% (p=0.000 n=9+10)
IndexMapHash-8 3.64µs ± 1% 2.00µs ± 0% -45.09% (p=0.000 n=10+9)
IndexAlloc-8 700ms ± 1% 601ms ± 6% -14.18% (p=0.000 n=8+10)
IndexAllocParallel-8 205ms ± 5% 192ms ± 8% -6.18% (p=0.043 n=10+10)
MasterIndexAlloc-8 319ms ± 1% 279ms ± 5% -12.58% (p=0.000 n=10+10)
MasterIndexLookupSingleIndex-8 156ns ± 8% 147ns ± 6% -5.46% (p=0.023 n=10+10)
MasterIndexLookupMultipleIndex-8 150ns ± 7% 142ns ± 8% -5.69% (p=0.007 n=10+10)
MasterIndexLookupSingleIndexUnknown-8 74.4ns ± 6% 72.0ns ± 9% ~ (p=0.175 n=10+9)
MasterIndexLookupMultipleIndexUnknown-8 67.4ns ± 9% 65.5ns ± 7% ~ (p=0.340 n=9+9)
MasterIndexLookupParallel/known,indices=25-8 461ns ± 2% 445ns ± 2% -3.49% (p=0.000 n=10+10)
MasterIndexLookupParallel/unknown,indices=25-8 408ns ±11% 378ns ± 5% -7.22% (p=0.035 n=10+9)
MasterIndexLookupParallel/known,indices=50-8 479ns ± 1% 437ns ± 4% -8.82% (p=0.000 n=10+10)
MasterIndexLookupParallel/unknown,indices=50-8 406ns ± 8% 343ns ±15% -15.44% (p=0.001 n=10+10)
MasterIndexLookupParallel/known,indices=100-8 480ns ± 1% 455ns ± 5% -5.15% (p=0.000 n=8+10)
MasterIndexLookupParallel/unknown,indices=100-8 391ns ±18% 382ns ± 8% ~ (p=0.315 n=10+10)
MasterIndexLookupBlobSize-8 71.0ns ± 8% 57.2ns ±11% -19.36% (p=0.000 n=9+10)
PackerManager-8 254ms ± 1% 254ms ± 1% ~ (p=0.285 n=15+15)
name old speed new speed delta
IndexMapHash-8 1.12GB/s ± 1% 2.05GB/s ± 0% +82.13% (p=0.000 n=10+9)
PackerManager-8 208MB/s ± 1% 207MB/s ± 1% ~ (p=0.281 n=15+15)
name old alloc/op new alloc/op delta
IndexMapHash-8 0.00B 0.00B ~ (all equal)
IndexAlloc-8 400MB ± 0% 400MB ± 0% ~ (p=1.000 n=9+10)
IndexAllocParallel-8 401MB ± 0% 401MB ± 0% +0.00% (p=0.000 n=10+10)
MasterIndexAlloc-8 258MB ± 0% 262MB ± 0% +1.42% (p=0.000 n=9+10)
PackerManager-8 73.1kB ± 0% 73.1kB ± 0% ~ (p=0.382 n=13+13)
name old allocs/op new allocs/op delta
IndexMapHash-8 0.00 0.00 ~ (all equal)
IndexAlloc-8 907k ± 0% 907k ± 0% -0.00% (p=0.000 n=10+10)
IndexAllocParallel-8 907k ± 0% 907k ± 0% +0.00% (p=0.009 n=10+10)
MasterIndexAlloc-8 327k ± 0% 317k ± 0% -3.06% (p=0.000 n=10+10)
PackerManager-8 744 ± 0% 744 ± 0% ~ (all equal)
2021-09-17 10:38:17 +00:00
|
|
|
mh maphash.Hash
|
2020-06-23 20:13:25 +00:00
|
|
|
|
2023-05-28 21:42:47 +00:00
|
|
|
blockList hashedArrayTree
|
2020-06-23 20:13:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
const (
|
|
|
|
growthFactor = 2 // Must be a power of 2.
|
|
|
|
maxLoad = 4 // Max. number of entries per bucket.
|
|
|
|
)
|
|
|
|
|
|
|
|
// add inserts an indexEntry for the given arguments into the map,
|
|
|
|
// using id as the key.
|
2022-02-13 16:24:09 +00:00
|
|
|
func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32, uncompressedLength uint32) {
|
2020-06-23 20:13:25 +00:00
|
|
|
switch {
|
2023-06-08 16:07:06 +00:00
|
|
|
case m.numentries == 0: // Lazy initialization.
|
2020-06-23 20:13:25 +00:00
|
|
|
m.init()
|
2023-06-08 16:07:06 +00:00
|
|
|
case m.numentries >= maxLoad*uint(len(m.buckets)):
|
2020-06-23 20:13:25 +00:00
|
|
|
m.grow()
|
|
|
|
}
|
|
|
|
|
|
|
|
h := m.hash(id)
|
2022-02-05 20:25:23 +00:00
|
|
|
e, idx := m.newEntry()
|
2020-06-23 20:13:25 +00:00
|
|
|
e.id = id
|
|
|
|
e.next = m.buckets[h] // Prepend to existing chain.
|
|
|
|
e.packIndex = packIdx
|
|
|
|
e.offset = offset
|
|
|
|
e.length = length
|
2022-02-13 16:24:09 +00:00
|
|
|
e.uncompressedLength = uncompressedLength
|
2020-06-23 20:13:25 +00:00
|
|
|
|
2022-02-05 20:25:23 +00:00
|
|
|
m.buckets[h] = idx
|
2023-06-08 16:07:06 +00:00
|
|
|
m.numentries++
|
2020-06-23 20:13:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// foreach calls fn for all entries in the map, until fn returns false.
|
|
|
|
func (m *indexMap) foreach(fn func(*indexEntry) bool) {
|
2023-05-30 18:12:36 +00:00
|
|
|
blockCount := m.blockList.Size()
|
|
|
|
for i := uint(1); i < blockCount; i++ {
|
|
|
|
if !fn(m.resolve(i)) {
|
|
|
|
return
|
2020-06-23 20:13:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// foreachWithID calls fn for all entries with the given id.
|
|
|
|
func (m *indexMap) foreachWithID(id restic.ID, fn func(*indexEntry)) {
|
|
|
|
if len(m.buckets) == 0 {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
h := m.hash(id)
|
2022-02-05 20:25:23 +00:00
|
|
|
ei := m.buckets[h]
|
|
|
|
for ei != 0 {
|
|
|
|
e := m.resolve(ei)
|
|
|
|
ei = e.next
|
2020-06-23 20:13:25 +00:00
|
|
|
if e.id != id {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
fn(e)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// get returns the first entry for the given id.
|
|
|
|
func (m *indexMap) get(id restic.ID) *indexEntry {
|
|
|
|
if len(m.buckets) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
h := m.hash(id)
|
2022-02-05 20:25:23 +00:00
|
|
|
ei := m.buckets[h]
|
|
|
|
for ei != 0 {
|
|
|
|
e := m.resolve(ei)
|
2020-06-23 20:13:25 +00:00
|
|
|
if e.id == id {
|
|
|
|
return e
|
|
|
|
}
|
2022-02-05 20:25:23 +00:00
|
|
|
ei = e.next
|
2020-06-23 20:13:25 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m *indexMap) grow() {
|
2022-02-05 20:25:23 +00:00
|
|
|
m.buckets = make([]uint, growthFactor*len(m.buckets))
|
2020-06-23 20:13:25 +00:00
|
|
|
|
2023-05-30 18:13:33 +00:00
|
|
|
blockCount := m.blockList.Size()
|
|
|
|
for i := uint(1); i < blockCount; i++ {
|
|
|
|
e := m.resolve(i)
|
|
|
|
|
|
|
|
h := m.hash(e.id)
|
|
|
|
e.next = m.buckets[h]
|
|
|
|
m.buckets[h] = i
|
2020-06-23 20:13:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m *indexMap) hash(id restic.ID) uint {
|
Replace siphash by hash/maphash
In Go 1.17.1, maphash has become quite a bit faster than siphash, so we
can drop one third-party dependency. maphash is just an interface to the
standard Go map's hash function, which we already trust for other use
cases.
Benchmark results on linux/amd64, -benchtime=3s:
name old time/op new time/op delta
IndexHasUnknown-8 50.6ns ±10% 41.0ns ±19% -18.92% (p=0.000 n=9+10)
IndexHasKnown-8 52.6ns ±12% 41.5ns ±12% -21.13% (p=0.000 n=9+10)
IndexMapHash-8 3.64µs ± 1% 2.00µs ± 0% -45.09% (p=0.000 n=10+9)
IndexAlloc-8 700ms ± 1% 601ms ± 6% -14.18% (p=0.000 n=8+10)
IndexAllocParallel-8 205ms ± 5% 192ms ± 8% -6.18% (p=0.043 n=10+10)
MasterIndexAlloc-8 319ms ± 1% 279ms ± 5% -12.58% (p=0.000 n=10+10)
MasterIndexLookupSingleIndex-8 156ns ± 8% 147ns ± 6% -5.46% (p=0.023 n=10+10)
MasterIndexLookupMultipleIndex-8 150ns ± 7% 142ns ± 8% -5.69% (p=0.007 n=10+10)
MasterIndexLookupSingleIndexUnknown-8 74.4ns ± 6% 72.0ns ± 9% ~ (p=0.175 n=10+9)
MasterIndexLookupMultipleIndexUnknown-8 67.4ns ± 9% 65.5ns ± 7% ~ (p=0.340 n=9+9)
MasterIndexLookupParallel/known,indices=25-8 461ns ± 2% 445ns ± 2% -3.49% (p=0.000 n=10+10)
MasterIndexLookupParallel/unknown,indices=25-8 408ns ±11% 378ns ± 5% -7.22% (p=0.035 n=10+9)
MasterIndexLookupParallel/known,indices=50-8 479ns ± 1% 437ns ± 4% -8.82% (p=0.000 n=10+10)
MasterIndexLookupParallel/unknown,indices=50-8 406ns ± 8% 343ns ±15% -15.44% (p=0.001 n=10+10)
MasterIndexLookupParallel/known,indices=100-8 480ns ± 1% 455ns ± 5% -5.15% (p=0.000 n=8+10)
MasterIndexLookupParallel/unknown,indices=100-8 391ns ±18% 382ns ± 8% ~ (p=0.315 n=10+10)
MasterIndexLookupBlobSize-8 71.0ns ± 8% 57.2ns ±11% -19.36% (p=0.000 n=9+10)
PackerManager-8 254ms ± 1% 254ms ± 1% ~ (p=0.285 n=15+15)
name old speed new speed delta
IndexMapHash-8 1.12GB/s ± 1% 2.05GB/s ± 0% +82.13% (p=0.000 n=10+9)
PackerManager-8 208MB/s ± 1% 207MB/s ± 1% ~ (p=0.281 n=15+15)
name old alloc/op new alloc/op delta
IndexMapHash-8 0.00B 0.00B ~ (all equal)
IndexAlloc-8 400MB ± 0% 400MB ± 0% ~ (p=1.000 n=9+10)
IndexAllocParallel-8 401MB ± 0% 401MB ± 0% +0.00% (p=0.000 n=10+10)
MasterIndexAlloc-8 258MB ± 0% 262MB ± 0% +1.42% (p=0.000 n=9+10)
PackerManager-8 73.1kB ± 0% 73.1kB ± 0% ~ (p=0.382 n=13+13)
name old allocs/op new allocs/op delta
IndexMapHash-8 0.00 0.00 ~ (all equal)
IndexAlloc-8 907k ± 0% 907k ± 0% -0.00% (p=0.000 n=10+10)
IndexAllocParallel-8 907k ± 0% 907k ± 0% +0.00% (p=0.009 n=10+10)
MasterIndexAlloc-8 327k ± 0% 317k ± 0% -3.06% (p=0.000 n=10+10)
PackerManager-8 744 ± 0% 744 ± 0% ~ (all equal)
2021-09-17 10:38:17 +00:00
|
|
|
// We use maphash to prevent backups of specially crafted inputs
|
|
|
|
// from degrading performance.
|
2020-06-23 20:13:25 +00:00
|
|
|
// While SHA-256 should be collision-resistant, for hash table indices
|
|
|
|
// we use only a few bits of it and finding collisions for those is
|
|
|
|
// much easier than breaking the whole algorithm.
|
Replace siphash by hash/maphash
In Go 1.17.1, maphash has become quite a bit faster than siphash, so we
can drop one third-party dependency. maphash is just an interface to the
standard Go map's hash function, which we already trust for other use
cases.
Benchmark results on linux/amd64, -benchtime=3s:
name old time/op new time/op delta
IndexHasUnknown-8 50.6ns ±10% 41.0ns ±19% -18.92% (p=0.000 n=9+10)
IndexHasKnown-8 52.6ns ±12% 41.5ns ±12% -21.13% (p=0.000 n=9+10)
IndexMapHash-8 3.64µs ± 1% 2.00µs ± 0% -45.09% (p=0.000 n=10+9)
IndexAlloc-8 700ms ± 1% 601ms ± 6% -14.18% (p=0.000 n=8+10)
IndexAllocParallel-8 205ms ± 5% 192ms ± 8% -6.18% (p=0.043 n=10+10)
MasterIndexAlloc-8 319ms ± 1% 279ms ± 5% -12.58% (p=0.000 n=10+10)
MasterIndexLookupSingleIndex-8 156ns ± 8% 147ns ± 6% -5.46% (p=0.023 n=10+10)
MasterIndexLookupMultipleIndex-8 150ns ± 7% 142ns ± 8% -5.69% (p=0.007 n=10+10)
MasterIndexLookupSingleIndexUnknown-8 74.4ns ± 6% 72.0ns ± 9% ~ (p=0.175 n=10+9)
MasterIndexLookupMultipleIndexUnknown-8 67.4ns ± 9% 65.5ns ± 7% ~ (p=0.340 n=9+9)
MasterIndexLookupParallel/known,indices=25-8 461ns ± 2% 445ns ± 2% -3.49% (p=0.000 n=10+10)
MasterIndexLookupParallel/unknown,indices=25-8 408ns ±11% 378ns ± 5% -7.22% (p=0.035 n=10+9)
MasterIndexLookupParallel/known,indices=50-8 479ns ± 1% 437ns ± 4% -8.82% (p=0.000 n=10+10)
MasterIndexLookupParallel/unknown,indices=50-8 406ns ± 8% 343ns ±15% -15.44% (p=0.001 n=10+10)
MasterIndexLookupParallel/known,indices=100-8 480ns ± 1% 455ns ± 5% -5.15% (p=0.000 n=8+10)
MasterIndexLookupParallel/unknown,indices=100-8 391ns ±18% 382ns ± 8% ~ (p=0.315 n=10+10)
MasterIndexLookupBlobSize-8 71.0ns ± 8% 57.2ns ±11% -19.36% (p=0.000 n=9+10)
PackerManager-8 254ms ± 1% 254ms ± 1% ~ (p=0.285 n=15+15)
name old speed new speed delta
IndexMapHash-8 1.12GB/s ± 1% 2.05GB/s ± 0% +82.13% (p=0.000 n=10+9)
PackerManager-8 208MB/s ± 1% 207MB/s ± 1% ~ (p=0.281 n=15+15)
name old alloc/op new alloc/op delta
IndexMapHash-8 0.00B 0.00B ~ (all equal)
IndexAlloc-8 400MB ± 0% 400MB ± 0% ~ (p=1.000 n=9+10)
IndexAllocParallel-8 401MB ± 0% 401MB ± 0% +0.00% (p=0.000 n=10+10)
MasterIndexAlloc-8 258MB ± 0% 262MB ± 0% +1.42% (p=0.000 n=9+10)
PackerManager-8 73.1kB ± 0% 73.1kB ± 0% ~ (p=0.382 n=13+13)
name old allocs/op new allocs/op delta
IndexMapHash-8 0.00 0.00 ~ (all equal)
IndexAlloc-8 907k ± 0% 907k ± 0% -0.00% (p=0.000 n=10+10)
IndexAllocParallel-8 907k ± 0% 907k ± 0% +0.00% (p=0.009 n=10+10)
MasterIndexAlloc-8 327k ± 0% 317k ± 0% -3.06% (p=0.000 n=10+10)
PackerManager-8 744 ± 0% 744 ± 0% ~ (all equal)
2021-09-17 10:38:17 +00:00
|
|
|
m.mh.Reset()
|
|
|
|
_, _ = m.mh.Write(id[:])
|
|
|
|
h := uint(m.mh.Sum64())
|
2020-06-23 20:13:25 +00:00
|
|
|
return h & uint(len(m.buckets)-1)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (m *indexMap) init() {
|
|
|
|
const initialBuckets = 64
|
2022-02-05 20:25:23 +00:00
|
|
|
m.buckets = make([]uint, initialBuckets)
|
|
|
|
// first entry in blockList serves as null byte
|
2023-05-28 21:42:47 +00:00
|
|
|
m.blockList = *newHAT()
|
|
|
|
m.newEntry()
|
2020-06-23 20:13:25 +00:00
|
|
|
}
|
|
|
|
|
2023-06-08 16:07:06 +00:00
|
|
|
func (m *indexMap) len() uint { return m.numentries }
|
2020-06-23 20:13:25 +00:00
|
|
|
|
2022-02-05 20:25:23 +00:00
|
|
|
func (m *indexMap) newEntry() (*indexEntry, uint) {
|
2023-05-28 21:42:47 +00:00
|
|
|
return m.blockList.Alloc()
|
2022-02-05 20:25:23 +00:00
|
|
|
}
|
2020-06-23 20:13:25 +00:00
|
|
|
|
2022-02-05 20:25:23 +00:00
|
|
|
func (m *indexMap) resolve(idx uint) *indexEntry {
|
2023-05-28 21:42:47 +00:00
|
|
|
return m.blockList.Ref(idx)
|
2020-06-23 20:13:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type indexEntry struct {
|
2022-02-13 16:24:09 +00:00
|
|
|
id restic.ID
|
2022-02-05 20:25:23 +00:00
|
|
|
next uint
|
2022-02-13 16:24:09 +00:00
|
|
|
packIndex int // Position in containing Index's packs field.
|
|
|
|
offset uint32
|
|
|
|
length uint32
|
|
|
|
uncompressedLength uint32
|
2020-06-23 20:13:25 +00:00
|
|
|
}
|
2023-05-28 21:42:47 +00:00
|
|
|
|
|
|
|
type hashedArrayTree struct {
|
|
|
|
mask uint
|
|
|
|
maskShift uint
|
|
|
|
blockSize uint
|
|
|
|
|
|
|
|
size uint
|
|
|
|
blockList [][]indexEntry
|
|
|
|
}
|
|
|
|
|
|
|
|
func newHAT() *hashedArrayTree {
|
|
|
|
// start with a small block size
|
|
|
|
blockSizePower := uint(2)
|
|
|
|
blockSize := uint(1 << blockSizePower)
|
|
|
|
|
|
|
|
return &hashedArrayTree{
|
|
|
|
mask: blockSize - 1,
|
|
|
|
maskShift: blockSizePower,
|
|
|
|
blockSize: blockSize,
|
|
|
|
size: 0,
|
|
|
|
blockList: make([][]indexEntry, blockSize),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *hashedArrayTree) Alloc() (*indexEntry, uint) {
|
|
|
|
h.grow()
|
|
|
|
size := h.size
|
|
|
|
idx, subIdx := h.index(size)
|
|
|
|
h.size++
|
|
|
|
return &h.blockList[idx][subIdx], size
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *hashedArrayTree) index(pos uint) (idx uint, subIdx uint) {
|
|
|
|
subIdx = pos & h.mask
|
|
|
|
idx = pos >> h.maskShift
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *hashedArrayTree) Ref(pos uint) *indexEntry {
|
|
|
|
if pos >= h.size {
|
2023-05-28 23:09:33 +00:00
|
|
|
panic("array index out of bounds")
|
2023-05-28 21:42:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
idx, subIdx := h.index(pos)
|
|
|
|
return &h.blockList[idx][subIdx]
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *hashedArrayTree) Size() uint {
|
|
|
|
return h.size
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *hashedArrayTree) grow() {
|
|
|
|
idx, subIdx := h.index(h.size)
|
|
|
|
if int(idx) == len(h.blockList) {
|
|
|
|
// blockList is too small -> double list and block size
|
|
|
|
h.blockSize *= 2
|
|
|
|
h.mask = h.mask*2 + 1
|
|
|
|
h.maskShift++
|
|
|
|
idx = idx / 2
|
|
|
|
|
2023-06-02 17:39:12 +00:00
|
|
|
oldBlocks := h.blockList
|
|
|
|
h.blockList = make([][]indexEntry, h.blockSize)
|
|
|
|
|
2023-05-28 21:42:47 +00:00
|
|
|
// pairwise merging of blocks
|
|
|
|
for i := 0; i < len(oldBlocks); i += 2 {
|
|
|
|
block := make([]indexEntry, 0, h.blockSize)
|
|
|
|
block = append(block, oldBlocks[i]...)
|
|
|
|
block = append(block, oldBlocks[i+1]...)
|
|
|
|
h.blockList[i/2] = block
|
|
|
|
// allow GC
|
|
|
|
oldBlocks[i] = nil
|
|
|
|
oldBlocks[i+1] = nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if subIdx == 0 {
|
|
|
|
// new index entry batch
|
|
|
|
h.blockList[idx] = make([]indexEntry, h.blockSize)
|
|
|
|
}
|
|
|
|
}
|