From 8d2996eaaa2e3a79ac56e363f39bce40a5cb799f Mon Sep 17 00:00:00 2001 From: greatroar <61184462+greatroar@users.noreply.github.com> Date: Fri, 17 Sep 2021 12:38:17 +0200 Subject: [PATCH] Replace siphash by hash/maphash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Go 1.17.1, maphash has become quite a bit faster than siphash, so we can drop one third-party dependency. maphash is just an interface to the standard Go map's hash function, which we already trust for other use cases. Benchmark results on linux/amd64, -benchtime=3s: name old time/op new time/op delta IndexHasUnknown-8 50.6ns ±10% 41.0ns ±19% -18.92% (p=0.000 n=9+10) IndexHasKnown-8 52.6ns ±12% 41.5ns ±12% -21.13% (p=0.000 n=9+10) IndexMapHash-8 3.64µs ± 1% 2.00µs ± 0% -45.09% (p=0.000 n=10+9) IndexAlloc-8 700ms ± 1% 601ms ± 6% -14.18% (p=0.000 n=8+10) IndexAllocParallel-8 205ms ± 5% 192ms ± 8% -6.18% (p=0.043 n=10+10) MasterIndexAlloc-8 319ms ± 1% 279ms ± 5% -12.58% (p=0.000 n=10+10) MasterIndexLookupSingleIndex-8 156ns ± 8% 147ns ± 6% -5.46% (p=0.023 n=10+10) MasterIndexLookupMultipleIndex-8 150ns ± 7% 142ns ± 8% -5.69% (p=0.007 n=10+10) MasterIndexLookupSingleIndexUnknown-8 74.4ns ± 6% 72.0ns ± 9% ~ (p=0.175 n=10+9) MasterIndexLookupMultipleIndexUnknown-8 67.4ns ± 9% 65.5ns ± 7% ~ (p=0.340 n=9+9) MasterIndexLookupParallel/known,indices=25-8 461ns ± 2% 445ns ± 2% -3.49% (p=0.000 n=10+10) MasterIndexLookupParallel/unknown,indices=25-8 408ns ±11% 378ns ± 5% -7.22% (p=0.035 n=10+9) MasterIndexLookupParallel/known,indices=50-8 479ns ± 1% 437ns ± 4% -8.82% (p=0.000 n=10+10) MasterIndexLookupParallel/unknown,indices=50-8 406ns ± 8% 343ns ±15% -15.44% (p=0.001 n=10+10) MasterIndexLookupParallel/known,indices=100-8 480ns ± 1% 455ns ± 5% -5.15% (p=0.000 n=8+10) MasterIndexLookupParallel/unknown,indices=100-8 391ns ±18% 382ns ± 8% ~ (p=0.315 n=10+10) MasterIndexLookupBlobSize-8 71.0ns ± 8% 57.2ns ±11% -19.36% (p=0.000 n=9+10) PackerManager-8 254ms ± 1% 254ms ± 1% ~ (p=0.285 n=15+15) name old speed new speed delta IndexMapHash-8 1.12GB/s ± 1% 2.05GB/s ± 0% +82.13% (p=0.000 n=10+9) PackerManager-8 208MB/s ± 1% 207MB/s ± 1% ~ (p=0.281 n=15+15) name old alloc/op new alloc/op delta IndexMapHash-8 0.00B 0.00B ~ (all equal) IndexAlloc-8 400MB ± 0% 400MB ± 0% ~ (p=1.000 n=9+10) IndexAllocParallel-8 401MB ± 0% 401MB ± 0% +0.00% (p=0.000 n=10+10) MasterIndexAlloc-8 258MB ± 0% 262MB ± 0% +1.42% (p=0.000 n=9+10) PackerManager-8 73.1kB ± 0% 73.1kB ± 0% ~ (p=0.382 n=13+13) name old allocs/op new allocs/op delta IndexMapHash-8 0.00 0.00 ~ (all equal) IndexAlloc-8 907k ± 0% 907k ± 0% -0.00% (p=0.000 n=10+10) IndexAllocParallel-8 907k ± 0% 907k ± 0% +0.00% (p=0.009 n=10+10) MasterIndexAlloc-8 327k ± 0% 317k ± 0% -3.06% (p=0.000 n=10+10) PackerManager-8 744 ± 0% 744 ± 0% ~ (all equal) --- go.mod | 1 - go.sum | 2 -- internal/repository/indexmap.go | 22 +++++++--------------- internal/repository/indexmap_test.go | 26 -------------------------- 4 files changed, 7 insertions(+), 44 deletions(-) diff --git a/go.mod b/go.mod index 27cac9f48..1ba834aa3 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,6 @@ require ( github.com/Azure/go-autorest/autorest/to v0.4.0 // indirect github.com/cenkalti/backoff/v4 v4.1.1 github.com/cespare/xxhash/v2 v2.1.1 - github.com/dchest/siphash v1.2.2 github.com/dnaeon/go-vcr v1.2.0 // indirect github.com/elithrar/simple-scrypt v1.3.0 github.com/go-ole/go-ole v1.2.5 diff --git a/go.sum b/go.sum index d9dc28725..0e5ba267a 100644 --- a/go.sum +++ b/go.sum @@ -89,8 +89,6 @@ github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsr github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dchest/siphash v1.2.2 h1:9DFz8tQwl9pTVt5iok/9zKyzA1Q6bRGiF3HPiEEVr9I= -github.com/dchest/siphash v1.2.2/go.mod h1:q+IRvb2gOSrUnYoPqHiyHXS0FOBBOdl6tONBlVnOnt4= github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI= github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo= diff --git a/internal/repository/indexmap.go b/internal/repository/indexmap.go index 8182d055e..f713a3304 100644 --- a/internal/repository/indexmap.go +++ b/internal/repository/indexmap.go @@ -1,12 +1,9 @@ package repository import ( - "crypto/rand" - "encoding/binary" + "hash/maphash" "github.com/restic/restic/internal/restic" - - "github.com/dchest/siphash" ) // An indexMap is a chained hash table that maps blob IDs to indexEntries. @@ -23,7 +20,7 @@ type indexMap struct { buckets []*indexEntry numentries uint - key0, key1 uint64 // Key for hash randomization. + mh maphash.Hash free *indexEntry // Free list. } @@ -113,25 +110,20 @@ func (m *indexMap) grow() { } func (m *indexMap) hash(id restic.ID) uint { - // We use siphash with a randomly generated 128-bit key, to prevent - // backups of specially crafted inputs from degrading performance. + // We use maphash to prevent backups of specially crafted inputs + // from degrading performance. // While SHA-256 should be collision-resistant, for hash table indices // we use only a few bits of it and finding collisions for those is // much easier than breaking the whole algorithm. - h := uint(siphash.Hash(m.key0, m.key1, id[:])) + m.mh.Reset() + _, _ = m.mh.Write(id[:]) + h := uint(m.mh.Sum64()) return h & uint(len(m.buckets)-1) } func (m *indexMap) init() { const initialBuckets = 64 m.buckets = make([]*indexEntry, initialBuckets) - - var buf [16]byte - if _, err := rand.Read(buf[:]); err != nil { - panic(err) // Very little we can do here. - } - m.key0 = binary.LittleEndian.Uint64(buf[:8]) - m.key1 = binary.LittleEndian.Uint64(buf[8:]) } func (m *indexMap) len() uint { return m.numentries } diff --git a/internal/repository/indexmap_test.go b/internal/repository/indexmap_test.go index 0d435387d..d803bf3c5 100644 --- a/internal/repository/indexmap_test.go +++ b/internal/repository/indexmap_test.go @@ -107,32 +107,6 @@ func TestIndexMapForeachWithID(t *testing.T) { } } -func TestIndexMapHash(t *testing.T) { - t.Parallel() - - var m1, m2 indexMap - - id := restic.NewRandomID() - // Add to both maps to initialize them. - m1.add(id, 0, 0, 0) - m2.add(id, 0, 0, 0) - - h1 := m1.hash(id) - h2 := m2.hash(id) - - rtest.Equals(t, len(m1.buckets), len(m2.buckets)) // just to be sure - - if h1 == h2 { - // The probability of the zero key should be 2^(-128). - if m1.key0 == 0 && m1.key1 == 0 { - t.Error("siphash key not set for m1") - } - if m2.key0 == 0 && m2.key1 == 0 { - t.Error("siphash key not set for m2") - } - } -} - func BenchmarkIndexMapHash(b *testing.B) { var m indexMap m.add(restic.ID{}, 0, 0, 0) // Trigger lazy initialization.