From fafd30f804fe98d3e2ebc727f5fecd21f7b4d3f6 Mon Sep 17 00:00:00 2001 From: Audrius Butkevicius Date: Mon, 25 Feb 2019 09:29:31 +0000 Subject: [PATCH] lib/scanner: Use standard adler32 when we don't need rolling (#5556) * lib/scanner: Use standard adler32 when we don't need rolling Seems the rolling adler32 implementation is super slow when executed on large blocks, even tho I can't explain why. BenchmarkFind1MFile-16 100 18991667 ns/op 55.21 MB/s 398844 B/op 20 allocs/op BenchmarkBlock/adler32-131072/#00-16 200 9726519 ns/op 1078.06 MB/s 2654936 B/op 163 allocs/op BenchmarkBlock/bozo32-131072/#00-16 20 73435540 ns/op 142.79 MB/s 2654928 B/op 163 allocs/op BenchmarkBlock/buzhash32-131072/#00-16 20 61482005 ns/op 170.55 MB/s 2654928 B/op 163 allocs/op BenchmarkBlock/buzhash64-131072/#00-16 20 61673660 ns/op 170.02 MB/s 2654928 B/op 163 allocs/op BenchmarkBlock/vanilla-adler32-131072/#00-16 300 4377307 ns/op 2395.48 MB/s 2654935 B/op 163 allocs/op BenchmarkBlock/adler32-16777216/#00-16 2 544010100 ns/op 19.27 MB/s 65624 B/op 5 allocs/op BenchmarkBlock/bozo32-16777216/#00-16 1 4678108500 ns/op 2.24 MB/s 51970144 B/op 24 allocs/op BenchmarkBlock/buzhash32-16777216/#00-16 1 3880370700 ns/op 2.70 MB/s 51970144 B/op 24 allocs/op BenchmarkBlock/buzhash64-16777216/#00-16 1 3875911700 ns/op 2.71 MB/s 51970144 B/op 24 allocs/op BenchmarkBlock/vanilla-adler32-16777216/#00-16 300 4010279 ns/op 2614.72 MB/s 65624 B/op 5 allocs/op BenchmarkRoll/adler32-131072/#00-16 2000 974279 ns/op 134.53 MB/s 270 B/op 0 allocs/op BenchmarkRoll/bozo32-131072/#00-16 2000 791770 ns/op 165.54 MB/s 270 B/op 0 allocs/op BenchmarkRoll/buzhash32-131072/#00-16 2000 917409 ns/op 142.87 MB/s 270 B/op 0 allocs/op BenchmarkRoll/buzhash64-131072/#00-16 2000 881125 ns/op 148.76 MB/s 270 B/op 0 allocs/op BenchmarkRoll/adler32-16777216/#00-16 10 124000400 ns/op 135.30 MB/s 7548937 B/op 0 allocs/op BenchmarkRoll/bozo32-16777216/#00-16 10 118008080 ns/op 142.17 MB/s 7548928 B/op 0 allocs/op BenchmarkRoll/buzhash32-16777216/#00-16 10 126794440 ns/op 132.32 MB/s 7548928 B/op 0 allocs/op BenchmarkRoll/buzhash64-16777216/#00-16 10 126631960 ns/op 132.49 MB/s 7548928 B/op 0 allocs/op * Update benchmark_test.go * gofmt * fixup benchmark --- lib/scanner/blocks.go | 2 +- lib/weakhash/benchmark_test.go | 223 +++++++++++++++------------------ 2 files changed, 99 insertions(+), 126 deletions(-) diff --git a/lib/scanner/blocks.go b/lib/scanner/blocks.go index 3d3e641ec..a67ac9208 100644 --- a/lib/scanner/blocks.go +++ b/lib/scanner/blocks.go @@ -10,9 +10,9 @@ import ( "bytes" "context" "hash" + "hash/adler32" "io" - "github.com/chmduquesne/rollinghash/adler32" "github.com/syncthing/syncthing/lib/protocol" "github.com/syncthing/syncthing/lib/sha256" ) diff --git a/lib/weakhash/benchmark_test.go b/lib/weakhash/benchmark_test.go index 0eb6aae39..133be894e 100644 --- a/lib/weakhash/benchmark_test.go +++ b/lib/weakhash/benchmark_test.go @@ -7,7 +7,13 @@ package weakhash import ( + "bytes" "context" + "fmt" + "hash" + vadler32 "hash/adler32" + "io" + "math/rand" "os" "testing" @@ -15,10 +21,9 @@ import ( "github.com/chmduquesne/rollinghash/bozo32" "github.com/chmduquesne/rollinghash/buzhash32" "github.com/chmduquesne/rollinghash/buzhash64" - "github.com/chmduquesne/rollinghash/rabinkarp64" ) -const testFile = "../model/testdata/~syncthing~file.tmp" +const testFile = "../model/testdata/tmpfile" const size = 128 << 10 func BenchmarkFind1MFile(b *testing.B) { @@ -37,142 +42,110 @@ func BenchmarkFind1MFile(b *testing.B) { } } -func BenchmarkWeakHashAdler32(b *testing.B) { - data := make([]byte, size) - hf := adler32.New() - - for i := 0; i < b.N; i++ { - hf.Write(data) - } - - hf.Sum32() - b.SetBytes(size) +type RollingHash interface { + hash.Hash + Roll(byte) } -func BenchmarkWeakHashAdler32Roll(b *testing.B) { - data := make([]byte, size) - hf := adler32.New() - hf.Write(data) +func BenchmarkBlock(b *testing.B) { + tests := []struct { + name string + hash hash.Hash + }{ + { + "adler32", adler32.New(), + }, + { + "bozo32", bozo32.New(), + }, + { + "buzhash32", buzhash32.New(), + }, + { + "buzhash64", buzhash64.New(), + }, + { + "vanilla-adler32", vadler32.New(), + }, + } - b.ResetTimer() + sizes := []int64{128 << 10, 16 << 20} - for i := 0; i < b.N; i++ { - for i := 0; i <= size; i++ { - hf.Roll('a') + buf := make([]byte, 16<<20) + rand.Read(buf) + + for _, testSize := range sizes { + for _, test := range tests { + b.Run(test.name+"-"+fmt.Sprint(testSize), func(bb *testing.B) { + bb.Run("", func(bbb *testing.B) { + bbb.ResetTimer() + for i := 0; i < bbb.N; i++ { + lr := io.LimitReader(bytes.NewReader(buf), testSize) + n, err := io.Copy(test.hash, lr) + if err != nil { + bbb.Error(err) + } + if n != testSize { + bbb.Errorf("%d != %d", n, testSize) + } + + test.hash.Sum(nil) + test.hash.Reset() + } + + bbb.SetBytes(int64(len(buf))) + bbb.ReportAllocs() + }) + + }) } } - - b.SetBytes(size) } -func BenchmarkWeakHashRabinKarp64(b *testing.B) { - data := make([]byte, size) - hf := rabinkarp64.New() - - for i := 0; i < b.N; i++ { - hf.Write(data) +func BenchmarkRoll(b *testing.B) { + tests := []struct { + name string + hash RollingHash + }{ + { + "adler32", adler32.New(), + }, + { + "bozo32", bozo32.New(), + }, + { + "buzhash32", buzhash32.New(), + }, + { + "buzhash64", buzhash64.New(), + }, } - hf.Sum64() - b.SetBytes(size) -} + sizes := []int64{128 << 10, 16 << 20} -func BenchmarkWeakHashRabinKarp64Roll(b *testing.B) { - data := make([]byte, size) - hf := rabinkarp64.New() - hf.Write(data) + for _, testSize := range sizes { + for _, test := range tests { + b.Run(test.name+"-"+fmt.Sprint(testSize), func(bb *testing.B) { + bb.Run("", func(bbb *testing.B) { + data := make([]byte, testSize) - b.ResetTimer() + if _, err := test.hash.Write(data); err != nil { + bbb.Error(err) + } - for i := 0; i < b.N; i++ { - for i := 0; i <= size; i++ { - hf.Roll('a') + bbb.ResetTimer() + + for i := 0; i < bbb.N; i++ { + for j := int64(0); j <= testSize; j++ { + test.hash.Roll('a') + } + } + + bbb.SetBytes(testSize) + bbb.ReportAllocs() + }) + + }) } } - - b.SetBytes(size) -} - -func BenchmarkWeakHashBozo32(b *testing.B) { - data := make([]byte, size) - hf := bozo32.New() - - for i := 0; i < b.N; i++ { - hf.Write(data) - } - - hf.Sum32() - b.SetBytes(size) -} - -func BenchmarkWeakHashBozo32Roll(b *testing.B) { - data := make([]byte, size) - hf := bozo32.New() - hf.Write(data) - - b.ResetTimer() - - for i := 0; i < b.N; i++ { - for i := 0; i <= size; i++ { - hf.Roll('a') - } - } - - b.SetBytes(size) -} - -func BenchmarkWeakHashBuzhash32(b *testing.B) { - data := make([]byte, size) - hf := buzhash32.New() - - for i := 0; i < b.N; i++ { - hf.Write(data) - } - - hf.Sum32() - b.SetBytes(size) -} - -func BenchmarkWeakHashBuzhash32Roll(b *testing.B) { - data := make([]byte, size) - hf := buzhash32.New() - hf.Write(data) - - b.ResetTimer() - - for i := 0; i < b.N; i++ { - for i := 0; i <= size; i++ { - hf.Roll('a') - } - } - - b.SetBytes(size) -} - -func BenchmarkWeakHashBuzhash64(b *testing.B) { - data := make([]byte, size) - hf := buzhash64.New() - - for i := 0; i < b.N; i++ { - hf.Write(data) - } - - hf.Sum64() - b.SetBytes(size) -} - -func BenchmarkWeakHashBuzhash64Roll(b *testing.B) { - data := make([]byte, size) - hf := buzhash64.New() - hf.Write(data) - - b.ResetTimer() - - for i := 0; i < b.N; i++ { - for i := 0; i <= size; i++ { - hf.Roll('a') - } - } - - b.SetBytes(size) }