syncthing/lib/scanner/blocks_test.go

222 lines
5.9 KiB
Go
Raw Normal View History

2014-11-16 20:13:20 +00:00
// Copyright (C) 2014 The Syncthing Authors.
2014-09-29 19:43:32 +00:00
//
2015-03-07 20:36:35 +00:00
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
2014-06-01 20:50:14 +00:00
package scanner
2014-03-02 22:58:14 +00:00
import (
"bytes"
"context"
"crypto/rand"
lib/sha256: Remove it (#9643) ### Purpose Remove the `lib/sha256` package, because it's no longer necessary. Go's standard library now has the same performance and is on par with `sha256-simd` since [Since Go 1.21](https://github.com/golang/go/commit/1a64574f42b95594cf9c8a12e9ca13d75585429c). Therefore using `sha256-simd` has no benefits anymore. ARM already has optimized sha256 assembly code since https://github.com/golang/go/commit/7b8a7f8272fd1941a199af1adb334bd9996e8909, `sha256-simd` published their results before that optimized assembly was implemented, https://github.com/minio/sha256-simd/commit/f941fedda826b68a196de2e0a9183e273ec0cb91. The assembly looks very similar and the benchmarks in the Go commit match that of `sha256-simd`. This patch removes all of the related code of `lib/sha256` and makes `crypto/sha256` the 'default'. Benchmark of `sha256-simd` and `crypto/sha256`: <details> ``` cpu: AMD Ryzen 5 3600X 6-Core Processor │ simd.txt │ go.txt │ │ sec/op │ sec/op vs base │ Hash/8Bytes-12 63.25n ± 1% 73.38n ± 1% +16.02% (p=0.002 n=6) Hash/64Bytes-12 98.73n ± 1% 105.30n ± 1% +6.65% (p=0.002 n=6) Hash/1K-12 567.2n ± 1% 572.8n ± 1% +0.99% (p=0.002 n=6) Hash/8K-12 4.062µ ± 1% 4.062µ ± 1% ~ (p=0.396 n=6) Hash/1M-12 512.1µ ± 0% 510.6µ ± 1% ~ (p=0.485 n=6) Hash/5M-12 2.556m ± 1% 2.564m ± 0% ~ (p=0.093 n=6) Hash/10M-12 5.112m ± 0% 5.127m ± 0% ~ (p=0.093 n=6) geomean 13.82µ 14.27µ +3.28% │ simd.txt │ go.txt │ │ B/s │ B/s vs base │ Hash/8Bytes-12 120.6Mi ± 1% 104.0Mi ± 1% -13.81% (p=0.002 n=6) Hash/64Bytes-12 618.2Mi ± 1% 579.8Mi ± 1% -6.22% (p=0.002 n=6) Hash/1K-12 1.682Gi ± 1% 1.665Gi ± 1% -0.98% (p=0.002 n=6) Hash/8K-12 1.878Gi ± 1% 1.878Gi ± 1% ~ (p=0.310 n=6) Hash/1M-12 1.907Gi ± 0% 1.913Gi ± 1% ~ (p=0.485 n=6) Hash/5M-12 1.911Gi ± 1% 1.904Gi ± 0% ~ (p=0.093 n=6) Hash/10M-12 1.910Gi ± 0% 1.905Gi ± 0% ~ (p=0.093 n=6) geomean 1.066Gi 1.032Gi -3.18% ``` </details> ### Testing Compiled and tested on Linux. ### Documentation https://github.com/syncthing/docs/pull/874
2024-08-10 11:58:20 +00:00
"crypto/sha256"
2014-03-02 22:58:14 +00:00
"fmt"
origAdler32 "hash/adler32"
mrand "math/rand"
2014-03-02 22:58:14 +00:00
"testing"
"testing/quick"
2014-07-12 21:06:48 +00:00
rollingAdler32 "github.com/chmduquesne/rollinghash/adler32"
2015-09-22 17:38:46 +00:00
"github.com/syncthing/syncthing/lib/protocol"
2014-03-02 22:58:14 +00:00
)
var blocksTestData = []struct {
data []byte
blocksize int
hash []string
weakhash []uint32
2014-03-02 22:58:14 +00:00
}{
{[]byte(""), 1024, []string{
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"},
[]uint32{0},
},
2014-03-02 22:58:14 +00:00
{[]byte("contents"), 1024, []string{
"d1b2a59fbea7e20077af9f91b27e95e865061b270be03ff539ab3b73587882e8"},
[]uint32{0x0f3a036f},
},
2014-03-02 22:58:14 +00:00
{[]byte("contents"), 9, []string{
"d1b2a59fbea7e20077af9f91b27e95e865061b270be03ff539ab3b73587882e8"},
[]uint32{0x0f3a036f},
},
2014-03-02 22:58:14 +00:00
{[]byte("contents"), 8, []string{
"d1b2a59fbea7e20077af9f91b27e95e865061b270be03ff539ab3b73587882e8"},
[]uint32{0x0f3a036f},
},
2014-03-02 22:58:14 +00:00
{[]byte("contents"), 7, []string{
"ed7002b439e9ac845f22357d822bac1444730fbdb6016d3ec9432297b9ec9f73",
"043a718774c572bd8a25adbeb1bfcd5c0256ae11cecf9f9c3f925d0e52beaf89"},
[]uint32{0x0bcb02fc, 0x00740074},
2014-03-02 22:58:14 +00:00
},
{[]byte("contents"), 3, []string{
"1143da2bc54c495c4be31d3868785d39ffdfd56df5668f0645d8f14d47647952",
"e4432baa90819aaef51d2a7f8e148bf7e679610f3173752fabb4dcb2d0f418d3",
"44ad63f60af0f6db6fdde6d5186ef78176367df261fa06be3079b6c80c8adba4"},
[]uint32{0x02780141, 0x02970148, 0x015d00e8},
2014-03-02 22:58:14 +00:00
},
{[]byte("conconts"), 3, []string{
"1143da2bc54c495c4be31d3868785d39ffdfd56df5668f0645d8f14d47647952",
"1143da2bc54c495c4be31d3868785d39ffdfd56df5668f0645d8f14d47647952",
"44ad63f60af0f6db6fdde6d5186ef78176367df261fa06be3079b6c80c8adba4"},
[]uint32{0x02780141, 0x02780141, 0x015d00e8},
2014-03-02 22:58:14 +00:00
},
{[]byte("contenten"), 3, []string{
"1143da2bc54c495c4be31d3868785d39ffdfd56df5668f0645d8f14d47647952",
"e4432baa90819aaef51d2a7f8e148bf7e679610f3173752fabb4dcb2d0f418d3",
"e4432baa90819aaef51d2a7f8e148bf7e679610f3173752fabb4dcb2d0f418d3"},
[]uint32{0x02780141, 0x02970148, 0x02970148},
2014-03-02 22:58:14 +00:00
},
}
func TestBlocks(t *testing.T) {
for testNo, test := range blocksTestData {
2014-03-02 22:58:14 +00:00
buf := bytes.NewBuffer(test.data)
blocks, err := Blocks(context.TODO(), buf, test.blocksize, -1, nil, true)
2014-03-02 22:58:14 +00:00
if err != nil {
t.Fatal(err)
}
if l := len(blocks); l != len(test.hash) {
t.Fatalf("%d: Incorrect number of blocks %d != %d", testNo, l, len(test.hash))
2014-03-02 22:58:14 +00:00
} else {
i := 0
for off := int64(0); off < int64(len(test.data)); off += int64(test.blocksize) {
if blocks[i].Offset != off {
t.Errorf("%d/%d: Incorrect offset %d != %d", testNo, i, blocks[i].Offset, off)
2014-03-02 22:58:14 +00:00
}
bs := test.blocksize
if rem := len(test.data) - int(off); bs > rem {
bs = rem
}
if int(blocks[i].Size) != bs {
t.Errorf("%d/%d: Incorrect length %d != %d", testNo, i, blocks[i].Size, bs)
2014-03-02 22:58:14 +00:00
}
if h := fmt.Sprintf("%x", blocks[i].Hash); h != test.hash[i] {
t.Errorf("%d/%d: Incorrect block hash %q != %q", testNo, i, h, test.hash[i])
}
if h := blocks[i].WeakHash; h != test.weakhash[i] {
t.Errorf("%d/%d: Incorrect block weakhash 0x%08x != 0x%08x", testNo, i, h, test.weakhash[i])
2014-03-02 22:58:14 +00:00
}
i++
}
}
}
}
func TestAdler32Variants(t *testing.T) {
// Verify that the two adler32 functions give matching results for a few
// different blocks of data.
hf1 := origAdler32.New()
hf2 := rollingAdler32.New()
checkFn := func(data []byte) bool {
2019-02-02 11:16:27 +00:00
hf1.Write(data)
sum1 := hf1.Sum32()
2019-02-02 11:16:27 +00:00
hf2.Write(data)
sum2 := hf2.Sum32()
hf1.Reset()
hf2.Reset()
// Make sure whatever we use in Validate matches too resp. this
// tests gets adjusted if we ever switch the weak hash algo.
return sum1 == sum2 && Validate(data, nil, sum1)
}
// protocol block sized data
data := make([]byte, protocol.MinBlockSize)
for i := 0; i < 5; i++ {
2019-02-02 11:16:27 +00:00
rand.Read(data)
if !checkFn(data) {
t.Errorf("Hash mismatch on block sized data")
}
}
// random small blocks
if err := quick.Check(checkFn, nil); err != nil {
t.Error(err)
}
// rolling should have the same result as the individual blocks
// themselves.
windowSize := 128
hf3 := rollingAdler32.New()
2019-02-02 11:16:27 +00:00
hf3.Write(data[:windowSize])
for i := windowSize; i < len(data); i++ {
if i%windowSize == 0 {
// let the reference function catch up
window := data[i-windowSize : i]
hf1.Reset()
hf1.Write(window)
hf2.Reset()
hf2.Write(window)
// verify that they are in sync with the rolling function
sum1 := hf1.Sum32()
sum2 := hf2.Sum32()
sum3 := hf3.Sum32()
t.Logf("At i=%d, sum2=%08x, sum3=%08x", i, sum2, sum3)
if sum2 != sum3 {
t.Errorf("Mismatch after roll; i=%d, sum2=%08x, sum3=%08x", i, sum2, sum3)
break
}
if sum1 != sum3 {
t.Errorf("Mismatch after roll; i=%d, sum1=%08x, sum3=%08x", i, sum1, sum3)
break
}
if !Validate(window, nil, sum1) {
t.Errorf("Validation failure after roll; i=%d", i)
}
}
hf3.Roll(data[i])
}
}
func BenchmarkValidate(b *testing.B) {
type block struct {
data []byte
hash [sha256.Size]byte
weakhash uint32
}
var blocks []block
const blocksPerType = 100
r := mrand.New(mrand.NewSource(0x136bea689e851))
// Valid blocks.
for i := 0; i < blocksPerType; i++ {
var b block
b.data = make([]byte, 128<<10)
r.Read(b.data)
b.hash = sha256.Sum256(b.data)
b.weakhash = origAdler32.Checksum(b.data)
blocks = append(blocks, b)
}
// Blocks where the hash matches, but the weakhash doesn't.
for i := 0; i < blocksPerType; i++ {
var b block
b.data = make([]byte, 128<<10)
r.Read(b.data)
b.hash = sha256.Sum256(b.data)
b.weakhash = 1 // Zeros causes Validate to skip the weakhash.
blocks = append(blocks, b)
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, b := range blocks {
Validate(b.data, b.hash[:], b.weakhash)
}
}
}