syncthing/lib/scanner/blocks.go

143 lines
3.4 KiB
Go
Raw Normal View History

2014-11-16 20:13:20 +00:00
// Copyright (C) 2014 The Syncthing Authors.
2014-09-29 19:43:32 +00:00
//
2015-03-07 20:36:35 +00:00
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
2014-06-01 20:50:14 +00:00
package scanner
2014-03-02 22:58:14 +00:00
import (
"bytes"
"context"
lib/sha256: Remove it (#9643) ### Purpose Remove the `lib/sha256` package, because it's no longer necessary. Go's standard library now has the same performance and is on par with `sha256-simd` since [Since Go 1.21](https://github.com/golang/go/commit/1a64574f42b95594cf9c8a12e9ca13d75585429c). Therefore using `sha256-simd` has no benefits anymore. ARM already has optimized sha256 assembly code since https://github.com/golang/go/commit/7b8a7f8272fd1941a199af1adb334bd9996e8909, `sha256-simd` published their results before that optimized assembly was implemented, https://github.com/minio/sha256-simd/commit/f941fedda826b68a196de2e0a9183e273ec0cb91. The assembly looks very similar and the benchmarks in the Go commit match that of `sha256-simd`. This patch removes all of the related code of `lib/sha256` and makes `crypto/sha256` the 'default'. Benchmark of `sha256-simd` and `crypto/sha256`: <details> ``` cpu: AMD Ryzen 5 3600X 6-Core Processor │ simd.txt │ go.txt │ │ sec/op │ sec/op vs base │ Hash/8Bytes-12 63.25n ± 1% 73.38n ± 1% +16.02% (p=0.002 n=6) Hash/64Bytes-12 98.73n ± 1% 105.30n ± 1% +6.65% (p=0.002 n=6) Hash/1K-12 567.2n ± 1% 572.8n ± 1% +0.99% (p=0.002 n=6) Hash/8K-12 4.062µ ± 1% 4.062µ ± 1% ~ (p=0.396 n=6) Hash/1M-12 512.1µ ± 0% 510.6µ ± 1% ~ (p=0.485 n=6) Hash/5M-12 2.556m ± 1% 2.564m ± 0% ~ (p=0.093 n=6) Hash/10M-12 5.112m ± 0% 5.127m ± 0% ~ (p=0.093 n=6) geomean 13.82µ 14.27µ +3.28% │ simd.txt │ go.txt │ │ B/s │ B/s vs base │ Hash/8Bytes-12 120.6Mi ± 1% 104.0Mi ± 1% -13.81% (p=0.002 n=6) Hash/64Bytes-12 618.2Mi ± 1% 579.8Mi ± 1% -6.22% (p=0.002 n=6) Hash/1K-12 1.682Gi ± 1% 1.665Gi ± 1% -0.98% (p=0.002 n=6) Hash/8K-12 1.878Gi ± 1% 1.878Gi ± 1% ~ (p=0.310 n=6) Hash/1M-12 1.907Gi ± 0% 1.913Gi ± 1% ~ (p=0.485 n=6) Hash/5M-12 1.911Gi ± 1% 1.904Gi ± 0% ~ (p=0.093 n=6) Hash/10M-12 1.910Gi ± 0% 1.905Gi ± 0% ~ (p=0.093 n=6) geomean 1.066Gi 1.032Gi -3.18% ``` </details> ### Testing Compiled and tested on Linux. ### Documentation https://github.com/syncthing/docs/pull/874
2024-08-10 11:58:20 +00:00
"crypto/sha256"
"hash"
lib/scanner: Use standard adler32 when we don't need rolling (#5556) * lib/scanner: Use standard adler32 when we don't need rolling Seems the rolling adler32 implementation is super slow when executed on large blocks, even tho I can't explain why. BenchmarkFind1MFile-16 100 18991667 ns/op 55.21 MB/s 398844 B/op 20 allocs/op BenchmarkBlock/adler32-131072/#00-16 200 9726519 ns/op 1078.06 MB/s 2654936 B/op 163 allocs/op BenchmarkBlock/bozo32-131072/#00-16 20 73435540 ns/op 142.79 MB/s 2654928 B/op 163 allocs/op BenchmarkBlock/buzhash32-131072/#00-16 20 61482005 ns/op 170.55 MB/s 2654928 B/op 163 allocs/op BenchmarkBlock/buzhash64-131072/#00-16 20 61673660 ns/op 170.02 MB/s 2654928 B/op 163 allocs/op BenchmarkBlock/vanilla-adler32-131072/#00-16 300 4377307 ns/op 2395.48 MB/s 2654935 B/op 163 allocs/op BenchmarkBlock/adler32-16777216/#00-16 2 544010100 ns/op 19.27 MB/s 65624 B/op 5 allocs/op BenchmarkBlock/bozo32-16777216/#00-16 1 4678108500 ns/op 2.24 MB/s 51970144 B/op 24 allocs/op BenchmarkBlock/buzhash32-16777216/#00-16 1 3880370700 ns/op 2.70 MB/s 51970144 B/op 24 allocs/op BenchmarkBlock/buzhash64-16777216/#00-16 1 3875911700 ns/op 2.71 MB/s 51970144 B/op 24 allocs/op BenchmarkBlock/vanilla-adler32-16777216/#00-16 300 4010279 ns/op 2614.72 MB/s 65624 B/op 5 allocs/op BenchmarkRoll/adler32-131072/#00-16 2000 974279 ns/op 134.53 MB/s 270 B/op 0 allocs/op BenchmarkRoll/bozo32-131072/#00-16 2000 791770 ns/op 165.54 MB/s 270 B/op 0 allocs/op BenchmarkRoll/buzhash32-131072/#00-16 2000 917409 ns/op 142.87 MB/s 270 B/op 0 allocs/op BenchmarkRoll/buzhash64-131072/#00-16 2000 881125 ns/op 148.76 MB/s 270 B/op 0 allocs/op BenchmarkRoll/adler32-16777216/#00-16 10 124000400 ns/op 135.30 MB/s 7548937 B/op 0 allocs/op BenchmarkRoll/bozo32-16777216/#00-16 10 118008080 ns/op 142.17 MB/s 7548928 B/op 0 allocs/op BenchmarkRoll/buzhash32-16777216/#00-16 10 126794440 ns/op 132.32 MB/s 7548928 B/op 0 allocs/op BenchmarkRoll/buzhash64-16777216/#00-16 10 126631960 ns/op 132.49 MB/s 7548928 B/op 0 allocs/op * Update benchmark_test.go * gofmt * fixup benchmark
2019-02-25 09:29:31 +00:00
"hash/adler32"
2014-03-02 22:58:14 +00:00
"io"
2014-07-12 21:06:48 +00:00
2015-09-22 17:38:46 +00:00
"github.com/syncthing/syncthing/lib/protocol"
2014-03-02 22:58:14 +00:00
)
2014-10-24 22:20:08 +00:00
var SHA256OfNothing = []uint8{0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55}
2014-07-26 19:28:32 +00:00
2015-11-17 20:08:36 +00:00
type Counter interface {
Update(bytes int64)
}
2014-03-02 22:58:14 +00:00
// Blocks returns the blockwise hash of the reader.
func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, counter Counter, useWeakHashes bool) ([]protocol.BlockInfo, error) {
if counter == nil {
counter = &noopCounter{}
}
hf := sha256.New()
const hashLength = sha256.Size
var weakHf hash.Hash32 = noopHash{}
var multiHf io.Writer = hf
if useWeakHashes {
// Use an actual weak hash function, make the multiHf
// write to both hash functions.
weakHf = adler32.New()
multiHf = io.MultiWriter(hf, weakHf)
}
2014-07-12 21:06:48 +00:00
var blocks []protocol.BlockInfo
var hashes, thisHash []byte
if sizehint >= 0 {
// Allocate contiguous blocks for the BlockInfo structures and their
// hashes once and for all, and stick to the specified size.
r = io.LimitReader(r, sizehint)
numBlocks := sizehint / int64(blocksize)
remainder := sizehint % int64(blocksize)
if remainder != 0 {
numBlocks++
}
blocks = make([]protocol.BlockInfo, 0, numBlocks)
hashes = make([]byte, 0, hashLength*numBlocks)
2014-08-12 11:52:36 +00:00
}
// A 32k buffer is used for copying into the hash function.
buf := make([]byte, 32<<10)
2014-03-02 22:58:14 +00:00
var offset int64
lr := io.LimitReader(r, int64(blocksize)).(*io.LimitedReader)
2014-03-02 22:58:14 +00:00
for {
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
}
lr.N = int64(blocksize)
n, err := io.CopyBuffer(multiHf, lr, buf)
2014-03-02 22:58:14 +00:00
if err != nil {
return nil, err
}
if n == 0 {
break
}
counter.Update(n)
2015-08-26 22:49:06 +00:00
// Carve out a hash-sized chunk of "hashes" to store the hash for this
// block.
hashes = hf.Sum(hashes)
thisHash, hashes = hashes[:hashLength], hashes[hashLength:]
2014-07-12 21:06:48 +00:00
b := protocol.BlockInfo{
Size: int(n),
Offset: offset,
Hash: thisHash,
WeakHash: weakHf.Sum32(),
2014-03-02 22:58:14 +00:00
}
2014-03-02 22:58:14 +00:00
blocks = append(blocks, b)
offset += n
2014-08-12 11:52:36 +00:00
hf.Reset()
weakHf.Reset()
2014-03-02 22:58:14 +00:00
}
if len(blocks) == 0 {
// Empty file
2014-07-12 21:06:48 +00:00
blocks = append(blocks, protocol.BlockInfo{
2014-03-02 22:58:14 +00:00
Offset: 0,
Size: 0,
2014-10-24 22:20:08 +00:00
Hash: SHA256OfNothing,
2014-03-02 22:58:14 +00:00
})
}
return blocks, nil
}
// Validate quickly validates buf against the 32-bit weakHash, if not zero,
// else against the cryptohash hash, if len(hash)>0. It is satisfied if
// either hash matches or neither hash is given.
func Validate(buf, hash []byte, weakHash uint32) bool {
if weakHash != 0 && adler32.Checksum(buf) == weakHash {
return true
}
if len(hash) > 0 {
hbuf := sha256.Sum256(buf)
return bytes.Equal(hbuf[:], hash)
}
return true
}
type noopHash struct{}
func (noopHash) Sum32() uint32 { return 0 }
func (noopHash) BlockSize() int { return 0 }
func (noopHash) Size() int { return 0 }
func (noopHash) Reset() {}
func (noopHash) Sum([]byte) []byte { return nil }
func (noopHash) Write([]byte) (int, error) { return 0, nil }
type noopCounter struct{}
func (*noopCounter) Update(_ int64) {}