mirror of
https://github.com/octoleo/syncthing.git
synced 2024-11-09 14:50:56 +00:00
Reduce allocations in HashFile
By using copyBuffer we avoid a buffer allocation for each block we hash, and by allocating space for the hashes up front we get one large backing array instead of a small one for each block. For a 17 MiB file this makes quite a difference in the amount of memory allocated: benchmark old ns/op new ns/op delta BenchmarkHashFile-8 102045110 100459158 -1.55% benchmark old allocs new allocs delta BenchmarkHashFile-8 415 144 -65.30% benchmark old bytes new bytes delta BenchmarkHashFile-8 4504296 48104 -98.93%
This commit is contained in:
parent
1efd8d6c75
commit
dc32f7f0a3
@ -20,15 +20,27 @@ var SHA256OfNothing = []uint8{0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x
|
||||
|
||||
// Blocks returns the blockwise hash of the reader.
|
||||
func Blocks(r io.Reader, blocksize int, sizehint int64, counter *int64) ([]protocol.BlockInfo, error) {
|
||||
var blocks []protocol.BlockInfo
|
||||
if sizehint > 0 {
|
||||
blocks = make([]protocol.BlockInfo, 0, int(sizehint/int64(blocksize)))
|
||||
}
|
||||
var offset int64
|
||||
hf := sha256.New()
|
||||
hashLength := hf.Size()
|
||||
|
||||
var blocks []protocol.BlockInfo
|
||||
var hashes, thisHash []byte
|
||||
|
||||
if sizehint > 0 {
|
||||
// Allocate contiguous blocks for the BlockInfo structures and their
|
||||
// hashes once and for all.
|
||||
numBlocks := int(sizehint / int64(blocksize))
|
||||
blocks = make([]protocol.BlockInfo, 0, numBlocks)
|
||||
hashes = make([]byte, 0, hashLength*numBlocks)
|
||||
}
|
||||
|
||||
// A 32k buffer is used for copying into the hash function.
|
||||
buf := make([]byte, 32<<10)
|
||||
|
||||
var offset int64
|
||||
for {
|
||||
lr := &io.LimitedReader{R: r, N: int64(blocksize)}
|
||||
n, err := io.Copy(hf, lr)
|
||||
lr := io.LimitReader(r, int64(blocksize))
|
||||
n, err := copyBuffer(hf, lr, buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -41,11 +53,17 @@ func Blocks(r io.Reader, blocksize int, sizehint int64, counter *int64) ([]proto
|
||||
atomic.AddInt64(counter, int64(n))
|
||||
}
|
||||
|
||||
// Carve out a hash-sized chunk of "hashes" to store the hash for this
|
||||
// block.
|
||||
hashes = hf.Sum(hashes)
|
||||
thisHash, hashes = hashes[:hashLength], hashes[hashLength:]
|
||||
|
||||
b := protocol.BlockInfo{
|
||||
Size: int32(n),
|
||||
Offset: offset,
|
||||
Hash: hf.Sum(nil),
|
||||
Hash: thisHash,
|
||||
}
|
||||
|
||||
blocks = append(blocks, b)
|
||||
offset += int64(n)
|
||||
|
||||
@ -158,3 +176,48 @@ func BlocksEqual(src, tgt []protocol.BlockInfo) bool {
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// This is a copy & paste of io.copyBuffer from the Go 1.5 standard library,
|
||||
// as we want this but also want to build with Go 1.3+.
|
||||
|
||||
// copyBuffer is the actual implementation of Copy and CopyBuffer.
|
||||
// if buf is nil, one is allocated.
|
||||
func copyBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err error) {
|
||||
// If the reader has a WriteTo method, use it to do the copy.
|
||||
// Avoids an allocation and a copy.
|
||||
if wt, ok := src.(io.WriterTo); ok {
|
||||
return wt.WriteTo(dst)
|
||||
}
|
||||
// Similarly, if the writer has a ReadFrom method, use it to do the copy.
|
||||
if rt, ok := dst.(io.ReaderFrom); ok {
|
||||
return rt.ReadFrom(src)
|
||||
}
|
||||
if buf == nil {
|
||||
buf = make([]byte, 32*1024)
|
||||
}
|
||||
for {
|
||||
nr, er := src.Read(buf)
|
||||
if nr > 0 {
|
||||
nw, ew := dst.Write(buf[0:nr])
|
||||
if nw > 0 {
|
||||
written += int64(nw)
|
||||
}
|
||||
if ew != nil {
|
||||
err = ew
|
||||
break
|
||||
}
|
||||
if nr != nw {
|
||||
err = io.ErrShortWrite
|
||||
break
|
||||
}
|
||||
}
|
||||
if er == io.EOF {
|
||||
break
|
||||
}
|
||||
if er != nil {
|
||||
err = er
|
||||
break
|
||||
}
|
||||
}
|
||||
return written, err
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user