Merge pull request #2410 from calmh/hashalloc

Reduce allocations in HashFile
This commit is contained in:
Audrius Butkevicius 2015-10-27 08:45:38 +00:00
commit 09a555fdd2
3 changed files with 111 additions and 8 deletions

1
lib/scanner/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
_random.data

View File

@ -20,15 +20,27 @@ var SHA256OfNothing = []uint8{0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x
// Blocks returns the blockwise hash of the reader.
func Blocks(r io.Reader, blocksize int, sizehint int64, counter *int64) ([]protocol.BlockInfo, error) {
var blocks []protocol.BlockInfo
if sizehint > 0 {
blocks = make([]protocol.BlockInfo, 0, int(sizehint/int64(blocksize)))
}
var offset int64
hf := sha256.New()
hashLength := hf.Size()
var blocks []protocol.BlockInfo
var hashes, thisHash []byte
if sizehint > 0 {
// Allocate contiguous blocks for the BlockInfo structures and their
// hashes once and for all.
numBlocks := int(sizehint / int64(blocksize))
blocks = make([]protocol.BlockInfo, 0, numBlocks)
hashes = make([]byte, 0, hashLength*numBlocks)
}
// A 32k buffer is used for copying into the hash function.
buf := make([]byte, 32<<10)
var offset int64
for {
lr := &io.LimitedReader{R: r, N: int64(blocksize)}
n, err := io.Copy(hf, lr)
lr := io.LimitReader(r, int64(blocksize))
n, err := copyBuffer(hf, lr, buf)
if err != nil {
return nil, err
}
@ -41,11 +53,17 @@ func Blocks(r io.Reader, blocksize int, sizehint int64, counter *int64) ([]proto
atomic.AddInt64(counter, int64(n))
}
// Carve out a hash-sized chunk of "hashes" to store the hash for this
// block.
hashes = hf.Sum(hashes)
thisHash, hashes = hashes[:hashLength], hashes[hashLength:]
b := protocol.BlockInfo{
Size: int32(n),
Offset: offset,
Hash: hf.Sum(nil),
Hash: thisHash,
}
blocks = append(blocks, b)
offset += int64(n)
@ -158,3 +176,48 @@ func BlocksEqual(src, tgt []protocol.BlockInfo) bool {
}
return true
}
// This is a copy & paste of io.copyBuffer from the Go 1.5 standard library,
// as we want this but also want to build with Go 1.3+.
// copyBuffer is the actual implementation of Copy and CopyBuffer.
// if buf is nil, one is allocated.
func copyBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err error) {
// If the reader has a WriteTo method, use it to do the copy.
// Avoids an allocation and a copy.
if wt, ok := src.(io.WriterTo); ok {
return wt.WriteTo(dst)
}
// Similarly, if the writer has a ReadFrom method, use it to do the copy.
if rt, ok := dst.(io.ReaderFrom); ok {
return rt.ReadFrom(src)
}
if buf == nil {
buf = make([]byte, 32*1024)
}
for {
nr, er := src.Read(buf)
if nr > 0 {
nw, ew := dst.Write(buf[0:nr])
if nw > 0 {
written += int64(nw)
}
if ew != nil {
err = ew
break
}
if nr != nw {
err = io.ErrShortWrite
break
}
}
if er == io.EOF {
break
}
if er != nil {
err = er
break
}
}
return written, err
}

View File

@ -8,13 +8,16 @@ package scanner
import (
"bytes"
"crypto/rand"
"fmt"
"io"
"os"
"path/filepath"
"reflect"
"runtime"
rdebug "runtime/debug"
"sort"
"sync"
"testing"
"github.com/syncthing/syncthing/lib/ignore"
@ -372,3 +375,39 @@ func TestSymlinkTypeEqual(t *testing.T) {
}
}
}
var initOnce sync.Once
const (
testdataSize = 17 << 20
testdataName = "_random.data"
)
func BenchmarkHashFile(b *testing.B) {
initOnce.Do(initTestFile)
b.ResetTimer()
for i := 0; i < b.N; i++ {
if _, err := HashFile(testdataName, protocol.BlockSize, testdataSize, nil); err != nil {
b.Fatal(err)
}
}
b.ReportAllocs()
}
func initTestFile() {
fd, err := os.Create(testdataName)
if err != nil {
panic(err)
}
lr := io.LimitReader(rand.Reader, testdataSize)
if _, err := io.Copy(fd, lr); err != nil {
panic(err)
}
if err := fd.Close(); err != nil {
panic(err)
}
}