Reduce allocations in HashFile

By using copyBuffer we avoid a buffer allocation for each block we hash, and by allocating space for the hashes up front we get one large backing array instead of a small one for each block. For a 17 MiB file this makes quite a difference in the amount of memory allocated: benchmark old ns/op new ns/op delta BenchmarkHashFile-8 102045110 100459158 -1.55% benchmark old allocs new allocs delta BenchmarkHashFile-8 415 144 -65.30% benchmark old bytes new bytes delta BenchmarkHashFile-8 4504296 48104 -98.93%
2024-11-09 14:50:56 +00:00 · 2015-10-27 09:31:28 +01:00 · 2015-10-27 09:31:28 +01:00 · dc32f7f0a3
commit dc32f7f0a3
parent 1efd8d6c75
1 changed files with 71 additions and 8 deletions
--- a/lib/scanner/blocks.go
+++ b/lib/scanner/blocks.go
@ -20,15 +20,27 @@ var SHA256OfNothing = []uint8{0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x
 // Blocks returns the blockwise hash of the reader.
 func Blocks(r io.Reader, blocksize int, sizehint int64, counter *int64) ([]protocol.BlockInfo, error) {
 	var blocks []protocol.BlockInfo
 	if sizehint > 0 {
 		blocks = make([]protocol.BlockInfo, 0, int(sizehint/int64(blocksize)))
 	}
 	var offset int64
 	hf := sha256.New()
 	hashLength := hf.Size()
 	var blocks []protocol.BlockInfo
 	var hashes, thisHash []byte
 	if sizehint > 0 {
 		// Allocate contiguous blocks for the BlockInfo structures and their
 		// hashes once and for all.
 		numBlocks := int(sizehint / int64(blocksize))
 		blocks = make([]protocol.BlockInfo, 0, numBlocks)
 		hashes = make([]byte, 0, hashLength*numBlocks)
 	}
 	// A 32k buffer is used for copying into the hash function.
 	buf := make([]byte, 32<<10)
 	var offset int64
 	for {
-		lr := &io.LimitedReader{R: r, N: int64(blocksize)}
+		lr := io.LimitReader(r, int64(blocksize))
-		n, err := io.Copy(hf, lr)
+		n, err := copyBuffer(hf, lr, buf)
 		if err != nil {
 			return nil, err
 		}
@ -41,11 +53,17 @@ func Blocks(r io.Reader, blocksize int, sizehint int64, counter *int64) ([]proto
 			atomic.AddInt64(counter, int64(n))
 		}
 		// Carve out a hash-sized chunk of "hashes" to store the hash for this
 		// block.
 		hashes = hf.Sum(hashes)
 		thisHash, hashes = hashes[:hashLength], hashes[hashLength:]
 		b := protocol.BlockInfo{
 			Size:   int32(n),
 			Offset: offset,
-			Hash:   hf.Sum(nil),
+			Hash:   thisHash,
 		}
 		blocks = append(blocks, b)
 		offset += int64(n)
@ -158,3 +176,48 @@ func BlocksEqual(src, tgt []protocol.BlockInfo) bool {
 	}
 	return true
 }
 // This is a copy & paste of io.copyBuffer from the Go 1.5 standard library,
 // as we want this but also want to build with Go 1.3+.
 // copyBuffer is the actual implementation of Copy and CopyBuffer.
 // if buf is nil, one is allocated.
 func copyBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err error) {
 	// If the reader has a WriteTo method, use it to do the copy.
 	// Avoids an allocation and a copy.
 	if wt, ok := src.(io.WriterTo); ok {
 		return wt.WriteTo(dst)
 	}
 	// Similarly, if the writer has a ReadFrom method, use it to do the copy.
 	if rt, ok := dst.(io.ReaderFrom); ok {
 		return rt.ReadFrom(src)
 	}
 	if buf == nil {
 		buf = make([]byte, 32*1024)
 	}
 	for {
 		nr, er := src.Read(buf)
 		if nr > 0 {
 			nw, ew := dst.Write(buf[0:nr])
 			if nw > 0 {
 				written += int64(nw)
 			}
 			if ew != nil {
 				err = ew
 				break
 			}
 			if nr != nw {
 				err = io.ErrShortWrite
 				break
 			}
 		}
 		if er == io.EOF {
 			break
 		}
 		if er != nil {
 			err = er
 			break
 		}
 	}
 	return written, err
 }