Reduce allocations while hash scanning

This commit is contained in:
Jakob Borg 2014-08-12 13:52:36 +02:00
parent 939dd5cb31
commit f51b775698
4 changed files with 20 additions and 7 deletions

View File

@ -728,7 +728,7 @@ func (p *puller) closeFile(f protocol.FileInfo) {
l.Infof("open: error: %q / %q: %v", p.repoCfg.ID, f.Name, err)
return
}
hb, _ := scanner.Blocks(fd, scanner.StandardBlockSize)
hb, _ := scanner.Blocks(fd, scanner.StandardBlockSize, f.Size())
fd.Close()
if l0, l1 := len(hb), len(f.Blocks); l0 != l1 {

View File

@ -49,7 +49,15 @@ func hashFile(dir string, blockSize int, outbox, inbox chan protocol.FileInfo) {
continue
}
blocks, err := Blocks(fd, blockSize)
fi, err := fd.Stat()
if err != nil {
fd.Close()
if debug {
l.Debugln("stat:", err)
}
continue
}
blocks, err := Blocks(fd, blockSize, fi.Size())
fd.Close()
if err != nil {

View File

@ -17,12 +17,15 @@ const StandardBlockSize = 128 * 1024
var sha256OfNothing = []uint8{0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55}
// Blocks returns the blockwise hash of the reader.
func Blocks(r io.Reader, blocksize int) ([]protocol.BlockInfo, error) {
func Blocks(r io.Reader, blocksize int, sizehint int64) ([]protocol.BlockInfo, error) {
var blocks []protocol.BlockInfo
if sizehint > 0 {
blocks = make([]protocol.BlockInfo, 0, int(sizehint/int64(blocksize)))
}
var offset int64
hf := sha256.New()
for {
lr := &io.LimitedReader{R: r, N: int64(blocksize)}
hf := sha256.New()
n, err := io.Copy(hf, lr)
if err != nil {
return nil, err
@ -39,6 +42,8 @@ func Blocks(r io.Reader, blocksize int) ([]protocol.BlockInfo, error) {
}
blocks = append(blocks, b)
offset += int64(n)
hf.Reset()
}
if len(blocks) == 0 {

View File

@ -49,7 +49,7 @@ var blocksTestData = []struct {
func TestBlocks(t *testing.T) {
for _, test := range blocksTestData {
buf := bytes.NewBuffer(test.data)
blocks, err := Blocks(buf, test.blocksize)
blocks, err := Blocks(buf, test.blocksize, 0)
if err != nil {
t.Fatal(err)
@ -103,8 +103,8 @@ var diffTestData = []struct {
func TestDiff(t *testing.T) {
for i, test := range diffTestData {
a, _ := Blocks(bytes.NewBufferString(test.a), test.s)
b, _ := Blocks(bytes.NewBufferString(test.b), test.s)
a, _ := Blocks(bytes.NewBufferString(test.a), test.s, 0)
b, _ := Blocks(bytes.NewBufferString(test.b), test.s, 0)
_, d := BlockDiff(a, b)
if len(d) != len(test.d) {
t.Fatalf("Incorrect length for diff %d; %d != %d", i, len(d), len(test.d))