Reduce allocations while hash scanning

This commit is contained in:
Jakob Borg 2014-08-12 13:52:36 +02:00
parent 939dd5cb31
commit f51b775698
4 changed files with 20 additions and 7 deletions

View File

@ -728,7 +728,7 @@ func (p *puller) closeFile(f protocol.FileInfo) {
l.Infof("open: error: %q / %q: %v", p.repoCfg.ID, f.Name, err) l.Infof("open: error: %q / %q: %v", p.repoCfg.ID, f.Name, err)
return return
} }
hb, _ := scanner.Blocks(fd, scanner.StandardBlockSize) hb, _ := scanner.Blocks(fd, scanner.StandardBlockSize, f.Size())
fd.Close() fd.Close()
if l0, l1 := len(hb), len(f.Blocks); l0 != l1 { if l0, l1 := len(hb), len(f.Blocks); l0 != l1 {

View File

@ -49,7 +49,15 @@ func hashFile(dir string, blockSize int, outbox, inbox chan protocol.FileInfo) {
continue continue
} }
blocks, err := Blocks(fd, blockSize) fi, err := fd.Stat()
if err != nil {
fd.Close()
if debug {
l.Debugln("stat:", err)
}
continue
}
blocks, err := Blocks(fd, blockSize, fi.Size())
fd.Close() fd.Close()
if err != nil { if err != nil {

View File

@ -17,12 +17,15 @@ const StandardBlockSize = 128 * 1024
var sha256OfNothing = []uint8{0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55} var sha256OfNothing = []uint8{0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55}
// Blocks returns the blockwise hash of the reader. // Blocks returns the blockwise hash of the reader.
func Blocks(r io.Reader, blocksize int) ([]protocol.BlockInfo, error) { func Blocks(r io.Reader, blocksize int, sizehint int64) ([]protocol.BlockInfo, error) {
var blocks []protocol.BlockInfo var blocks []protocol.BlockInfo
if sizehint > 0 {
blocks = make([]protocol.BlockInfo, 0, int(sizehint/int64(blocksize)))
}
var offset int64 var offset int64
hf := sha256.New()
for { for {
lr := &io.LimitedReader{R: r, N: int64(blocksize)} lr := &io.LimitedReader{R: r, N: int64(blocksize)}
hf := sha256.New()
n, err := io.Copy(hf, lr) n, err := io.Copy(hf, lr)
if err != nil { if err != nil {
return nil, err return nil, err
@ -39,6 +42,8 @@ func Blocks(r io.Reader, blocksize int) ([]protocol.BlockInfo, error) {
} }
blocks = append(blocks, b) blocks = append(blocks, b)
offset += int64(n) offset += int64(n)
hf.Reset()
} }
if len(blocks) == 0 { if len(blocks) == 0 {

View File

@ -49,7 +49,7 @@ var blocksTestData = []struct {
func TestBlocks(t *testing.T) { func TestBlocks(t *testing.T) {
for _, test := range blocksTestData { for _, test := range blocksTestData {
buf := bytes.NewBuffer(test.data) buf := bytes.NewBuffer(test.data)
blocks, err := Blocks(buf, test.blocksize) blocks, err := Blocks(buf, test.blocksize, 0)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
@ -103,8 +103,8 @@ var diffTestData = []struct {
func TestDiff(t *testing.T) { func TestDiff(t *testing.T) {
for i, test := range diffTestData { for i, test := range diffTestData {
a, _ := Blocks(bytes.NewBufferString(test.a), test.s) a, _ := Blocks(bytes.NewBufferString(test.a), test.s, 0)
b, _ := Blocks(bytes.NewBufferString(test.b), test.s) b, _ := Blocks(bytes.NewBufferString(test.b), test.s, 0)
_, d := BlockDiff(a, b) _, d := BlockDiff(a, b)
if len(d) != len(test.d) { if len(d) != len(test.d) {
t.Fatalf("Incorrect length for diff %d; %d != %d", i, len(d), len(test.d)) t.Fatalf("Incorrect length for diff %d; %d != %d", i, len(d), len(test.d))