diff --git a/chunker/chunker.go b/chunker/chunker.go index f8980bd90..b06dec69a 100644 --- a/chunker/chunker.go +++ b/chunker/chunker.go @@ -67,16 +67,16 @@ type Chunker struct { digest uint64 h hash.Hash - hfn func() hash.Hash } -// New returns a new Chunker that reads from data from rd. -func New(rd io.Reader, bufsize int, hashfn func() hash.Hash) *Chunker { +// New returns a new Chunker that reads from data from rd with bufsize and pass +// all data to hash along the way. +func New(rd io.Reader, bufsize int, hash hash.Hash) *Chunker { once.Do(fill_tables) c := &Chunker{ buf: make([]byte, bufsize), - hfn: hashfn, + h: hash, } c.Reset(rd) @@ -99,7 +99,9 @@ func (c *Chunker) Reset(rd io.Reader) { c.count = 0 c.slide(1) - c.resetHash() + if c.h != nil { + c.h.Reset() + } // do not start a new chunk unless at least MinSize bytes have been read c.pre = MinSize - WindowSize @@ -232,7 +234,9 @@ func (c *Chunker) Next() (*Chunk, error) { Digest: c.hashDigest(), } - c.resetHash() + if c.h != nil { + c.h.Reset() + } // reset chunker, but keep position pos := c.pos @@ -255,12 +259,6 @@ func (c *Chunker) Next() (*Chunk, error) { } } -func (c *Chunker) resetHash() { - if c.hfn != nil { - c.h = c.hfn() - } -} - func (c *Chunker) updateHash(data []byte) { if c.h != nil { // the hashes from crypto/sha* do not return an error diff --git a/chunker/chunker_test.go b/chunker/chunker_test.go index 5c740e0ad..710d49588 100644 --- a/chunker/chunker_test.go +++ b/chunker/chunker_test.go @@ -8,6 +8,7 @@ import ( "flag" "hash" "io" + "io/ioutil" "math/rand" "os" "testing" @@ -104,7 +105,7 @@ func test_with_data(t *testing.T, chnker *chunker.Chunker, testChunks []chunk) [ i, len(chunks)-1, chunk.CutFP, c.Cut) } - if !bytes.Equal(c.Digest, chunk.Digest) { + if c.Digest != nil && !bytes.Equal(c.Digest, chunk.Digest) { t.Fatalf("Digest fingerprint for chunk %d/%d does not match: expected %02x, got %02x", i, len(chunks)-1, chunk.Digest, c.Digest) } @@ -145,7 +146,7 @@ func get_random(seed, count int) []byte { func TestChunker(t *testing.T) { // setup data source buf := get_random(23, 32*1024*1024) - ch := chunker.New(bytes.NewReader(buf), *testBufSize, sha256.New) + ch := chunker.New(bytes.NewReader(buf), *testBufSize, sha256.New()) chunks := test_with_data(t, ch, chunks1) // test reader @@ -172,14 +173,52 @@ func TestChunker(t *testing.T) { // setup nullbyte data source buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize) - ch = chunker.New(bytes.NewReader(buf), *testBufSize, sha256.New) + ch = chunker.New(bytes.NewReader(buf), *testBufSize, sha256.New()) + + test_with_data(t, ch, chunks2) +} + +func TestChunkerWithoutHash(t *testing.T) { + // setup data source + buf := get_random(23, 32*1024*1024) + ch := chunker.New(bytes.NewReader(buf), *testBufSize, nil) + chunks := test_with_data(t, ch, chunks1) + + // test reader + for i, c := range chunks { + rd := c.Reader(bytes.NewReader(buf)) + + buf2, err := ioutil.ReadAll(rd) + if err != nil { + t.Fatalf("io.Copy(): %v", err) + } + + if uint(len(buf2)) != chunks1[i].Length { + t.Fatalf("reader returned wrong number of bytes: expected %d, got %d", + chunks1[i].Length, uint(len(buf2))) + } + + if uint(len(buf2)) != chunks1[i].Length { + t.Fatalf("wrong number of bytes returned: expected %02x, got %02x", + chunks[i].Length, len(buf2)) + } + + if !bytes.Equal(buf[c.Start:c.Start+c.Length], buf2) { + t.Fatalf("invalid data for chunk returned: expected %02x, got %02x", + buf[c.Start:c.Start+c.Length], buf2) + } + } + + // setup nullbyte data source + buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize) + ch = chunker.New(bytes.NewReader(buf), *testBufSize, sha256.New()) test_with_data(t, ch, chunks2) } func TestChunkerReuse(t *testing.T) { // test multiple uses of the same chunker - ch := chunker.New(nil, *testBufSize, sha256.New) + ch := chunker.New(nil, *testBufSize, sha256.New()) buf := get_random(23, 32*1024*1024) for i := 0; i < 4; i++ { @@ -188,7 +227,7 @@ func TestChunkerReuse(t *testing.T) { } } -func benchmarkChunker(b *testing.B, hash func() hash.Hash) { +func benchmarkChunker(b *testing.B, hash hash.Hash) { var ( rd io.ReadSeeker size int @@ -244,11 +283,11 @@ func benchmarkChunker(b *testing.B, hash func() hash.Hash) { } func BenchmarkChunkerWithSHA256(b *testing.B) { - benchmarkChunker(b, sha256.New) + benchmarkChunker(b, sha256.New()) } func BenchmarkChunkerWithMD5(b *testing.B) { - benchmarkChunker(b, md5.New) + benchmarkChunker(b, md5.New()) } func BenchmarkChunker(b *testing.B) { diff --git a/pools.go b/pools.go index 782c6bacf..1bf7a0cc8 100644 --- a/pools.go +++ b/pools.go @@ -99,7 +99,7 @@ func newChunker() interface{} { chunkStats.new++ // create a new chunker with a nil reader - return chunker.New(nil, chunkerBufSize, sha256.New) + return chunker.New(nil, chunkerBufSize, sha256.New()) } func GetChunkBuf(s string) []byte {