2
2
mirror of https://github.com/octoleo/restic.git synced 2025-01-22 22:58:26 +00:00

Chunker: remove pool, buf and make bufsize an option

This commit is contained in:
Alexander Neumann 2015-02-08 19:32:12 +01:00
parent 8dc5c2296a
commit a5c33d80d8
2 changed files with 54 additions and 56 deletions

View File

@ -9,10 +9,11 @@ const (
KiB = 1024
MiB = 1024 * KiB
// randomly generated irreducible polynomial of degree 53 in Z_2[X]
// Polynomial is a randomly generated irreducible polynomial of degree 53
// in Z_2[X]. All rabin fingerprints are calculated with this polynomial.
Polynomial = 0x3DA3358B4DC173
// use a sliding window of 64 byte.
// WindowSize is the size of the sliding window.
WindowSize = 64
// aim to create chunks of 20 bits or about 1MiB on average.
@ -30,15 +31,6 @@ var (
once sync.Once
mod_table [256]uint64
out_table [256]uint64
chunkerPool = sync.Pool{
New: func() interface{} {
return &Chunker{
window: make([]byte, WindowSize),
buf: make([]byte, MaxSize),
}
},
}
)
// A chunk is one content-dependent chunk of bytes whose end was cut when the
@ -72,22 +64,19 @@ type Chunker struct {
}
// New returns a new Chunker that reads from data from rd.
func New(rd io.Reader) *Chunker {
c := chunkerPool.Get().(*Chunker)
c.rd = rd
func New(rd io.Reader, bufsize int) *Chunker {
once.Do(fill_tables)
once.Do(c.fill_tables)
c := &Chunker{
window: make([]byte, WindowSize),
buf: make([]byte, bufsize),
rd: rd,
}
c.reset()
return c
}
// Free returns this chunker to the allocation pool
func (c *Chunker) Free() {
c.rd = nil
chunkerPool.Put(c)
}
func (c *Chunker) reset() {
for i := 0; i < WindowSize; i++ {
c.window[i] = 0
@ -103,7 +92,7 @@ func (c *Chunker) reset() {
}
// Calculate out_table and mod_table for optimization. Must be called only once.
func (c *Chunker) fill_tables() {
func fill_tables() {
// calculate table for sliding out bytes. The byte to slide out is used as
// the index for the table, the value contains the following:
// out_table[b] = Hash(b || 0 || ... || 0)
@ -139,13 +128,11 @@ func (c *Chunker) fill_tables() {
}
}
// Next returns the next chunk of data. If an error occurs while reading,
// the error is returned with a nil chunk. The state of the current chunk
// is undefined. When the last chunk has been returned, all subsequent
// calls yield a nil chunk and an io.EOF error.
func (c *Chunker) Next(dst []byte) (*Chunk, error) {
dst = dst[:0]
// Next returns the position and length of the next chunk of data. If an error
// occurs while reading, the error is returned with a nil chunk. The state of
// the current chunk is undefined. When the last chunk has been returned, all
// subsequent calls yield a nil chunk and an io.EOF error.
func (c *Chunker) Next() (*Chunk, error) {
for {
if c.bpos >= c.bmax {
n, err := io.ReadFull(c.rd, c.buf)
@ -168,7 +155,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
Start: c.start,
Length: c.count,
Cut: c.digest,
Data: dst,
}, nil
}
}
@ -186,7 +172,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
n := c.bmax - c.bpos
if c.pre > n {
c.pre -= n
dst = append(dst, c.buf[c.bpos:c.bmax]...)
c.count += n
c.pos += n
@ -194,7 +179,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
continue
}
dst = append(dst, c.buf[c.bpos:c.bpos+c.pre]...)
c.bpos += c.pre
c.count += c.pre
c.pos += c.pre
@ -216,7 +200,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
c.digest ^= mod_table[index]
if (c.count+i+1 >= MinSize && (c.digest&splitmask) == 0) || c.count+i+1 >= MaxSize {
dst = append(dst, c.buf[c.bpos:c.bpos+i+1]...)
c.count += i + 1
c.pos += i + 1
c.bpos += i + 1
@ -225,7 +208,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
Start: c.start,
Length: c.count,
Cut: c.digest,
Data: dst,
}
// keep position
@ -240,9 +222,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
}
steps := c.bmax - c.bpos
if steps > 0 {
dst = append(dst, c.buf[c.bpos:c.bpos+steps]...)
}
c.count += steps
c.pos += steps
c.bpos = c.bmax

View File

@ -2,13 +2,18 @@ package chunker_test
import (
"bytes"
"flag"
"io"
"math/rand"
"os"
"testing"
"github.com/restic/restic/chunker"
)
var benchmarkFile = flag.String("bench.file", "", "read from this file for benchmark")
var testBufSize = flag.Int("test.bufsize", 256*1024, "use this buffer size for benchmark")
type chunk struct {
Length int
CutFP uint64
@ -55,9 +60,8 @@ var chunks2 = []chunk{
}
func test_with_data(t *testing.T, chnker *chunker.Chunker, chunks []chunk) {
buf := make([]byte, chunker.MaxSize)
for i, chunk := range chunks {
c, err := chnker.Next(buf)
c, err := chnker.Next()
if err != nil {
t.Fatalf("Error returned with chunk %d: %v", i, err)
@ -73,11 +77,6 @@ func test_with_data(t *testing.T, chnker *chunker.Chunker, chunks []chunk) {
i, chunk.Length, c.Length)
}
if len(c.Data) != chunk.Length {
t.Fatalf("Data length for chunk %d does not match: expected %d, got %d",
i, chunk.Length, len(c.Data))
}
if c.Cut != chunk.CutFP {
t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x",
i, len(chunks)-1, chunk.CutFP, c.Cut)
@ -85,7 +84,7 @@ func test_with_data(t *testing.T, chnker *chunker.Chunker, chunks []chunk) {
}
}
c, err := chnker.Next(buf)
c, err := chnker.Next()
if c != nil {
t.Fatal("additional non-nil chunk returned")
@ -114,32 +113,51 @@ func get_random(seed, count int) []byte {
func TestChunker(t *testing.T) {
// setup data source
buf := get_random(23, 32*1024*1024)
ch := chunker.New(bytes.NewReader(buf))
ch := chunker.New(bytes.NewReader(buf), *testBufSize)
test_with_data(t, ch, chunks1)
ch.Free()
// setup nullbyte data source
buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize)
ch = chunker.New(bytes.NewReader(buf))
ch = chunker.New(bytes.NewReader(buf), *testBufSize)
test_with_data(t, ch, chunks2)
ch.Free()
}
func TestChunkerReuse(t *testing.T) {
// test multiple uses of the same chunker
for i := 0; i < 4; i++ {
buf := get_random(23, 32*1024*1024)
ch := chunker.New(bytes.NewReader(buf))
ch := chunker.New(bytes.NewReader(buf), *testBufSize)
test_with_data(t, ch, chunks1)
ch.Free()
}
}
func BenchmarkChunker(b *testing.B) {
size := 10 * 1024 * 1024
buf := get_random(23, size)
dst := make([]byte, chunker.MaxSize)
var (
rd io.ReadSeeker
size int
)
b.Logf("using bufsize %v", *testBufSize)
if *benchmarkFile != "" {
b.Logf("using file %q for benchmark", *benchmarkFile)
f, err := os.Open(*benchmarkFile)
if err != nil {
b.Fatalf("open(%q): %v", *benchmarkFile, err)
}
fi, err := f.Stat()
if err != nil {
b.Fatalf("lstat(%q): %v", *benchmarkFile, err)
}
size = int(fi.Size())
rd = f
} else {
size = 10 * 1024 * 1024
rd = bytes.NewReader(get_random(23, size))
}
b.ResetTimer()
b.SetBytes(int64(size))
@ -148,10 +166,11 @@ func BenchmarkChunker(b *testing.B) {
for i := 0; i < b.N; i++ {
chunks = 0
ch := chunker.New(bytes.NewReader(buf))
rd.Seek(0, 0)
ch := chunker.New(rd, *testBufSize)
for {
_, err := ch.Next(dst)
_, err := ch.Next()
if err == io.EOF {
break