2
2
mirror of https://github.com/octoleo/restic.git synced 2024-11-30 00:33:57 +00:00

chunker: Cache computations

This commit is contained in:
Alexander Neumann 2015-04-05 22:46:11 +02:00
parent 367cc75970
commit 25e3ac40ee
2 changed files with 60 additions and 18 deletions

View File

@ -4,6 +4,7 @@ import (
"errors" "errors"
"hash" "hash"
"io" "io"
"sync"
) )
const ( const (
@ -23,6 +24,21 @@ const (
splitmask = (1 << AverageBits) - 1 splitmask = (1 << AverageBits) - 1
) )
type tables struct {
out [256]uint64
mod [256]uint64
}
// cache precomputed tables, these are read-only anyway
var cache struct {
entries map[Pol]*tables
sync.Mutex
}
func init() {
cache.entries = make(map[Pol]*tables)
}
// A chunk is one content-dependent chunk of bytes whose end was cut when the // A chunk is one content-dependent chunk of bytes whose end was cut when the
// Rabin Fingerprint had the value stored in Cut. // Rabin Fingerprint had the value stored in Cut.
type Chunk struct { type Chunk struct {
@ -40,8 +56,7 @@ func (c Chunk) Reader(r io.ReaderAt) io.Reader {
type Chunker struct { type Chunker struct {
pol Pol pol Pol
pol_shift uint pol_shift uint
mod_table [256]uint64 tables *tables
out_table [256]uint64
rd io.Reader rd io.Reader
closed bool closed bool
@ -66,18 +81,15 @@ type Chunker struct {
// New returns a new Chunker based on polynomial p that reads from data from rd // New returns a new Chunker based on polynomial p that reads from data from rd
// with bufsize and pass all data to hash along the way. // with bufsize and pass all data to hash along the way.
func New(rd io.Reader, p Pol, bufsize int, hash hash.Hash) (*Chunker, error) { func New(rd io.Reader, p Pol, bufsize int, hash hash.Hash) (*Chunker, error) {
// test irreducibility of p again
if !p.Irreducible() {
return nil, errors.New("invalid polynomial")
}
c := &Chunker{ c := &Chunker{
pol: p, pol: p,
pol_shift: uint(p.Deg() - 8), pol_shift: uint(p.Deg() - 8),
buf: make([]byte, bufsize), buf: make([]byte, bufsize),
h: hash, h: hash,
} }
c.fill_tables() if err := c.fill_tables(); err != nil {
return nil, err
}
c.Reset(rd) c.Reset(rd)
return c, nil return c, nil
@ -107,8 +119,26 @@ func (c *Chunker) Reset(rd io.Reader) {
c.pre = MinSize - WindowSize c.pre = MinSize - WindowSize
} }
// Calculate out_table and mod_table for optimization. Must be called only once. // Calculate out_table and mod_table for optimization. Must be called only
func (c *Chunker) fill_tables() { // once. This implementation uses a cache in the global variable cache.
func (c *Chunker) fill_tables() error {
// test if the tables are cached for this polynomial
cache.Lock()
defer cache.Unlock()
if t, ok := cache.entries[c.pol]; ok {
c.tables = t
return nil
}
// else create a new entry
c.tables = &tables{}
cache.entries[c.pol] = c.tables
// test irreducibility of p
if !c.pol.Irreducible() {
return errors.New("invalid polynomial")
}
// calculate table for sliding out bytes. The byte to slide out is used as // calculate table for sliding out bytes. The byte to slide out is used as
// the index for the table, the value contains the following: // the index for the table, the value contains the following:
// out_table[b] = Hash(b || 0 || ... || 0) // out_table[b] = Hash(b || 0 || ... || 0)
@ -127,7 +157,7 @@ func (c *Chunker) fill_tables() {
for i := 0; i < WindowSize-1; i++ { for i := 0; i < WindowSize-1; i++ {
hash = append_byte(hash, 0, uint64(c.pol)) hash = append_byte(hash, 0, uint64(c.pol))
} }
c.out_table[b] = hash c.tables.out[b] = hash
} }
// calculate table for reduction mod Polynomial // calculate table for reduction mod Polynomial
@ -140,8 +170,10 @@ func (c *Chunker) fill_tables() {
// two parts: Part A contains the result of the modulus operation, part // two parts: Part A contains the result of the modulus operation, part
// B is used to cancel out the 8 top bits so that one XOR operation is // B is used to cancel out the 8 top bits so that one XOR operation is
// enough to reduce modulo Polynomial // enough to reduce modulo Polynomial
c.mod_table[b] = mod(uint64(b)<<uint(k), uint64(c.pol)) | (uint64(b) << uint(k)) c.tables.mod[b] = mod(uint64(b)<<uint(k), uint64(c.pol)) | (uint64(b) << uint(k))
} }
return nil
} }
// Next returns the position and length of the next chunk of data. If an error // Next returns the position and length of the next chunk of data. If an error
@ -211,7 +243,7 @@ func (c *Chunker) Next() (*Chunk, error) {
// inline c.slide(b) and append(b) to increase performance // inline c.slide(b) and append(b) to increase performance
out := c.window[c.wpos] out := c.window[c.wpos]
c.window[c.wpos] = b c.window[c.wpos] = b
c.digest ^= c.out_table[out] c.digest ^= c.tables.out[out]
c.wpos = (c.wpos + 1) % WindowSize c.wpos = (c.wpos + 1) % WindowSize
// c.append(b) // c.append(b)
@ -219,7 +251,7 @@ func (c *Chunker) Next() (*Chunk, error) {
c.digest <<= 8 c.digest <<= 8
c.digest |= uint64(b) c.digest |= uint64(b)
c.digest ^= c.mod_table[index] c.digest ^= c.tables.mod[index]
// end inline // end inline
add++ add++
@ -289,13 +321,13 @@ func (c *Chunker) append(b byte) {
c.digest <<= 8 c.digest <<= 8
c.digest |= uint64(b) c.digest |= uint64(b)
c.digest ^= c.mod_table[index] c.digest ^= c.tables.mod[index]
} }
func (c *Chunker) slide(b byte) { func (c *Chunker) slide(b byte) {
out := c.window[c.wpos] out := c.window[c.wpos]
c.window[c.wpos] = b c.window[c.wpos] = b
c.digest ^= c.out_table[out] c.digest ^= c.tables.out[out]
c.wpos = (c.wpos + 1) % WindowSize c.wpos = (c.wpos + 1) % WindowSize
c.append(b) c.append(b)

View File

@ -289,10 +289,8 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) {
rd = bytes.NewReader(get_random(23, size)) rd = bytes.NewReader(get_random(23, size))
} }
t1 := time.Now()
ch, err := chunker.New(rd, testPol, *testBufSize, hash) ch, err := chunker.New(rd, testPol, *testBufSize, hash)
ok(b, err) ok(b, err)
b.Logf("generating tables took %v", time.Since(t1))
b.ResetTimer() b.ResetTimer()
b.SetBytes(int64(size)) b.SetBytes(int64(size))
@ -333,3 +331,15 @@ func BenchmarkChunkerWithMD5(b *testing.B) {
func BenchmarkChunker(b *testing.B) { func BenchmarkChunker(b *testing.B) {
benchmarkChunker(b, nil) benchmarkChunker(b, nil)
} }
func BenchmarkNewChunker(b *testing.B) {
p, err := chunker.RandomPolynomial()
ok(b, err)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := chunker.New(bytes.NewBuffer(nil), p, *testBufSize, nil)
ok(b, err)
}
}