mirror of
https://github.com/octoleo/restic.git
synced 2024-11-30 00:33:57 +00:00
chunker: Cache computations
This commit is contained in:
parent
367cc75970
commit
25e3ac40ee
@ -4,6 +4,7 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"hash"
|
"hash"
|
||||||
"io"
|
"io"
|
||||||
|
"sync"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -23,6 +24,21 @@ const (
|
|||||||
splitmask = (1 << AverageBits) - 1
|
splitmask = (1 << AverageBits) - 1
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type tables struct {
|
||||||
|
out [256]uint64
|
||||||
|
mod [256]uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
// cache precomputed tables, these are read-only anyway
|
||||||
|
var cache struct {
|
||||||
|
entries map[Pol]*tables
|
||||||
|
sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
cache.entries = make(map[Pol]*tables)
|
||||||
|
}
|
||||||
|
|
||||||
// A chunk is one content-dependent chunk of bytes whose end was cut when the
|
// A chunk is one content-dependent chunk of bytes whose end was cut when the
|
||||||
// Rabin Fingerprint had the value stored in Cut.
|
// Rabin Fingerprint had the value stored in Cut.
|
||||||
type Chunk struct {
|
type Chunk struct {
|
||||||
@ -40,8 +56,7 @@ func (c Chunk) Reader(r io.ReaderAt) io.Reader {
|
|||||||
type Chunker struct {
|
type Chunker struct {
|
||||||
pol Pol
|
pol Pol
|
||||||
pol_shift uint
|
pol_shift uint
|
||||||
mod_table [256]uint64
|
tables *tables
|
||||||
out_table [256]uint64
|
|
||||||
|
|
||||||
rd io.Reader
|
rd io.Reader
|
||||||
closed bool
|
closed bool
|
||||||
@ -66,18 +81,15 @@ type Chunker struct {
|
|||||||
// New returns a new Chunker based on polynomial p that reads from data from rd
|
// New returns a new Chunker based on polynomial p that reads from data from rd
|
||||||
// with bufsize and pass all data to hash along the way.
|
// with bufsize and pass all data to hash along the way.
|
||||||
func New(rd io.Reader, p Pol, bufsize int, hash hash.Hash) (*Chunker, error) {
|
func New(rd io.Reader, p Pol, bufsize int, hash hash.Hash) (*Chunker, error) {
|
||||||
// test irreducibility of p again
|
|
||||||
if !p.Irreducible() {
|
|
||||||
return nil, errors.New("invalid polynomial")
|
|
||||||
}
|
|
||||||
|
|
||||||
c := &Chunker{
|
c := &Chunker{
|
||||||
pol: p,
|
pol: p,
|
||||||
pol_shift: uint(p.Deg() - 8),
|
pol_shift: uint(p.Deg() - 8),
|
||||||
buf: make([]byte, bufsize),
|
buf: make([]byte, bufsize),
|
||||||
h: hash,
|
h: hash,
|
||||||
}
|
}
|
||||||
c.fill_tables()
|
if err := c.fill_tables(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
c.Reset(rd)
|
c.Reset(rd)
|
||||||
|
|
||||||
return c, nil
|
return c, nil
|
||||||
@ -107,8 +119,26 @@ func (c *Chunker) Reset(rd io.Reader) {
|
|||||||
c.pre = MinSize - WindowSize
|
c.pre = MinSize - WindowSize
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate out_table and mod_table for optimization. Must be called only once.
|
// Calculate out_table and mod_table for optimization. Must be called only
|
||||||
func (c *Chunker) fill_tables() {
|
// once. This implementation uses a cache in the global variable cache.
|
||||||
|
func (c *Chunker) fill_tables() error {
|
||||||
|
// test if the tables are cached for this polynomial
|
||||||
|
cache.Lock()
|
||||||
|
defer cache.Unlock()
|
||||||
|
if t, ok := cache.entries[c.pol]; ok {
|
||||||
|
c.tables = t
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// else create a new entry
|
||||||
|
c.tables = &tables{}
|
||||||
|
cache.entries[c.pol] = c.tables
|
||||||
|
|
||||||
|
// test irreducibility of p
|
||||||
|
if !c.pol.Irreducible() {
|
||||||
|
return errors.New("invalid polynomial")
|
||||||
|
}
|
||||||
|
|
||||||
// calculate table for sliding out bytes. The byte to slide out is used as
|
// calculate table for sliding out bytes. The byte to slide out is used as
|
||||||
// the index for the table, the value contains the following:
|
// the index for the table, the value contains the following:
|
||||||
// out_table[b] = Hash(b || 0 || ... || 0)
|
// out_table[b] = Hash(b || 0 || ... || 0)
|
||||||
@ -127,7 +157,7 @@ func (c *Chunker) fill_tables() {
|
|||||||
for i := 0; i < WindowSize-1; i++ {
|
for i := 0; i < WindowSize-1; i++ {
|
||||||
hash = append_byte(hash, 0, uint64(c.pol))
|
hash = append_byte(hash, 0, uint64(c.pol))
|
||||||
}
|
}
|
||||||
c.out_table[b] = hash
|
c.tables.out[b] = hash
|
||||||
}
|
}
|
||||||
|
|
||||||
// calculate table for reduction mod Polynomial
|
// calculate table for reduction mod Polynomial
|
||||||
@ -140,8 +170,10 @@ func (c *Chunker) fill_tables() {
|
|||||||
// two parts: Part A contains the result of the modulus operation, part
|
// two parts: Part A contains the result of the modulus operation, part
|
||||||
// B is used to cancel out the 8 top bits so that one XOR operation is
|
// B is used to cancel out the 8 top bits so that one XOR operation is
|
||||||
// enough to reduce modulo Polynomial
|
// enough to reduce modulo Polynomial
|
||||||
c.mod_table[b] = mod(uint64(b)<<uint(k), uint64(c.pol)) | (uint64(b) << uint(k))
|
c.tables.mod[b] = mod(uint64(b)<<uint(k), uint64(c.pol)) | (uint64(b) << uint(k))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Next returns the position and length of the next chunk of data. If an error
|
// Next returns the position and length of the next chunk of data. If an error
|
||||||
@ -211,7 +243,7 @@ func (c *Chunker) Next() (*Chunk, error) {
|
|||||||
// inline c.slide(b) and append(b) to increase performance
|
// inline c.slide(b) and append(b) to increase performance
|
||||||
out := c.window[c.wpos]
|
out := c.window[c.wpos]
|
||||||
c.window[c.wpos] = b
|
c.window[c.wpos] = b
|
||||||
c.digest ^= c.out_table[out]
|
c.digest ^= c.tables.out[out]
|
||||||
c.wpos = (c.wpos + 1) % WindowSize
|
c.wpos = (c.wpos + 1) % WindowSize
|
||||||
|
|
||||||
// c.append(b)
|
// c.append(b)
|
||||||
@ -219,7 +251,7 @@ func (c *Chunker) Next() (*Chunk, error) {
|
|||||||
c.digest <<= 8
|
c.digest <<= 8
|
||||||
c.digest |= uint64(b)
|
c.digest |= uint64(b)
|
||||||
|
|
||||||
c.digest ^= c.mod_table[index]
|
c.digest ^= c.tables.mod[index]
|
||||||
// end inline
|
// end inline
|
||||||
|
|
||||||
add++
|
add++
|
||||||
@ -289,13 +321,13 @@ func (c *Chunker) append(b byte) {
|
|||||||
c.digest <<= 8
|
c.digest <<= 8
|
||||||
c.digest |= uint64(b)
|
c.digest |= uint64(b)
|
||||||
|
|
||||||
c.digest ^= c.mod_table[index]
|
c.digest ^= c.tables.mod[index]
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Chunker) slide(b byte) {
|
func (c *Chunker) slide(b byte) {
|
||||||
out := c.window[c.wpos]
|
out := c.window[c.wpos]
|
||||||
c.window[c.wpos] = b
|
c.window[c.wpos] = b
|
||||||
c.digest ^= c.out_table[out]
|
c.digest ^= c.tables.out[out]
|
||||||
c.wpos = (c.wpos + 1) % WindowSize
|
c.wpos = (c.wpos + 1) % WindowSize
|
||||||
|
|
||||||
c.append(b)
|
c.append(b)
|
||||||
|
@ -289,10 +289,8 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) {
|
|||||||
rd = bytes.NewReader(get_random(23, size))
|
rd = bytes.NewReader(get_random(23, size))
|
||||||
}
|
}
|
||||||
|
|
||||||
t1 := time.Now()
|
|
||||||
ch, err := chunker.New(rd, testPol, *testBufSize, hash)
|
ch, err := chunker.New(rd, testPol, *testBufSize, hash)
|
||||||
ok(b, err)
|
ok(b, err)
|
||||||
b.Logf("generating tables took %v", time.Since(t1))
|
|
||||||
|
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
b.SetBytes(int64(size))
|
b.SetBytes(int64(size))
|
||||||
@ -333,3 +331,15 @@ func BenchmarkChunkerWithMD5(b *testing.B) {
|
|||||||
func BenchmarkChunker(b *testing.B) {
|
func BenchmarkChunker(b *testing.B) {
|
||||||
benchmarkChunker(b, nil)
|
benchmarkChunker(b, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkNewChunker(b *testing.B) {
|
||||||
|
p, err := chunker.RandomPolynomial()
|
||||||
|
ok(b, err)
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
_, err := chunker.New(bytes.NewBuffer(nil), p, *testBufSize, nil)
|
||||||
|
ok(b, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user