From b2307cafa2a1fda8e77f361fa2a5b2a0148ca833 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Mon, 6 Apr 2015 00:22:19 +0200 Subject: [PATCH] Seed chunker with random per-repository polynomial --- archiver.go | 2 +- archiver_test.go | 5 +++-- chunker/chunker.go | 43 ++++++++++++++++++++---------------------- chunker/polynomials.go | 22 +++++++++++++++++++++ key.go | 28 ++++++++++++++++++++++++--- pools.go | 4 ++-- server.go | 6 ++++++ 7 files changed, 79 insertions(+), 31 deletions(-) diff --git a/archiver.go b/archiver.go index a5ead7f9c..6b58a609d 100644 --- a/archiver.go +++ b/archiver.go @@ -201,7 +201,7 @@ func (arch *Archiver) SaveFile(p *Progress, node *Node) (Blobs, error) { // store all chunks chnker := GetChunker("archiver.SaveFile") - chnker.Reset(file) + chnker.Reset(file, arch.s.ChunkerPolynomial()) chans := [](<-chan Blob){} defer FreeChunker("archiver.SaveFile", chnker) diff --git a/archiver_test.go b/archiver_test.go index 23346c591..e550a0dd6 100644 --- a/archiver_test.go +++ b/archiver_test.go @@ -13,6 +13,7 @@ import ( ) var benchArchiveDirectory = flag.String("test.benchdir", ".", "benchmark archiving a real directory (default: .)") +var testPol = chunker.Pol(0x3DA3358B4DC173) func get_random(seed, count int) []byte { buf := make([]byte, count) @@ -39,7 +40,7 @@ type Rdr interface { func benchmarkChunkEncrypt(b testing.TB, buf []byte, rd Rdr, key *restic.Key) { ch := restic.GetChunker("BenchmarkChunkEncrypt") rd.Seek(0, 0) - ch.Reset(rd) + ch.Reset(rd, testPol) for { chunk, err := ch.Next() @@ -86,7 +87,7 @@ func BenchmarkChunkEncrypt(b *testing.B) { func benchmarkChunkEncryptP(b *testing.PB, buf []byte, rd Rdr, key *restic.Key) { ch := restic.GetChunker("BenchmarkChunkEncryptP") rd.Seek(0, 0) - ch.Reset(rd) + ch.Reset(rd, testPol) for { chunk, err := ch.Next() diff --git a/chunker/chunker.go b/chunker/chunker.go index 6c024e93c..e9d583937 100644 --- a/chunker/chunker.go +++ b/chunker/chunker.go @@ -80,27 +80,21 @@ type Chunker struct { // New returns a new Chunker based on polynomial p that reads from data from rd // with bufsize and pass all data to hash along the way. -func New(rd io.Reader, p Pol, bufsize int, hash hash.Hash) (*Chunker, error) { +func New(rd io.Reader, p Pol, bufsize int, hash hash.Hash) *Chunker { c := &Chunker{ buf: make([]byte, bufsize), h: hash, } - if err := c.Reset(rd, p); err != nil { - return nil, err - } - - return c, nil + c.Reset(rd, p) + return c } // Reset restarts a chunker so that it can be reused with a different // polynomial and reader. -func (c *Chunker) Reset(rd io.Reader, p Pol) error { +func (c *Chunker) Reset(rd io.Reader, p Pol) { c.pol = p c.pol_shift = uint(p.Deg() - 8) - if err := c.fill_tables(); err != nil { - return err - } - + c.fill_tables() c.rd = rd for i := 0; i < WindowSize; i++ { @@ -112,7 +106,10 @@ func (c *Chunker) Reset(rd io.Reader, p Pol) error { c.pos = 0 c.start = 0 c.count = 0 - c.slide(1) + + if p != 0 { + c.slide(1) + } if c.h != nil { c.h.Reset() @@ -120,30 +117,28 @@ func (c *Chunker) Reset(rd io.Reader, p Pol) error { // do not start a new chunk unless at least MinSize bytes have been read c.pre = MinSize - WindowSize - - return nil } // Calculate out_table and mod_table for optimization. Must be called only // once. This implementation uses a cache in the global variable cache. -func (c *Chunker) fill_tables() error { +func (c *Chunker) fill_tables() { + // if polynomial hasn't been specified, do not compute anything for now + if c.pol == 0 { + return + } + // test if the tables are cached for this polynomial cache.Lock() defer cache.Unlock() if t, ok := cache.entries[c.pol]; ok { c.tables = t - return nil + return } // else create a new entry c.tables = &tables{} cache.entries[c.pol] = c.tables - // test irreducibility of p - if !c.pol.Irreducible() { - return errors.New("invalid polynomial") - } - // calculate table for sliding out bytes. The byte to slide out is used as // the index for the table, the value contains the following: // out_table[b] = Hash(b || 0 || ... || 0) @@ -177,8 +172,6 @@ func (c *Chunker) fill_tables() error { // enough to reduce modulo Polynomial c.tables.mod[b] = mod(uint64(b)<= c.bmax { n, err := io.ReadFull(c.rd, c.buf[:]) diff --git a/chunker/polynomials.go b/chunker/polynomials.go index 4961f39b5..65fe1e931 100644 --- a/chunker/polynomials.go +++ b/chunker/polynomials.go @@ -6,6 +6,8 @@ import ( "errors" "fmt" "strconv" + + "github.com/davecgh/go-spew/spew" ) // Pol is a polynomial from F_2[X]. @@ -255,3 +257,23 @@ func qp(p uint, g Pol) Pol { // add x return res.Add(2).Mod(g) } + +func (p Pol) MarshalJSON() ([]byte, error) { + buf := strconv.AppendUint([]byte{'"'}, uint64(p), 16) + buf = append(buf, '"') + spew.Dump(buf) + return buf, nil +} + +func (p *Pol) UnmarshalJSON(data []byte) error { + if len(data) < 2 { + return errors.New("invalid string for polynomial") + } + n, err := strconv.ParseUint(string(data[1:len(data)-1]), 16, 64) + if err != nil { + return err + } + *p = Pol(n) + + return nil +} diff --git a/key.go b/key.go index a3c298a6f..7f8de363f 100644 --- a/key.go +++ b/key.go @@ -13,6 +13,7 @@ import ( "github.com/restic/restic/backend" "github.com/restic/restic/chunker" + "github.com/restic/restic/debug" "golang.org/x/crypto/poly1305" ) @@ -62,10 +63,12 @@ type Key struct { // MasterKeys holds signing and encryption keys for a repository. It is stored // encrypted and signed as a JSON data structure in the Data field of the Key -// structure. +// structure. For the master key, the secret random polynomial used for content +// defined chunking is included. type MasterKeys struct { - Sign MACKey `json:"sign"` - Encrypt AESKey `json:"encrypt"` + Sign MACKey `json:"sign"` + Encrypt AESKey `json:"encrypt"` + ChunkerPolynomial chunker.Pol `json:"chunker_polynomial,omitempty"` } // CreateKey initializes a master key in the given backend and encrypts it with @@ -106,6 +109,17 @@ func OpenKey(s Server, name string, password string) (*Key, error) { } k.name = name + // test if polynomial is valid and irreducible + if k.master.ChunkerPolynomial == 0 { + return nil, errors.New("Polynomial for content defined chunking is zero") + } + + if !k.master.ChunkerPolynomial.Irreducible() { + return nil, errors.New("Polynomial for content defined chunking is invalid") + } + + debug.Log("OpenKey", "Master keys loaded, polynomial %v", k.master.ChunkerPolynomial) + return k, nil } @@ -184,6 +198,14 @@ func AddKey(s Server, password string, template *Key) (*Key, error) { if template == nil { // generate new random master keys newkey.master = generateRandomKeys() + // generate random polynomial for cdc + p, err := chunker.RandomPolynomial() + if err != nil { + debug.Log("AddKey", "error generating new polynomial for cdc: %v", err) + return nil, err + } + debug.Log("AddKey", "generated new polynomial for cdc: %v", p) + newkey.master.ChunkerPolynomial = p } else { // copy master keys from old key newkey.master = template.master diff --git a/pools.go b/pools.go index a99f631ec..10e573a88 100644 --- a/pools.go +++ b/pools.go @@ -88,8 +88,8 @@ func newChunker() interface{} { defer chunkStats.m.Unlock() chunkStats.new++ - // create a new chunker with a nil reader - return chunker.New(nil, chunkerBufSize, sha256.New()) + // create a new chunker with a nil reader and null polynomial + return chunker.New(nil, 0, chunkerBufSize, sha256.New()) } func GetChunkBuf(s string) []byte { diff --git a/server.go b/server.go index 0251d1ac6..e4ebbeea6 100644 --- a/server.go +++ b/server.go @@ -10,6 +10,7 @@ import ( "sync" "github.com/restic/restic/backend" + "github.com/restic/restic/chunker" "github.com/restic/restic/debug" ) @@ -26,6 +27,11 @@ func (s *Server) SetKey(k *Key) { s.key = k } +// ChunkerPolynomial returns the secret polynomial used for content defined chunking. +func (s *Server) ChunkerPolynomial() chunker.Pol { + return chunker.Pol(s.key.Master().ChunkerPolynomial) +} + // Find loads the list of all blobs of type t and searches for names which start // with prefix. If none is found, nil and ErrNoIDPrefixFound is returned. If // more than one is found, nil and ErrMultipleIDMatches is returned.