From 055668758459d73e390ea5d23f2f21d8b7a550a0 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Thu, 12 Jan 2017 22:14:31 +0100 Subject: [PATCH 01/18] Reduce memory usage for prune --- src/restic/repository/master_index.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/restic/repository/master_index.go b/src/restic/repository/master_index.go index 165bf6024..8019d8781 100644 --- a/src/restic/repository/master_index.go +++ b/src/restic/repository/master_index.go @@ -46,9 +46,8 @@ func (mi *MasterIndex) LookupSize(id restic.ID, tpe restic.BlobType) (uint, erro defer mi.idxMutex.RUnlock() for _, idx := range mi.idx { - length, err := idx.LookupSize(id, tpe) - if err == nil { - return length, nil + if idx.Has(id, tpe) { + return idx.LookupSize(id, tpe) } } From 8734c2466ced4c72c83f36e253c797ed3670ed61 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Fri, 13 Jan 2017 12:22:42 +0100 Subject: [PATCH 02/18] Fix call to debug.Log() --- src/restic/repository/master_index.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/restic/repository/master_index.go b/src/restic/repository/master_index.go index 8019d8781..ebd2cbef2 100644 --- a/src/restic/repository/master_index.go +++ b/src/restic/repository/master_index.go @@ -30,8 +30,7 @@ func (mi *MasterIndex) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic for _, idx := range mi.idx { blobs, err = idx.Lookup(id, tpe) if err == nil { - debug.Log("MasterIndex.Lookup", - "found id %v: %v", id.Str(), blobs) + debug.Log("found id %v: %v", id.Str(), blobs) return } } From 215af5c60acb6247dc15de45973ecc7f94fb92fc Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Fri, 13 Jan 2017 12:20:37 +0100 Subject: [PATCH 03/18] Add LoadBlob benchmark --- src/restic/repository/repository_test.go | 46 ++++++++++++++++++++---- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/src/restic/repository/repository_test.go b/src/restic/repository/repository_test.go index 5934d4778..34e642abb 100644 --- a/src/restic/repository/repository_test.go +++ b/src/restic/repository/repository_test.go @@ -2,12 +2,12 @@ package repository_test import ( "bytes" - "crypto/rand" "crypto/sha256" "io" - mrand "math/rand" + "math/rand" "path/filepath" "testing" + "time" "restic" "restic/archiver" @@ -17,13 +17,15 @@ import ( var testSizes = []int{5, 23, 2<<18 + 23, 1 << 20} +var rnd = rand.New(rand.NewSource(time.Now().UnixNano())) + func TestSave(t *testing.T) { repo, cleanup := repository.TestRepository(t) defer cleanup() for _, size := range testSizes { data := make([]byte, size) - _, err := io.ReadFull(rand.Reader, data) + _, err := io.ReadFull(rnd, data) OK(t, err) id := restic.Hash(data) @@ -59,7 +61,7 @@ func TestSaveFrom(t *testing.T) { for _, size := range testSizes { data := make([]byte, size) - _, err := io.ReadFull(rand.Reader, data) + _, err := io.ReadFull(rnd, data) OK(t, err) id := restic.Hash(data) @@ -94,7 +96,7 @@ func BenchmarkSaveAndEncrypt(t *testing.B) { size := 4 << 20 // 4MiB data := make([]byte, size) - _, err := io.ReadFull(rand.Reader, data) + _, err := io.ReadFull(rnd, data) OK(t, err) id := restic.ID(sha256.Sum256(data)) @@ -145,6 +147,36 @@ func BenchmarkLoadTree(t *testing.B) { } } +func BenchmarkLoadBlob(b *testing.B) { + repo, cleanup := repository.TestRepository(b) + defer cleanup() + + length := 1000000 + buf := make([]byte, length) + _, err := io.ReadFull(rnd, buf) + OK(b, err) + + id, err := repo.SaveBlob(restic.DataBlob, buf, restic.ID{}) + OK(b, err) + OK(b, repo.Flush()) + + b.ResetTimer() + b.SetBytes(int64(length)) + + for i := 0; i < b.N; i++ { + n, err := repo.LoadBlob(restic.DataBlob, id, buf) + OK(b, err) + if n != length { + b.Errorf("wanted %d bytes, got %d", length, n) + } + + id2 := restic.Hash(buf[:n]) + if !id.Equal(id2) { + b.Errorf("wrong data returned, wanted %v, got %v", id.Str(), id2.Str()) + } + } +} + func TestLoadJSONUnpacked(t *testing.T) { repo, cleanup := repository.TestRepository(t) defer cleanup() @@ -197,10 +229,10 @@ func BenchmarkLoadIndex(b *testing.B) { // saveRandomDataBlobs generates random data blobs and saves them to the repository. func saveRandomDataBlobs(t testing.TB, repo restic.Repository, num int, sizeMax int) { for i := 0; i < num; i++ { - size := mrand.Int() % sizeMax + size := rand.Int() % sizeMax buf := make([]byte, size) - _, err := io.ReadFull(rand.Reader, buf) + _, err := io.ReadFull(rnd, buf) OK(t, err) _, err = repo.SaveBlob(restic.DataBlob, buf, restic.ID{}) From 9a5b9253c4bce73a68b08e9f6bd6ac205d8ebc8f Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Fri, 13 Jan 2017 12:33:06 +0100 Subject: [PATCH 04/18] LoadBlob: use buffer as scratch space benchmark old bytes new bytes delta BenchmarkLoadBlob-4 1010128 2256 -99.78% --- src/restic/repository/repository.go | 28 +++++++++--------------- src/restic/repository/repository_test.go | 3 ++- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/src/restic/repository/repository.go b/src/restic/repository/repository.go index 809afb187..cb1e3ad69 100644 --- a/src/restic/repository/repository.go +++ b/src/restic/repository/repository.go @@ -81,17 +81,6 @@ func (r *Repository) LoadAndDecrypt(t restic.FileType, id restic.ID) ([]byte, er func (r *Repository) loadBlob(id restic.ID, t restic.BlobType, plaintextBuf []byte) (int, error) { debug.Log("load %v with id %v (buf %p, len %d)", t, id.Str(), plaintextBuf, len(plaintextBuf)) - // lookup plaintext size of blob - size, err := r.idx.LookupSize(id, t) - if err != nil { - return 0, err - } - - // make sure the plaintext buffer is large enough, extend otherwise - if len(plaintextBuf) < int(size) { - return 0, errors.Errorf("buffer is too small: %d < %d", len(plaintextBuf), size) - } - // lookup packs blobs, err := r.idx.Lookup(id, t) if err != nil { @@ -109,8 +98,8 @@ func (r *Repository) loadBlob(id restic.ID, t restic.BlobType, plaintextBuf []by // load blob from pack h := restic.Handle{Type: restic.DataFile, Name: blob.PackID.String()} - ciphertextBuf := make([]byte, blob.Length) - n, err := r.be.Load(h, ciphertextBuf, int64(blob.Offset)) + plaintextBuf = plaintextBuf[:cap(plaintextBuf)] + n, err := r.be.Load(h, plaintextBuf, int64(blob.Offset)) if err != nil { debug.Log("error loading blob %v: %v", blob, err) lastError = err @@ -125,7 +114,7 @@ func (r *Repository) loadBlob(id restic.ID, t restic.BlobType, plaintextBuf []by } // decrypt - n, err = r.decryptTo(plaintextBuf, ciphertextBuf) + n, err = r.decryptTo(plaintextBuf, plaintextBuf) if err != nil { lastError = errors.Errorf("decrypting blob %v failed: %v", id, err) continue @@ -528,7 +517,9 @@ func (r *Repository) Close() error { return r.be.Close() } -// LoadBlob loads a blob of type t from the repository to the buffer. +// LoadBlob loads a blob of type t from the repository to the buffer. buf must +// be large enough to hold the encrypted blob, since it is used as scratch +// space. func (r *Repository) LoadBlob(t restic.BlobType, id restic.ID, buf []byte) (int, error) { debug.Log("load blob %v into buf %p", id.Str(), buf) size, err := r.idx.LookupSize(id, t) @@ -536,8 +527,9 @@ func (r *Repository) LoadBlob(t restic.BlobType, id restic.ID, buf []byte) (int, return 0, err } - if len(buf) < int(size) { - return 0, errors.Errorf("buffer is too small for data blob (%d < %d)", len(buf), size) + buf = buf[:cap(buf)] + if len(buf) < int(size)+crypto.Extension { + return 0, errors.Errorf("buffer is too small for data blob (%d < %d)", len(buf), size+crypto.Extension) } n, err := r.loadBlob(id, t, buf) @@ -571,7 +563,7 @@ func (r *Repository) LoadTree(id restic.ID) (*restic.Tree, error) { } debug.Log("size is %d, create buffer", size) - buf := make([]byte, size) + buf := make([]byte, size+crypto.Extension) n, err := r.loadBlob(id, restic.TreeBlob, buf) if err != nil { diff --git a/src/restic/repository/repository_test.go b/src/restic/repository/repository_test.go index 34e642abb..978537d7f 100644 --- a/src/restic/repository/repository_test.go +++ b/src/restic/repository/repository_test.go @@ -11,6 +11,7 @@ import ( "restic" "restic/archiver" + "restic/crypto" "restic/repository" . "restic/test" ) @@ -152,7 +153,7 @@ func BenchmarkLoadBlob(b *testing.B) { defer cleanup() length := 1000000 - buf := make([]byte, length) + buf := make([]byte, length, length+crypto.Extension) _, err := io.ReadFull(rnd, buf) OK(b, err) From 91dcb958e079fa064aca4475dbba6aef90fe00c6 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Fri, 13 Jan 2017 12:40:03 +0100 Subject: [PATCH 05/18] Fix tests --- src/restic/archiver/archive_reader_test.go | 3 ++- src/restic/node.go | 3 ++- src/restic/repository/repository_test.go | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/restic/archiver/archive_reader_test.go b/src/restic/archiver/archive_reader_test.go index f2de24830..82a432f00 100644 --- a/src/restic/archiver/archive_reader_test.go +++ b/src/restic/archiver/archive_reader_test.go @@ -5,6 +5,7 @@ import ( "io" "math/rand" "restic" + "restic/crypto" "restic/repository" "testing" ) @@ -44,7 +45,7 @@ func checkSavedFile(t *testing.T, repo restic.Repository, treeID restic.ID, name t.Fatal(err) } - buf := make([]byte, int(size)) + buf := make([]byte, int(size), int(size)+crypto.Extension) n := loadBlob(t, repo, id, buf) if n != len(buf) { t.Errorf("wrong number of bytes read, want %d, got %d", len(buf), n) diff --git a/src/restic/node.go b/src/restic/node.go index e17215125..82de2bd6a 100644 --- a/src/restic/node.go +++ b/src/restic/node.go @@ -10,6 +10,7 @@ import ( "syscall" "time" + "restic/crypto" "restic/errors" "runtime" @@ -208,7 +209,7 @@ func (node Node) createFileAt(path string, repo Repository) error { buf = buf[:cap(buf)] if uint(len(buf)) < size { - buf = make([]byte, size) + buf = make([]byte, size, size+crypto.Extension) } n, err := repo.LoadBlob(DataBlob, id, buf) diff --git a/src/restic/repository/repository_test.go b/src/restic/repository/repository_test.go index 978537d7f..b2f04ab1d 100644 --- a/src/restic/repository/repository_test.go +++ b/src/restic/repository/repository_test.go @@ -41,7 +41,7 @@ func TestSave(t *testing.T) { // OK(t, repo.SaveIndex()) // read back - buf := make([]byte, size) + buf := make([]byte, size, size+crypto.Extension) n, err := repo.LoadBlob(restic.DataBlob, id, buf) OK(t, err) Equals(t, len(buf), n) @@ -75,7 +75,7 @@ func TestSaveFrom(t *testing.T) { OK(t, repo.Flush()) // read back - buf := make([]byte, size) + buf := make([]byte, size, size+crypto.Extension) n, err := repo.LoadBlob(restic.DataBlob, id, buf) OK(t, err) Equals(t, len(buf), n) From 32a5c2c1f6d656d122d25517ec2a1c81437222db Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Fri, 13 Jan 2017 12:57:05 +0100 Subject: [PATCH 06/18] Add a few functions to calculate Blob buffer len --- src/restic/archiver/archive_reader_test.go | 3 +-- src/restic/buffer.go | 21 +++++++++++++++++++++ src/restic/node.go | 3 +-- src/restic/pack/pack.go | 8 ++++---- src/restic/pack/pack_test.go | 7 +++---- src/restic/repository/index.go | 9 ++++----- src/restic/repository/repository.go | 8 ++++---- src/restic/repository/repository_test.go | 7 +++---- 8 files changed, 41 insertions(+), 25 deletions(-) create mode 100644 src/restic/buffer.go diff --git a/src/restic/archiver/archive_reader_test.go b/src/restic/archiver/archive_reader_test.go index 82a432f00..c24a0be5e 100644 --- a/src/restic/archiver/archive_reader_test.go +++ b/src/restic/archiver/archive_reader_test.go @@ -5,7 +5,6 @@ import ( "io" "math/rand" "restic" - "restic/crypto" "restic/repository" "testing" ) @@ -45,7 +44,7 @@ func checkSavedFile(t *testing.T, repo restic.Repository, treeID restic.ID, name t.Fatal(err) } - buf := make([]byte, int(size), int(size)+crypto.Extension) + buf := restic.NewBlobBuffer(int(size)) n := loadBlob(t, repo, id, buf) if n != len(buf) { t.Errorf("wrong number of bytes read, want %d, got %d", len(buf), n) diff --git a/src/restic/buffer.go b/src/restic/buffer.go new file mode 100644 index 000000000..d822fced9 --- /dev/null +++ b/src/restic/buffer.go @@ -0,0 +1,21 @@ +package restic + +import "restic/crypto" + +// NewBlobBuffer returns a buffer that is large enough to hold a blob of size +// plaintext bytes, including the crypto overhead. +func NewBlobBuffer(size int) []byte { + return make([]byte, size, size+crypto.Extension) +} + +// PlaintextLength returns the plaintext length of a blob with ciphertextSize +// bytes. +func PlaintextLength(ciphertextSize int) int { + return ciphertextSize - crypto.Extension +} + +// CiphertextLength returns the encrypted length of a blob with plaintextSize +// bytes. +func CiphertextLength(plaintextSize int) int { + return plaintextSize + crypto.Extension +} diff --git a/src/restic/node.go b/src/restic/node.go index 82de2bd6a..bf41f4201 100644 --- a/src/restic/node.go +++ b/src/restic/node.go @@ -10,7 +10,6 @@ import ( "syscall" "time" - "restic/crypto" "restic/errors" "runtime" @@ -209,7 +208,7 @@ func (node Node) createFileAt(path string, repo Repository) error { buf = buf[:cap(buf)] if uint(len(buf)) < size { - buf = make([]byte, size, size+crypto.Extension) + buf = NewBlobBuffer(int(size)) } n, err := repo.LoadBlob(DataBlob, id, buf) diff --git a/src/restic/pack/pack.go b/src/restic/pack/pack.go index 17f79b09a..be4fcc087 100644 --- a/src/restic/pack/pack.go +++ b/src/restic/pack/pack.go @@ -85,15 +85,15 @@ func (p *Packer) Finalize() (uint, error) { return 0, errors.Wrap(err, "Write") } - hdrBytes := bytesHeader + crypto.Extension - if uint(n) != hdrBytes { + hdrBytes := restic.CiphertextLength(int(bytesHeader)) + if n != hdrBytes { return 0, errors.New("wrong number of bytes written") } - bytesWritten += hdrBytes + bytesWritten += uint(hdrBytes) // write length - err = binary.Write(p.wr, binary.LittleEndian, uint32(uint(len(p.blobs))*entrySize+crypto.Extension)) + err = binary.Write(p.wr, binary.LittleEndian, uint32(restic.CiphertextLength(len(p.blobs)*int(entrySize)))) if err != nil { return 0, errors.Wrap(err, "binary.Write") } diff --git a/src/restic/pack/pack_test.go b/src/restic/pack/pack_test.go index f90d1a426..c797cb1c0 100644 --- a/src/restic/pack/pack_test.go +++ b/src/restic/pack/pack_test.go @@ -54,10 +54,9 @@ func verifyBlobs(t testing.TB, bufs []Buf, k *crypto.Key, rd io.ReaderAt, packSi } // header length written += binary.Size(uint32(0)) - // header - written += len(bufs) * (binary.Size(restic.BlobType(0)) + binary.Size(uint32(0)) + len(restic.ID{})) - // header crypto - written += crypto.Extension + // header + header crypto + headerSize := len(bufs) * (binary.Size(restic.BlobType(0)) + binary.Size(uint32(0)) + len(restic.ID{})) + written += restic.CiphertextLength(headerSize) // check length Equals(t, uint(written), packSize) diff --git a/src/restic/repository/index.go b/src/restic/repository/index.go index 1ca9525d0..9aeedfb32 100644 --- a/src/restic/repository/index.go +++ b/src/restic/repository/index.go @@ -10,7 +10,6 @@ import ( "restic/errors" - "restic/crypto" "restic/debug" ) @@ -177,15 +176,15 @@ func (idx *Index) Has(id restic.ID, tpe restic.BlobType) bool { return false } -// LookupSize returns the length of the cleartext content behind the -// given id -func (idx *Index) LookupSize(id restic.ID, tpe restic.BlobType) (cleartextLength uint, err error) { +// LookupSize returns the length of the plaintext content of the blob with the +// given id. +func (idx *Index) LookupSize(id restic.ID, tpe restic.BlobType) (plaintextLength uint, err error) { blobs, err := idx.Lookup(id, tpe) if err != nil { return 0, err } - return blobs[0].Length - crypto.Extension, nil + return uint(restic.PlaintextLength(int(blobs[0].Length))), nil } // Supersedes returns the list of indexes this index supersedes, if any. diff --git a/src/restic/repository/repository.go b/src/restic/repository/repository.go index cb1e3ad69..c71c75e41 100644 --- a/src/restic/repository/repository.go +++ b/src/restic/repository/repository.go @@ -213,7 +213,7 @@ func (r *Repository) SaveJSONUnpacked(t restic.FileType, item interface{}) (rest // SaveUnpacked encrypts data and stores it in the backend. Returned is the // storage hash. func (r *Repository) SaveUnpacked(t restic.FileType, p []byte) (id restic.ID, err error) { - ciphertext := make([]byte, len(p)+crypto.Extension) + ciphertext := restic.NewBlobBuffer(len(p)) ciphertext, err = r.Encrypt(ciphertext, p) if err != nil { return restic.ID{}, err @@ -528,8 +528,8 @@ func (r *Repository) LoadBlob(t restic.BlobType, id restic.ID, buf []byte) (int, } buf = buf[:cap(buf)] - if len(buf) < int(size)+crypto.Extension { - return 0, errors.Errorf("buffer is too small for data blob (%d < %d)", len(buf), size+crypto.Extension) + if len(buf) < restic.CiphertextLength(int(size)) { + return 0, errors.Errorf("buffer is too small for data blob (%d < %d)", len(buf), restic.CiphertextLength(int(size))) } n, err := r.loadBlob(id, t, buf) @@ -563,7 +563,7 @@ func (r *Repository) LoadTree(id restic.ID) (*restic.Tree, error) { } debug.Log("size is %d, create buffer", size) - buf := make([]byte, size+crypto.Extension) + buf := restic.NewBlobBuffer(int(size)) n, err := r.loadBlob(id, restic.TreeBlob, buf) if err != nil { diff --git a/src/restic/repository/repository_test.go b/src/restic/repository/repository_test.go index b2f04ab1d..6cb1defd6 100644 --- a/src/restic/repository/repository_test.go +++ b/src/restic/repository/repository_test.go @@ -11,7 +11,6 @@ import ( "restic" "restic/archiver" - "restic/crypto" "restic/repository" . "restic/test" ) @@ -41,7 +40,7 @@ func TestSave(t *testing.T) { // OK(t, repo.SaveIndex()) // read back - buf := make([]byte, size, size+crypto.Extension) + buf := restic.NewBlobBuffer(size) n, err := repo.LoadBlob(restic.DataBlob, id, buf) OK(t, err) Equals(t, len(buf), n) @@ -75,7 +74,7 @@ func TestSaveFrom(t *testing.T) { OK(t, repo.Flush()) // read back - buf := make([]byte, size, size+crypto.Extension) + buf := restic.NewBlobBuffer(size) n, err := repo.LoadBlob(restic.DataBlob, id, buf) OK(t, err) Equals(t, len(buf), n) @@ -153,7 +152,7 @@ func BenchmarkLoadBlob(b *testing.B) { defer cleanup() length := 1000000 - buf := make([]byte, length, length+crypto.Extension) + buf := restic.NewBlobBuffer(length) _, err := io.ReadFull(rnd, buf) OK(b, err) From 710499cf468727cb4475f707bf2dbb65f73ed661 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Fri, 13 Jan 2017 20:56:50 +0100 Subject: [PATCH 07/18] Add benchmark for LoadAndDecrypt --- src/restic/repository/repository_test.go | 32 ++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/restic/repository/repository_test.go b/src/restic/repository/repository_test.go index 6cb1defd6..7809bd3a6 100644 --- a/src/restic/repository/repository_test.go +++ b/src/restic/repository/repository_test.go @@ -177,6 +177,38 @@ func BenchmarkLoadBlob(b *testing.B) { } } +func BenchmarkLoadAndDecrypt(b *testing.B) { + repo, cleanup := repository.TestRepository(b) + defer cleanup() + + length := 1000000 + buf := restic.NewBlobBuffer(length) + _, err := io.ReadFull(rnd, buf) + OK(b, err) + + dataID := restic.Hash(buf) + + storageID, err := repo.SaveUnpacked(restic.DataFile, buf) + OK(b, err) + // OK(b, repo.Flush()) + + b.ResetTimer() + b.SetBytes(int64(length)) + + for i := 0; i < b.N; i++ { + data, err := repo.LoadAndDecrypt(restic.DataFile, storageID) + OK(b, err) + if len(data) != length { + b.Errorf("wanted %d bytes, got %d", length, len(data)) + } + + id2 := restic.Hash(data) + if !dataID.Equal(id2) { + b.Errorf("wrong data returned, wanted %v, got %v", storageID.Str(), id2.Str()) + } + } +} + func TestLoadJSONUnpacked(t *testing.T) { repo, cleanup := repository.TestRepository(t) defer cleanup() From e571b6a6567a1a5fdc24741621f27b68d2f49103 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Fri, 13 Jan 2017 21:23:47 +0100 Subject: [PATCH 08/18] Use the same buffer for decryption --- src/restic/repository/repository.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/restic/repository/repository.go b/src/restic/repository/repository.go index c71c75e41..948301cc5 100644 --- a/src/restic/repository/repository.go +++ b/src/restic/repository/repository.go @@ -64,15 +64,13 @@ func (r *Repository) LoadAndDecrypt(t restic.FileType, id restic.ID) ([]byte, er return nil, errors.New("invalid data returned") } - plain := make([]byte, len(buf)) - // decrypt - n, err := r.decryptTo(plain, buf) + n, err := r.decryptTo(buf, buf) if err != nil { return nil, err } - return plain[:n], nil + return buf[:n], nil } // loadBlob tries to load and decrypt content identified by t and id from a From e463587bad54661298d339d97f570bbc22566837 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Fri, 13 Jan 2017 21:27:37 +0100 Subject: [PATCH 09/18] Add BenchmarkDecodeIndex --- src/restic/repository/index_test.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/restic/repository/index_test.go b/src/restic/repository/index_test.go index 986f9efc7..7691bd953 100644 --- a/src/restic/repository/index_test.go +++ b/src/restic/repository/index_test.go @@ -326,6 +326,19 @@ func TestIndexUnserialize(t *testing.T) { } } +func BenchmarkDecodeIndex(b *testing.B) { + rd := bytes.NewReader(docExample) + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + _, err := rd.Seek(0, 0) + OK(b, err) + _, err = repository.DecodeIndex(rd) + OK(b, err) + } +} + func TestIndexUnserializeOld(t *testing.T) { idx, err := repository.DecodeOldIndex(bytes.NewReader(docOldExample)) OK(t, err) From 73e7a2bea8b975df15b76ecc0dbb3cc361152fdd Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Fri, 13 Jan 2017 21:39:40 +0100 Subject: [PATCH 10/18] Add BenchmarkLoadIndex --- src/restic/repository/repository_test.go | 31 +++++++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/src/restic/repository/repository_test.go b/src/restic/repository/repository_test.go index 7809bd3a6..efa8fc35d 100644 --- a/src/restic/repository/repository_test.go +++ b/src/restic/repository/repository_test.go @@ -246,15 +246,38 @@ func TestRepositoryLoadIndex(t *testing.T) { } func BenchmarkLoadIndex(b *testing.B) { - repodir, cleanup := Env(b, repoFixture) + repository.TestUseLowSecurityKDFParameters(b) + + repo, cleanup := repository.TestRepository(b) defer cleanup() - repo := repository.TestOpenLocal(b, repodir) + idx := repository.NewIndex() + + for i := 0; i < 5000; i++ { + idx.Store(restic.PackedBlob{ + Blob: restic.Blob{ + Type: restic.DataBlob, + Length: 1234, + ID: restic.NewRandomID(), + Offset: 1235, + }, + PackID: restic.NewRandomID(), + }) + } + + id, err := repository.SaveIndex(repo, idx) + OK(b, err) + + b.Logf("index saved as %v (%v entries)", id.Str(), idx.Count(restic.DataBlob)) + fi, err := repo.Backend().Stat(restic.Handle{Type: restic.IndexFile, Name: id.String()}) + OK(b, err) + b.Logf("filesize is %v", fi.Size) + b.ResetTimer() for i := 0; i < b.N; i++ { - repo.SetIndex(repository.NewMasterIndex()) - OK(b, repo.LoadIndex()) + _, err := repository.LoadIndex(repo, id) + OK(b, err) } } From caabc4ec44414d23e504ceb7b8a2a9d10e861a8d Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Fri, 13 Jan 2017 22:05:34 +0100 Subject: [PATCH 11/18] Reduce memory usage while decoding index --- src/restic/repository/index.go | 17 +++++++---------- src/restic/repository/index_test.go | 14 +++++--------- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/src/restic/repository/index.go b/src/restic/repository/index.go index 9aeedfb32..4257c7dfa 100644 --- a/src/restic/repository/index.go +++ b/src/restic/repository/index.go @@ -1,7 +1,6 @@ package repository import ( - "bytes" "encoding/json" "io" "restic" @@ -451,12 +450,11 @@ func isErrOldIndex(err error) bool { var ErrOldIndexFormat = errors.New("index has old format") // DecodeIndex loads and unserializes an index from rd. -func DecodeIndex(rd io.Reader) (idx *Index, err error) { +func DecodeIndex(buf []byte) (idx *Index, err error) { debug.Log("Start decoding index") - idxJSON := jsonIndex{} + idxJSON := &jsonIndex{} - dec := json.NewDecoder(rd) - err = dec.Decode(&idxJSON) + err = json.Unmarshal(buf, idxJSON) if err != nil { debug.Log("Error %v", err) @@ -490,12 +488,11 @@ func DecodeIndex(rd io.Reader) (idx *Index, err error) { } // DecodeOldIndex loads and unserializes an index in the old format from rd. -func DecodeOldIndex(rd io.Reader) (idx *Index, err error) { +func DecodeOldIndex(buf []byte) (idx *Index, err error) { debug.Log("Start decoding old index") list := []*packJSON{} - dec := json.NewDecoder(rd) - err = dec.Decode(&list) + err = json.Unmarshal(buf, &list) if err != nil { debug.Log("Error %#v", err) return nil, errors.Wrap(err, "Decode") @@ -522,7 +519,7 @@ func DecodeOldIndex(rd io.Reader) (idx *Index, err error) { } // LoadIndexWithDecoder loads the index and decodes it with fn. -func LoadIndexWithDecoder(repo restic.Repository, id restic.ID, fn func(io.Reader) (*Index, error)) (idx *Index, err error) { +func LoadIndexWithDecoder(repo restic.Repository, id restic.ID, fn func([]byte) (*Index, error)) (idx *Index, err error) { debug.Log("Loading index %v", id.Str()) buf, err := repo.LoadAndDecrypt(restic.IndexFile, id) @@ -530,7 +527,7 @@ func LoadIndexWithDecoder(repo restic.Repository, id restic.ID, fn func(io.Reade return nil, err } - idx, err = fn(bytes.NewReader(buf)) + idx, err = fn(buf) if err != nil { debug.Log("error while decoding index %v: %v", id, err) return nil, err diff --git a/src/restic/repository/index_test.go b/src/restic/repository/index_test.go index 7691bd953..892b2b4fa 100644 --- a/src/restic/repository/index_test.go +++ b/src/restic/repository/index_test.go @@ -54,7 +54,7 @@ func TestIndexSerialize(t *testing.T) { err := idx.Encode(wr) OK(t, err) - idx2, err := repository.DecodeIndex(wr) + idx2, err := repository.DecodeIndex(wr.Bytes()) OK(t, err) Assert(t, idx2 != nil, "nil returned for decoded index") @@ -136,7 +136,7 @@ func TestIndexSerialize(t *testing.T) { Assert(t, id2.Equal(id), "wrong ID returned: want %v, got %v", id, id2) - idx3, err := repository.DecodeIndex(wr3) + idx3, err := repository.DecodeIndex(wr3.Bytes()) OK(t, err) Assert(t, idx3 != nil, "nil returned for decoded index") @@ -288,7 +288,7 @@ var exampleLookupTest = struct { func TestIndexUnserialize(t *testing.T) { oldIdx := restic.IDs{restic.TestParseID("ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452")} - idx, err := repository.DecodeIndex(bytes.NewReader(docExample)) + idx, err := repository.DecodeIndex(docExample) OK(t, err) for _, test := range exampleTests { @@ -327,20 +327,16 @@ func TestIndexUnserialize(t *testing.T) { } func BenchmarkDecodeIndex(b *testing.B) { - rd := bytes.NewReader(docExample) - b.ResetTimer() for i := 0; i < b.N; i++ { - _, err := rd.Seek(0, 0) - OK(b, err) - _, err = repository.DecodeIndex(rd) + _, err := repository.DecodeIndex(docExample) OK(b, err) } } func TestIndexUnserializeOld(t *testing.T) { - idx, err := repository.DecodeOldIndex(bytes.NewReader(docOldExample)) + idx, err := repository.DecodeOldIndex(docOldExample) OK(t, err) for _, test := range exampleTests { From cd9b5262038f2ca29312fef805218aa8b95bec95 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 15 Jan 2017 15:27:58 +0100 Subject: [PATCH 12/18] Preallocate pack entries list --- src/restic/pack/pack.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/restic/pack/pack.go b/src/restic/pack/pack.go index be4fcc087..6666d886a 100644 --- a/src/restic/pack/pack.go +++ b/src/restic/pack/pack.go @@ -233,6 +233,8 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, err hdrRd := bytes.NewReader(buf) + entries = make([]restic.Blob, 0, uint(n)/entrySize) + pos := uint(0) for { e := headerEntry{} From d40f566e419fed34d108338e263af3221ec78d57 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 15 Jan 2017 15:45:52 +0100 Subject: [PATCH 13/18] Index: Use slices instead of maps, reduce data --- src/cmds/restic/cmd_prune.go | 21 +++++---- src/restic/index/index.go | 80 ++++++++-------------------------- src/restic/index/index_test.go | 6 +-- 3 files changed, 33 insertions(+), 74 deletions(-) diff --git a/src/cmds/restic/cmd_prune.go b/src/cmds/restic/cmd_prune.go index 98c32f784..00f74e559 100644 --- a/src/cmds/restic/cmd_prune.go +++ b/src/cmds/restic/cmd_prune.go @@ -103,11 +103,13 @@ func runPrune(gopts GlobalOptions) error { return err } + blobs := 0 for _, pack := range idx.Packs { stats.bytes += pack.Size + blobs += len(pack.Entries) } Verbosef("repository contains %v packs (%v blobs) with %v bytes\n", - len(idx.Packs), len(idx.Blobs), formatBytes(uint64(stats.bytes))) + len(idx.Packs), blobs, formatBytes(uint64(stats.bytes))) blobCount := make(map[restic.BlobHandle]int) duplicateBlobs := 0 @@ -164,14 +166,17 @@ func runPrune(gopts GlobalOptions) error { // find packs that need a rewrite rewritePacks := restic.NewIDSet() - for h, blob := range idx.Blobs { - if !usedBlobs.Has(h) { - rewritePacks.Merge(blob.Packs) - continue - } + for _, pack := range idx.Packs { + for _, blob := range pack.Entries { + h := restic.BlobHandle{ID: blob.ID, Type: blob.Type} + if !usedBlobs.Has(h) { + rewritePacks.Insert(pack.ID) + continue + } - if blobCount[h] > 1 { - rewritePacks.Merge(blob.Packs) + if blobCount[h] > 1 { + rewritePacks.Insert(pack.ID) + } } } diff --git a/src/restic/index/index.go b/src/restic/index/index.go index 4481d0d5d..7d8b72836 100644 --- a/src/restic/index/index.go +++ b/src/restic/index/index.go @@ -14,27 +14,20 @@ import ( // Pack contains information about the contents of a pack. type Pack struct { + ID restic.ID Size int64 Entries []restic.Blob } -// Blob contains information about a blob. -type Blob struct { - Size int64 - Packs restic.IDSet -} - // Index contains information about blobs and packs stored in a repo. type Index struct { Packs map[restic.ID]Pack - Blobs map[restic.BlobHandle]Blob IndexIDs restic.IDSet } func newIndex() *Index { return &Index{ Packs: make(map[restic.ID]Pack), - Blobs: make(map[restic.BlobHandle]Blob), IndexIDs: restic.NewIDSet(), } } @@ -70,7 +63,7 @@ func New(repo restic.Repository, p *restic.Progress) (*Index, error) { return nil, err } - p := Pack{Entries: j.Entries(), Size: j.Size()} + p := Pack{ID: packID, Entries: j.Entries(), Size: j.Size()} idx.Packs[packID] = p } @@ -181,18 +174,6 @@ func (idx *Index) AddPack(id restic.ID, size int64, entries []restic.Blob) error idx.Packs[id] = Pack{Size: size, Entries: entries} - for _, entry := range entries { - h := restic.BlobHandle{ID: entry.ID, Type: entry.Type} - if _, ok := idx.Blobs[h]; !ok { - idx.Blobs[h] = Blob{ - Size: int64(entry.Length), - Packs: restic.NewIDSet(), - } - } - - idx.Blobs[h].Packs.Insert(id) - } - return nil } @@ -202,15 +183,6 @@ func (idx *Index) RemovePack(id restic.ID) error { return errors.Errorf("pack %v not found in the index", id.Str()) } - for _, blob := range idx.Packs[id].Entries { - h := restic.BlobHandle{ID: blob.ID, Type: blob.Type} - idx.Blobs[h].Packs.Delete(id) - - if len(idx.Blobs[h].Packs) == 0 { - delete(idx.Blobs, h) - } - } - delete(idx.Packs, id) return nil @@ -239,14 +211,11 @@ func (idx *Index) DuplicateBlobs() (dups restic.BlobSet) { func (idx *Index) PacksForBlobs(blobs restic.BlobSet) (packs restic.IDSet) { packs = restic.NewIDSet() - for h := range blobs { - blob, ok := idx.Blobs[h] - if !ok { - continue - } - - for id := range blob.Packs { - packs.Insert(id) + for id, p := range idx.Packs { + for _, entry := range p.Entries { + if blobs.Has(restic.BlobHandle{ID: entry.ID, Type: entry.Type}) { + packs.Insert(id) + } } } @@ -264,31 +233,20 @@ type Location struct { var ErrBlobNotFound = errors.New("blob not found in index") // FindBlob returns a list of packs and positions the blob can be found in. -func (idx *Index) FindBlob(h restic.BlobHandle) ([]Location, error) { - blob, ok := idx.Blobs[h] - if !ok { - return nil, ErrBlobNotFound +func (idx *Index) FindBlob(h restic.BlobHandle) (result []Location, err error) { + for id, p := range idx.Packs { + for _, entry := range p.Entries { + if entry.ID.Equal(h.ID) && entry.Type == h.Type { + result = append(result, Location{ + PackID: id, + Blob: entry, + }) + } + } } - result := make([]Location, 0, len(blob.Packs)) - for packID := range blob.Packs { - pack, ok := idx.Packs[packID] - if !ok { - return nil, errors.Errorf("pack %v not found in index", packID.Str()) - } - - for _, entry := range pack.Entries { - if entry.Type != h.Type { - continue - } - - if !entry.ID.Equal(h.ID) { - continue - } - - loc := Location{PackID: packID, Blob: entry} - result = append(result, loc) - } + if len(result) == 0 { + return nil, ErrBlobNotFound } return result, nil diff --git a/src/restic/index/index_test.go b/src/restic/index/index_test.go index 7905f7368..a7de094d4 100644 --- a/src/restic/index/index_test.go +++ b/src/restic/index/index_test.go @@ -151,7 +151,7 @@ func TestIndexDuplicateBlobs(t *testing.T) { if len(dups) == 0 { t.Errorf("no duplicate blobs found") } - t.Logf("%d packs, %d unique blobs", len(idx.Packs), len(idx.Blobs)) + t.Logf("%d packs, %d duplicate blobs", len(idx.Packs), len(dups)) packs := idx.PacksForBlobs(dups) if len(packs) == 0 { @@ -249,10 +249,6 @@ func TestIndexAddRemovePack(t *testing.T) { if err == nil { t.Errorf("removed blob %v found in index", h) } - - if _, ok := idx.Blobs[h]; ok { - t.Errorf("removed blob %v found in index.Blobs", h) - } } } From 36276c41b25ec5cba9e8b0f1f761e3a67e541ced Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 15 Jan 2017 16:16:09 +0100 Subject: [PATCH 14/18] Add Benchmark for IndexSave --- src/restic/index/index_test.go | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/restic/index/index_test.go b/src/restic/index/index_test.go index a7de094d4..913af2a82 100644 --- a/src/restic/index/index_test.go +++ b/src/restic/index/index_test.go @@ -4,6 +4,7 @@ import ( "math/rand" "restic" "restic/repository" + "restic/test" "testing" "time" ) @@ -135,6 +136,26 @@ func BenchmarkIndexNew(b *testing.B) { if idx == nil { b.Fatalf("New() returned nil index") } + b.Logf("idx %v packs", len(idx.Packs)) + } +} + +func BenchmarkIndexSave(b *testing.B) { + repo, cleanup := createFilledRepo(b, 3, 0) + defer cleanup() + + idx, err := New(repo, nil) + test.OK(b, err) + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + id, err := idx.Save(repo, nil) + if err != nil { + b.Fatalf("New() returned error %v", err) + } + + b.Logf("saved as %v", id.Str()) } } @@ -250,7 +271,6 @@ func TestIndexAddRemovePack(t *testing.T) { t.Errorf("removed blob %v found in index", h) } } - } // example index serialization from doc/Design.md From 73ad3d418d49ba12c219a3063a0164a6bec0f5bc Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Tue, 17 Jan 2017 12:46:41 +0100 Subject: [PATCH 15/18] Index: Remove unneeded allocation --- src/restic/index/index.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/restic/index/index.go b/src/restic/index/index.go index 7d8b72836..6a122e67a 100644 --- a/src/restic/index/index.go +++ b/src/restic/index/index.go @@ -62,9 +62,6 @@ func New(repo restic.Repository, p *restic.Progress) (*Index, error) { if err != nil { return nil, err } - - p := Pack{ID: packID, Entries: j.Entries(), Size: j.Size()} - idx.Packs[packID] = p } return idx, nil From c4f44c7bcb8dbd6be326a2fb4156c41909781bba Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Tue, 17 Jan 2017 12:56:20 +0100 Subject: [PATCH 16/18] Reduce memory consuption of TestCreateSnapshot --- src/restic/testing.go | 25 +++++++++++++++++-------- src/restic/testing_test.go | 11 +++++++++++ 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/restic/testing.go b/src/restic/testing.go index 49a848965..719ff336d 100644 --- a/src/restic/testing.go +++ b/src/restic/testing.go @@ -23,16 +23,26 @@ type fakeFileSystem struct { repo Repository knownBlobs IDSet duplication float32 + buf []byte + chunker *chunker.Chunker } // saveFile reads from rd and saves the blobs in the repository. The list of // IDs is returned. -func (fs fakeFileSystem) saveFile(rd io.Reader) (blobs IDs) { - blobs = IDs{} - ch := chunker.New(rd, fs.repo.Config().ChunkerPolynomial) +func (fs *fakeFileSystem) saveFile(rd io.Reader) (blobs IDs) { + if fs.buf == nil { + fs.buf = make([]byte, chunker.MaxSize) + } + if fs.chunker == nil { + fs.chunker = chunker.New(rd, fs.repo.Config().ChunkerPolynomial) + } else { + fs.chunker.Reset(rd, fs.repo.Config().ChunkerPolynomial) + } + + blobs = IDs{} for { - chunk, err := ch.Next(getBuf()) + chunk, err := fs.chunker.Next(fs.buf) if errors.Cause(err) == io.EOF { break } @@ -50,7 +60,6 @@ func (fs fakeFileSystem) saveFile(rd io.Reader) (blobs IDs) { fs.knownBlobs.Insert(id) } - freeBuf(chunk.Data) blobs = append(blobs, id) } @@ -64,7 +73,7 @@ const ( maxNodes = 32 ) -func (fs fakeFileSystem) treeIsKnown(tree *Tree) (bool, []byte, ID) { +func (fs *fakeFileSystem) treeIsKnown(tree *Tree) (bool, []byte, ID) { data, err := json.Marshal(tree) if err != nil { fs.t.Fatalf("json.Marshal(tree) returned error: %v", err) @@ -76,7 +85,7 @@ func (fs fakeFileSystem) treeIsKnown(tree *Tree) (bool, []byte, ID) { return fs.blobIsKnown(id, TreeBlob), data, id } -func (fs fakeFileSystem) blobIsKnown(id ID, t BlobType) bool { +func (fs *fakeFileSystem) blobIsKnown(id ID, t BlobType) bool { if rand.Float32() < fs.duplication { return false } @@ -94,7 +103,7 @@ func (fs fakeFileSystem) blobIsKnown(id ID, t BlobType) bool { } // saveTree saves a tree of fake files in the repo and returns the ID. -func (fs fakeFileSystem) saveTree(seed int64, depth int) ID { +func (fs *fakeFileSystem) saveTree(seed int64, depth int) ID { rnd := rand.NewSource(seed) numNodes := int(rnd.Int63() % maxNodes) diff --git a/src/restic/testing_test.go b/src/restic/testing_test.go index 1258bf208..86b18a001 100644 --- a/src/restic/testing_test.go +++ b/src/restic/testing_test.go @@ -47,3 +47,14 @@ func TestCreateSnapshot(t *testing.T) { checker.TestCheckRepo(t, repo) } + +func BenchmarkTestCreateSnapshot(t *testing.B) { + repo, cleanup := repository.TestRepository(t) + defer cleanup() + + t.ResetTimer() + + for i := 0; i < t.N; i++ { + restic.TestCreateSnapshot(t, repo, testSnapshotTime.Add(time.Duration(i)*time.Second), testDepth, 0) + } +} From dac18e3bf882e5be7f57afbe738f1e854ce03264 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Tue, 17 Jan 2017 13:00:59 +0100 Subject: [PATCH 17/18] Improve BenchmarkIndexSave --- src/restic/index/index_test.go | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/restic/index/index_test.go b/src/restic/index/index_test.go index 913af2a82..8b86b7d53 100644 --- a/src/restic/index/index_test.go +++ b/src/restic/index/index_test.go @@ -141,12 +141,26 @@ func BenchmarkIndexNew(b *testing.B) { } func BenchmarkIndexSave(b *testing.B) { - repo, cleanup := createFilledRepo(b, 3, 0) + repo, cleanup := repository.TestRepository(b) defer cleanup() idx, err := New(repo, nil) test.OK(b, err) + for i := 0; i < 8000; i++ { + entries := make([]restic.Blob, 0, 200) + for j := 0; j < len(entries); j++ { + entries = append(entries, restic.Blob{ + ID: restic.NewRandomID(), + Length: 1000, + Offset: 5, + Type: restic.DataBlob, + }) + } + + idx.AddPack(restic.NewRandomID(), 10000, entries) + } + b.ResetTimer() for i := 0; i < b.N; i++ { From 8dd7fe82ff7b1e35f0619dc0c36b0553f5641047 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Fri, 20 Jan 2017 14:46:14 +0100 Subject: [PATCH 18/18] Add TestIndexSave --- src/restic/index/index_test.go | 38 +++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/src/restic/index/index_test.go b/src/restic/index/index_test.go index 8b86b7d53..27aba6491 100644 --- a/src/restic/index/index_test.go +++ b/src/restic/index/index_test.go @@ -3,6 +3,7 @@ package index import ( "math/rand" "restic" + "restic/checker" "restic/repository" "restic/test" "testing" @@ -204,7 +205,7 @@ func loadIndex(t testing.TB, repo restic.Repository) *Index { return idx } -func TestIndexSave(t *testing.T) { +func TestSave(t *testing.T) { repo, cleanup := createFilledRepo(t, 3, 0) defer cleanup() @@ -254,6 +255,41 @@ func TestIndexSave(t *testing.T) { } } +func TestIndexSave(t *testing.T) { + repo, cleanup := createFilledRepo(t, 3, 0) + defer cleanup() + + idx := loadIndex(t, repo) + + id, err := idx.Save(repo, idx.IndexIDs.List()) + if err != nil { + t.Fatalf("unable to save new index: %v", err) + } + + t.Logf("new index saved as %v", id.Str()) + + for id := range idx.IndexIDs { + t.Logf("remove index %v", id.Str()) + err = repo.Backend().Remove(restic.IndexFile, id.String()) + if err != nil { + t.Errorf("error removing index %v: %v", id, err) + } + } + + idx2 := loadIndex(t, repo) + t.Logf("load new index with %d packs", len(idx2.Packs)) + + checker := checker.New(repo) + hints, errs := checker.LoadIndex() + for _, h := range hints { + t.Logf("hint: %v\n", h) + } + + for _, err := range errs { + t.Errorf("checker found error: %v", err) + } +} + func TestIndexAddRemovePack(t *testing.T) { repo, cleanup := createFilledRepo(t, 3, 0) defer cleanup()