2
2
mirror of https://github.com/octoleo/restic.git synced 2025-01-03 15:17:28 +00:00
restic/internal/repository/index_test.go
Michael Eischer 1ebd57247a repository: optimize MasterIndex.Each
Sending data through a channel at very high frequency is extremely
inefficient. Thus use simple callbacks instead of channels.

> name                old time/op  new time/op  delta
> MasterIndexEach-16   6.68s ±24%   0.96s ± 2%  -85.64%  (p=0.008 n=5+5)
2022-09-24 12:21:59 +02:00

650 lines
16 KiB
Go

package repository_test
import (
"bytes"
"context"
"math/rand"
"sync"
"testing"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
rtest "github.com/restic/restic/internal/test"
)
func TestIndexSerialize(t *testing.T) {
tests := []restic.PackedBlob{}
idx := repository.NewIndex()
// create 50 packs with 20 blobs each
for i := 0; i < 50; i++ {
packID := restic.NewRandomID()
var blobs []restic.Blob
pos := uint(0)
for j := 0; j < 20; j++ {
length := uint(i*100 + j)
uncompressedLength := uint(0)
if i >= 25 {
// test a mix of compressed and uncompressed packs
uncompressedLength = 2 * length
}
pb := restic.PackedBlob{
Blob: restic.Blob{
BlobHandle: restic.NewRandomBlobHandle(),
Offset: pos,
Length: length,
UncompressedLength: uncompressedLength,
},
PackID: packID,
}
blobs = append(blobs, pb.Blob)
tests = append(tests, pb)
pos += length
}
idx.StorePack(packID, blobs)
}
wr := bytes.NewBuffer(nil)
err := idx.Encode(wr)
rtest.OK(t, err)
idx2ID := restic.NewRandomID()
idx2, oldFormat, err := repository.DecodeIndex(wr.Bytes(), idx2ID)
rtest.OK(t, err)
rtest.Assert(t, idx2 != nil,
"nil returned for decoded index")
rtest.Assert(t, !oldFormat, "new index format recognized as old format")
indexID, err := idx2.IDs()
rtest.OK(t, err)
rtest.Equals(t, indexID, restic.IDs{idx2ID})
wr2 := bytes.NewBuffer(nil)
err = idx2.Encode(wr2)
rtest.OK(t, err)
for _, testBlob := range tests {
list := idx.Lookup(testBlob.BlobHandle, nil)
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", testBlob.ID.Str(), len(list), list)
}
result := list[0]
rtest.Equals(t, testBlob, result)
list2 := idx2.Lookup(testBlob.BlobHandle, nil)
if len(list2) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", testBlob.ID.Str(), len(list2), list2)
}
result2 := list2[0]
rtest.Equals(t, testBlob, result2)
}
// add more blobs to idx
newtests := []restic.PackedBlob{}
for i := 0; i < 10; i++ {
packID := restic.NewRandomID()
var blobs []restic.Blob
pos := uint(0)
for j := 0; j < 10; j++ {
length := uint(i*100 + j)
pb := restic.PackedBlob{
Blob: restic.Blob{
BlobHandle: restic.NewRandomBlobHandle(),
Offset: pos,
Length: length,
},
PackID: packID,
}
blobs = append(blobs, pb.Blob)
newtests = append(newtests, pb)
pos += length
}
idx.StorePack(packID, blobs)
}
// finalize; serialize idx, unserialize to idx3
idx.Finalize()
wr3 := bytes.NewBuffer(nil)
err = idx.Encode(wr3)
rtest.OK(t, err)
rtest.Assert(t, idx.Final(),
"index not final after encoding")
id := restic.NewRandomID()
rtest.OK(t, idx.SetID(id))
ids, err := idx.IDs()
rtest.OK(t, err)
rtest.Equals(t, restic.IDs{id}, ids)
idx3, oldFormat, err := repository.DecodeIndex(wr3.Bytes(), id)
rtest.OK(t, err)
rtest.Assert(t, idx3 != nil,
"nil returned for decoded index")
rtest.Assert(t, idx3.Final(),
"decoded index is not final")
rtest.Assert(t, !oldFormat, "new index format recognized as old format")
// all new blobs must be in the index
for _, testBlob := range newtests {
list := idx3.Lookup(testBlob.BlobHandle, nil)
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", testBlob.ID.Str(), len(list), list)
}
blob := list[0]
rtest.Equals(t, testBlob, blob)
}
}
func TestIndexSize(t *testing.T) {
idx := repository.NewIndex()
packs := 200
blobCount := 100
for i := 0; i < packs; i++ {
packID := restic.NewRandomID()
var blobs []restic.Blob
pos := uint(0)
for j := 0; j < blobCount; j++ {
length := uint(i*100 + j)
blobs = append(blobs, restic.Blob{
BlobHandle: restic.NewRandomBlobHandle(),
Offset: pos,
Length: length,
})
pos += length
}
idx.StorePack(packID, blobs)
}
wr := bytes.NewBuffer(nil)
err := idx.Encode(wr)
rtest.OK(t, err)
t.Logf("Index file size for %d blobs in %d packs is %d", blobCount*packs, packs, wr.Len())
}
// example index serialization from doc/Design.rst
var docExampleV1 = []byte(`
{
"supersedes": [
"ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452"
],
"packs": [
{
"id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c",
"blobs": [
{
"id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
"type": "data",
"offset": 0,
"length": 38
},{
"id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
"type": "tree",
"offset": 38,
"length": 112
},
{
"id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
"type": "data",
"offset": 150,
"length": 123
}
]
}
]
}
`)
var docExampleV2 = []byte(`
{
"supersedes": [
"ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452"
],
"packs": [
{
"id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c",
"blobs": [
{
"id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
"type": "data",
"offset": 0,
"length": 38
},
{
"id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
"type": "tree",
"offset": 38,
"length": 112,
"uncompressed_length": 511
},
{
"id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
"type": "data",
"offset": 150,
"length": 123,
"uncompressed_length": 234
}
]
}
]
}
`)
var docOldExample = []byte(`
[ {
"id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c",
"blobs": [
{
"id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
"type": "data",
"offset": 0,
"length": 38
},{
"id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
"type": "tree",
"offset": 38,
"length": 112
},
{
"id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
"type": "data",
"offset": 150,
"length": 123
}
]
} ]
`)
var exampleTests = []struct {
id, packID restic.ID
tpe restic.BlobType
offset, length uint
uncompressedLength uint
}{
{
restic.TestParseID("3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce"),
restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
restic.DataBlob, 0, 38, 0,
}, {
restic.TestParseID("9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae"),
restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
restic.TreeBlob, 38, 112, 511,
}, {
restic.TestParseID("d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66"),
restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
restic.DataBlob, 150, 123, 234,
},
}
var exampleLookupTest = struct {
packID restic.ID
blobs map[restic.ID]restic.BlobType
}{
restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
map[restic.ID]restic.BlobType{
restic.TestParseID("3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce"): restic.DataBlob,
restic.TestParseID("9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae"): restic.TreeBlob,
restic.TestParseID("d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66"): restic.DataBlob,
},
}
func TestIndexUnserialize(t *testing.T) {
for _, task := range []struct {
idxBytes []byte
version int
}{
{docExampleV1, 1},
{docExampleV2, 2},
} {
oldIdx := restic.IDs{restic.TestParseID("ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452")}
idx, oldFormat, err := repository.DecodeIndex(task.idxBytes, restic.NewRandomID())
rtest.OK(t, err)
rtest.Assert(t, !oldFormat, "new index format recognized as old format")
for _, test := range exampleTests {
list := idx.Lookup(restic.BlobHandle{ID: test.id, Type: test.tpe}, nil)
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list)
}
blob := list[0]
t.Logf("looking for blob %v/%v, got %v", test.tpe, test.id.Str(), blob)
rtest.Equals(t, test.packID, blob.PackID)
rtest.Equals(t, test.tpe, blob.Type)
rtest.Equals(t, test.offset, blob.Offset)
rtest.Equals(t, test.length, blob.Length)
if task.version == 1 {
rtest.Equals(t, uint(0), blob.UncompressedLength)
} else if task.version == 2 {
rtest.Equals(t, test.uncompressedLength, blob.UncompressedLength)
} else {
t.Fatal("Invalid index version")
}
}
rtest.Equals(t, oldIdx, idx.Supersedes())
blobs := listPack(idx, exampleLookupTest.packID)
if len(blobs) != len(exampleLookupTest.blobs) {
t.Fatalf("expected %d blobs in pack, got %d", len(exampleLookupTest.blobs), len(blobs))
}
for _, blob := range blobs {
b, ok := exampleLookupTest.blobs[blob.ID]
if !ok {
t.Errorf("unexpected blob %v found", blob.ID.Str())
}
if blob.Type != b {
t.Errorf("unexpected type for blob %v: want %v, got %v", blob.ID.Str(), b, blob.Type)
}
}
}
}
func listPack(idx *repository.Index, id restic.ID) (pbs []restic.PackedBlob) {
idx.Each(context.TODO(), func(pb restic.PackedBlob) {
if pb.PackID.Equal(id) {
pbs = append(pbs, pb)
}
})
return pbs
}
var (
benchmarkIndexJSON []byte
benchmarkIndexJSONOnce sync.Once
)
func initBenchmarkIndexJSON() {
idx, _ := createRandomIndex(rand.New(rand.NewSource(0)), 200000)
var buf bytes.Buffer
err := idx.Encode(&buf)
if err != nil {
panic(err)
}
benchmarkIndexJSON = buf.Bytes()
}
func BenchmarkDecodeIndex(b *testing.B) {
benchmarkIndexJSONOnce.Do(initBenchmarkIndexJSON)
id := restic.NewRandomID()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _, err := repository.DecodeIndex(benchmarkIndexJSON, id)
rtest.OK(b, err)
}
}
func BenchmarkDecodeIndexParallel(b *testing.B) {
benchmarkIndexJSONOnce.Do(initBenchmarkIndexJSON)
id := restic.NewRandomID()
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, _, err := repository.DecodeIndex(benchmarkIndexJSON, id)
rtest.OK(b, err)
}
})
}
func TestIndexUnserializeOld(t *testing.T) {
idx, oldFormat, err := repository.DecodeIndex(docOldExample, restic.NewRandomID())
rtest.OK(t, err)
rtest.Assert(t, oldFormat, "old index format recognized as new format")
for _, test := range exampleTests {
list := idx.Lookup(restic.BlobHandle{ID: test.id, Type: test.tpe}, nil)
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list)
}
blob := list[0]
rtest.Equals(t, test.packID, blob.PackID)
rtest.Equals(t, test.tpe, blob.Type)
rtest.Equals(t, test.offset, blob.Offset)
rtest.Equals(t, test.length, blob.Length)
}
rtest.Equals(t, 0, len(idx.Supersedes()))
}
func TestIndexPacks(t *testing.T) {
idx := repository.NewIndex()
packs := restic.NewIDSet()
for i := 0; i < 20; i++ {
packID := restic.NewRandomID()
idx.StorePack(packID, []restic.Blob{
{
BlobHandle: restic.NewRandomBlobHandle(),
Offset: 0,
Length: 23,
},
})
packs.Insert(packID)
}
idxPacks := idx.Packs()
rtest.Assert(t, packs.Equals(idxPacks), "packs in index do not match packs added to index")
}
const maxPackSize = 16 * 1024 * 1024
// This function generates a (insecure) random ID, similar to NewRandomID
func NewRandomTestID(rng *rand.Rand) restic.ID {
id := restic.ID{}
rng.Read(id[:])
return id
}
func createRandomIndex(rng *rand.Rand, packfiles int) (idx *repository.Index, lookupBh restic.BlobHandle) {
idx = repository.NewIndex()
// create index with given number of pack files
for i := 0; i < packfiles; i++ {
packID := NewRandomTestID(rng)
var blobs []restic.Blob
offset := 0
for offset < maxPackSize {
size := 2000 + rng.Intn(4*1024*1024)
id := NewRandomTestID(rng)
blobs = append(blobs, restic.Blob{
BlobHandle: restic.BlobHandle{
Type: restic.DataBlob,
ID: id,
},
Length: uint(size),
UncompressedLength: uint(2 * size),
Offset: uint(offset),
})
offset += size
}
idx.StorePack(packID, blobs)
if i == 0 {
lookupBh = restic.BlobHandle{
Type: restic.DataBlob,
ID: blobs[rng.Intn(len(blobs))].ID,
}
}
}
return idx, lookupBh
}
func BenchmarkIndexHasUnknown(b *testing.B) {
idx, _ := createRandomIndex(rand.New(rand.NewSource(0)), 200000)
lookupBh := restic.NewRandomBlobHandle()
b.ResetTimer()
for i := 0; i < b.N; i++ {
idx.Has(lookupBh)
}
}
func BenchmarkIndexHasKnown(b *testing.B) {
idx, lookupBh := createRandomIndex(rand.New(rand.NewSource(0)), 200000)
b.ResetTimer()
for i := 0; i < b.N; i++ {
idx.Has(lookupBh)
}
}
func BenchmarkIndexAlloc(b *testing.B) {
rng := rand.New(rand.NewSource(0))
b.ReportAllocs()
for i := 0; i < b.N; i++ {
createRandomIndex(rng, 200000)
}
}
func BenchmarkIndexAllocParallel(b *testing.B) {
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
rng := rand.New(rand.NewSource(0))
for pb.Next() {
createRandomIndex(rng, 200000)
}
})
}
func TestIndexHas(t *testing.T) {
tests := []restic.PackedBlob{}
idx := repository.NewIndex()
// create 50 packs with 20 blobs each
for i := 0; i < 50; i++ {
packID := restic.NewRandomID()
var blobs []restic.Blob
pos := uint(0)
for j := 0; j < 20; j++ {
length := uint(i*100 + j)
uncompressedLength := uint(0)
if i >= 25 {
// test a mix of compressed and uncompressed packs
uncompressedLength = 2 * length
}
pb := restic.PackedBlob{
Blob: restic.Blob{
BlobHandle: restic.NewRandomBlobHandle(),
Offset: pos,
Length: length,
UncompressedLength: uncompressedLength,
},
PackID: packID,
}
blobs = append(blobs, pb.Blob)
tests = append(tests, pb)
pos += length
}
idx.StorePack(packID, blobs)
}
for _, testBlob := range tests {
rtest.Assert(t, idx.Has(testBlob.BlobHandle), "Index reports not having data blob added to it")
}
rtest.Assert(t, !idx.Has(restic.NewRandomBlobHandle()), "Index reports having a data blob not added to it")
rtest.Assert(t, !idx.Has(restic.BlobHandle{ID: tests[0].ID, Type: restic.TreeBlob}), "Index reports having a tree blob added to it with the same id as a data blob")
}
func TestMixedEachByPack(t *testing.T) {
idx := repository.NewIndex()
expected := make(map[restic.ID]int)
// create 50 packs with 2 blobs each
for i := 0; i < 50; i++ {
packID := restic.NewRandomID()
expected[packID] = 1
blobs := []restic.Blob{
{
BlobHandle: restic.BlobHandle{Type: restic.DataBlob, ID: restic.NewRandomID()},
Offset: 0,
Length: 42,
},
{
BlobHandle: restic.BlobHandle{Type: restic.TreeBlob, ID: restic.NewRandomID()},
Offset: 42,
Length: 43,
},
}
idx.StorePack(packID, blobs)
}
reported := make(map[restic.ID]int)
for bp := range idx.EachByPack(context.TODO(), restic.NewIDSet()) {
reported[bp.PackID]++
rtest.Equals(t, 2, len(bp.Blobs)) // correct blob count
if bp.Blobs[0].Offset > bp.Blobs[1].Offset {
bp.Blobs[1], bp.Blobs[0] = bp.Blobs[0], bp.Blobs[1]
}
b0 := bp.Blobs[0]
rtest.Assert(t, b0.Type == restic.DataBlob && b0.Offset == 0 && b0.Length == 42, "wrong blob", b0)
b1 := bp.Blobs[1]
rtest.Assert(t, b1.Type == restic.TreeBlob && b1.Offset == 42 && b1.Length == 43, "wrong blob", b1)
}
rtest.Equals(t, expected, reported)
}
func TestEachByPackIgnoes(t *testing.T) {
idx := repository.NewIndex()
ignores := restic.NewIDSet()
expected := make(map[restic.ID]int)
// create 50 packs with one blob each
for i := 0; i < 50; i++ {
packID := restic.NewRandomID()
if i < 3 {
ignores.Insert(packID)
} else {
expected[packID] = 1
}
blobs := []restic.Blob{
{
BlobHandle: restic.BlobHandle{Type: restic.DataBlob, ID: restic.NewRandomID()},
Offset: 0,
Length: 42,
},
}
idx.StorePack(packID, blobs)
}
idx.Finalize()
reported := make(map[restic.ID]int)
for bp := range idx.EachByPack(context.TODO(), ignores) {
reported[bp.PackID]++
rtest.Equals(t, 1, len(bp.Blobs)) // correct blob count
b0 := bp.Blobs[0]
rtest.Assert(t, b0.Type == restic.DataBlob && b0.Offset == 0 && b0.Length == 42, "wrong blob", b0)
}
rtest.Equals(t, expected, reported)
}