2
2
mirror of https://github.com/octoleo/restic.git synced 2024-12-23 11:28:54 +00:00
restic/internal/repository/index_test.go
Michael Eischer 04c23fa95d rebuild-index: correctly rebuild index for mixed packs
For mixed packs, data and tree blobs were stored in separate index
entries. This results in warning from the check command and maybe other
problems.
2022-07-02 19:24:02 +02:00

616 lines
15 KiB
Go

package repository_test
import (
"bytes"
"context"
"math/rand"
"sync"
"testing"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
rtest "github.com/restic/restic/internal/test"
)
func TestIndexSerialize(t *testing.T) {
tests := []restic.PackedBlob{}
idx := repository.NewIndex()
// create 50 packs with 20 blobs each
for i := 0; i < 50; i++ {
packID := restic.NewRandomID()
var blobs []restic.Blob
pos := uint(0)
for j := 0; j < 20; j++ {
length := uint(i*100 + j)
uncompressedLength := uint(0)
if i >= 25 {
// test a mix of compressed and uncompressed packs
uncompressedLength = 2 * length
}
pb := restic.PackedBlob{
Blob: restic.Blob{
BlobHandle: restic.NewRandomBlobHandle(),
Offset: pos,
Length: length,
UncompressedLength: uncompressedLength,
},
PackID: packID,
}
blobs = append(blobs, pb.Blob)
tests = append(tests, pb)
pos += length
}
idx.StorePack(packID, blobs)
}
wr := bytes.NewBuffer(nil)
err := idx.Encode(wr)
rtest.OK(t, err)
idx2ID := restic.NewRandomID()
idx2, oldFormat, err := repository.DecodeIndex(wr.Bytes(), idx2ID)
rtest.OK(t, err)
rtest.Assert(t, idx2 != nil,
"nil returned for decoded index")
rtest.Assert(t, !oldFormat, "new index format recognized as old format")
indexID, err := idx2.IDs()
rtest.OK(t, err)
rtest.Equals(t, indexID, restic.IDs{idx2ID})
wr2 := bytes.NewBuffer(nil)
err = idx2.Encode(wr2)
rtest.OK(t, err)
for _, testBlob := range tests {
list := idx.Lookup(testBlob.BlobHandle, nil)
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", testBlob.ID.Str(), len(list), list)
}
result := list[0]
rtest.Equals(t, testBlob, result)
list2 := idx2.Lookup(testBlob.BlobHandle, nil)
if len(list2) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", testBlob.ID.Str(), len(list2), list2)
}
result2 := list2[0]
rtest.Equals(t, testBlob, result2)
}
// add more blobs to idx
newtests := []restic.PackedBlob{}
for i := 0; i < 10; i++ {
packID := restic.NewRandomID()
var blobs []restic.Blob
pos := uint(0)
for j := 0; j < 10; j++ {
length := uint(i*100 + j)
pb := restic.PackedBlob{
Blob: restic.Blob{
BlobHandle: restic.NewRandomBlobHandle(),
Offset: pos,
Length: length,
},
PackID: packID,
}
blobs = append(blobs, pb.Blob)
newtests = append(newtests, pb)
pos += length
}
idx.StorePack(packID, blobs)
}
// finalize; serialize idx, unserialize to idx3
idx.Finalize()
wr3 := bytes.NewBuffer(nil)
err = idx.Encode(wr3)
rtest.OK(t, err)
rtest.Assert(t, idx.Final(),
"index not final after encoding")
id := restic.NewRandomID()
rtest.OK(t, idx.SetID(id))
ids, err := idx.IDs()
rtest.OK(t, err)
rtest.Equals(t, restic.IDs{id}, ids)
idx3, oldFormat, err := repository.DecodeIndex(wr3.Bytes(), id)
rtest.OK(t, err)
rtest.Assert(t, idx3 != nil,
"nil returned for decoded index")
rtest.Assert(t, idx3.Final(),
"decoded index is not final")
rtest.Assert(t, !oldFormat, "new index format recognized as old format")
// all new blobs must be in the index
for _, testBlob := range newtests {
list := idx3.Lookup(testBlob.BlobHandle, nil)
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", testBlob.ID.Str(), len(list), list)
}
blob := list[0]
rtest.Equals(t, testBlob, blob)
}
}
func TestIndexSize(t *testing.T) {
idx := repository.NewIndex()
packs := 200
blobCount := 100
for i := 0; i < packs; i++ {
packID := restic.NewRandomID()
var blobs []restic.Blob
pos := uint(0)
for j := 0; j < blobCount; j++ {
length := uint(i*100 + j)
blobs = append(blobs, restic.Blob{
BlobHandle: restic.NewRandomBlobHandle(),
Offset: pos,
Length: length,
})
pos += length
}
idx.StorePack(packID, blobs)
}
wr := bytes.NewBuffer(nil)
err := idx.Encode(wr)
rtest.OK(t, err)
t.Logf("Index file size for %d blobs in %d packs is %d", blobCount*packs, packs, wr.Len())
}
// example index serialization from doc/Design.rst
var docExampleV1 = []byte(`
{
"supersedes": [
"ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452"
],
"packs": [
{
"id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c",
"blobs": [
{
"id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
"type": "data",
"offset": 0,
"length": 38
},{
"id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
"type": "tree",
"offset": 38,
"length": 112
},
{
"id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
"type": "data",
"offset": 150,
"length": 123
}
]
}
]
}
`)
var docExampleV2 = []byte(`
{
"supersedes": [
"ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452"
],
"packs": [
{
"id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c",
"blobs": [
{
"id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
"type": "data",
"offset": 0,
"length": 38
},
{
"id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
"type": "tree",
"offset": 38,
"length": 112,
"uncompressed_length": 511
},
{
"id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
"type": "data",
"offset": 150,
"length": 123,
"uncompressed_length": 234
}
]
}
]
}
`)
var docOldExample = []byte(`
[ {
"id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c",
"blobs": [
{
"id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
"type": "data",
"offset": 0,
"length": 38
},{
"id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
"type": "tree",
"offset": 38,
"length": 112
},
{
"id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
"type": "data",
"offset": 150,
"length": 123
}
]
} ]
`)
var exampleTests = []struct {
id, packID restic.ID
tpe restic.BlobType
offset, length uint
uncompressedLength uint
}{
{
restic.TestParseID("3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce"),
restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
restic.DataBlob, 0, 38, 0,
}, {
restic.TestParseID("9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae"),
restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
restic.TreeBlob, 38, 112, 511,
}, {
restic.TestParseID("d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66"),
restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
restic.DataBlob, 150, 123, 234,
},
}
var exampleLookupTest = struct {
packID restic.ID
blobs map[restic.ID]restic.BlobType
}{
restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
map[restic.ID]restic.BlobType{
restic.TestParseID("3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce"): restic.DataBlob,
restic.TestParseID("9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae"): restic.TreeBlob,
restic.TestParseID("d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66"): restic.DataBlob,
},
}
func TestIndexUnserialize(t *testing.T) {
for _, task := range []struct {
idxBytes []byte
version int
}{
{docExampleV1, 1},
{docExampleV2, 2},
} {
oldIdx := restic.IDs{restic.TestParseID("ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452")}
idx, oldFormat, err := repository.DecodeIndex(task.idxBytes, restic.NewRandomID())
rtest.OK(t, err)
rtest.Assert(t, !oldFormat, "new index format recognized as old format")
for _, test := range exampleTests {
list := idx.Lookup(restic.BlobHandle{ID: test.id, Type: test.tpe}, nil)
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list)
}
blob := list[0]
t.Logf("looking for blob %v/%v, got %v", test.tpe, test.id.Str(), blob)
rtest.Equals(t, test.packID, blob.PackID)
rtest.Equals(t, test.tpe, blob.Type)
rtest.Equals(t, test.offset, blob.Offset)
rtest.Equals(t, test.length, blob.Length)
if task.version == 1 {
rtest.Equals(t, uint(0), blob.UncompressedLength)
} else if task.version == 2 {
rtest.Equals(t, test.uncompressedLength, blob.UncompressedLength)
} else {
t.Fatal("Invalid index version")
}
}
rtest.Equals(t, oldIdx, idx.Supersedes())
blobs := listPack(idx, exampleLookupTest.packID)
if len(blobs) != len(exampleLookupTest.blobs) {
t.Fatalf("expected %d blobs in pack, got %d", len(exampleLookupTest.blobs), len(blobs))
}
for _, blob := range blobs {
b, ok := exampleLookupTest.blobs[blob.ID]
if !ok {
t.Errorf("unexpected blob %v found", blob.ID.Str())
}
if blob.Type != b {
t.Errorf("unexpected type for blob %v: want %v, got %v", blob.ID.Str(), b, blob.Type)
}
}
}
}
func listPack(idx *repository.Index, id restic.ID) (pbs []restic.PackedBlob) {
for pb := range idx.Each(context.TODO()) {
if pb.PackID.Equal(id) {
pbs = append(pbs, pb)
}
}
return pbs
}
var (
benchmarkIndexJSON []byte
benchmarkIndexJSONOnce sync.Once
)
func initBenchmarkIndexJSON() {
idx, _ := createRandomIndex(rand.New(rand.NewSource(0)), 200000)
var buf bytes.Buffer
err := idx.Encode(&buf)
if err != nil {
panic(err)
}
benchmarkIndexJSON = buf.Bytes()
}
func BenchmarkDecodeIndex(b *testing.B) {
benchmarkIndexJSONOnce.Do(initBenchmarkIndexJSON)
id := restic.NewRandomID()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _, err := repository.DecodeIndex(benchmarkIndexJSON, id)
rtest.OK(b, err)
}
}
func BenchmarkDecodeIndexParallel(b *testing.B) {
benchmarkIndexJSONOnce.Do(initBenchmarkIndexJSON)
id := restic.NewRandomID()
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, _, err := repository.DecodeIndex(benchmarkIndexJSON, id)
rtest.OK(b, err)
}
})
}
func TestIndexUnserializeOld(t *testing.T) {
idx, oldFormat, err := repository.DecodeIndex(docOldExample, restic.NewRandomID())
rtest.OK(t, err)
rtest.Assert(t, oldFormat, "old index format recognized as new format")
for _, test := range exampleTests {
list := idx.Lookup(restic.BlobHandle{ID: test.id, Type: test.tpe}, nil)
if len(list) != 1 {
t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list)
}
blob := list[0]
rtest.Equals(t, test.packID, blob.PackID)
rtest.Equals(t, test.tpe, blob.Type)
rtest.Equals(t, test.offset, blob.Offset)
rtest.Equals(t, test.length, blob.Length)
}
rtest.Equals(t, 0, len(idx.Supersedes()))
}
func TestIndexPacks(t *testing.T) {
idx := repository.NewIndex()
packs := restic.NewIDSet()
for i := 0; i < 20; i++ {
packID := restic.NewRandomID()
idx.StorePack(packID, []restic.Blob{
{
BlobHandle: restic.NewRandomBlobHandle(),
Offset: 0,
Length: 23,
},
})
packs.Insert(packID)
}
idxPacks := idx.Packs()
rtest.Assert(t, packs.Equals(idxPacks), "packs in index do not match packs added to index")
}
const maxPackSize = 16 * 1024 * 1024
// This function generates a (insecure) random ID, similar to NewRandomID
func NewRandomTestID(rng *rand.Rand) restic.ID {
id := restic.ID{}
rng.Read(id[:])
return id
}
func createRandomIndex(rng *rand.Rand, packfiles int) (idx *repository.Index, lookupBh restic.BlobHandle) {
idx = repository.NewIndex()
// create index with given number of pack files
for i := 0; i < packfiles; i++ {
packID := NewRandomTestID(rng)
var blobs []restic.Blob
offset := 0
for offset < maxPackSize {
size := 2000 + rng.Intn(4*1024*1024)
id := NewRandomTestID(rng)
blobs = append(blobs, restic.Blob{
BlobHandle: restic.BlobHandle{
Type: restic.DataBlob,
ID: id,
},
Length: uint(size),
UncompressedLength: uint(2 * size),
Offset: uint(offset),
})
offset += size
}
idx.StorePack(packID, blobs)
if i == 0 {
lookupBh = restic.BlobHandle{
Type: restic.DataBlob,
ID: blobs[rng.Intn(len(blobs))].ID,
}
}
}
return idx, lookupBh
}
func BenchmarkIndexHasUnknown(b *testing.B) {
idx, _ := createRandomIndex(rand.New(rand.NewSource(0)), 200000)
lookupBh := restic.NewRandomBlobHandle()
b.ResetTimer()
for i := 0; i < b.N; i++ {
idx.Has(lookupBh)
}
}
func BenchmarkIndexHasKnown(b *testing.B) {
idx, lookupBh := createRandomIndex(rand.New(rand.NewSource(0)), 200000)
b.ResetTimer()
for i := 0; i < b.N; i++ {
idx.Has(lookupBh)
}
}
func BenchmarkIndexAlloc(b *testing.B) {
rng := rand.New(rand.NewSource(0))
b.ReportAllocs()
for i := 0; i < b.N; i++ {
createRandomIndex(rng, 200000)
}
}
func BenchmarkIndexAllocParallel(b *testing.B) {
b.ReportAllocs()
b.RunParallel(func(pb *testing.PB) {
rng := rand.New(rand.NewSource(0))
for pb.Next() {
createRandomIndex(rng, 200000)
}
})
}
func TestIndexHas(t *testing.T) {
tests := []restic.PackedBlob{}
idx := repository.NewIndex()
// create 50 packs with 20 blobs each
for i := 0; i < 50; i++ {
packID := restic.NewRandomID()
var blobs []restic.Blob
pos := uint(0)
for j := 0; j < 20; j++ {
length := uint(i*100 + j)
uncompressedLength := uint(0)
if i >= 25 {
// test a mix of compressed and uncompressed packs
uncompressedLength = 2 * length
}
pb := restic.PackedBlob{
Blob: restic.Blob{
BlobHandle: restic.NewRandomBlobHandle(),
Offset: pos,
Length: length,
UncompressedLength: uncompressedLength,
},
PackID: packID,
}
blobs = append(blobs, pb.Blob)
tests = append(tests, pb)
pos += length
}
idx.StorePack(packID, blobs)
}
for _, testBlob := range tests {
rtest.Assert(t, idx.Has(testBlob.BlobHandle), "Index reports not having data blob added to it")
}
rtest.Assert(t, !idx.Has(restic.NewRandomBlobHandle()), "Index reports having a data blob not added to it")
rtest.Assert(t, !idx.Has(restic.BlobHandle{ID: tests[0].ID, Type: restic.TreeBlob}), "Index reports having a tree blob added to it with the same id as a data blob")
}
func TestMixedEachByPack(t *testing.T) {
idx := repository.NewIndex()
expected := make(map[restic.ID]int)
// create 50 packs with 2 blobs each
for i := 0; i < 50; i++ {
packID := restic.NewRandomID()
expected[packID] = 1
blobs := []restic.Blob{
{
BlobHandle: restic.BlobHandle{Type: restic.DataBlob, ID: restic.NewRandomID()},
Offset: 0,
Length: 42,
},
{
BlobHandle: restic.BlobHandle{Type: restic.TreeBlob, ID: restic.NewRandomID()},
Offset: 42,
Length: 43,
},
}
idx.StorePack(packID, blobs)
}
reported := make(map[restic.ID]int)
for bp := range idx.EachByPack(context.TODO(), restic.NewIDSet()) {
reported[bp.PackID]++
rtest.Equals(t, 2, len(bp.Blobs)) // correct blob count
if bp.Blobs[0].Offset > bp.Blobs[1].Offset {
bp.Blobs[1], bp.Blobs[0] = bp.Blobs[0], bp.Blobs[1]
}
b0 := bp.Blobs[0]
rtest.Assert(t, b0.Type == restic.DataBlob && b0.Offset == 0 && b0.Length == 42, "wrong blob", b0)
b1 := bp.Blobs[1]
rtest.Assert(t, b1.Type == restic.TreeBlob && b1.Offset == 42 && b1.Length == 43, "wrong blob", b1)
}
rtest.Equals(t, expected, reported)
}