mirror of
https://github.com/octoleo/restic.git
synced 2025-01-10 18:04:38 +00:00
Merge pull request #2818 from aawsome/merge-index-chaining
Merge index (based on chaining index implementation)
This commit is contained in:
commit
bcd47ec3a2
@ -190,6 +190,8 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) {
|
||||
}
|
||||
}
|
||||
|
||||
c.masterIndex.MergeFinalIndexes()
|
||||
|
||||
err = c.repo.SetIndex(c.masterIndex)
|
||||
if err != nil {
|
||||
debug.Log("SetIndex returned error: %v", err)
|
||||
|
@ -49,8 +49,8 @@ type Index struct {
|
||||
// only used by Store, StorePacks does not check for already saved packIDs
|
||||
packIDToIndex map[restic.ID]int
|
||||
|
||||
final bool // set to true for all indexes read from the backend ("finalized")
|
||||
id restic.ID // set to the ID of the index when it's finalized
|
||||
final bool // set to true for all indexes read from the backend ("finalized")
|
||||
ids restic.IDs // set to the IDs of the contained finalized indexes
|
||||
supersedes restic.IDs
|
||||
created time.Time
|
||||
}
|
||||
@ -393,17 +393,17 @@ func (idx *Index) Finalize() {
|
||||
idx.packIDToIndex = nil
|
||||
}
|
||||
|
||||
// ID returns the ID of the index, if available. If the index is not yet
|
||||
// ID returns the IDs of the index, if available. If the index is not yet
|
||||
// finalized, an error is returned.
|
||||
func (idx *Index) ID() (restic.ID, error) {
|
||||
func (idx *Index) IDs() (restic.IDs, error) {
|
||||
idx.m.Lock()
|
||||
defer idx.m.Unlock()
|
||||
|
||||
if !idx.final {
|
||||
return restic.ID{}, errors.New("index not finalized")
|
||||
return nil, errors.New("index not finalized")
|
||||
}
|
||||
|
||||
return idx.id, nil
|
||||
return idx.ids, nil
|
||||
}
|
||||
|
||||
// SetID sets the ID the index has been written to. This requires that
|
||||
@ -416,12 +416,12 @@ func (idx *Index) SetID(id restic.ID) error {
|
||||
return errors.New("index is not final")
|
||||
}
|
||||
|
||||
if !idx.id.IsNull() {
|
||||
if len(idx.ids) > 0 {
|
||||
return errors.New("ID already set")
|
||||
}
|
||||
|
||||
debug.Log("ID set to %v", id)
|
||||
idx.id = id
|
||||
idx.ids = append(idx.ids, id)
|
||||
|
||||
return nil
|
||||
}
|
||||
@ -462,6 +462,38 @@ func (idx *Index) TreePacks() restic.IDs {
|
||||
return idx.treePacks
|
||||
}
|
||||
|
||||
// merge() merges indexes, i.e. idx.merge(idx2) merges the contents of idx2 into idx.
|
||||
// idx2 is not changed by this method.
|
||||
func (idx *Index) merge(idx2 *Index) error {
|
||||
idx.m.Lock()
|
||||
defer idx.m.Unlock()
|
||||
idx2.m.Lock()
|
||||
defer idx2.m.Unlock()
|
||||
|
||||
if !idx2.final {
|
||||
return errors.New("index to merge is not final!")
|
||||
}
|
||||
|
||||
packlen := len(idx.packs)
|
||||
// copy all index entries of idx2 to idx
|
||||
for typ := range idx2.byType {
|
||||
m2 := &idx2.byType[typ]
|
||||
m := &idx.byType[typ]
|
||||
m2.foreach(func(entry *indexEntry) bool {
|
||||
// packIndex is changed as idx2.pack is appended to idx.pack, see below
|
||||
m.add(entry.id, entry.packIndex+packlen, entry.offset, entry.length)
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
idx.packs = append(idx.packs, idx2.packs...)
|
||||
idx.treePacks = append(idx.treePacks, idx2.treePacks...)
|
||||
idx.ids = append(idx.ids, idx2.ids...)
|
||||
idx.supersedes = append(idx.supersedes, idx2.supersedes...)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// isErrOldIndex returns true if the error may be caused by an old index
|
||||
// format.
|
||||
func isErrOldIndex(err error) bool {
|
||||
@ -581,7 +613,7 @@ func LoadIndexWithDecoder(ctx context.Context, repo restic.Repository, buf []byt
|
||||
return nil, buf[:0], err
|
||||
}
|
||||
|
||||
idx.id = id
|
||||
idx.ids = append(idx.ids, id)
|
||||
|
||||
return idx, buf, nil
|
||||
}
|
||||
|
@ -135,10 +135,9 @@ func TestIndexSerialize(t *testing.T) {
|
||||
|
||||
id := restic.NewRandomID()
|
||||
rtest.OK(t, idx.SetID(id))
|
||||
id2, err := idx.ID()
|
||||
ids, err := idx.IDs()
|
||||
rtest.OK(t, err)
|
||||
rtest.Assert(t, id2.Equal(id),
|
||||
"wrong ID returned: want %v, got %v", id, id2)
|
||||
rtest.Equals(t, restic.IDs{id}, ids)
|
||||
|
||||
idx3, err := repository.DecodeIndex(wr3.Bytes())
|
||||
rtest.OK(t, err)
|
||||
@ -336,7 +335,7 @@ var (
|
||||
)
|
||||
|
||||
func initBenchmarkIndexJSON() {
|
||||
idx, _ := createRandomIndex(rand.New(rand.NewSource(0)))
|
||||
idx, _ := createRandomIndex(rand.New(rand.NewSource(0)), 200000)
|
||||
var buf bytes.Buffer
|
||||
idx.Encode(&buf)
|
||||
benchmarkIndexJSON = buf.Bytes()
|
||||
@ -418,11 +417,11 @@ func NewRandomTestID(rng *rand.Rand) restic.ID {
|
||||
return id
|
||||
}
|
||||
|
||||
func createRandomIndex(rng *rand.Rand) (idx *repository.Index, lookupID restic.ID) {
|
||||
func createRandomIndex(rng *rand.Rand, packfiles int) (idx *repository.Index, lookupID restic.ID) {
|
||||
idx = repository.NewIndex()
|
||||
|
||||
// create index with 200k pack files
|
||||
for i := 0; i < 200000; i++ {
|
||||
// create index with given number of pack files
|
||||
for i := 0; i < packfiles; i++ {
|
||||
packID := NewRandomTestID(rng)
|
||||
var blobs []restic.Blob
|
||||
offset := 0
|
||||
@ -449,7 +448,7 @@ func createRandomIndex(rng *rand.Rand) (idx *repository.Index, lookupID restic.I
|
||||
}
|
||||
|
||||
func BenchmarkIndexHasUnknown(b *testing.B) {
|
||||
idx, _ := createRandomIndex(rand.New(rand.NewSource(0)))
|
||||
idx, _ := createRandomIndex(rand.New(rand.NewSource(0)), 200000)
|
||||
lookupID := restic.NewRandomID()
|
||||
|
||||
b.ResetTimer()
|
||||
@ -460,7 +459,7 @@ func BenchmarkIndexHasUnknown(b *testing.B) {
|
||||
}
|
||||
|
||||
func BenchmarkIndexHasKnown(b *testing.B) {
|
||||
idx, lookupID := createRandomIndex(rand.New(rand.NewSource(0)))
|
||||
idx, lookupID := createRandomIndex(rand.New(rand.NewSource(0)), 200000)
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
@ -474,7 +473,7 @@ func BenchmarkIndexAlloc(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
createRandomIndex(rng)
|
||||
createRandomIndex(rng, 200000)
|
||||
}
|
||||
}
|
||||
|
||||
@ -484,7 +483,7 @@ func BenchmarkIndexAllocParallel(b *testing.B) {
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
rng := rand.New(rand.NewSource(0))
|
||||
for pb.Next() {
|
||||
createRandomIndex(rng)
|
||||
createRandomIndex(rng, 200000)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
@ -18,7 +18,12 @@ type MasterIndex struct {
|
||||
|
||||
// NewMasterIndex creates a new master index.
|
||||
func NewMasterIndex() *MasterIndex {
|
||||
return &MasterIndex{pendingBlobs: restic.NewBlobSet()}
|
||||
// Always add an empty final index, such that MergeFinalIndexes can merge into this.
|
||||
// Note that removing this index could lead to a race condition in the rare
|
||||
// sitation that only two indexes exist which are saved and merged concurrently.
|
||||
idx := []*Index{NewIndex()}
|
||||
idx[0].Finalize()
|
||||
return &MasterIndex{idx: idx, pendingBlobs: restic.NewBlobSet()}
|
||||
}
|
||||
|
||||
// Lookup queries all known Indexes for the ID and returns the first match.
|
||||
@ -237,6 +242,31 @@ func (mi *MasterIndex) Each(ctx context.Context) <-chan restic.PackedBlob {
|
||||
return ch
|
||||
}
|
||||
|
||||
// MergeFinalIndexes merges all final indexes together.
|
||||
// After calling, there will be only one big final index in MasterIndex
|
||||
// containing all final index contents.
|
||||
// Indexes that are not final are left untouched.
|
||||
// This merging can only be called after all index files are loaded - as
|
||||
// removing of superseded index contents is only possible for unmerged indexes.
|
||||
func (mi *MasterIndex) MergeFinalIndexes() {
|
||||
mi.idxMutex.Lock()
|
||||
defer mi.idxMutex.Unlock()
|
||||
|
||||
// The first index is always final and the one to merge into
|
||||
newIdx := mi.idx[:1]
|
||||
for i := 1; i < len(mi.idx); i++ {
|
||||
idx := mi.idx[i]
|
||||
// clear reference in masterindex as it may become stale
|
||||
mi.idx[i] = nil
|
||||
if !idx.Final() {
|
||||
newIdx = append(newIdx, idx)
|
||||
} else {
|
||||
mi.idx[0].merge(idx)
|
||||
}
|
||||
}
|
||||
mi.idx = newIdx
|
||||
}
|
||||
|
||||
// RebuildIndex combines all known indexes to a new index, leaving out any
|
||||
// packs whose ID is contained in packBlacklist. The new index contains the IDs
|
||||
// of all known indexes in the "supersedes" field.
|
||||
@ -267,15 +297,15 @@ func (mi *MasterIndex) RebuildIndex(packBlacklist restic.IDSet) (*Index, error)
|
||||
continue
|
||||
}
|
||||
|
||||
id, err := idx.ID()
|
||||
ids, err := idx.IDs()
|
||||
if err != nil {
|
||||
debug.Log("index %d does not have an ID: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
debug.Log("adding index id %v to supersedes field", id)
|
||||
debug.Log("adding index ids %v to supersedes field", ids)
|
||||
|
||||
err = newIndex.AddToSupersedes(id)
|
||||
err = newIndex.AddToSupersedes(ids...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -57,11 +57,75 @@ func TestMasterIndexLookup(t *testing.T) {
|
||||
rtest.Assert(t, blobs == nil, "Expected no blobs when fetching with a random id")
|
||||
}
|
||||
|
||||
func BenchmarkMasterIndexLookupSingleIndex(b *testing.B) {
|
||||
idx1, lookupID := createRandomIndex(rand.New(rand.NewSource(0)))
|
||||
func TestMasterMergeFinalIndexes(t *testing.T) {
|
||||
idInIdx1 := restic.NewRandomID()
|
||||
idInIdx2 := restic.NewRandomID()
|
||||
|
||||
blob1 := restic.PackedBlob{
|
||||
PackID: restic.NewRandomID(),
|
||||
Blob: restic.Blob{
|
||||
Type: restic.DataBlob,
|
||||
ID: idInIdx1,
|
||||
Length: 10,
|
||||
Offset: 0,
|
||||
},
|
||||
}
|
||||
|
||||
blob2 := restic.PackedBlob{
|
||||
PackID: restic.NewRandomID(),
|
||||
Blob: restic.Blob{
|
||||
Type: restic.DataBlob,
|
||||
ID: idInIdx2,
|
||||
Length: 100,
|
||||
Offset: 10,
|
||||
},
|
||||
}
|
||||
|
||||
idx1 := repository.NewIndex()
|
||||
idx1.Store(blob1)
|
||||
|
||||
idx2 := repository.NewIndex()
|
||||
idx2.Store(blob2)
|
||||
|
||||
mIdx := repository.NewMasterIndex()
|
||||
mIdx.Insert(idx1)
|
||||
mIdx.Insert(idx2)
|
||||
|
||||
finalIndexes := mIdx.FinalizeNotFinalIndexes()
|
||||
rtest.Equals(t, []*repository.Index{idx1, idx2}, finalIndexes)
|
||||
|
||||
mIdx.MergeFinalIndexes()
|
||||
|
||||
blobs, found := mIdx.Lookup(idInIdx1, restic.DataBlob)
|
||||
rtest.Assert(t, found, "Expected to find blob id %v from index 1", idInIdx1)
|
||||
rtest.Equals(t, []restic.PackedBlob{blob1}, blobs)
|
||||
|
||||
blobs, found = mIdx.Lookup(idInIdx2, restic.DataBlob)
|
||||
rtest.Assert(t, found, "Expected to find blob id %v from index 2", idInIdx2)
|
||||
rtest.Equals(t, []restic.PackedBlob{blob2}, blobs)
|
||||
|
||||
blobs, found = mIdx.Lookup(restic.NewRandomID(), restic.DataBlob)
|
||||
rtest.Assert(t, !found, "Expected to not find a blob when fetching with a random id")
|
||||
rtest.Assert(t, blobs == nil, "Expected no blobs when fetching with a random id")
|
||||
}
|
||||
|
||||
func createRandomMasterIndex(rng *rand.Rand, num, size int) (*repository.MasterIndex, restic.ID) {
|
||||
mIdx := repository.NewMasterIndex()
|
||||
for i := 0; i < num-1; i++ {
|
||||
idx, _ := createRandomIndex(rng, size)
|
||||
mIdx.Insert(idx)
|
||||
}
|
||||
idx1, lookupID := createRandomIndex(rng, size)
|
||||
mIdx.Insert(idx1)
|
||||
|
||||
mIdx.FinalizeNotFinalIndexes()
|
||||
mIdx.MergeFinalIndexes()
|
||||
|
||||
return mIdx, lookupID
|
||||
}
|
||||
|
||||
func BenchmarkMasterIndexLookupSingleIndex(b *testing.B) {
|
||||
mIdx, lookupID := createRandomMasterIndex(rand.New(rand.NewSource(0)), 1, 200000)
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
@ -71,16 +135,7 @@ func BenchmarkMasterIndexLookupSingleIndex(b *testing.B) {
|
||||
}
|
||||
|
||||
func BenchmarkMasterIndexLookupMultipleIndex(b *testing.B) {
|
||||
rng := rand.New(rand.NewSource(0))
|
||||
mIdx := repository.NewMasterIndex()
|
||||
|
||||
for i := 0; i < 5; i++ {
|
||||
idx, _ := createRandomIndex(rng)
|
||||
mIdx.Insert(idx)
|
||||
}
|
||||
|
||||
idx1, lookupID := createRandomIndex(rng)
|
||||
mIdx.Insert(idx1)
|
||||
mIdx, lookupID := createRandomMasterIndex(rand.New(rand.NewSource(0)), 100, 10000)
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
@ -90,11 +145,9 @@ func BenchmarkMasterIndexLookupMultipleIndex(b *testing.B) {
|
||||
}
|
||||
|
||||
func BenchmarkMasterIndexLookupSingleIndexUnknown(b *testing.B) {
|
||||
lookupID := restic.NewRandomID()
|
||||
idx1, _ := createRandomIndex(rand.New(rand.NewSource(0)))
|
||||
|
||||
mIdx := repository.NewMasterIndex()
|
||||
mIdx.Insert(idx1)
|
||||
lookupID := restic.NewRandomID()
|
||||
mIdx, _ := createRandomMasterIndex(rand.New(rand.NewSource(0)), 1, 200000)
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
@ -104,14 +157,8 @@ func BenchmarkMasterIndexLookupSingleIndexUnknown(b *testing.B) {
|
||||
}
|
||||
|
||||
func BenchmarkMasterIndexLookupMultipleIndexUnknown(b *testing.B) {
|
||||
rng := rand.New(rand.NewSource(0))
|
||||
lookupID := restic.NewRandomID()
|
||||
mIdx := repository.NewMasterIndex()
|
||||
|
||||
for i := 0; i < 6; i++ {
|
||||
idx, _ := createRandomIndex(rng)
|
||||
mIdx.Insert(idx)
|
||||
}
|
||||
mIdx, _ := createRandomMasterIndex(rand.New(rand.NewSource(0)), 100, 10000)
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
@ -123,16 +170,12 @@ func BenchmarkMasterIndexLookupMultipleIndexUnknown(b *testing.B) {
|
||||
func BenchmarkMasterIndexLookupParallel(b *testing.B) {
|
||||
mIdx := repository.NewMasterIndex()
|
||||
|
||||
for _, numindices := range []int{5, 10, 20} {
|
||||
for _, numindices := range []int{25, 50, 100} {
|
||||
var lookupID restic.ID
|
||||
|
||||
b.StopTimer()
|
||||
rng := rand.New(rand.NewSource(0))
|
||||
for i := 0; i < numindices; i++ {
|
||||
var idx *repository.Index
|
||||
idx, lookupID = createRandomIndex(rng)
|
||||
mIdx.Insert(idx)
|
||||
}
|
||||
mIdx, lookupID = createRandomMasterIndex(rng, numindices, 10000)
|
||||
b.StartTimer()
|
||||
|
||||
name := fmt.Sprintf("known,indices=%d", numindices)
|
||||
|
@ -361,12 +361,14 @@ func (r *Repository) SetIndex(i restic.Index) error {
|
||||
|
||||
ids := restic.NewIDSet()
|
||||
for _, idx := range r.idx.All() {
|
||||
id, err := idx.ID()
|
||||
indexIDs, err := idx.IDs()
|
||||
if err != nil {
|
||||
debug.Log("not using index, ID() returned error %v", err)
|
||||
continue
|
||||
}
|
||||
ids.Insert(id)
|
||||
for _, id := range indexIDs {
|
||||
ids.Insert(id)
|
||||
}
|
||||
}
|
||||
|
||||
return r.PrepareCache(ids)
|
||||
@ -396,6 +398,7 @@ func (r *Repository) saveIndex(ctx context.Context, indexes ...*Index) error {
|
||||
|
||||
debug.Log("Saved index %d as %v", i, sid)
|
||||
}
|
||||
r.idx.MergeFinalIndexes()
|
||||
|
||||
return nil
|
||||
}
|
||||
@ -479,12 +482,16 @@ func (r *Repository) LoadIndex(ctx context.Context) error {
|
||||
validIndex := restic.NewIDSet()
|
||||
wg.Go(func() error {
|
||||
for idx := range indexCh {
|
||||
id, err := idx.ID()
|
||||
ids, err := idx.IDs()
|
||||
if err == nil {
|
||||
validIndex.Insert(id)
|
||||
for _, id := range ids {
|
||||
validIndex.Insert(id)
|
||||
}
|
||||
}
|
||||
|
||||
r.idx.Insert(idx)
|
||||
}
|
||||
r.idx.MergeFinalIndexes()
|
||||
return nil
|
||||
})
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user