2015-10-12 22:34:12 +02:00
|
|
|
package repository
|
|
|
|
|
|
|
|
import (
|
2017-06-18 14:45:02 +02:00
|
|
|
"context"
|
2015-10-12 22:34:12 +02:00
|
|
|
"sync"
|
|
|
|
|
2017-07-23 14:21:03 +02:00
|
|
|
"github.com/restic/restic/internal/debug"
|
2020-11-16 04:18:55 +01:00
|
|
|
"github.com/restic/restic/internal/pack"
|
2020-11-04 14:11:29 +01:00
|
|
|
"github.com/restic/restic/internal/restic"
|
|
|
|
"github.com/restic/restic/internal/ui/progress"
|
2020-11-12 02:49:53 +01:00
|
|
|
"golang.org/x/sync/errgroup"
|
2015-10-12 22:34:12 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
// MasterIndex is a collection of indexes and IDs of chunks that are in the process of being saved.
|
|
|
|
type MasterIndex struct {
|
2020-06-06 22:20:44 +02:00
|
|
|
idx []*Index
|
|
|
|
pendingBlobs restic.BlobSet
|
|
|
|
idxMutex sync.RWMutex
|
2015-10-12 22:34:12 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewMasterIndex creates a new master index.
|
|
|
|
func NewMasterIndex() *MasterIndex {
|
2020-07-04 07:06:14 +02:00
|
|
|
// Always add an empty final index, such that MergeFinalIndexes can merge into this.
|
|
|
|
// Note that removing this index could lead to a race condition in the rare
|
|
|
|
// sitation that only two indexes exist which are saved and merged concurrently.
|
|
|
|
idx := []*Index{NewIndex()}
|
|
|
|
idx[0].Finalize()
|
|
|
|
return &MasterIndex{idx: idx, pendingBlobs: restic.NewBlobSet()}
|
2015-10-12 22:34:12 +02:00
|
|
|
}
|
|
|
|
|
2020-06-14 13:26:10 +02:00
|
|
|
// Lookup queries all known Indexes for the ID and returns all matches.
|
2020-11-05 22:18:00 +01:00
|
|
|
func (mi *MasterIndex) Lookup(bh restic.BlobHandle) (pbs []restic.PackedBlob) {
|
2015-10-12 22:34:12 +02:00
|
|
|
mi.idxMutex.RLock()
|
|
|
|
defer mi.idxMutex.RUnlock()
|
|
|
|
|
|
|
|
for _, idx := range mi.idx {
|
2020-11-05 22:18:00 +01:00
|
|
|
pbs = idx.Lookup(bh, pbs)
|
2015-10-12 22:34:12 +02:00
|
|
|
}
|
|
|
|
|
2020-11-05 22:00:41 +01:00
|
|
|
return pbs
|
2015-10-12 22:34:12 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// LookupSize queries all known Indexes for the ID and returns the first match.
|
2020-11-05 22:18:00 +01:00
|
|
|
func (mi *MasterIndex) LookupSize(bh restic.BlobHandle) (uint, bool) {
|
2015-10-12 22:34:12 +02:00
|
|
|
mi.idxMutex.RLock()
|
|
|
|
defer mi.idxMutex.RUnlock()
|
|
|
|
|
|
|
|
for _, idx := range mi.idx {
|
2020-11-05 22:18:00 +01:00
|
|
|
if size, found := idx.LookupSize(bh); found {
|
2018-01-12 01:20:12 -05:00
|
|
|
return size, found
|
2015-10-12 22:34:12 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-12 01:20:12 -05:00
|
|
|
return 0, false
|
2015-10-12 22:34:12 +02:00
|
|
|
}
|
|
|
|
|
2020-06-06 22:20:44 +02:00
|
|
|
// AddPending adds a given blob to list of pending Blobs
|
|
|
|
// Before doing so it checks if this blob is already known.
|
|
|
|
// Returns true if adding was successful and false if the blob
|
|
|
|
// was already known
|
2020-11-05 22:18:00 +01:00
|
|
|
func (mi *MasterIndex) addPending(bh restic.BlobHandle) bool {
|
2020-06-06 22:20:44 +02:00
|
|
|
|
|
|
|
mi.idxMutex.Lock()
|
|
|
|
defer mi.idxMutex.Unlock()
|
|
|
|
|
|
|
|
// Check if blob is pending or in index
|
2020-11-05 22:18:00 +01:00
|
|
|
if mi.pendingBlobs.Has(bh) {
|
2020-06-06 22:20:44 +02:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, idx := range mi.idx {
|
2020-11-05 22:18:00 +01:00
|
|
|
if idx.Has(bh) {
|
2020-06-06 22:20:44 +02:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// really not known -> insert
|
2020-11-05 22:18:00 +01:00
|
|
|
mi.pendingBlobs.Insert(bh)
|
2020-06-06 22:20:44 +02:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2015-10-12 22:34:12 +02:00
|
|
|
// Has queries all known Indexes for the ID and returns the first match.
|
2020-06-06 22:20:44 +02:00
|
|
|
// Also returns true if the ID is pending.
|
2020-11-05 22:18:00 +01:00
|
|
|
func (mi *MasterIndex) Has(bh restic.BlobHandle) bool {
|
2015-10-12 22:34:12 +02:00
|
|
|
mi.idxMutex.RLock()
|
|
|
|
defer mi.idxMutex.RUnlock()
|
|
|
|
|
2020-06-06 22:20:44 +02:00
|
|
|
// also return true if blob is pending
|
2020-11-05 22:18:00 +01:00
|
|
|
if mi.pendingBlobs.Has(bh) {
|
2020-06-06 22:20:44 +02:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2015-10-12 22:34:12 +02:00
|
|
|
for _, idx := range mi.idx {
|
2020-11-05 22:18:00 +01:00
|
|
|
if idx.Has(bh) {
|
2015-10-12 22:34:12 +02:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2020-10-10 07:42:22 +02:00
|
|
|
// Packs returns all packs that are covered by the index.
|
|
|
|
func (mi *MasterIndex) Packs() restic.IDSet {
|
|
|
|
mi.idxMutex.RLock()
|
|
|
|
defer mi.idxMutex.RUnlock()
|
|
|
|
|
|
|
|
packs := restic.NewIDSet()
|
|
|
|
for _, idx := range mi.idx {
|
|
|
|
packs.Merge(idx.Packs())
|
|
|
|
}
|
|
|
|
|
|
|
|
return packs
|
|
|
|
}
|
|
|
|
|
2020-11-16 04:18:55 +01:00
|
|
|
// PackSize returns the size of all packs computed by index information.
|
|
|
|
// If onlyHdr is set to true, only the size of the header is returned
|
|
|
|
// Note that this function only gives correct sizes, if there are no
|
|
|
|
// duplicates in the index.
|
|
|
|
func (mi *MasterIndex) PackSize(ctx context.Context, onlyHdr bool) map[restic.ID]int64 {
|
|
|
|
packSize := make(map[restic.ID]int64)
|
|
|
|
|
|
|
|
for blob := range mi.Each(ctx) {
|
|
|
|
size, ok := packSize[blob.PackID]
|
|
|
|
if !ok {
|
|
|
|
size = pack.HeaderSize
|
|
|
|
}
|
|
|
|
if !onlyHdr {
|
|
|
|
size += int64(blob.Length)
|
|
|
|
}
|
|
|
|
packSize[blob.PackID] = size + int64(pack.EntrySize)
|
|
|
|
}
|
|
|
|
|
|
|
|
return packSize
|
|
|
|
}
|
|
|
|
|
2015-10-12 22:34:12 +02:00
|
|
|
// Count returns the number of blobs of type t in the index.
|
2016-08-31 20:58:57 +02:00
|
|
|
func (mi *MasterIndex) Count(t restic.BlobType) (n uint) {
|
2015-10-12 22:34:12 +02:00
|
|
|
mi.idxMutex.RLock()
|
|
|
|
defer mi.idxMutex.RUnlock()
|
|
|
|
|
|
|
|
var sum uint
|
|
|
|
for _, idx := range mi.idx {
|
|
|
|
sum += idx.Count(t)
|
|
|
|
}
|
|
|
|
|
|
|
|
return sum
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert adds a new index to the MasterIndex.
|
|
|
|
func (mi *MasterIndex) Insert(idx *Index) {
|
|
|
|
mi.idxMutex.Lock()
|
|
|
|
defer mi.idxMutex.Unlock()
|
|
|
|
|
|
|
|
mi.idx = append(mi.idx, idx)
|
|
|
|
}
|
|
|
|
|
2020-07-28 22:24:43 +02:00
|
|
|
// StorePack remembers the id and pack in the index.
|
2020-06-06 22:20:44 +02:00
|
|
|
func (mi *MasterIndex) StorePack(id restic.ID, blobs []restic.Blob) {
|
2017-10-07 05:11:42 -07:00
|
|
|
mi.idxMutex.Lock()
|
|
|
|
defer mi.idxMutex.Unlock()
|
|
|
|
|
2020-06-06 22:20:44 +02:00
|
|
|
// delete blobs from pending
|
|
|
|
for _, blob := range blobs {
|
|
|
|
mi.pendingBlobs.Delete(restic.BlobHandle{Type: blob.Type, ID: blob.ID})
|
|
|
|
}
|
|
|
|
|
2015-10-12 22:34:12 +02:00
|
|
|
for _, idx := range mi.idx {
|
|
|
|
if !idx.Final() {
|
2020-06-06 22:20:44 +02:00
|
|
|
idx.StorePack(id, blobs)
|
2017-01-02 14:14:51 +01:00
|
|
|
return
|
2015-10-12 22:34:12 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
newIdx := NewIndex()
|
2020-06-06 22:20:44 +02:00
|
|
|
newIdx.StorePack(id, blobs)
|
2015-10-12 22:34:12 +02:00
|
|
|
mi.idx = append(mi.idx, newIdx)
|
|
|
|
}
|
|
|
|
|
2020-06-06 22:20:44 +02:00
|
|
|
// FinalizeNotFinalIndexes finalizes all indexes that
|
|
|
|
// have not yet been saved and returns that list
|
|
|
|
func (mi *MasterIndex) FinalizeNotFinalIndexes() []*Index {
|
2015-10-12 22:34:12 +02:00
|
|
|
mi.idxMutex.Lock()
|
|
|
|
defer mi.idxMutex.Unlock()
|
|
|
|
|
|
|
|
var list []*Index
|
|
|
|
|
|
|
|
for _, idx := range mi.idx {
|
|
|
|
if !idx.Final() {
|
2020-06-06 22:20:44 +02:00
|
|
|
idx.Finalize()
|
2015-10-12 22:34:12 +02:00
|
|
|
list = append(list, idx)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-27 22:35:08 +02:00
|
|
|
debug.Log("return %d indexes", len(list))
|
2015-10-12 23:59:17 +02:00
|
|
|
return list
|
|
|
|
}
|
|
|
|
|
2020-06-06 22:20:44 +02:00
|
|
|
// FinalizeFullIndexes finalizes all indexes that are full and returns that list.
|
|
|
|
func (mi *MasterIndex) FinalizeFullIndexes() []*Index {
|
2015-10-12 23:59:17 +02:00
|
|
|
mi.idxMutex.Lock()
|
|
|
|
defer mi.idxMutex.Unlock()
|
|
|
|
|
|
|
|
var list []*Index
|
|
|
|
|
2016-09-27 22:35:08 +02:00
|
|
|
debug.Log("checking %d indexes", len(mi.idx))
|
2015-10-12 23:59:17 +02:00
|
|
|
for _, idx := range mi.idx {
|
|
|
|
if idx.Final() {
|
2016-09-27 22:35:08 +02:00
|
|
|
debug.Log("index %p is final", idx)
|
2015-10-12 23:59:17 +02:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2015-10-25 15:05:22 +01:00
|
|
|
if IndexFull(idx) {
|
2016-09-27 22:35:08 +02:00
|
|
|
debug.Log("index %p is full", idx)
|
2020-06-06 22:20:44 +02:00
|
|
|
idx.Finalize()
|
2015-10-12 23:59:17 +02:00
|
|
|
list = append(list, idx)
|
|
|
|
} else {
|
2016-09-27 22:35:08 +02:00
|
|
|
debug.Log("index %p not full", idx)
|
2015-10-12 23:59:17 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-27 22:35:08 +02:00
|
|
|
debug.Log("return %d indexes", len(list))
|
2015-10-12 22:34:12 +02:00
|
|
|
return list
|
|
|
|
}
|
|
|
|
|
|
|
|
// All returns all indexes.
|
|
|
|
func (mi *MasterIndex) All() []*Index {
|
|
|
|
mi.idxMutex.Lock()
|
|
|
|
defer mi.idxMutex.Unlock()
|
|
|
|
|
|
|
|
return mi.idx
|
|
|
|
}
|
2015-11-02 19:28:30 +01:00
|
|
|
|
2017-06-18 14:45:02 +02:00
|
|
|
// Each returns a channel that yields all blobs known to the index. When the
|
|
|
|
// context is cancelled, the background goroutine terminates. This blocks any
|
|
|
|
// modification of the index.
|
|
|
|
func (mi *MasterIndex) Each(ctx context.Context) <-chan restic.PackedBlob {
|
|
|
|
mi.idxMutex.RLock()
|
|
|
|
|
|
|
|
ch := make(chan restic.PackedBlob)
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
defer mi.idxMutex.RUnlock()
|
|
|
|
defer func() {
|
|
|
|
close(ch)
|
|
|
|
}()
|
|
|
|
|
|
|
|
for _, idx := range mi.idx {
|
|
|
|
idxCh := idx.Each(ctx)
|
|
|
|
for pb := range idxCh {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return
|
|
|
|
case ch <- pb:
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
return ch
|
|
|
|
}
|
|
|
|
|
2020-07-04 07:06:14 +02:00
|
|
|
// MergeFinalIndexes merges all final indexes together.
|
|
|
|
// After calling, there will be only one big final index in MasterIndex
|
|
|
|
// containing all final index contents.
|
|
|
|
// Indexes that are not final are left untouched.
|
|
|
|
// This merging can only be called after all index files are loaded - as
|
|
|
|
// removing of superseded index contents is only possible for unmerged indexes.
|
|
|
|
func (mi *MasterIndex) MergeFinalIndexes() {
|
|
|
|
mi.idxMutex.Lock()
|
|
|
|
defer mi.idxMutex.Unlock()
|
|
|
|
|
|
|
|
// The first index is always final and the one to merge into
|
|
|
|
newIdx := mi.idx[:1]
|
|
|
|
for i := 1; i < len(mi.idx); i++ {
|
|
|
|
idx := mi.idx[i]
|
|
|
|
// clear reference in masterindex as it may become stale
|
|
|
|
mi.idx[i] = nil
|
|
|
|
if !idx.Final() {
|
|
|
|
newIdx = append(newIdx, idx)
|
|
|
|
} else {
|
|
|
|
mi.idx[0].merge(idx)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
mi.idx = newIdx
|
|
|
|
}
|
|
|
|
|
2020-11-12 02:49:53 +01:00
|
|
|
const saveIndexParallelism = 4
|
|
|
|
|
2020-10-10 07:42:22 +02:00
|
|
|
// Save saves all known indexes to index files, leaving out any
|
2020-11-12 02:49:53 +01:00
|
|
|
// packs whose ID is contained in packBlacklist from finalized indexes.
|
|
|
|
// The new index contains the IDs of all known indexes in the "supersedes"
|
|
|
|
// field. The IDs are also returned in the IDSet obsolete.
|
2020-10-10 07:42:22 +02:00
|
|
|
// After calling this function, you should remove the obsolete index files.
|
2020-10-18 09:24:34 +02:00
|
|
|
func (mi *MasterIndex) Save(ctx context.Context, repo restic.Repository, packBlacklist restic.IDSet, extraObsolete restic.IDs, p *progress.Counter) (obsolete restic.IDSet, err error) {
|
2015-11-02 19:28:30 +01:00
|
|
|
mi.idxMutex.Lock()
|
|
|
|
defer mi.idxMutex.Unlock()
|
|
|
|
|
2016-09-27 22:35:08 +02:00
|
|
|
debug.Log("start rebuilding index of %d indexes, pack blacklist: %v", len(mi.idx), packBlacklist)
|
2015-11-02 19:28:30 +01:00
|
|
|
|
|
|
|
newIndex := NewIndex()
|
2020-10-10 07:42:22 +02:00
|
|
|
obsolete = restic.NewIDSet()
|
2017-06-18 14:45:02 +02:00
|
|
|
|
2020-11-12 02:49:53 +01:00
|
|
|
// track spawned goroutines using wg, create a new context which is
|
|
|
|
// cancelled as soon as an error occurs.
|
|
|
|
wg, ctx := errgroup.WithContext(ctx)
|
2015-11-02 19:28:30 +01:00
|
|
|
|
2020-11-12 02:49:53 +01:00
|
|
|
ch := make(chan *Index)
|
|
|
|
|
|
|
|
wg.Go(func() error {
|
|
|
|
defer close(ch)
|
|
|
|
for i, idx := range mi.idx {
|
|
|
|
if idx.Final() {
|
|
|
|
ids, err := idx.IDs()
|
|
|
|
if err != nil {
|
|
|
|
debug.Log("index %d does not have an ID: %v", err)
|
|
|
|
return err
|
|
|
|
}
|
2015-11-02 19:28:30 +01:00
|
|
|
|
2020-11-12 02:49:53 +01:00
|
|
|
debug.Log("adding index ids %v to supersedes field", ids)
|
|
|
|
|
|
|
|
err = newIndex.AddToSupersedes(ids...)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
obsolete.Merge(restic.NewIDSet(ids...))
|
|
|
|
} else {
|
|
|
|
debug.Log("index %d isn't final, don't add to supersedes field", i)
|
|
|
|
}
|
|
|
|
|
|
|
|
debug.Log("adding index %d", i)
|
|
|
|
|
|
|
|
for pbs := range idx.EachByPack(ctx, packBlacklist) {
|
|
|
|
newIndex.StorePack(pbs.packID, pbs.blobs)
|
|
|
|
p.Add(1)
|
|
|
|
if IndexFull(newIndex) {
|
|
|
|
select {
|
|
|
|
case ch <- newIndex:
|
|
|
|
case <-ctx.Done():
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
newIndex = NewIndex()
|
|
|
|
}
|
2020-10-10 07:42:22 +02:00
|
|
|
}
|
2015-11-02 19:28:30 +01:00
|
|
|
}
|
|
|
|
|
2020-11-12 02:49:53 +01:00
|
|
|
err = newIndex.AddToSupersedes(extraObsolete...)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
obsolete.Merge(restic.NewIDSet(extraObsolete...))
|
2015-11-02 19:28:30 +01:00
|
|
|
|
2020-11-12 02:49:53 +01:00
|
|
|
select {
|
|
|
|
case ch <- newIndex:
|
|
|
|
case <-ctx.Done():
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
|
|
|
|
// a worker receives an index from ch, and saves the index
|
|
|
|
worker := func() error {
|
|
|
|
for idx := range ch {
|
|
|
|
idx.Finalize()
|
|
|
|
if _, err := SaveIndex(ctx, repo, idx); err != nil {
|
|
|
|
return err
|
2020-10-10 07:42:22 +02:00
|
|
|
}
|
2015-11-02 19:28:30 +01:00
|
|
|
}
|
2020-11-12 02:49:53 +01:00
|
|
|
return nil
|
2015-11-02 19:28:30 +01:00
|
|
|
}
|
2020-10-18 09:24:34 +02:00
|
|
|
|
2020-11-12 02:49:53 +01:00
|
|
|
// run workers on ch
|
|
|
|
wg.Go(func() error {
|
|
|
|
return RunWorkers(saveIndexParallelism, worker)
|
|
|
|
})
|
2020-10-18 09:24:34 +02:00
|
|
|
|
2020-11-12 02:49:53 +01:00
|
|
|
err = wg.Wait()
|
2015-11-02 19:28:30 +01:00
|
|
|
|
2020-11-12 02:49:53 +01:00
|
|
|
return obsolete, err
|
2015-11-02 19:28:30 +01:00
|
|
|
}
|