2
2
mirror of https://github.com/octoleo/restic.git synced 2024-09-29 06:59:01 +00:00
restic/src/restic/index/index.go

340 lines
7.5 KiB
Go
Raw Normal View History

2016-08-07 15:19:00 +00:00
// Package index contains various data structures for indexing content in a repository or backend.
package index
import (
"fmt"
"os"
2016-08-15 19:10:12 +00:00
"restic"
2016-08-07 15:19:00 +00:00
"restic/backend"
"restic/debug"
2016-08-14 15:59:20 +00:00
"restic/list"
2016-08-07 15:19:00 +00:00
"restic/pack"
"restic/types"
2016-08-07 15:19:00 +00:00
"restic/worker"
"github.com/pkg/errors"
2016-08-07 15:19:00 +00:00
)
// Pack contains information about the contents of a pack.
type Pack struct {
2016-08-07 19:56:42 +00:00
Size int64
2016-08-07 15:19:00 +00:00
Entries []pack.Blob
}
// Blob contains information about a blob.
2016-08-07 20:18:20 +00:00
type Blob struct {
Size int64
Packs backend.IDSet
}
2016-08-07 15:19:00 +00:00
// Index contains information about blobs and packs stored in a repo.
type Index struct {
Packs map[backend.ID]Pack
Blobs map[pack.Handle]Blob
IndexIDs backend.IDSet
2016-08-07 15:19:00 +00:00
}
func newIndex() *Index {
return &Index{
Packs: make(map[backend.ID]Pack),
Blobs: make(map[pack.Handle]Blob),
IndexIDs: backend.NewIDSet(),
2016-08-07 15:19:00 +00:00
}
}
// New creates a new index for repo from scratch.
2016-08-15 19:10:12 +00:00
func New(repo types.Repository, p *restic.Progress) (*Index, error) {
2016-08-07 15:19:00 +00:00
done := make(chan struct{})
defer close(done)
2016-08-15 19:10:12 +00:00
p.Start()
defer p.Done()
2016-08-07 15:19:00 +00:00
ch := make(chan worker.Job)
2016-08-14 15:59:20 +00:00
go list.AllPacks(repo, ch, done)
2016-08-07 15:19:00 +00:00
idx := newIndex()
for job := range ch {
2016-08-15 19:10:12 +00:00
p.Report(restic.Stat{Blobs: 1})
2016-08-07 15:19:00 +00:00
packID := job.Data.(backend.ID)
if job.Error != nil {
fmt.Fprintf(os.Stderr, "unable to list pack %v: %v\n", packID.Str(), job.Error)
continue
}
2016-08-14 15:59:20 +00:00
j := job.Result.(list.Result)
2016-08-07 15:19:00 +00:00
2016-08-14 14:11:59 +00:00
debug.Log("Index.New", "pack %v contains %d blobs", packID.Str(), len(j.Entries()))
2016-08-07 15:19:00 +00:00
2016-08-14 14:11:59 +00:00
err := idx.AddPack(packID, j.Size(), j.Entries())
2016-08-07 20:18:20 +00:00
if err != nil {
return nil, err
2016-08-07 15:19:00 +00:00
}
2016-08-07 20:18:20 +00:00
2016-08-14 14:11:59 +00:00
p := Pack{Entries: j.Entries(), Size: j.Size()}
2016-08-07 15:19:00 +00:00
idx.Packs[packID] = p
}
return idx, nil
}
const loadIndexParallelism = 20
type packJSON struct {
ID backend.ID `json:"id"`
Blobs []blobJSON `json:"blobs"`
}
type blobJSON struct {
ID backend.ID `json:"id"`
Type pack.BlobType `json:"type"`
Offset uint `json:"offset"`
Length uint `json:"length"`
}
type indexJSON struct {
Supersedes backend.IDs `json:"supersedes,omitempty"`
Packs []*packJSON `json:"packs"`
}
func loadIndexJSON(repo types.Repository, id backend.ID) (*indexJSON, error) {
2016-08-07 16:45:25 +00:00
debug.Log("index.loadIndexJSON", "process index %v\n", id.Str())
2016-08-07 15:19:00 +00:00
var idx indexJSON
err := repo.LoadJSONUnpacked(backend.Index, id, &idx)
if err != nil {
return nil, err
}
return &idx, nil
}
// Load creates an index by loading all index files from the repo.
2016-08-15 19:10:12 +00:00
func Load(repo types.Repository, p *restic.Progress) (*Index, error) {
2016-08-07 15:19:00 +00:00
debug.Log("index.Load", "loading indexes")
2016-08-15 19:10:12 +00:00
p.Start()
defer p.Done()
2016-08-07 15:19:00 +00:00
done := make(chan struct{})
defer close(done)
supersedes := make(map[backend.ID]backend.IDSet)
results := make(map[backend.ID]map[backend.ID]Pack)
2016-08-07 20:18:20 +00:00
index := newIndex()
2016-08-07 15:19:00 +00:00
for id := range repo.List(backend.Index, done) {
2016-08-15 19:10:12 +00:00
p.Report(restic.Stat{Blobs: 1})
2016-08-07 15:19:00 +00:00
debug.Log("index.Load", "Load index %v", id.Str())
idx, err := loadIndexJSON(repo, id)
if err != nil {
return nil, err
}
res := make(map[backend.ID]Pack)
supersedes[id] = backend.NewIDSet()
for _, sid := range idx.Supersedes {
debug.Log("index.Load", " index %v supersedes %v", id.Str(), sid)
supersedes[id].Insert(sid)
}
for _, jpack := range idx.Packs {
2016-08-07 20:18:20 +00:00
entries := make([]pack.Blob, 0, len(jpack.Blobs))
2016-08-07 15:19:00 +00:00
for _, blob := range jpack.Blobs {
2016-08-07 16:45:25 +00:00
entry := pack.Blob{
2016-08-07 15:19:00 +00:00
ID: blob.ID,
Type: blob.Type,
Offset: blob.Offset,
Length: blob.Length,
2016-08-07 16:45:25 +00:00
}
2016-08-07 20:18:20 +00:00
entries = append(entries, entry)
}
if err = index.AddPack(jpack.ID, 0, entries); err != nil {
return nil, err
2016-08-07 15:19:00 +00:00
}
}
results[id] = res
index.IndexIDs.Insert(id)
2016-08-07 15:19:00 +00:00
}
for superID, list := range supersedes {
for indexID := range list {
if _, ok := results[indexID]; !ok {
continue
}
2016-08-07 15:19:00 +00:00
debug.Log("index.Load", " removing index %v, superseded by %v", indexID.Str(), superID.Str())
fmt.Fprintf(os.Stderr, "index %v can be removed, superseded by index %v\n", indexID.Str(), superID.Str())
2016-08-07 15:19:00 +00:00
delete(results, indexID)
}
}
2016-08-07 20:18:20 +00:00
return index, nil
}
// AddPack adds a pack to the index. If this pack is already in the index, an
// error is returned.
func (idx *Index) AddPack(id backend.ID, size int64, entries []pack.Blob) error {
if _, ok := idx.Packs[id]; ok {
return errors.Errorf("pack %v already present in the index", id.Str())
2016-08-07 20:18:20 +00:00
}
idx.Packs[id] = Pack{Size: size, Entries: entries}
for _, entry := range entries {
h := pack.Handle{ID: entry.ID, Type: entry.Type}
if _, ok := idx.Blobs[h]; !ok {
idx.Blobs[h] = Blob{
Size: int64(entry.Length),
Packs: backend.NewIDSet(),
}
2016-08-07 15:19:00 +00:00
}
2016-08-07 20:18:20 +00:00
idx.Blobs[h].Packs.Insert(id)
2016-08-07 15:19:00 +00:00
}
2016-08-07 20:18:20 +00:00
return nil
2016-08-07 15:19:00 +00:00
}
2016-08-07 19:57:31 +00:00
2016-08-15 16:55:52 +00:00
// RemovePack deletes a pack from the index.
func (idx *Index) RemovePack(id backend.ID) error {
if _, ok := idx.Packs[id]; !ok {
return errors.Errorf("pack %v not found in the index", id.Str())
2016-08-15 16:55:52 +00:00
}
for _, blob := range idx.Packs[id].Entries {
h := pack.Handle{ID: blob.ID, Type: blob.Type}
idx.Blobs[h].Packs.Delete(id)
if len(idx.Blobs[h].Packs) == 0 {
delete(idx.Blobs, h)
}
}
delete(idx.Packs, id)
return nil
}
2016-08-07 19:57:31 +00:00
// DuplicateBlobs returns a list of blobs that are stored more than once in the
// repo.
2016-08-07 20:18:20 +00:00
func (idx *Index) DuplicateBlobs() (dups pack.BlobSet) {
dups = pack.NewBlobSet()
2016-08-07 19:57:31 +00:00
seen := pack.NewBlobSet()
for _, p := range idx.Packs {
for _, entry := range p.Entries {
h := pack.Handle{ID: entry.ID, Type: entry.Type}
if seen.Has(h) {
2016-08-07 20:18:20 +00:00
dups.Insert(h)
2016-08-07 19:57:31 +00:00
}
seen.Insert(h)
}
}
return dups
}
2016-08-07 20:18:20 +00:00
// PacksForBlobs returns the set of packs in which the blobs are contained.
func (idx *Index) PacksForBlobs(blobs pack.BlobSet) (packs backend.IDSet) {
packs = backend.NewIDSet()
for h := range blobs {
blob, ok := idx.Blobs[h]
if !ok {
continue
}
for id := range blob.Packs {
packs.Insert(id)
}
}
return packs
}
// Location describes the location of a blob in a pack.
type Location struct {
PackID backend.ID
pack.Blob
}
// ErrBlobNotFound is return by FindBlob when the blob could not be found in
// the index.
var ErrBlobNotFound = errors.New("blob not found in index")
// FindBlob returns a list of packs and positions the blob can be found in.
func (idx *Index) FindBlob(h pack.Handle) ([]Location, error) {
blob, ok := idx.Blobs[h]
if !ok {
return nil, ErrBlobNotFound
}
result := make([]Location, 0, len(blob.Packs))
for packID := range blob.Packs {
pack, ok := idx.Packs[packID]
if !ok {
return nil, errors.Errorf("pack %v not found in index", packID.Str())
}
for _, entry := range pack.Entries {
if entry.Type != h.Type {
continue
}
if !entry.ID.Equal(h.ID) {
continue
}
loc := Location{PackID: packID, Blob: entry}
result = append(result, loc)
}
}
return result, nil
}
// Save writes the complete index to the repo.
func (idx *Index) Save(repo types.Repository, supersedes backend.IDs) (backend.ID, error) {
2016-08-15 18:46:14 +00:00
packs := make(map[backend.ID][]pack.Blob, len(idx.Packs))
for id, p := range idx.Packs {
packs[id] = p.Entries
}
return Save(repo, packs, supersedes)
2016-08-15 18:46:14 +00:00
}
// Save writes a new index containing the given packs.
func Save(repo types.Repository, packs map[backend.ID][]pack.Blob, supersedes backend.IDs) (backend.ID, error) {
idx := &indexJSON{
Supersedes: supersedes,
Packs: make([]*packJSON, 0, len(packs)),
}
for packID, blobs := range packs {
b := make([]blobJSON, 0, len(blobs))
for _, blob := range blobs {
b = append(b, blobJSON{
ID: blob.ID,
Type: blob.Type,
Offset: blob.Offset,
Length: blob.Length,
})
}
p := &packJSON{
ID: packID,
Blobs: b,
}
idx.Packs = append(idx.Packs, p)
}
return repo.SaveJSONUnpacked(backend.Index, idx)
}