2
2
mirror of https://github.com/octoleo/restic.git synced 2024-06-01 00:20:48 +00:00
restic/internal/repository/index.go

599 lines
13 KiB
Go
Raw Normal View History

package repository
2015-04-26 15:10:31 +00:00
import (
2017-06-04 09:16:55 +00:00
"context"
2015-04-26 15:10:31 +00:00
"encoding/json"
"io"
"sync"
2015-10-12 21:59:17 +00:00
"time"
2015-04-26 15:10:31 +00:00
2017-07-23 12:21:03 +00:00
"github.com/restic/restic/internal/errors"
2017-07-24 15:42:25 +00:00
"github.com/restic/restic/internal/restic"
2017-07-23 12:21:03 +00:00
"github.com/restic/restic/internal/debug"
2015-04-26 15:10:31 +00:00
)
// In large repositories, millions of blobs are stored in the repository
// and restic needs to store an index entry for each blob in memory for
// most operations.
// Hence the index data structure defined here is one of the main contributions
// to the total memory requirements of restic.
//
// We use a map to store each index entry.
// The key of the map is a BlobHandle
// The entries of the maps are slices which contain the actual index entries.
//
// To compute the needed amount of memory, we need some assumptions.
// Maps need an overhead of allocated but not needed elements.
// For computations, we assume an overhead of 50% and use OF=1.5 (overhead factor)
//
// We have the following sizes:
// key: 32 + 1 = 33 bytes
// slice: 24 bytes (pointer, len and cap)
// indexEntry: 32 + 8 + 8 = 48 bytes
//
// To save N index entries, we therefore need:
// N * OF * (33 + 24) bytes + N * 48 bytes = N * 134 bytes
2015-04-26 15:10:31 +00:00
// Index holds a lookup table for id -> pack.
type Index struct {
2017-09-23 12:47:40 +00:00
m sync.Mutex
pack map[restic.BlobHandle][]indexEntry
treePacks restic.IDs
2016-08-31 18:29:54 +00:00
final bool // set to true for all indexes read from the backend ("finalized")
id restic.ID // set to the ID of the index when it's finalized
supersedes restic.IDs
2015-10-12 21:59:17 +00:00
created time.Time
2015-04-26 15:10:31 +00:00
}
type indexEntry struct {
2016-08-31 18:29:54 +00:00
packID restic.ID
2015-04-26 15:10:31 +00:00
offset uint
length uint
}
// NewIndex returns a new index.
func NewIndex() *Index {
return &Index{
2016-08-31 18:58:57 +00:00
pack: make(map[restic.BlobHandle][]indexEntry),
2015-10-12 21:59:17 +00:00
created: time.Now(),
2015-04-26 15:10:31 +00:00
}
}
2016-08-31 20:39:36 +00:00
func (idx *Index) store(blob restic.PackedBlob) {
newEntry := indexEntry{
packID: blob.PackID,
offset: blob.Offset,
length: blob.Length,
}
2016-08-31 18:58:57 +00:00
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
idx.pack[h] = append(idx.pack[h], newEntry)
2015-04-26 15:10:31 +00:00
}
2015-10-12 20:34:12 +00:00
// Final returns true iff the index is already written to the repository, it is
// finalized.
func (idx *Index) Final() bool {
idx.m.Lock()
defer idx.m.Unlock()
return idx.final
}
2015-10-12 21:59:17 +00:00
const (
2020-05-24 07:19:42 +00:00
indexMaxBlobs = 50000
indexMaxAge = 10 * time.Minute
2015-10-12 21:59:17 +00:00
)
// IndexFull returns true iff the index is "full enough" to be saved as a preliminary index.
var IndexFull = func(idx *Index) bool {
2015-10-12 21:59:17 +00:00
idx.m.Lock()
defer idx.m.Unlock()
2016-09-27 20:35:08 +00:00
debug.Log("checking whether index %p is full", idx)
2015-10-12 21:59:17 +00:00
2020-05-24 07:19:42 +00:00
blobs := len(idx.pack)
2015-10-12 21:59:17 +00:00
age := time.Now().Sub(idx.created)
2020-05-24 07:19:42 +00:00
switch {
case age >= indexMaxAge:
2016-09-27 20:35:08 +00:00
debug.Log("index %p is old enough", idx, age)
2015-10-12 21:59:17 +00:00
return true
2020-05-24 07:19:42 +00:00
case blobs >= indexMaxBlobs:
debug.Log("index %p has %d blobs", idx, blobs)
2015-10-12 21:59:17 +00:00
return true
}
2020-05-24 07:19:42 +00:00
debug.Log("index %p only has %d blobs and is too young (%v)", idx, blobs, age)
2015-10-12 21:59:17 +00:00
return false
2020-05-24 07:19:42 +00:00
2015-10-12 21:59:17 +00:00
}
// Store remembers the id and pack in the index.
2016-08-31 20:39:36 +00:00
func (idx *Index) Store(blob restic.PackedBlob) {
2015-04-26 15:10:31 +00:00
idx.m.Lock()
defer idx.m.Unlock()
2015-10-12 20:34:12 +00:00
if idx.final {
panic("store new item in finalized index")
}
2016-09-27 20:35:08 +00:00
debug.Log("%v", blob)
2015-04-26 15:10:31 +00:00
idx.store(blob)
2015-04-26 15:10:31 +00:00
}
// StorePack remembers the ids of all blobs of a given pack
// in the index
func (idx *Index) StorePack(id restic.ID, blobs []restic.Blob) {
idx.m.Lock()
defer idx.m.Unlock()
if idx.final {
panic("store new item in finalized index")
}
debug.Log("%v", blobs)
for _, blob := range blobs {
idx.store(restic.PackedBlob{Blob: blob, PackID: id})
}
}
2016-08-31 20:39:36 +00:00
// Lookup queries the index for the blob ID and returns a restic.PackedBlob.
func (idx *Index) Lookup(id restic.ID, tpe restic.BlobType) (blobs []restic.PackedBlob, found bool) {
2015-04-26 15:10:31 +00:00
idx.m.Lock()
defer idx.m.Unlock()
2016-08-31 18:58:57 +00:00
h := restic.BlobHandle{ID: id, Type: tpe}
if packs, ok := idx.pack[h]; ok {
2016-08-31 20:39:36 +00:00
blobs = make([]restic.PackedBlob, 0, len(packs))
for _, p := range packs {
2016-08-31 20:39:36 +00:00
blob := restic.PackedBlob{
Blob: restic.Blob{
Type: tpe,
Length: p.length,
ID: id,
Offset: p.offset,
},
PackID: p.packID,
}
blobs = append(blobs, blob)
}
return blobs, true
2015-04-26 15:10:31 +00:00
}
return nil, false
2015-04-26 15:10:31 +00:00
}
2015-11-01 21:32:28 +00:00
// ListPack returns a list of blobs contained in a pack.
2016-08-31 20:39:36 +00:00
func (idx *Index) ListPack(id restic.ID) (list []restic.PackedBlob) {
2015-11-01 21:32:28 +00:00
idx.m.Lock()
defer idx.m.Unlock()
for h, packList := range idx.pack {
2016-08-03 18:03:52 +00:00
for _, entry := range packList {
if entry.packID == id {
2016-08-31 20:39:36 +00:00
list = append(list, restic.PackedBlob{
Blob: restic.Blob{
ID: h.ID,
Type: h.Type,
Length: entry.length,
Offset: entry.offset,
},
2016-08-03 18:03:52 +00:00
PackID: entry.packID,
})
}
2015-11-01 21:32:28 +00:00
}
}
return list
}
2015-04-26 15:10:31 +00:00
// Has returns true iff the id is listed in the index.
2016-08-31 18:58:57 +00:00
func (idx *Index) Has(id restic.ID, tpe restic.BlobType) bool {
idx.m.Lock()
defer idx.m.Unlock()
2015-04-26 15:10:31 +00:00
h := restic.BlobHandle{ID: id, Type: tpe}
_, ok := idx.pack[h]
return ok
2015-04-26 15:10:31 +00:00
}
// LookupSize returns the length of the plaintext content of the blob with the
// given id.
func (idx *Index) LookupSize(id restic.ID, tpe restic.BlobType) (plaintextLength uint, found bool) {
blobs, found := idx.Lookup(id, tpe)
if !found {
return 0, found
2015-07-19 13:16:16 +00:00
}
return uint(restic.PlaintextLength(int(blobs[0].Length))), true
2015-07-19 13:16:16 +00:00
}
// Supersedes returns the list of indexes this index supersedes, if any.
2016-08-31 18:29:54 +00:00
func (idx *Index) Supersedes() restic.IDs {
return idx.supersedes
}
2015-10-25 16:06:56 +00:00
// AddToSupersedes adds the ids to the list of indexes superseded by this
// index. If the index has already been finalized, an error is returned.
2016-08-31 18:29:54 +00:00
func (idx *Index) AddToSupersedes(ids ...restic.ID) error {
2015-10-25 16:06:56 +00:00
idx.m.Lock()
defer idx.m.Unlock()
if idx.final {
return errors.New("index already finalized")
}
idx.supersedes = append(idx.supersedes, ids...)
return nil
}
2017-06-18 12:45:02 +00:00
// Each returns a channel that yields all blobs known to the index. When the
// context is cancelled, the background goroutine terminates. This blocks any
// modification of the index.
func (idx *Index) Each(ctx context.Context) <-chan restic.PackedBlob {
2015-04-26 15:10:31 +00:00
idx.m.Lock()
2016-08-31 20:39:36 +00:00
ch := make(chan restic.PackedBlob)
2015-04-26 15:10:31 +00:00
go func() {
defer idx.m.Unlock()
defer func() {
close(ch)
}()
for h, packs := range idx.pack {
2016-08-03 18:03:52 +00:00
for _, blob := range packs {
select {
2017-06-18 12:45:02 +00:00
case <-ctx.Done():
2016-08-03 18:03:52 +00:00
return
2016-08-31 20:39:36 +00:00
case ch <- restic.PackedBlob{
Blob: restic.Blob{
ID: h.ID,
Type: h.Type,
Offset: blob.offset,
Length: blob.length,
},
2016-08-03 18:03:52 +00:00
PackID: blob.packID,
}:
}
2015-04-26 15:10:31 +00:00
}
}
}()
return ch
}
2015-10-25 14:28:01 +00:00
// Packs returns all packs in this index
2016-08-31 18:29:54 +00:00
func (idx *Index) Packs() restic.IDSet {
2015-10-25 14:28:01 +00:00
idx.m.Lock()
defer idx.m.Unlock()
2016-08-31 18:29:54 +00:00
packs := restic.NewIDSet()
2016-08-03 18:03:52 +00:00
for _, list := range idx.pack {
for _, entry := range list {
packs.Insert(entry.packID)
}
2015-10-25 14:28:01 +00:00
}
return packs
}
// Count returns the number of blobs of type t in the index.
2016-08-31 18:58:57 +00:00
func (idx *Index) Count(t restic.BlobType) (n uint) {
2016-09-27 20:35:08 +00:00
debug.Log("counting blobs of type %v", t)
idx.m.Lock()
defer idx.m.Unlock()
for h, list := range idx.pack {
if h.Type != t {
continue
}
n += uint(len(list))
}
return
}
2015-04-26 15:10:31 +00:00
type packJSON struct {
2016-08-31 18:29:54 +00:00
ID restic.ID `json:"id"`
2015-04-26 15:10:31 +00:00
Blobs []blobJSON `json:"blobs"`
}
type blobJSON struct {
2016-08-31 18:58:57 +00:00
ID restic.ID `json:"id"`
Type restic.BlobType `json:"type"`
Offset uint `json:"offset"`
Length uint `json:"length"`
2015-04-26 15:10:31 +00:00
}
// generatePackList returns a list of packs.
func (idx *Index) generatePackList() ([]*packJSON, error) {
2015-04-26 15:10:31 +00:00
list := []*packJSON{}
2016-08-31 18:29:54 +00:00
packs := make(map[restic.ID]*packJSON)
2015-04-26 15:10:31 +00:00
for h, packedBlobs := range idx.pack {
2016-08-03 18:03:52 +00:00
for _, blob := range packedBlobs {
if blob.packID.IsNull() {
panic("null pack id")
}
2016-09-27 20:35:08 +00:00
debug.Log("handle blob %v", h)
2016-08-03 18:03:52 +00:00
if blob.packID.IsNull() {
2016-09-27 20:35:08 +00:00
debug.Log("blob %v has no packID! (offset %v, length %v)",
h, blob.offset, blob.length)
return nil, errors.Errorf("unable to serialize index: pack for blob %v hasn't been written yet", h)
2016-08-03 18:03:52 +00:00
}
2016-08-03 18:03:52 +00:00
// see if pack is already in map
p, ok := packs[blob.packID]
if !ok {
// else create new pack
p = &packJSON{ID: blob.packID}
2015-04-26 15:10:31 +00:00
2016-08-03 18:03:52 +00:00
// and append it to the list and map
list = append(list, p)
packs[p.ID] = p
}
2015-04-26 15:10:31 +00:00
2016-08-03 18:03:52 +00:00
// add blob
p.Blobs = append(p.Blobs, blobJSON{
ID: h.ID,
Type: h.Type,
2016-08-03 18:03:52 +00:00
Offset: blob.offset,
Length: blob.length,
})
}
2015-04-26 15:10:31 +00:00
}
2016-09-27 20:35:08 +00:00
debug.Log("done")
2015-05-16 12:05:19 +00:00
return list, nil
}
2015-07-25 22:40:00 +00:00
type jsonIndex struct {
2016-08-31 18:29:54 +00:00
Supersedes restic.IDs `json:"supersedes,omitempty"`
Packs []*packJSON `json:"packs"`
2015-07-25 22:40:00 +00:00
}
2015-10-12 20:34:12 +00:00
// Encode writes the JSON serialization of the index to the writer w.
func (idx *Index) Encode(w io.Writer) error {
2016-09-27 20:35:08 +00:00
debug.Log("encoding index")
2015-10-12 20:34:12 +00:00
idx.m.Lock()
defer idx.m.Unlock()
return idx.encode(w)
}
// encode writes the JSON serialization of the index to the writer w.
func (idx *Index) encode(w io.Writer) error {
2016-09-27 20:35:08 +00:00
debug.Log("encoding index")
2015-10-12 20:34:12 +00:00
list, err := idx.generatePackList()
2015-05-16 12:05:19 +00:00
if err != nil {
return err
}
2015-04-26 15:10:31 +00:00
enc := json.NewEncoder(w)
2015-07-25 22:40:00 +00:00
idxJSON := jsonIndex{
2015-10-12 20:34:12 +00:00
Supersedes: idx.supersedes,
2015-07-25 22:40:00 +00:00
Packs: list,
}
return enc.Encode(idxJSON)
2015-04-26 15:10:31 +00:00
}
// Finalize sets the index to final.
func (idx *Index) Finalize() {
debug.Log("finalizing index")
2015-05-16 12:05:19 +00:00
idx.m.Lock()
defer idx.m.Unlock()
2015-10-12 20:34:12 +00:00
idx.final = true
2015-05-16 12:05:19 +00:00
}
2015-11-02 17:51:45 +00:00
// ID returns the ID of the index, if available. If the index is not yet
// finalized, an error is returned.
2016-08-31 18:29:54 +00:00
func (idx *Index) ID() (restic.ID, error) {
2015-11-02 17:51:45 +00:00
idx.m.Lock()
defer idx.m.Unlock()
if !idx.final {
2016-08-31 18:29:54 +00:00
return restic.ID{}, errors.New("index not finalized")
2015-11-02 17:51:45 +00:00
}
return idx.id, nil
}
// SetID sets the ID the index has been written to. This requires that
// Finalize() has been called before, otherwise an error is returned.
2016-08-31 18:29:54 +00:00
func (idx *Index) SetID(id restic.ID) error {
2015-11-02 17:51:45 +00:00
idx.m.Lock()
defer idx.m.Unlock()
if !idx.final {
2017-02-08 23:43:10 +00:00
return errors.New("index is not final")
2015-11-02 17:51:45 +00:00
}
if !idx.id.IsNull() {
return errors.New("ID already set")
}
2018-01-25 19:49:41 +00:00
debug.Log("ID set to %v", id)
2015-11-02 17:51:45 +00:00
idx.id = id
return nil
}
2015-05-16 12:05:19 +00:00
// Dump writes the pretty-printed JSON representation of the index to w.
func (idx *Index) Dump(w io.Writer) error {
2016-09-27 20:35:08 +00:00
debug.Log("dumping index")
2015-05-16 12:05:19 +00:00
idx.m.Lock()
defer idx.m.Unlock()
list, err := idx.generatePackList()
2015-05-16 12:05:19 +00:00
if err != nil {
return err
}
2015-11-02 18:28:30 +00:00
outer := jsonIndex{
Supersedes: idx.Supersedes(),
Packs: list,
}
buf, err := json.MarshalIndent(outer, "", " ")
2015-05-16 12:05:19 +00:00
if err != nil {
return err
}
_, err = w.Write(append(buf, '\n'))
if err != nil {
2016-08-29 20:16:58 +00:00
return errors.Wrap(err, "Write")
2015-05-16 12:05:19 +00:00
}
2016-09-27 20:35:08 +00:00
debug.Log("done")
2015-05-16 12:05:19 +00:00
return nil
}
2017-09-23 12:47:40 +00:00
// TreePacks returns a list of packs that contain only tree blobs.
func (idx *Index) TreePacks() restic.IDs {
return idx.treePacks
}
2015-07-26 19:58:03 +00:00
// isErrOldIndex returns true if the error may be caused by an old index
// format.
func isErrOldIndex(err error) bool {
if e, ok := err.(*json.UnmarshalTypeError); ok && e.Value == "array" {
return true
}
return false
}
// ErrOldIndexFormat means an index with the old format was detected.
var ErrOldIndexFormat = errors.New("index has old format")
2015-04-26 15:10:31 +00:00
// DecodeIndex loads and unserializes an index from rd.
func DecodeIndex(buf []byte) (idx *Index, err error) {
2016-09-27 20:35:08 +00:00
debug.Log("Start decoding index")
idxJSON := &jsonIndex{}
2015-07-25 22:40:00 +00:00
err = json.Unmarshal(buf, idxJSON)
2015-07-25 22:40:00 +00:00
if err != nil {
2016-09-27 20:35:08 +00:00
debug.Log("Error %v", err)
2015-07-26 19:58:03 +00:00
if isErrOldIndex(err) {
2016-09-27 20:35:08 +00:00
debug.Log("index is probably old format, trying that")
2015-07-26 19:58:03 +00:00
err = ErrOldIndexFormat
}
2016-08-29 20:16:58 +00:00
return nil, errors.Wrap(err, "Decode")
2015-07-25 22:40:00 +00:00
}
idx = NewIndex()
2015-07-25 22:40:00 +00:00
for _, pack := range idxJSON.Packs {
2017-09-23 12:47:40 +00:00
var data, tree bool
2015-07-25 22:40:00 +00:00
for _, blob := range pack.Blobs {
2016-08-31 20:39:36 +00:00
idx.store(restic.PackedBlob{
Blob: restic.Blob{
Type: blob.Type,
ID: blob.ID,
Offset: blob.Offset,
Length: blob.Length,
},
PackID: pack.ID,
})
2017-09-23 12:47:40 +00:00
switch blob.Type {
case restic.DataBlob:
data = true
case restic.TreeBlob:
tree = true
}
}
if !data && tree {
idx.treePacks = append(idx.treePacks, pack.ID)
2015-07-25 22:40:00 +00:00
}
}
idx.supersedes = idxJSON.Supersedes
2015-10-12 20:34:12 +00:00
idx.final = true
2015-07-25 22:40:00 +00:00
2016-09-27 20:35:08 +00:00
debug.Log("done")
2016-08-29 20:16:58 +00:00
return idx, nil
2015-07-25 22:40:00 +00:00
}
// DecodeOldIndex loads and unserializes an index in the old format from rd.
func DecodeOldIndex(buf []byte) (idx *Index, err error) {
2016-09-27 20:35:08 +00:00
debug.Log("Start decoding old index")
2015-04-26 15:10:31 +00:00
list := []*packJSON{}
err = json.Unmarshal(buf, &list)
2015-04-26 15:10:31 +00:00
if err != nil {
2016-09-27 20:35:08 +00:00
debug.Log("Error %#v", err)
2016-08-29 20:16:58 +00:00
return nil, errors.Wrap(err, "Decode")
2015-04-26 15:10:31 +00:00
}
idx = NewIndex()
2015-04-26 15:10:31 +00:00
for _, pack := range list {
2017-09-23 12:47:40 +00:00
var data, tree bool
2015-04-26 15:10:31 +00:00
for _, blob := range pack.Blobs {
2016-08-31 20:39:36 +00:00
idx.store(restic.PackedBlob{
Blob: restic.Blob{
Type: blob.Type,
ID: blob.ID,
Offset: blob.Offset,
Length: blob.Length,
},
PackID: pack.ID,
})
2017-09-23 12:47:40 +00:00
switch blob.Type {
case restic.DataBlob:
data = true
case restic.TreeBlob:
tree = true
}
}
if !data && tree {
idx.treePacks = append(idx.treePacks, pack.ID)
2015-04-26 15:10:31 +00:00
}
}
idx.final = true
2015-04-26 15:10:31 +00:00
2016-09-27 20:35:08 +00:00
debug.Log("done")
2016-08-29 20:16:58 +00:00
return idx, nil
}
2015-11-02 17:51:24 +00:00
// LoadIndexWithDecoder loads the index and decodes it with fn.
2019-03-24 21:12:38 +00:00
func LoadIndexWithDecoder(ctx context.Context, repo restic.Repository, buf []byte, id restic.ID, fn func([]byte) (*Index, error)) (*Index, []byte, error) {
2018-01-25 19:49:41 +00:00
debug.Log("Loading index %v", id)
2015-11-02 17:51:24 +00:00
2019-03-24 21:12:38 +00:00
buf, err := repo.LoadAndDecrypt(ctx, buf[:0], restic.IndexFile, id)
2015-11-02 17:51:24 +00:00
if err != nil {
2019-03-24 21:12:38 +00:00
return nil, buf[:0], err
2015-11-02 17:51:24 +00:00
}
2019-03-24 21:12:38 +00:00
idx, err := fn(buf)
2015-11-02 17:51:24 +00:00
if err != nil {
2016-09-27 20:35:08 +00:00
debug.Log("error while decoding index %v: %v", id, err)
2019-03-24 21:12:38 +00:00
return nil, buf[:0], err
2015-11-02 17:51:24 +00:00
}
idx.id = id
2015-11-02 17:51:24 +00:00
2019-03-24 21:12:38 +00:00
return idx, buf, nil
2015-11-02 17:51:24 +00:00
}