2018-04-08 08:02:30 -04:00
|
|
|
package restorer
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2018-09-14 20:18:37 -04:00
|
|
|
"path/filepath"
|
2019-11-27 07:22:38 -05:00
|
|
|
"sync"
|
2018-04-08 08:02:30 -04:00
|
|
|
|
2021-01-04 19:20:04 +01:00
|
|
|
"golang.org/x/sync/errgroup"
|
|
|
|
|
2018-04-08 08:02:30 -04:00
|
|
|
"github.com/restic/restic/internal/crypto"
|
|
|
|
"github.com/restic/restic/internal/debug"
|
|
|
|
"github.com/restic/restic/internal/errors"
|
2021-08-20 12:12:38 +02:00
|
|
|
"github.com/restic/restic/internal/repository"
|
2018-04-08 08:02:30 -04:00
|
|
|
"github.com/restic/restic/internal/restic"
|
|
|
|
)
|
|
|
|
|
|
|
|
// TODO if a blob is corrupt, there may be good blob copies in other packs
|
|
|
|
// TODO evaluate if it makes sense to split download and processing workers
|
|
|
|
// pro: can (slowly) read network and decrypt/write files concurrently
|
|
|
|
// con: each worker needs to keep one pack in memory
|
|
|
|
|
|
|
|
const (
|
2019-11-27 07:22:38 -05:00
|
|
|
largeFileBlobCount = 25
|
2018-04-08 08:02:30 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
// information about regular file being restored
|
|
|
|
type fileInfo struct {
|
2021-01-04 19:20:04 +01:00
|
|
|
lock sync.Mutex
|
|
|
|
inProgress bool
|
|
|
|
size int64
|
|
|
|
location string // file on local filesystem relative to restorer basedir
|
|
|
|
blobs interface{} // blobs of the file
|
2019-11-27 07:22:38 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
type fileBlobInfo struct {
|
|
|
|
id restic.ID // the blob id
|
|
|
|
offset int64 // blob offset in the file
|
2018-04-08 08:02:30 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// information about a data pack required to restore one or more files
|
|
|
|
type packInfo struct {
|
2019-11-27 07:22:38 -05:00
|
|
|
id restic.ID // the pack id
|
|
|
|
files map[*fileInfo]struct{} // set of files that use blobs from this pack
|
2018-04-08 08:02:30 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// fileRestorer restores set of files
|
|
|
|
type fileRestorer struct {
|
|
|
|
key *crypto.Key
|
2020-11-05 22:18:00 +01:00
|
|
|
idx func(restic.BlobHandle) []restic.PackedBlob
|
2021-08-20 12:12:38 +02:00
|
|
|
packLoader repository.BackendLoadFn
|
2018-04-08 08:02:30 -04:00
|
|
|
|
2021-08-08 00:38:17 +02:00
|
|
|
workerCount int
|
2019-11-27 07:22:38 -05:00
|
|
|
filesWriter *filesWriter
|
2018-04-08 08:02:30 -04:00
|
|
|
|
2018-09-14 20:18:37 -04:00
|
|
|
dst string
|
2018-04-08 08:02:30 -04:00
|
|
|
files []*fileInfo
|
2021-01-04 19:20:04 +01:00
|
|
|
Error func(string, error) error
|
2018-04-08 08:02:30 -04:00
|
|
|
}
|
|
|
|
|
2019-11-27 07:22:38 -05:00
|
|
|
func newFileRestorer(dst string,
|
2021-08-20 12:12:38 +02:00
|
|
|
packLoader repository.BackendLoadFn,
|
2019-11-27 07:22:38 -05:00
|
|
|
key *crypto.Key,
|
2021-08-08 00:38:17 +02:00
|
|
|
idx func(restic.BlobHandle) []restic.PackedBlob,
|
|
|
|
connections uint) *fileRestorer {
|
|
|
|
|
|
|
|
// as packs are streamed the concurrency is limited by IO
|
|
|
|
workerCount := int(connections)
|
2019-11-27 07:22:38 -05:00
|
|
|
|
2018-04-08 08:02:30 -04:00
|
|
|
return &fileRestorer{
|
|
|
|
key: key,
|
|
|
|
idx: idx,
|
2019-11-27 07:22:38 -05:00
|
|
|
packLoader: packLoader,
|
|
|
|
filesWriter: newFilesWriter(workerCount),
|
2021-08-08 00:38:17 +02:00
|
|
|
workerCount: workerCount,
|
2018-09-14 20:18:37 -04:00
|
|
|
dst: dst,
|
2021-01-04 19:20:04 +01:00
|
|
|
Error: restorerAbortOnAllErrors,
|
2018-04-08 08:02:30 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-15 17:45:05 +02:00
|
|
|
func (r *fileRestorer) addFile(location string, content restic.IDs, size int64) {
|
|
|
|
r.files = append(r.files, &fileInfo{location: location, blobs: content, size: size})
|
2018-09-14 20:18:37 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
func (r *fileRestorer) targetPath(location string) string {
|
|
|
|
return filepath.Join(r.dst, location)
|
2018-04-08 08:02:30 -04:00
|
|
|
}
|
|
|
|
|
2019-11-27 07:22:38 -05:00
|
|
|
func (r *fileRestorer) forEachBlob(blobIDs []restic.ID, fn func(packID restic.ID, packBlob restic.Blob)) error {
|
|
|
|
if len(blobIDs) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
2018-04-08 08:02:30 -04:00
|
|
|
|
2019-11-27 07:22:38 -05:00
|
|
|
for _, blobID := range blobIDs {
|
2020-11-05 22:18:00 +01:00
|
|
|
packs := r.idx(restic.BlobHandle{ID: blobID, Type: restic.DataBlob})
|
2020-06-14 13:26:10 +02:00
|
|
|
if len(packs) == 0 {
|
2019-11-27 07:22:38 -05:00
|
|
|
return errors.Errorf("Unknown blob %s", blobID.String())
|
2018-04-08 08:02:30 -04:00
|
|
|
}
|
2019-11-27 07:22:38 -05:00
|
|
|
fn(packs[0].PackID, packs[0].Blob)
|
2018-04-08 08:02:30 -04:00
|
|
|
}
|
|
|
|
|
2019-11-27 07:22:38 -05:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *fileRestorer) restoreFiles(ctx context.Context) error {
|
|
|
|
|
|
|
|
packs := make(map[restic.ID]*packInfo) // all packs
|
2020-08-15 17:41:55 +02:00
|
|
|
// Process packs in order of first access. While this cannot guarantee
|
|
|
|
// that file chunks are restored sequentially, it offers a good enough
|
|
|
|
// approximation to shorten restore times by up to 19% in some test.
|
|
|
|
var packOrder restic.IDs
|
2018-04-08 08:02:30 -04:00
|
|
|
|
2019-11-27 07:22:38 -05:00
|
|
|
// create packInfo from fileInfo
|
|
|
|
for _, file := range r.files {
|
|
|
|
fileBlobs := file.blobs.(restic.IDs)
|
|
|
|
largeFile := len(fileBlobs) > largeFileBlobCount
|
|
|
|
var packsMap map[restic.ID][]fileBlobInfo
|
|
|
|
if largeFile {
|
|
|
|
packsMap = make(map[restic.ID][]fileBlobInfo)
|
|
|
|
}
|
|
|
|
fileOffset := int64(0)
|
|
|
|
err := r.forEachBlob(fileBlobs, func(packID restic.ID, blob restic.Blob) {
|
|
|
|
if largeFile {
|
|
|
|
packsMap[packID] = append(packsMap[packID], fileBlobInfo{id: blob.ID, offset: fileOffset})
|
2022-02-13 17:24:09 +01:00
|
|
|
fileOffset += int64(blob.DataLength())
|
2019-11-27 07:22:38 -05:00
|
|
|
}
|
|
|
|
pack, ok := packs[packID]
|
|
|
|
if !ok {
|
|
|
|
pack = &packInfo{
|
|
|
|
id: packID,
|
|
|
|
files: make(map[*fileInfo]struct{}),
|
|
|
|
}
|
|
|
|
packs[packID] = pack
|
2020-08-15 17:41:55 +02:00
|
|
|
packOrder = append(packOrder, packID)
|
2019-11-27 07:22:38 -05:00
|
|
|
}
|
|
|
|
pack.files[file] = struct{}{}
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
// repository index is messed up, can't do anything
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if largeFile {
|
|
|
|
file.blobs = packsMap
|
|
|
|
}
|
|
|
|
}
|
2018-04-08 08:02:30 -04:00
|
|
|
|
2021-01-04 19:20:04 +01:00
|
|
|
wg, ctx := errgroup.WithContext(ctx)
|
2019-11-27 07:22:38 -05:00
|
|
|
downloadCh := make(chan *packInfo)
|
2021-01-04 19:20:04 +01:00
|
|
|
|
|
|
|
worker := func() error {
|
|
|
|
for pack := range downloadCh {
|
|
|
|
if err := r.downloadPack(ctx, pack); err != nil {
|
|
|
|
return err
|
2018-04-08 08:02:30 -04:00
|
|
|
}
|
|
|
|
}
|
2021-01-04 19:20:04 +01:00
|
|
|
return nil
|
2018-04-08 08:02:30 -04:00
|
|
|
}
|
2021-08-08 00:38:17 +02:00
|
|
|
for i := 0; i < r.workerCount; i++ {
|
2021-01-04 19:20:04 +01:00
|
|
|
wg.Go(worker)
|
2018-04-08 08:02:30 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// the main restore loop
|
2021-01-04 19:20:04 +01:00
|
|
|
wg.Go(func() error {
|
|
|
|
for _, id := range packOrder {
|
|
|
|
pack := packs[id]
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
case downloadCh <- pack:
|
|
|
|
debug.Log("Scheduled download pack %s", pack.id.Str())
|
|
|
|
}
|
2018-04-08 08:02:30 -04:00
|
|
|
}
|
2021-01-04 19:20:04 +01:00
|
|
|
close(downloadCh)
|
|
|
|
return nil
|
|
|
|
})
|
2019-11-27 07:22:38 -05:00
|
|
|
|
2021-01-04 19:20:04 +01:00
|
|
|
return wg.Wait()
|
2018-04-08 08:02:30 -04:00
|
|
|
}
|
|
|
|
|
2021-01-04 19:20:04 +01:00
|
|
|
func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error {
|
2018-04-08 08:02:30 -04:00
|
|
|
|
2022-02-13 11:43:09 +01:00
|
|
|
// calculate blob->[]files->[]offsets mappings
|
2019-11-27 07:22:38 -05:00
|
|
|
blobs := make(map[restic.ID]struct {
|
2021-08-20 12:12:38 +02:00
|
|
|
files map[*fileInfo][]int64 // file -> offsets (plural!) of the blob in the file
|
2019-11-27 07:22:38 -05:00
|
|
|
})
|
2021-08-20 12:12:38 +02:00
|
|
|
var blobList []restic.Blob
|
2018-04-08 08:02:30 -04:00
|
|
|
for file := range pack.files {
|
2019-11-27 07:22:38 -05:00
|
|
|
addBlob := func(blob restic.Blob, fileOffset int64) {
|
|
|
|
blobInfo, ok := blobs[blob.ID]
|
|
|
|
if !ok {
|
|
|
|
blobInfo.files = make(map[*fileInfo][]int64)
|
2021-08-20 12:12:38 +02:00
|
|
|
blobList = append(blobList, blob)
|
2019-11-27 07:22:38 -05:00
|
|
|
blobs[blob.ID] = blobInfo
|
|
|
|
}
|
|
|
|
blobInfo.files[file] = append(blobInfo.files[file], fileOffset)
|
|
|
|
}
|
|
|
|
if fileBlobs, ok := file.blobs.(restic.IDs); ok {
|
|
|
|
fileOffset := int64(0)
|
2021-01-31 18:04:45 +01:00
|
|
|
err := r.forEachBlob(fileBlobs, func(packID restic.ID, blob restic.Blob) {
|
2019-11-27 07:22:38 -05:00
|
|
|
if packID.Equal(pack.id) {
|
|
|
|
addBlob(blob, fileOffset)
|
|
|
|
}
|
2022-02-13 17:24:09 +01:00
|
|
|
fileOffset += int64(blob.DataLength())
|
2019-11-27 07:22:38 -05:00
|
|
|
})
|
2021-01-31 18:04:45 +01:00
|
|
|
if err != nil {
|
|
|
|
// restoreFiles should have caught this error before
|
|
|
|
panic(err)
|
|
|
|
}
|
2019-11-27 07:22:38 -05:00
|
|
|
} else if packsMap, ok := file.blobs.(map[restic.ID][]fileBlobInfo); ok {
|
|
|
|
for _, blob := range packsMap[pack.id] {
|
2020-11-05 22:18:00 +01:00
|
|
|
idxPacks := r.idx(restic.BlobHandle{ID: blob.id, Type: restic.DataBlob})
|
2020-06-14 13:26:10 +02:00
|
|
|
for _, idxPack := range idxPacks {
|
|
|
|
if idxPack.PackID.Equal(pack.id) {
|
|
|
|
addBlob(idxPack.Blob, blob.offset)
|
|
|
|
break
|
2018-04-08 08:02:30 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-11-27 07:22:38 -05:00
|
|
|
}
|
2018-04-08 08:02:30 -04:00
|
|
|
}
|
|
|
|
|
2021-01-04 19:20:04 +01:00
|
|
|
sanitizeError := func(file *fileInfo, err error) error {
|
|
|
|
if err != nil {
|
|
|
|
err = r.Error(file.location, err)
|
2019-11-27 07:22:38 -05:00
|
|
|
}
|
2021-01-04 19:20:04 +01:00
|
|
|
return err
|
2019-11-27 07:22:38 -05:00
|
|
|
}
|
|
|
|
|
2021-08-20 12:12:38 +02:00
|
|
|
err := repository.StreamPack(ctx, r.packLoader, r.key, pack.id, blobList, func(h restic.BlobHandle, blobData []byte, err error) error {
|
|
|
|
blob := blobs[h.ID]
|
|
|
|
if err != nil {
|
|
|
|
for file := range blob.files {
|
|
|
|
if errFile := sanitizeError(file, err); errFile != nil {
|
|
|
|
return errFile
|
2020-11-18 12:36:06 +01:00
|
|
|
}
|
|
|
|
}
|
2021-08-20 12:12:38 +02:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
for file, offsets := range blob.files {
|
|
|
|
for _, offset := range offsets {
|
|
|
|
writeToFile := func() error {
|
|
|
|
// this looks overly complicated and needs explanation
|
|
|
|
// two competing requirements:
|
|
|
|
// - must create the file once and only once
|
|
|
|
// - should allow concurrent writes to the file
|
|
|
|
// so write the first blob while holding file lock
|
|
|
|
// write other blobs after releasing the lock
|
|
|
|
createSize := int64(-1)
|
|
|
|
file.lock.Lock()
|
|
|
|
if file.inProgress {
|
|
|
|
file.lock.Unlock()
|
|
|
|
} else {
|
|
|
|
defer file.lock.Unlock()
|
|
|
|
file.inProgress = true
|
|
|
|
createSize = file.size
|
2020-11-18 12:36:06 +01:00
|
|
|
}
|
2021-08-20 12:12:38 +02:00
|
|
|
return r.filesWriter.writeToFile(r.targetPath(file.location), blobData, offset, createSize)
|
|
|
|
}
|
|
|
|
err := sanitizeError(file, writeToFile())
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2018-04-08 08:02:30 -04:00
|
|
|
}
|
|
|
|
}
|
2019-11-27 07:22:38 -05:00
|
|
|
}
|
2020-11-18 12:36:06 +01:00
|
|
|
return nil
|
|
|
|
})
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
for file := range pack.files {
|
2021-01-04 19:20:04 +01:00
|
|
|
if errFile := sanitizeError(file, err); errFile != nil {
|
|
|
|
return errFile
|
|
|
|
}
|
2020-11-18 12:36:06 +01:00
|
|
|
}
|
2018-04-08 08:02:30 -04:00
|
|
|
}
|
2020-11-18 12:36:06 +01:00
|
|
|
|
2021-01-04 19:20:04 +01:00
|
|
|
return nil
|
2018-04-08 08:02:30 -04:00
|
|
|
}
|