mirror of
https://github.com/octoleo/restic.git
synced 2024-12-28 21:02:47 +00:00
bda8d7722e
don't create fileInfo structs for empty files. this saves memory. this also avoids extra serial scan of all fileInfo, which should make restore faster and more consistent. Signed-off-by: Igor Fedorenko <igor@ifedorenko.com>
325 lines
9.0 KiB
Go
325 lines
9.0 KiB
Go
package restorer
|
|
|
|
import (
|
|
"context"
|
|
"io"
|
|
"path/filepath"
|
|
|
|
"github.com/restic/restic/internal/crypto"
|
|
"github.com/restic/restic/internal/debug"
|
|
"github.com/restic/restic/internal/errors"
|
|
"github.com/restic/restic/internal/restic"
|
|
)
|
|
|
|
// TODO if a blob is corrupt, there may be good blob copies in other packs
|
|
// TODO evaluate if it makes sense to split download and processing workers
|
|
// pro: can (slowly) read network and decrypt/write files concurrently
|
|
// con: each worker needs to keep one pack in memory
|
|
// TODO evaluate memory footprint for larger repositories, say 10M packs/10M files
|
|
// TODO consider replacing pack file cache with blob cache
|
|
// TODO avoid decrypting the same blob multiple times
|
|
// TODO evaluate disabled debug logging overhead for large repositories
|
|
|
|
const (
|
|
workerCount = 8
|
|
|
|
// max number of open output file handles
|
|
filesWriterCount = 32
|
|
|
|
// estimated average pack size used to calculate pack cache capacity
|
|
averagePackSize = 5 * 1024 * 1024
|
|
|
|
// pack cache capacity should support at least one cached pack per worker
|
|
// allow space for extra 5 packs for actual caching
|
|
packCacheCapacity = (workerCount + 5) * averagePackSize
|
|
)
|
|
|
|
// information about regular file being restored
|
|
type fileInfo struct {
|
|
location string // file on local filesystem relative to restorer basedir
|
|
blobs []restic.ID // remaining blobs of the file
|
|
}
|
|
|
|
// information about a data pack required to restore one or more files
|
|
type packInfo struct {
|
|
// the pack id
|
|
id restic.ID
|
|
|
|
// set of files that use blobs from this pack
|
|
files map[*fileInfo]struct{}
|
|
|
|
// number of other packs that must be downloaded before all blobs in this pack can be used
|
|
cost int
|
|
|
|
// used by packHeap
|
|
index int
|
|
}
|
|
|
|
// fileRestorer restores set of files
|
|
type fileRestorer struct {
|
|
key *crypto.Key
|
|
idx filePackTraverser
|
|
packLoader func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error
|
|
|
|
packCache *packCache // pack cache
|
|
filesWriter *filesWriter // file write
|
|
|
|
dst string
|
|
files []*fileInfo
|
|
}
|
|
|
|
func newFileRestorer(dst string, packLoader func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error, key *crypto.Key, idx filePackTraverser) *fileRestorer {
|
|
return &fileRestorer{
|
|
packLoader: packLoader,
|
|
key: key,
|
|
idx: idx,
|
|
filesWriter: newFilesWriter(filesWriterCount),
|
|
packCache: newPackCache(packCacheCapacity),
|
|
dst: dst,
|
|
}
|
|
}
|
|
|
|
func (r *fileRestorer) addFile(location string, content restic.IDs) {
|
|
r.files = append(r.files, &fileInfo{location: location, blobs: content})
|
|
}
|
|
|
|
func (r *fileRestorer) targetPath(location string) string {
|
|
return filepath.Join(r.dst, location)
|
|
}
|
|
|
|
// used to pass information among workers (wish golang channels allowed multivalues)
|
|
type processingInfo struct {
|
|
pack *packInfo
|
|
files map[*fileInfo]error
|
|
}
|
|
|
|
func (r *fileRestorer) restoreFiles(ctx context.Context, onError func(path string, err error)) error {
|
|
// TODO conditionally enable when debug log is on
|
|
// for _, file := range r.files {
|
|
// dbgmsg := file.location + ": "
|
|
// r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool {
|
|
// if packIdx > 0 {
|
|
// dbgmsg += ", "
|
|
// }
|
|
// dbgmsg += "pack{id=" + packID.Str() + ", blobs: "
|
|
// for blobIdx, blob := range packBlobs {
|
|
// if blobIdx > 0 {
|
|
// dbgmsg += ", "
|
|
// }
|
|
// dbgmsg += blob.ID.Str()
|
|
// }
|
|
// dbgmsg += "}"
|
|
// return true // keep going
|
|
// })
|
|
// debug.Log(dbgmsg)
|
|
// }
|
|
|
|
inprogress := make(map[*fileInfo]struct{})
|
|
queue, err := newPackQueue(r.idx, r.files, func(files map[*fileInfo]struct{}) bool {
|
|
for file := range files {
|
|
if _, found := inprogress[file]; found {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// workers
|
|
downloadCh := make(chan processingInfo)
|
|
feedbackCh := make(chan processingInfo)
|
|
|
|
defer close(downloadCh)
|
|
defer close(feedbackCh)
|
|
|
|
worker := func() {
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case request, ok := <-downloadCh:
|
|
if !ok {
|
|
return // channel closed
|
|
}
|
|
rd, err := r.downloadPack(ctx, request.pack)
|
|
if err == nil {
|
|
r.processPack(ctx, request, rd)
|
|
} else {
|
|
// mark all files as failed
|
|
for file := range request.files {
|
|
request.files[file] = err
|
|
}
|
|
}
|
|
feedbackCh <- request
|
|
}
|
|
}
|
|
}
|
|
for i := 0; i < workerCount; i++ {
|
|
go worker()
|
|
}
|
|
|
|
processFeedback := func(pack *packInfo, ferrors map[*fileInfo]error) {
|
|
// update files blobIdx
|
|
// must do it here to avoid race among worker and processing feedback threads
|
|
var success []*fileInfo
|
|
var failure []*fileInfo
|
|
for file, ferr := range ferrors {
|
|
target := r.targetPath(file.location)
|
|
if ferr != nil {
|
|
onError(file.location, ferr)
|
|
r.filesWriter.close(target)
|
|
delete(inprogress, file)
|
|
failure = append(failure, file)
|
|
} else {
|
|
r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool {
|
|
file.blobs = file.blobs[len(packBlobs):]
|
|
return false // only interesed in the first pack
|
|
})
|
|
if len(file.blobs) == 0 {
|
|
r.filesWriter.close(target)
|
|
delete(inprogress, file)
|
|
}
|
|
success = append(success, file)
|
|
}
|
|
}
|
|
// update the queue and requeueu the pack as necessary
|
|
if !queue.requeuePack(pack, success, failure) {
|
|
r.packCache.remove(pack.id)
|
|
debug.Log("Purged used up pack %s from pack cache", pack.id.Str())
|
|
}
|
|
}
|
|
|
|
// the main restore loop
|
|
for !queue.isEmpty() {
|
|
debug.Log("-----------------------------------")
|
|
pack, files := queue.nextPack()
|
|
if pack != nil {
|
|
ferrors := make(map[*fileInfo]error)
|
|
for _, file := range files {
|
|
ferrors[file] = nil
|
|
inprogress[file] = struct{}{}
|
|
}
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case downloadCh <- processingInfo{pack: pack, files: ferrors}:
|
|
debug.Log("Scheduled download pack %s (%d files)", pack.id.Str(), len(files))
|
|
case feedback := <-feedbackCh:
|
|
queue.requeuePack(pack, []*fileInfo{}, []*fileInfo{}) // didn't use the pack during this iteration
|
|
processFeedback(feedback.pack, feedback.files)
|
|
}
|
|
} else {
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case feedback := <-feedbackCh:
|
|
processFeedback(feedback.pack, feedback.files)
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) (readerAtCloser, error) {
|
|
const MaxInt64 = 1<<63 - 1 // odd Go does not have this predefined somewhere
|
|
|
|
// calculate pack byte range
|
|
start, end := int64(MaxInt64), int64(0)
|
|
for file := range pack.files {
|
|
r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool {
|
|
if packID.Equal(pack.id) {
|
|
for _, blob := range packBlobs {
|
|
if start > int64(blob.Offset) {
|
|
start = int64(blob.Offset)
|
|
}
|
|
if end < int64(blob.Offset+blob.Length) {
|
|
end = int64(blob.Offset + blob.Length)
|
|
}
|
|
}
|
|
}
|
|
|
|
return true // keep going
|
|
})
|
|
}
|
|
|
|
packReader, err := r.packCache.get(pack.id, start, int(end-start), func(offset int64, length int, wr io.WriteSeeker) error {
|
|
h := restic.Handle{Type: restic.DataFile, Name: pack.id.String()}
|
|
return r.packLoader(ctx, h, length, offset, func(rd io.Reader) error {
|
|
// reset the file in case of a download retry
|
|
_, err := wr.Seek(0, io.SeekStart)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
len, err := io.Copy(wr, rd)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if len != int64(length) {
|
|
return errors.Errorf("unexpected pack size: expected %d but got %d", length, len)
|
|
}
|
|
|
|
return nil
|
|
})
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return packReader, nil
|
|
}
|
|
|
|
func (r *fileRestorer) processPack(ctx context.Context, request processingInfo, rd readerAtCloser) {
|
|
defer rd.Close()
|
|
|
|
for file := range request.files {
|
|
target := r.targetPath(file.location)
|
|
r.idx.forEachFilePack(file, func(packIdx int, packID restic.ID, packBlobs []restic.Blob) bool {
|
|
for _, blob := range packBlobs {
|
|
debug.Log("Writing blob %s (%d bytes) from pack %s to %s", blob.ID.Str(), blob.Length, packID.Str(), file.location)
|
|
buf, err := r.loadBlob(rd, blob)
|
|
if err == nil {
|
|
err = r.filesWriter.writeToFile(target, buf)
|
|
}
|
|
if err != nil {
|
|
request.files[file] = err
|
|
break // could not restore the file
|
|
}
|
|
}
|
|
return false
|
|
})
|
|
}
|
|
}
|
|
|
|
func (r *fileRestorer) loadBlob(rd io.ReaderAt, blob restic.Blob) ([]byte, error) {
|
|
// TODO reconcile with Repository#loadBlob implementation
|
|
|
|
buf := make([]byte, blob.Length)
|
|
|
|
n, err := rd.ReadAt(buf, int64(blob.Offset))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if n != int(blob.Length) {
|
|
return nil, errors.Errorf("error loading blob %v: wrong length returned, want %d, got %d", blob.ID.Str(), blob.Length, n)
|
|
}
|
|
|
|
// decrypt
|
|
nonce, ciphertext := buf[:r.key.NonceSize()], buf[r.key.NonceSize():]
|
|
plaintext, err := r.key.Open(ciphertext[:0], nonce, ciphertext, nil)
|
|
if err != nil {
|
|
return nil, errors.Errorf("decrypting blob %v failed: %v", blob.ID, err)
|
|
}
|
|
|
|
// check hash
|
|
if !restic.Hash(plaintext).Equal(blob.ID) {
|
|
return nil, errors.Errorf("blob %v returned invalid hash", blob.ID)
|
|
}
|
|
|
|
return plaintext, nil
|
|
}
|