2
2
mirror of https://github.com/octoleo/restic.git synced 2024-11-23 21:27:34 +00:00
restic/internal/repository/repository.go
Michael Eischer 666a0b0bdb repository: streamPack: replace streaming with chunked download
Due to the interface of streamPack, we cannot guarantee that operations
progress fast enough that the underlying connections remains open. This
introduces partial failures which massively complicate the error
handling.

Switch to a simpler approach that retrieves the pack in chunks of 32MB.
If a blob is larger than this limit, then it is downloaded separately.

To avoid multiple copies in memory, an auxiliary interface
`discardReader` is introduced that allows directly accessing the
downloaded byte slices, while still supporting the streaming used by the
`check` command.
2024-04-22 21:21:23 +02:00

1215 lines
32 KiB
Go

package repository
import (
"bytes"
"context"
"fmt"
"io"
"os"
"runtime"
"sort"
"sync"
"github.com/klauspost/compress/zstd"
"github.com/restic/chunker"
"github.com/restic/restic/internal/backend"
"github.com/restic/restic/internal/backend/dryrun"
"github.com/restic/restic/internal/cache"
"github.com/restic/restic/internal/crypto"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/index"
"github.com/restic/restic/internal/pack"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/ui/progress"
"golang.org/x/sync/errgroup"
)
const MinPackSize = 4 * 1024 * 1024
const DefaultPackSize = 16 * 1024 * 1024
const MaxPackSize = 128 * 1024 * 1024
// Repository is used to access a repository in a backend.
type Repository struct {
be backend.Backend
cfg restic.Config
key *crypto.Key
keyID restic.ID
idx *index.MasterIndex
Cache *cache.Cache
opts Options
noAutoIndexUpdate bool
packerWg *errgroup.Group
uploader *packerUploader
treePM *packerManager
dataPM *packerManager
allocEnc sync.Once
allocDec sync.Once
enc *zstd.Encoder
dec *zstd.Decoder
}
type Options struct {
Compression CompressionMode
PackSize uint
NoExtraVerify bool
}
// CompressionMode configures if data should be compressed.
type CompressionMode uint
// Constants for the different compression levels.
const (
CompressionAuto CompressionMode = 0
CompressionOff CompressionMode = 1
CompressionMax CompressionMode = 2
CompressionInvalid CompressionMode = 3
)
// Set implements the method needed for pflag command flag parsing.
func (c *CompressionMode) Set(s string) error {
switch s {
case "auto":
*c = CompressionAuto
case "off":
*c = CompressionOff
case "max":
*c = CompressionMax
default:
*c = CompressionInvalid
return fmt.Errorf("invalid compression mode %q, must be one of (auto|off|max)", s)
}
return nil
}
func (c *CompressionMode) String() string {
switch *c {
case CompressionAuto:
return "auto"
case CompressionOff:
return "off"
case CompressionMax:
return "max"
default:
return "invalid"
}
}
func (c *CompressionMode) Type() string {
return "mode"
}
// New returns a new repository with backend be.
func New(be backend.Backend, opts Options) (*Repository, error) {
if opts.Compression == CompressionInvalid {
return nil, errors.New("invalid compression mode")
}
if opts.PackSize == 0 {
opts.PackSize = DefaultPackSize
}
if opts.PackSize > MaxPackSize {
return nil, fmt.Errorf("pack size larger than limit of %v MiB", MaxPackSize/1024/1024)
} else if opts.PackSize < MinPackSize {
return nil, fmt.Errorf("pack size smaller than minimum of %v MiB", MinPackSize/1024/1024)
}
repo := &Repository{
be: be,
opts: opts,
idx: index.NewMasterIndex(),
}
return repo, nil
}
// DisableAutoIndexUpdate deactives the automatic finalization and upload of new
// indexes once these are full
func (r *Repository) DisableAutoIndexUpdate() {
r.noAutoIndexUpdate = true
}
// setConfig assigns the given config and updates the repository parameters accordingly
func (r *Repository) setConfig(cfg restic.Config) {
r.cfg = cfg
if r.cfg.Version >= 2 {
r.idx.MarkCompressed()
}
}
// Config returns the repository configuration.
func (r *Repository) Config() restic.Config {
return r.cfg
}
// PackSize return the target size of a pack file when uploading
func (r *Repository) PackSize() uint {
return r.opts.PackSize
}
// UseCache replaces the backend with the wrapped cache.
func (r *Repository) UseCache(c *cache.Cache) {
if c == nil {
return
}
debug.Log("using cache")
r.Cache = c
r.be = c.Wrap(r.be)
}
// SetDryRun sets the repo backend into dry-run mode.
func (r *Repository) SetDryRun() {
r.be = dryrun.New(r.be)
}
// LoadUnpacked loads and decrypts the file with the given type and ID.
func (r *Repository) LoadUnpacked(ctx context.Context, t restic.FileType, id restic.ID) ([]byte, error) {
debug.Log("load %v with id %v", t, id)
if t == restic.ConfigFile {
id = restic.ID{}
}
ctx, cancel := context.WithCancel(ctx)
h := backend.Handle{Type: t, Name: id.String()}
retriedInvalidData := false
var dataErr error
wr := new(bytes.Buffer)
err := r.be.Load(ctx, h, 0, 0, func(rd io.Reader) error {
// make sure this call is idempotent, in case an error occurs
wr.Reset()
_, cerr := io.Copy(wr, rd)
if cerr != nil {
return cerr
}
buf := wr.Bytes()
if t != restic.ConfigFile && !restic.Hash(buf).Equal(id) {
debug.Log("retry loading broken blob %v", h)
if !retriedInvalidData {
retriedInvalidData = true
} else {
// with a canceled context there is not guarantee which error will
// be returned by `be.Load`.
dataErr = fmt.Errorf("load(%v): %w", h, restic.ErrInvalidData)
cancel()
}
return restic.ErrInvalidData
}
return nil
})
if dataErr != nil {
return nil, dataErr
}
if err != nil {
return nil, err
}
buf := wr.Bytes()
nonce, ciphertext := buf[:r.key.NonceSize()], buf[r.key.NonceSize():]
plaintext, err := r.key.Open(ciphertext[:0], nonce, ciphertext, nil)
if err != nil {
return nil, err
}
if t != restic.ConfigFile {
return r.decompressUnpacked(plaintext)
}
return plaintext, nil
}
type haver interface {
Has(backend.Handle) bool
}
// sortCachedPacksFirst moves all cached pack files to the front of blobs.
func sortCachedPacksFirst(cache haver, blobs []restic.PackedBlob) {
if cache == nil {
return
}
// no need to sort a list with one element
if len(blobs) == 1 {
return
}
cached := blobs[:0]
noncached := make([]restic.PackedBlob, 0, len(blobs)/2)
for _, blob := range blobs {
if cache.Has(backend.Handle{Type: restic.PackFile, Name: blob.PackID.String()}) {
cached = append(cached, blob)
continue
}
noncached = append(noncached, blob)
}
copy(blobs[len(cached):], noncached)
}
// LoadBlob loads a blob of type t from the repository.
// It may use all of buf[:cap(buf)] as scratch space.
func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic.ID, buf []byte) ([]byte, error) {
debug.Log("load %v with id %v (buf len %v, cap %d)", t, id, len(buf), cap(buf))
// lookup packs
blobs := r.idx.Lookup(restic.BlobHandle{ID: id, Type: t})
if len(blobs) == 0 {
debug.Log("id %v not found in index", id)
return nil, errors.Errorf("id %v not found in repository", id)
}
// try cached pack files first
sortCachedPacksFirst(r.Cache, blobs)
var lastError error
for _, blob := range blobs {
debug.Log("blob %v/%v found: %v", t, id, blob)
if blob.Type != t {
debug.Log("blob %v has wrong block type, want %v", blob, t)
}
// load blob from pack
h := backend.Handle{Type: restic.PackFile, Name: blob.PackID.String(), IsMetadata: t.IsMetadata()}
switch {
case cap(buf) < int(blob.Length):
buf = make([]byte, blob.Length)
case len(buf) != int(blob.Length):
buf = buf[:blob.Length]
}
n, err := backend.ReadAt(ctx, r.be, h, int64(blob.Offset), buf)
if err != nil {
debug.Log("error loading blob %v: %v", blob, err)
lastError = err
continue
}
if uint(n) != blob.Length {
lastError = errors.Errorf("error loading blob %v: wrong length returned, want %d, got %d",
id.Str(), blob.Length, uint(n))
debug.Log("lastError: %v", lastError)
continue
}
// decrypt
nonce, ciphertext := buf[:r.key.NonceSize()], buf[r.key.NonceSize():]
plaintext, err := r.key.Open(ciphertext[:0], nonce, ciphertext, nil)
if err != nil {
lastError = errors.Errorf("decrypting blob %v failed: %v", id, err)
continue
}
if blob.IsCompressed() {
plaintext, err = r.getZstdDecoder().DecodeAll(plaintext, make([]byte, 0, blob.DataLength()))
if err != nil {
lastError = errors.Errorf("decompressing blob %v failed: %v", id, err)
continue
}
}
// check hash
if !restic.Hash(plaintext).Equal(id) {
lastError = errors.Errorf("blob %v returned invalid hash", id)
continue
}
if len(plaintext) > cap(buf) {
return plaintext, nil
}
// move decrypted data to the start of the buffer
buf = buf[:len(plaintext)]
copy(buf, plaintext)
return buf, nil
}
if lastError != nil {
return nil, lastError
}
return nil, errors.Errorf("loading blob %v from %v packs failed", id.Str(), len(blobs))
}
// LookupBlobSize returns the size of blob id.
func (r *Repository) LookupBlobSize(id restic.ID, tpe restic.BlobType) (uint, bool) {
return r.idx.LookupSize(restic.BlobHandle{ID: id, Type: tpe})
}
func (r *Repository) getZstdEncoder() *zstd.Encoder {
r.allocEnc.Do(func() {
level := zstd.SpeedDefault
if r.opts.Compression == CompressionMax {
level = zstd.SpeedBestCompression
}
opts := []zstd.EOption{
// Set the compression level configured.
zstd.WithEncoderLevel(level),
// Disable CRC, we have enough checks in place, makes the
// compressed data four bytes shorter.
zstd.WithEncoderCRC(false),
// Set a window of 512kbyte, so we have good lookbehind for usual
// blob sizes.
zstd.WithWindowSize(512 * 1024),
}
enc, err := zstd.NewWriter(nil, opts...)
if err != nil {
panic(err)
}
r.enc = enc
})
return r.enc
}
func (r *Repository) getZstdDecoder() *zstd.Decoder {
r.allocDec.Do(func() {
opts := []zstd.DOption{
// Use all available cores.
zstd.WithDecoderConcurrency(0),
// Limit the maximum decompressed memory. Set to a very high,
// conservative value.
zstd.WithDecoderMaxMemory(16 * 1024 * 1024 * 1024),
}
dec, err := zstd.NewReader(nil, opts...)
if err != nil {
panic(err)
}
r.dec = dec
})
return r.dec
}
// saveAndEncrypt encrypts data and stores it to the backend as type t. If data
// is small enough, it will be packed together with other small blobs. The
// caller must ensure that the id matches the data. Returned is the size data
// occupies in the repo (compressed or not, including the encryption overhead).
func (r *Repository) saveAndEncrypt(ctx context.Context, t restic.BlobType, data []byte, id restic.ID) (size int, err error) {
debug.Log("save id %v (%v, %d bytes)", id, t, len(data))
uncompressedLength := 0
if r.cfg.Version > 1 {
// we have a repo v2, so compression is available. if the user opts to
// not compress, we won't compress any data, but everything else is
// compressed.
if r.opts.Compression != CompressionOff || t != restic.DataBlob {
uncompressedLength = len(data)
data = r.getZstdEncoder().EncodeAll(data, nil)
}
}
nonce := crypto.NewRandomNonce()
ciphertext := make([]byte, 0, crypto.CiphertextLength(len(data)))
ciphertext = append(ciphertext, nonce...)
// encrypt blob
ciphertext = r.key.Seal(ciphertext, nonce, data, nil)
if err := r.verifyCiphertext(ciphertext, uncompressedLength, id); err != nil {
//nolint:revive // ignore linter warnings about error message spelling
return 0, fmt.Errorf("Detected data corruption while saving blob %v: %w\nCorrupted blobs are either caused by hardware issues or software bugs. Please open an issue at https://github.com/restic/restic/issues/new/choose for further troubleshooting.", id, err)
}
// find suitable packer and add blob
var pm *packerManager
switch t {
case restic.TreeBlob:
pm = r.treePM
case restic.DataBlob:
pm = r.dataPM
default:
panic(fmt.Sprintf("invalid type: %v", t))
}
return pm.SaveBlob(ctx, t, id, ciphertext, uncompressedLength)
}
func (r *Repository) verifyCiphertext(buf []byte, uncompressedLength int, id restic.ID) error {
if r.opts.NoExtraVerify {
return nil
}
nonce, ciphertext := buf[:r.key.NonceSize()], buf[r.key.NonceSize():]
plaintext, err := r.key.Open(nil, nonce, ciphertext, nil)
if err != nil {
return fmt.Errorf("decryption failed: %w", err)
}
if uncompressedLength != 0 {
// DecodeAll will allocate a slice if it is not large enough since it
// knows the decompressed size (because we're using EncodeAll)
plaintext, err = r.getZstdDecoder().DecodeAll(plaintext, nil)
if err != nil {
return fmt.Errorf("decompression failed: %w", err)
}
}
if !restic.Hash(plaintext).Equal(id) {
return errors.New("hash mismatch")
}
return nil
}
func (r *Repository) compressUnpacked(p []byte) ([]byte, error) {
// compression is only available starting from version 2
if r.cfg.Version < 2 {
return p, nil
}
// version byte
out := []byte{2}
out = r.getZstdEncoder().EncodeAll(p, out)
return out, nil
}
func (r *Repository) decompressUnpacked(p []byte) ([]byte, error) {
// compression is only available starting from version 2
if r.cfg.Version < 2 {
return p, nil
}
if len(p) == 0 {
// too short for version header
return p, nil
}
if p[0] == '[' || p[0] == '{' {
// probably raw JSON
return p, nil
}
// version
if p[0] != 2 {
return nil, errors.New("not supported encoding format")
}
return r.getZstdDecoder().DecodeAll(p[1:], nil)
}
// SaveUnpacked encrypts data and stores it in the backend. Returned is the
// storage hash.
func (r *Repository) SaveUnpacked(ctx context.Context, t restic.FileType, buf []byte) (id restic.ID, err error) {
p := buf
if t != restic.ConfigFile {
p, err = r.compressUnpacked(p)
if err != nil {
return restic.ID{}, err
}
}
ciphertext := crypto.NewBlobBuffer(len(p))
ciphertext = ciphertext[:0]
nonce := crypto.NewRandomNonce()
ciphertext = append(ciphertext, nonce...)
ciphertext = r.key.Seal(ciphertext, nonce, p, nil)
if err := r.verifyUnpacked(ciphertext, t, buf); err != nil {
//nolint:revive // ignore linter warnings about error message spelling
return restic.ID{}, fmt.Errorf("Detected data corruption while saving file of type %v: %w\nCorrupted data is either caused by hardware issues or software bugs. Please open an issue at https://github.com/restic/restic/issues/new/choose for further troubleshooting.", t, err)
}
if t == restic.ConfigFile {
id = restic.ID{}
} else {
id = restic.Hash(ciphertext)
}
h := backend.Handle{Type: t, Name: id.String()}
err = r.be.Save(ctx, h, backend.NewByteReader(ciphertext, r.be.Hasher()))
if err != nil {
debug.Log("error saving blob %v: %v", h, err)
return restic.ID{}, err
}
debug.Log("blob %v saved", h)
return id, nil
}
func (r *Repository) verifyUnpacked(buf []byte, t restic.FileType, expected []byte) error {
if r.opts.NoExtraVerify {
return nil
}
nonce, ciphertext := buf[:r.key.NonceSize()], buf[r.key.NonceSize():]
plaintext, err := r.key.Open(nil, nonce, ciphertext, nil)
if err != nil {
return fmt.Errorf("decryption failed: %w", err)
}
if t != restic.ConfigFile {
plaintext, err = r.decompressUnpacked(plaintext)
if err != nil {
return fmt.Errorf("decompression failed: %w", err)
}
}
if !bytes.Equal(plaintext, expected) {
return errors.New("data mismatch")
}
return nil
}
// Flush saves all remaining packs and the index
func (r *Repository) Flush(ctx context.Context) error {
if err := r.flushPacks(ctx); err != nil {
return err
}
// Save index after flushing only if noAutoIndexUpdate is not set
if r.noAutoIndexUpdate {
return nil
}
return r.idx.SaveIndex(ctx, r)
}
func (r *Repository) StartPackUploader(ctx context.Context, wg *errgroup.Group) {
if r.packerWg != nil {
panic("uploader already started")
}
innerWg, ctx := errgroup.WithContext(ctx)
r.packerWg = innerWg
r.uploader = newPackerUploader(ctx, innerWg, r, r.be.Connections())
r.treePM = newPackerManager(r.key, restic.TreeBlob, r.PackSize(), r.uploader.QueuePacker)
r.dataPM = newPackerManager(r.key, restic.DataBlob, r.PackSize(), r.uploader.QueuePacker)
wg.Go(func() error {
return innerWg.Wait()
})
}
// FlushPacks saves all remaining packs.
func (r *Repository) flushPacks(ctx context.Context) error {
if r.packerWg == nil {
return nil
}
err := r.treePM.Flush(ctx)
if err != nil {
return err
}
err = r.dataPM.Flush(ctx)
if err != nil {
return err
}
r.uploader.TriggerShutdown()
err = r.packerWg.Wait()
r.treePM = nil
r.dataPM = nil
r.uploader = nil
r.packerWg = nil
return err
}
// Backend returns the backend for the repository.
func (r *Repository) Backend() backend.Backend {
return r.be
}
func (r *Repository) Connections() uint {
return r.be.Connections()
}
// Index returns the currently used MasterIndex.
func (r *Repository) Index() restic.MasterIndex {
return r.idx
}
// SetIndex instructs the repository to use the given index.
func (r *Repository) SetIndex(i restic.MasterIndex) error {
r.idx = i.(*index.MasterIndex)
return r.prepareCache()
}
// LoadIndex loads all index files from the backend in parallel and stores them
func (r *Repository) LoadIndex(ctx context.Context, p *progress.Counter) error {
debug.Log("Loading index")
indexList, err := restic.MemorizeList(ctx, r, restic.IndexFile)
if err != nil {
return err
}
if p != nil {
var numIndexFiles uint64
err := indexList.List(ctx, restic.IndexFile, func(_ restic.ID, _ int64) error {
numIndexFiles++
return nil
})
if err != nil {
return err
}
p.SetMax(numIndexFiles)
defer p.Done()
}
err = index.ForAllIndexes(ctx, indexList, r, func(_ restic.ID, idx *index.Index, _ bool, err error) error {
if err != nil {
return err
}
r.idx.Insert(idx)
if p != nil {
p.Add(1)
}
return nil
})
if err != nil {
return err
}
err = r.idx.MergeFinalIndexes()
if err != nil {
return err
}
// Trigger GC to reset garbage collection threshold
runtime.GC()
if r.cfg.Version < 2 {
// sanity check
ctx, cancel := context.WithCancel(ctx)
defer cancel()
invalidIndex := false
r.idx.Each(ctx, func(blob restic.PackedBlob) {
if blob.IsCompressed() {
invalidIndex = true
}
})
if invalidIndex {
return errors.New("index uses feature not supported by repository version 1")
}
}
// remove index files from the cache which have been removed in the repo
return r.prepareCache()
}
// CreateIndexFromPacks creates a new index by reading all given pack files (with sizes).
// The index is added to the MasterIndex but not marked as finalized.
// Returned is the list of pack files which could not be read.
func (r *Repository) CreateIndexFromPacks(ctx context.Context, packsize map[restic.ID]int64, p *progress.Counter) (invalid restic.IDs, err error) {
var m sync.Mutex
debug.Log("Loading index from pack files")
// track spawned goroutines using wg, create a new context which is
// cancelled as soon as an error occurs.
wg, ctx := errgroup.WithContext(ctx)
type FileInfo struct {
restic.ID
Size int64
}
ch := make(chan FileInfo)
// send list of pack files through ch, which is closed afterwards
wg.Go(func() error {
defer close(ch)
for id, size := range packsize {
select {
case <-ctx.Done():
return ctx.Err()
case ch <- FileInfo{id, size}:
}
}
return nil
})
// a worker receives an pack ID from ch, reads the pack contents, and adds them to idx
worker := func() error {
for fi := range ch {
entries, _, err := r.ListPack(ctx, fi.ID, fi.Size)
if err != nil {
debug.Log("unable to list pack file %v", fi.ID.Str())
m.Lock()
invalid = append(invalid, fi.ID)
m.Unlock()
}
r.idx.StorePack(fi.ID, entries)
p.Add(1)
}
return nil
}
// decoding the pack header is usually quite fast, thus we are primarily IO-bound
workerCount := int(r.Connections())
// run workers on ch
for i := 0; i < workerCount; i++ {
wg.Go(worker)
}
err = wg.Wait()
if err != nil {
return invalid, err
}
return invalid, nil
}
// prepareCache initializes the local cache. indexIDs is the list of IDs of
// index files still present in the repo.
func (r *Repository) prepareCache() error {
if r.Cache == nil {
return nil
}
indexIDs := r.idx.IDs()
debug.Log("prepare cache with %d index files", len(indexIDs))
// clear old index files
err := r.Cache.Clear(restic.IndexFile, indexIDs)
if err != nil {
fmt.Fprintf(os.Stderr, "error clearing index files in cache: %v\n", err)
}
packs := r.idx.Packs(restic.NewIDSet())
// clear old packs
err = r.Cache.Clear(restic.PackFile, packs)
if err != nil {
fmt.Fprintf(os.Stderr, "error clearing pack files in cache: %v\n", err)
}
return nil
}
// SearchKey finds a key with the supplied password, afterwards the config is
// read and parsed. It tries at most maxKeys key files in the repo.
func (r *Repository) SearchKey(ctx context.Context, password string, maxKeys int, keyHint string) error {
key, err := SearchKey(ctx, r, password, maxKeys, keyHint)
if err != nil {
return err
}
oldKey := r.key
oldKeyID := r.keyID
r.key = key.master
r.keyID = key.ID()
cfg, err := restic.LoadConfig(ctx, r)
if err != nil {
r.key = oldKey
r.keyID = oldKeyID
if err == crypto.ErrUnauthenticated {
return fmt.Errorf("config or key %v is damaged: %w", key.ID(), err)
}
return fmt.Errorf("config cannot be loaded: %w", err)
}
r.setConfig(cfg)
return nil
}
// Init creates a new master key with the supplied password, initializes and
// saves the repository config.
func (r *Repository) Init(ctx context.Context, version uint, password string, chunkerPolynomial *chunker.Pol) error {
if version > restic.MaxRepoVersion {
return fmt.Errorf("repository version %v too high", version)
}
if version < restic.MinRepoVersion {
return fmt.Errorf("repository version %v too low", version)
}
_, err := r.be.Stat(ctx, backend.Handle{Type: restic.ConfigFile})
if err != nil && !r.be.IsNotExist(err) {
return err
}
if err == nil {
return errors.New("repository master key and config already initialized")
}
cfg, err := restic.CreateConfig(version)
if err != nil {
return err
}
if chunkerPolynomial != nil {
cfg.ChunkerPolynomial = *chunkerPolynomial
}
return r.init(ctx, password, cfg)
}
// init creates a new master key with the supplied password and uses it to save
// the config into the repo.
func (r *Repository) init(ctx context.Context, password string, cfg restic.Config) error {
key, err := createMasterKey(ctx, r, password)
if err != nil {
return err
}
r.key = key.master
r.keyID = key.ID()
r.setConfig(cfg)
return restic.SaveConfig(ctx, r, cfg)
}
// Key returns the current master key.
func (r *Repository) Key() *crypto.Key {
return r.key
}
// KeyID returns the id of the current key in the backend.
func (r *Repository) KeyID() restic.ID {
return r.keyID
}
// List runs fn for all files of type t in the repo.
func (r *Repository) List(ctx context.Context, t restic.FileType, fn func(restic.ID, int64) error) error {
return r.be.List(ctx, t, func(fi backend.FileInfo) error {
id, err := restic.ParseID(fi.Name)
if err != nil {
debug.Log("unable to parse %v as an ID", fi.Name)
return nil
}
return fn(id, fi.Size)
})
}
// ListPack returns the list of blobs saved in the pack id and the length of
// the pack header.
func (r *Repository) ListPack(ctx context.Context, id restic.ID, size int64) ([]restic.Blob, uint32, error) {
h := backend.Handle{Type: restic.PackFile, Name: id.String()}
return pack.List(r.Key(), backend.ReaderAt(ctx, r.Backend(), h), size)
}
// Delete calls backend.Delete() if implemented, and returns an error
// otherwise.
func (r *Repository) Delete(ctx context.Context) error {
return r.be.Delete(ctx)
}
// Close closes the repository by closing the backend.
func (r *Repository) Close() error {
return r.be.Close()
}
// SaveBlob saves a blob of type t into the repository.
// It takes care that no duplicates are saved; this can be overwritten
// by setting storeDuplicate to true.
// If id is the null id, it will be computed and returned.
// Also returns if the blob was already known before.
// If the blob was not known before, it returns the number of bytes the blob
// occupies in the repo (compressed or not, including encryption overhead).
func (r *Repository) SaveBlob(ctx context.Context, t restic.BlobType, buf []byte, id restic.ID, storeDuplicate bool) (newID restic.ID, known bool, size int, err error) {
// compute plaintext hash if not already set
if id.IsNull() {
// Special case the hash calculation for all zero chunks. This is especially
// useful for sparse files containing large all zero regions. For these we can
// process chunks as fast as we can read the from disk.
if len(buf) == chunker.MinSize && restic.ZeroPrefixLen(buf) == chunker.MinSize {
newID = ZeroChunk()
} else {
newID = restic.Hash(buf)
}
} else {
newID = id
}
// first try to add to pending blobs; if not successful, this blob is already known
known = !r.idx.AddPending(restic.BlobHandle{ID: newID, Type: t})
// only save when needed or explicitly told
if !known || storeDuplicate {
size, err = r.saveAndEncrypt(ctx, t, buf, newID)
}
return newID, known, size, err
}
type backendLoadFn func(ctx context.Context, h backend.Handle, length int, offset int64, fn func(rd io.Reader) error) error
type loadBlobFn func(ctx context.Context, t restic.BlobType, id restic.ID, buf []byte) ([]byte, error)
// Skip sections with more than 4MB unused blobs
const maxUnusedRange = 4 * 1024 * 1024
// LoadBlobsFromPack loads the listed blobs from the specified pack file. The plaintext blob is passed to
// the handleBlobFn callback or an error if decryption failed or the blob hash does not match.
// handleBlobFn is called at most once for each blob. If the callback returns an error,
// then LoadBlobsFromPack will abort and not retry it. The buf passed to the callback is only valid within
// this specific call. The callback must not keep a reference to buf.
func (r *Repository) LoadBlobsFromPack(ctx context.Context, packID restic.ID, blobs []restic.Blob, handleBlobFn func(blob restic.BlobHandle, buf []byte, err error) error) error {
return streamPack(ctx, r.Backend().Load, r.LoadBlob, r.key, packID, blobs, handleBlobFn)
}
func streamPack(ctx context.Context, beLoad backendLoadFn, loadBlobFn loadBlobFn, key *crypto.Key, packID restic.ID, blobs []restic.Blob, handleBlobFn func(blob restic.BlobHandle, buf []byte, err error) error) error {
if len(blobs) == 0 {
// nothing to do
return nil
}
sort.Slice(blobs, func(i, j int) bool {
return blobs[i].Offset < blobs[j].Offset
})
lowerIdx := 0
lastPos := blobs[0].Offset
const maxChunkSize = 2 * DefaultPackSize
for i := 0; i < len(blobs); i++ {
if blobs[i].Offset < lastPos {
// don't wait for streamPackPart to fail
return errors.Errorf("overlapping blobs in pack %v", packID)
}
chunkSizeAfter := (blobs[i].Offset + blobs[i].Length) - blobs[lowerIdx].Offset
split := false
// split if the chunk would become larger than maxChunkSize. Oversized chunks are
// handled by the requirement that the chunk contains at least one blob (i > lowerIdx)
if i > lowerIdx && chunkSizeAfter >= maxChunkSize {
split = true
}
// skip too large gaps as a new request is typically much cheaper than data transfers
if blobs[i].Offset-lastPos > maxUnusedRange {
split = true
}
if split {
// load everything up to the skipped file section
err := streamPackPart(ctx, beLoad, loadBlobFn, key, packID, blobs[lowerIdx:i], handleBlobFn)
if err != nil {
return err
}
lowerIdx = i
}
lastPos = blobs[i].Offset + blobs[i].Length
}
// load remainder
return streamPackPart(ctx, beLoad, loadBlobFn, key, packID, blobs[lowerIdx:], handleBlobFn)
}
func streamPackPart(ctx context.Context, beLoad backendLoadFn, loadBlobFn loadBlobFn, key *crypto.Key, packID restic.ID, blobs []restic.Blob, handleBlobFn func(blob restic.BlobHandle, buf []byte, err error) error) error {
h := backend.Handle{Type: restic.PackFile, Name: packID.String(), IsMetadata: false}
dataStart := blobs[0].Offset
dataEnd := blobs[len(blobs)-1].Offset + blobs[len(blobs)-1].Length
debug.Log("streaming pack %v (%d to %d bytes), blobs: %v", packID, dataStart, dataEnd, len(blobs))
dec, err := zstd.NewReader(nil)
if err != nil {
panic(dec)
}
defer dec.Close()
data := make([]byte, int(dataEnd-dataStart))
err = beLoad(ctx, h, int(dataEnd-dataStart), int64(dataStart), func(rd io.Reader) error {
_, cerr := io.ReadFull(rd, data)
return cerr
})
// prevent callbacks after cancellation
if ctx.Err() != nil {
return ctx.Err()
}
if err != nil {
// the context is only still valid if handleBlobFn never returned an error
if loadBlobFn != nil {
// check whether we can get the remaining blobs somewhere else
for _, entry := range blobs {
buf, ierr := loadBlobFn(ctx, entry.Type, entry.ID, nil)
err = handleBlobFn(entry.BlobHandle, buf, ierr)
if err != nil {
break
}
}
}
return errors.Wrap(err, "StreamPack")
}
it := NewPackBlobIterator(packID, newByteReader(data), dataStart, blobs, key, dec)
for {
val, err := it.Next()
if err == ErrPackEOF {
break
} else if err != nil {
return err
}
if val.Err != nil && loadBlobFn != nil {
var ierr error
// check whether we can get a valid copy somewhere else
buf, ierr := loadBlobFn(ctx, val.Handle.Type, val.Handle.ID, nil)
if ierr == nil {
// success
val.Plaintext = buf
val.Err = nil
}
}
err = handleBlobFn(val.Handle, val.Plaintext, val.Err)
if err != nil {
return err
}
// ensure that each blob is only passed once to handleBlobFn
blobs = blobs[1:]
}
return errors.Wrap(err, "StreamPack")
}
// discardReader allows the PackBlobIterator to perform zero copy
// reads if the underlying data source is a byte slice.
type discardReader interface {
Discard(n int) (discarded int, err error)
// ReadFull reads the next n bytes into a byte slice. The caller must not
// retain a reference to the byte. Modifications are only allowed within
// the boundaries of the returned slice.
ReadFull(n int) (buf []byte, err error)
}
type byteReader struct {
buf []byte
}
func newByteReader(buf []byte) *byteReader {
return &byteReader{
buf: buf,
}
}
func (b *byteReader) Discard(n int) (discarded int, err error) {
if len(b.buf) < n {
return 0, io.ErrUnexpectedEOF
}
b.buf = b.buf[n:]
return n, nil
}
func (b *byteReader) ReadFull(n int) (buf []byte, err error) {
if len(b.buf) < n {
return nil, io.ErrUnexpectedEOF
}
buf = b.buf[:n]
b.buf = b.buf[n:]
return buf, nil
}
type PackBlobIterator struct {
packID restic.ID
rd discardReader
currentOffset uint
blobs []restic.Blob
key *crypto.Key
dec *zstd.Decoder
decode []byte
}
type PackBlobValue struct {
Handle restic.BlobHandle
Plaintext []byte
Err error
}
var ErrPackEOF = errors.New("reached EOF of pack file")
func NewPackBlobIterator(packID restic.ID, rd discardReader, currentOffset uint,
blobs []restic.Blob, key *crypto.Key, dec *zstd.Decoder) *PackBlobIterator {
return &PackBlobIterator{
packID: packID,
rd: rd,
currentOffset: currentOffset,
blobs: blobs,
key: key,
dec: dec,
}
}
// Next returns the next blob, an error or ErrPackEOF if all blobs were read
func (b *PackBlobIterator) Next() (PackBlobValue, error) {
if len(b.blobs) == 0 {
return PackBlobValue{}, ErrPackEOF
}
entry := b.blobs[0]
b.blobs = b.blobs[1:]
skipBytes := int(entry.Offset - b.currentOffset)
if skipBytes < 0 {
return PackBlobValue{}, fmt.Errorf("overlapping blobs in pack %v", b.packID)
}
_, err := b.rd.Discard(skipBytes)
if err != nil {
return PackBlobValue{}, err
}
b.currentOffset = entry.Offset
h := restic.BlobHandle{ID: entry.ID, Type: entry.Type}
debug.Log(" process blob %v, skipped %d, %v", h, skipBytes, entry)
buf, err := b.rd.ReadFull(int(entry.Length))
if err != nil {
debug.Log(" read error %v", err)
return PackBlobValue{}, fmt.Errorf("readFull: %w", err)
}
b.currentOffset = entry.Offset + entry.Length
if int(entry.Length) <= b.key.NonceSize() {
debug.Log("%v", b.blobs)
return PackBlobValue{}, fmt.Errorf("invalid blob length %v", entry)
}
// decryption errors are likely permanent, give the caller a chance to skip them
nonce, ciphertext := buf[:b.key.NonceSize()], buf[b.key.NonceSize():]
plaintext, err := b.key.Open(ciphertext[:0], nonce, ciphertext, nil)
if err != nil {
err = fmt.Errorf("decrypting blob %v from %v failed: %w", h, b.packID.Str(), err)
}
if err == nil && entry.IsCompressed() {
// DecodeAll will allocate a slice if it is not large enough since it
// knows the decompressed size (because we're using EncodeAll)
b.decode, err = b.dec.DecodeAll(plaintext, b.decode[:0])
plaintext = b.decode
if err != nil {
err = fmt.Errorf("decompressing blob %v from %v failed: %w", h, b.packID.Str(), err)
}
}
if err == nil {
id := restic.Hash(plaintext)
if !id.Equal(entry.ID) {
debug.Log("read blob %v/%v from %v: wrong data returned, hash is %v",
h.Type, h.ID, b.packID.Str(), id)
err = fmt.Errorf("read blob %v from %v: wrong data returned, hash is %v",
h, b.packID.Str(), id)
}
}
return PackBlobValue{entry.BlobHandle, plaintext, err}, nil
}
var zeroChunkOnce sync.Once
var zeroChunkID restic.ID
// ZeroChunk computes and returns (cached) the ID of an all-zero chunk with size chunker.MinSize
func ZeroChunk() restic.ID {
zeroChunkOnce.Do(func() {
zeroChunkID = restic.Hash(make([]byte, chunker.MinSize))
})
return zeroChunkID
}