2015-05-09 23:52:03 +02:00
package repository
2014-12-21 17:02:49 +01:00
import (
2015-04-26 17:44:38 +02:00
"bytes"
2017-06-04 11:16:55 +02:00
"context"
2015-01-10 23:40:10 +01:00
"fmt"
2018-02-11 22:41:59 -05:00
"io"
2024-04-19 20:48:43 +02:00
"math"
2024-08-17 00:18:13 +05:30
"os"
2023-06-02 21:56:14 +02:00
"runtime"
2021-08-20 23:21:05 +02:00
"sort"
2020-10-10 21:31:40 +02:00
"sync"
2014-12-21 17:02:49 +01:00
2022-02-13 00:12:40 +01:00
"github.com/klauspost/compress/zstd"
2020-09-19 12:41:52 +02:00
"github.com/restic/chunker"
2022-07-17 13:47:54 +02:00
"github.com/restic/restic/internal/backend"
2024-05-24 23:04:06 +02:00
"github.com/restic/restic/internal/backend/cache"
backup: add --dry-run/-n flag to show what would happen.
This can be used to check how large a backup is or validate exclusions.
It does not actually write any data to the underlying backend. This is
implemented as a simple overlay backend that accepts writes without
forwarding them, passes through reads, and generally does the minimal
necessary to pretend that progress is actually happening.
Fixes #1542
Example usage:
$ restic -vv --dry-run . | grep add
new /changelog/unreleased/issue-1542, saved in 0.000s (350 B added)
modified /cmd/restic/cmd_backup.go, saved in 0.000s (16.543 KiB added)
modified /cmd/restic/global.go, saved in 0.000s (0 B added)
new /internal/backend/dry/dry_backend_test.go, saved in 0.000s (3.866 KiB added)
new /internal/backend/dry/dry_backend.go, saved in 0.000s (3.744 KiB added)
modified /internal/backend/test/tests.go, saved in 0.000s (0 B added)
modified /internal/repository/repository.go, saved in 0.000s (20.707 KiB added)
modified /internal/ui/backup.go, saved in 0.000s (9.110 KiB added)
modified /internal/ui/jsonstatus/status.go, saved in 0.001s (11.055 KiB added)
modified /restic, saved in 0.131s (25.542 MiB added)
Would add to the repo: 25.892 MiB
2019-06-12 20:39:13 -07:00
"github.com/restic/restic/internal/backend/dryrun"
2018-10-28 21:12:15 +01:00
"github.com/restic/restic/internal/crypto"
"github.com/restic/restic/internal/debug"
2017-07-23 14:21:03 +02:00
"github.com/restic/restic/internal/errors"
2024-05-24 23:06:44 +02:00
"github.com/restic/restic/internal/repository/index"
2024-05-24 23:09:58 +02:00
"github.com/restic/restic/internal/repository/pack"
2018-10-28 21:12:15 +01:00
"github.com/restic/restic/internal/restic"
2020-10-10 21:31:40 +02:00
"github.com/restic/restic/internal/ui/progress"
2020-03-19 11:27:19 +01:00
2019-03-24 21:27:28 +01:00
"golang.org/x/sync/errgroup"
2014-12-21 17:02:49 +01:00
)
2022-07-02 23:30:26 +02:00
const MinPackSize = 4 * 1024 * 1024
const DefaultPackSize = 16 * 1024 * 1024
const MaxPackSize = 128 * 1024 * 1024
2015-05-09 23:59:58 +02:00
// Repository is used to access a repository in a backend.
type Repository struct {
2023-10-01 11:40:12 +02:00
be backend . Backend
2022-10-15 16:01:38 +02:00
cfg restic . Config
key * crypto . Key
keyID restic . ID
idx * index . MasterIndex
Cache * cache . Cache
2020-03-02 18:27:52 +01:00
2022-04-13 20:34:05 +02:00
opts Options
2021-08-07 22:52:05 +02:00
packerWg * errgroup . Group
uploader * packerUploader
treePM * packerManager
dataPM * packerManager
2022-02-13 00:12:40 +01:00
2022-02-19 21:15:31 +01:00
allocEnc sync . Once
allocDec sync . Once
enc * zstd . Encoder
dec * zstd . Decoder
2014-12-21 17:02:49 +01:00
}
2022-04-13 20:34:05 +02:00
type Options struct {
2024-02-04 16:50:50 +01:00
Compression CompressionMode
PackSize uint
NoExtraVerify bool
2022-04-13 20:34:05 +02:00
}
// CompressionMode configures if data should be compressed.
type CompressionMode uint
// Constants for the different compression levels.
const (
2022-10-29 22:03:39 +02:00
CompressionAuto CompressionMode = 0
CompressionOff CompressionMode = 1
CompressionMax CompressionMode = 2
CompressionInvalid CompressionMode = 3
2022-04-13 20:34:05 +02:00
)
// Set implements the method needed for pflag command flag parsing.
func ( c * CompressionMode ) Set ( s string ) error {
switch s {
case "auto" :
* c = CompressionAuto
case "off" :
* c = CompressionOff
case "max" :
* c = CompressionMax
default :
2022-10-29 22:03:39 +02:00
* c = CompressionInvalid
2022-04-13 20:34:05 +02:00
return fmt . Errorf ( "invalid compression mode %q, must be one of (auto|off|max)" , s )
}
return nil
}
func ( c * CompressionMode ) String ( ) string {
switch * c {
case CompressionAuto :
return "auto"
case CompressionOff :
return "off"
case CompressionMax :
return "max"
default :
return "invalid"
}
}
func ( c * CompressionMode ) Type ( ) string {
return "mode"
}
2015-07-02 22:53:03 +02:00
// New returns a new repository with backend be.
2023-10-01 11:40:12 +02:00
func New ( be backend . Backend , opts Options ) ( * Repository , error ) {
2022-10-29 22:03:39 +02:00
if opts . Compression == CompressionInvalid {
2023-05-13 22:43:42 +02:00
return nil , errors . New ( "invalid compression mode" )
2022-10-29 22:03:39 +02:00
}
2022-07-02 23:30:26 +02:00
if opts . PackSize == 0 {
opts . PackSize = DefaultPackSize
}
if opts . PackSize > MaxPackSize {
2023-05-13 22:43:42 +02:00
return nil , fmt . Errorf ( "pack size larger than limit of %v MiB" , MaxPackSize / 1024 / 1024 )
2022-07-02 23:30:26 +02:00
} else if opts . PackSize < MinPackSize {
2023-05-13 22:43:42 +02:00
return nil , fmt . Errorf ( "pack size smaller than minimum of %v MiB" , MinPackSize / 1024 / 1024 )
2022-07-02 23:30:26 +02:00
}
2016-03-06 12:26:25 +01:00
repo := & Repository {
2021-08-07 22:52:05 +02:00
be : be ,
opts : opts ,
2022-06-12 14:43:43 +02:00
idx : index . NewMasterIndex ( ) ,
2016-03-06 12:26:25 +01:00
}
2022-07-02 23:30:26 +02:00
return repo , nil
2014-12-21 17:02:49 +01:00
}
2022-04-29 23:12:43 +02:00
// setConfig assigns the given config and updates the repository parameters accordingly
func ( r * Repository ) setConfig ( cfg restic . Config ) {
r . cfg = cfg
}
2016-08-31 22:51:35 +02:00
// Config returns the repository configuration.
2016-08-31 22:39:36 +02:00
func ( r * Repository ) Config ( ) restic . Config {
return r . cfg
}
2024-05-19 12:51:58 +02:00
// packSize return the target size of a pack file when uploading
func ( r * Repository ) packSize ( ) uint {
2022-07-02 23:30:26 +02:00
return r . opts . PackSize
}
2017-06-10 13:10:08 +02:00
// UseCache replaces the backend with the wrapped cache.
2020-03-02 18:27:52 +01:00
func ( r * Repository ) UseCache ( c * cache . Cache ) {
2017-06-10 13:10:08 +02:00
if c == nil {
return
}
debug . Log ( "using cache" )
r . Cache = c
r . be = c . Wrap ( r . be )
}
backup: add --dry-run/-n flag to show what would happen.
This can be used to check how large a backup is or validate exclusions.
It does not actually write any data to the underlying backend. This is
implemented as a simple overlay backend that accepts writes without
forwarding them, passes through reads, and generally does the minimal
necessary to pretend that progress is actually happening.
Fixes #1542
Example usage:
$ restic -vv --dry-run . | grep add
new /changelog/unreleased/issue-1542, saved in 0.000s (350 B added)
modified /cmd/restic/cmd_backup.go, saved in 0.000s (16.543 KiB added)
modified /cmd/restic/global.go, saved in 0.000s (0 B added)
new /internal/backend/dry/dry_backend_test.go, saved in 0.000s (3.866 KiB added)
new /internal/backend/dry/dry_backend.go, saved in 0.000s (3.744 KiB added)
modified /internal/backend/test/tests.go, saved in 0.000s (0 B added)
modified /internal/repository/repository.go, saved in 0.000s (20.707 KiB added)
modified /internal/ui/backup.go, saved in 0.000s (9.110 KiB added)
modified /internal/ui/jsonstatus/status.go, saved in 0.001s (11.055 KiB added)
modified /restic, saved in 0.131s (25.542 MiB added)
Would add to the repo: 25.892 MiB
2019-06-12 20:39:13 -07:00
// SetDryRun sets the repo backend into dry-run mode.
func ( r * Repository ) SetDryRun ( ) {
r . be = dryrun . New ( r . be )
}
2023-01-27 15:01:54 +01:00
// LoadUnpacked loads and decrypts the file with the given type and ID.
func ( r * Repository ) LoadUnpacked ( ctx context . Context , t restic . FileType , id restic . ID ) ( [ ] byte , error ) {
2018-01-25 20:49:41 +01:00
debug . Log ( "load %v with id %v" , t , id )
2015-04-26 17:44:38 +02:00
2019-12-05 14:29:34 +01:00
if t == restic . ConfigFile {
id = restic . ID { }
}
2024-05-08 22:42:37 +02:00
buf , err := r . LoadRaw ( ctx , t , id )
2015-03-28 11:50:23 +01:00
if err != nil {
return nil , err
}
2017-10-29 11:33:57 +01:00
nonce , ciphertext := buf [ : r . key . NonceSize ( ) ] , buf [ r . key . NonceSize ( ) : ]
plaintext , err := r . key . Open ( ciphertext [ : 0 ] , nonce , ciphertext , nil )
2015-04-26 17:44:38 +02:00
if err != nil {
return nil , err
2015-01-10 23:40:10 +01:00
}
2022-02-13 00:12:40 +01:00
if t != restic . ConfigFile {
return r . decompressUnpacked ( plaintext )
}
2015-01-10 23:40:10 +01:00
2017-10-29 11:33:57 +01:00
return plaintext , nil
2015-04-26 17:44:38 +02:00
}
2020-03-06 09:17:33 +01:00
type haver interface {
2023-10-01 11:40:12 +02:00
Has ( backend . Handle ) bool
2020-03-06 09:17:33 +01:00
}
// sortCachedPacksFirst moves all cached pack files to the front of blobs.
2020-03-10 15:56:08 +01:00
func sortCachedPacksFirst ( cache haver , blobs [ ] restic . PackedBlob ) {
2020-03-06 09:17:33 +01:00
if cache == nil {
2020-03-10 15:56:08 +01:00
return
2017-07-16 21:06:43 +02:00
}
2019-07-06 17:42:29 +02:00
// no need to sort a list with one element
if len ( blobs ) == 1 {
2020-03-10 15:56:08 +01:00
return
2019-07-06 17:42:29 +02:00
}
2020-03-06 09:18:38 +01:00
cached := blobs [ : 0 ]
2017-07-16 21:06:43 +02:00
noncached := make ( [ ] restic . PackedBlob , 0 , len ( blobs ) / 2 )
for _ , blob := range blobs {
2023-10-01 11:40:12 +02:00
if cache . Has ( backend . Handle { Type : restic . PackFile , Name : blob . PackID . String ( ) } ) {
2017-07-16 21:06:43 +02:00
cached = append ( cached , blob )
continue
}
noncached = append ( noncached , blob )
}
2020-03-10 15:56:08 +01:00
copy ( blobs [ len ( cached ) : ] , noncached )
2017-07-16 21:06:43 +02:00
}
2020-03-10 17:52:14 +01:00
// LoadBlob loads a blob of type t from the repository.
// It may use all of buf[:cap(buf)] as scratch space.
func ( r * Repository ) LoadBlob ( ctx context . Context , t restic . BlobType , id restic . ID , buf [ ] byte ) ( [ ] byte , error ) {
debug . Log ( "load %v with id %v (buf len %v, cap %d)" , t , id , len ( buf ) , cap ( buf ) )
2016-08-03 22:38:05 +02:00
// lookup packs
2020-11-05 22:18:00 +01:00
blobs := r . idx . Lookup ( restic . BlobHandle { ID : id , Type : t } )
2020-06-14 13:26:10 +02:00
if len ( blobs ) == 0 {
2018-01-25 20:49:41 +01:00
debug . Log ( "id %v not found in index" , id )
2020-03-10 17:52:14 +01:00
return nil , errors . Errorf ( "id %v not found in repository" , id )
2015-04-26 17:44:38 +02:00
}
2017-07-16 21:06:43 +02:00
// try cached pack files first
2020-03-10 15:56:08 +01:00
sortCachedPacksFirst ( r . Cache , blobs )
2017-07-16 21:06:43 +02:00
2024-05-09 15:44:05 +02:00
buf , err := r . loadBlob ( ctx , blobs , buf )
if err != nil {
if r . Cache != nil {
for _ , blob := range blobs {
h := backend . Handle { Type : restic . PackFile , Name : blob . PackID . String ( ) , IsMetadata : blob . Type . IsMetadata ( ) }
// ignore errors as there's not much we can do here
_ = r . Cache . Forget ( h )
}
}
buf , err = r . loadBlob ( ctx , blobs , buf )
}
return buf , err
}
func ( r * Repository ) loadBlob ( ctx context . Context , blobs [ ] restic . PackedBlob , buf [ ] byte ) ( [ ] byte , error ) {
2016-08-28 22:18:02 +02:00
var lastError error
2016-08-03 22:38:05 +02:00
for _ , blob := range blobs {
2024-05-09 15:44:05 +02:00
debug . Log ( "blob %v found: %v" , blob . BlobHandle , blob )
2016-08-03 22:38:05 +02:00
// load blob from pack
2024-05-09 15:44:05 +02:00
h := backend . Handle { Type : restic . PackFile , Name : blob . PackID . String ( ) , IsMetadata : blob . Type . IsMetadata ( ) }
2017-01-24 11:27:36 +01:00
2020-03-10 17:52:14 +01:00
switch {
case cap ( buf ) < int ( blob . Length ) :
buf = make ( [ ] byte , blob . Length )
case len ( buf ) != int ( blob . Length ) :
buf = buf [ : blob . Length ]
2017-01-24 11:27:36 +01:00
}
2024-05-08 23:26:41 +02:00
_ , err := backend . ReadAt ( ctx , r . be , h , int64 ( blob . Offset ) , buf )
2016-08-03 22:38:05 +02:00
if err != nil {
2016-09-27 22:35:08 +02:00
debug . Log ( "error loading blob %v: %v" , blob , err )
2016-08-28 22:18:02 +02:00
lastError = err
2016-08-03 22:38:05 +02:00
continue
}
2024-05-10 16:29:48 +02:00
it := newPackBlobIterator ( blob . PackID , newByteReader ( buf ) , uint ( blob . Offset ) , [ ] restic . Blob { blob . Blob } , r . key , r . getZstdDecoder ( ) )
2024-05-08 23:26:41 +02:00
pbv , err := it . Next ( )
2016-08-03 22:38:05 +02:00
2024-05-08 23:26:41 +02:00
if err == nil {
err = pbv . Err
2016-08-03 22:38:05 +02:00
}
2024-05-08 23:26:41 +02:00
if err != nil {
debug . Log ( "error decoding blob %v: %v" , blob , err )
lastError = err
2016-08-03 22:38:05 +02:00
continue
}
2024-05-08 23:26:41 +02:00
plaintext := pbv . Plaintext
2022-02-13 17:24:09 +01:00
if len ( plaintext ) > cap ( buf ) {
return plaintext , nil
}
2020-03-10 17:52:14 +01:00
// move decrypted data to the start of the buffer
2022-06-06 16:26:38 +02:00
buf = buf [ : len ( plaintext ) ]
2020-03-10 17:52:14 +01:00
copy ( buf , plaintext )
2022-06-06 16:26:38 +02:00
return buf , nil
2015-04-26 17:44:38 +02:00
}
2015-01-10 23:40:10 +01:00
2016-08-28 22:18:02 +02:00
if lastError != nil {
2020-03-10 17:52:14 +01:00
return nil , lastError
2016-08-28 22:18:02 +02:00
}
2024-05-09 15:44:05 +02:00
return nil , errors . Errorf ( "loading %v from %v packs failed" , blobs [ 0 ] . BlobHandle , len ( blobs ) )
2015-01-10 23:40:10 +01:00
}
2022-02-19 21:15:31 +01:00
func ( r * Repository ) getZstdEncoder ( ) * zstd . Encoder {
r . allocEnc . Do ( func ( ) {
2022-04-13 20:34:05 +02:00
level := zstd . SpeedDefault
if r . opts . Compression == CompressionMax {
level = zstd . SpeedBestCompression
}
2022-04-20 20:46:11 +02:00
opts := [ ] zstd . EOption {
// Set the compression level configured.
zstd . WithEncoderLevel ( level ) ,
// Disable CRC, we have enough checks in place, makes the
// compressed data four bytes shorter.
zstd . WithEncoderCRC ( false ) ,
// Set a window of 512kbyte, so we have good lookbehind for usual
// blob sizes.
zstd . WithWindowSize ( 512 * 1024 ) ,
}
enc , err := zstd . NewWriter ( nil , opts ... )
2022-02-19 21:15:31 +01:00
if err != nil {
panic ( err )
}
r . enc = enc
} )
return r . enc
}
func ( r * Repository ) getZstdDecoder ( ) * zstd . Decoder {
r . allocDec . Do ( func ( ) {
2022-04-20 20:46:11 +02:00
opts := [ ] zstd . DOption {
// Use all available cores.
zstd . WithDecoderConcurrency ( 0 ) ,
// Limit the maximum decompressed memory. Set to a very high,
// conservative value.
zstd . WithDecoderMaxMemory ( 16 * 1024 * 1024 * 1024 ) ,
}
dec , err := zstd . NewReader ( nil , opts ... )
2022-02-19 21:15:31 +01:00
if err != nil {
panic ( err )
}
r . dec = dec
} )
return r . dec
}
2022-02-13 00:05:14 +01:00
// saveAndEncrypt encrypts data and stores it to the backend as type t. If data
2022-05-01 14:26:57 +02:00
// is small enough, it will be packed together with other small blobs. The
// caller must ensure that the id matches the data. Returned is the size data
// occupies in the repo (compressed or not, including the encryption overhead).
func ( r * Repository ) saveAndEncrypt ( ctx context . Context , t restic . BlobType , data [ ] byte , id restic . ID ) ( size int , err error ) {
2018-01-25 20:49:41 +01:00
debug . Log ( "save id %v (%v, %d bytes)" , id , t , len ( data ) )
2015-01-10 23:40:10 +01:00
2022-02-13 17:24:09 +01:00
uncompressedLength := 0
if r . cfg . Version > 1 {
2022-04-13 20:34:05 +02:00
// we have a repo v2, so compression is available. if the user opts to
// not compress, we won't compress any data, but everything else is
// compressed.
if r . opts . Compression != CompressionOff || t != restic . DataBlob {
uncompressedLength = len ( data )
data = r . getZstdEncoder ( ) . EncodeAll ( data , nil )
}
2022-02-13 17:24:09 +01:00
}
2017-10-29 11:33:57 +01:00
nonce := crypto . NewRandomNonce ( )
2020-02-26 23:26:11 +01:00
2022-06-12 14:48:30 +02:00
ciphertext := make ( [ ] byte , 0 , crypto . CiphertextLength ( len ( data ) ) )
2017-10-29 11:33:57 +01:00
ciphertext = append ( ciphertext , nonce ... )
2015-01-10 23:40:10 +01:00
// encrypt blob
2017-10-29 11:33:57 +01:00
ciphertext = r . key . Seal ( ciphertext , nonce , data , nil )
2015-01-10 23:40:10 +01:00
2024-02-02 21:15:39 +01:00
if err := r . verifyCiphertext ( ciphertext , uncompressedLength , id ) ; err != nil {
2024-02-04 18:09:32 +01:00
//nolint:revive // ignore linter warnings about error message spelling
return 0 , fmt . Errorf ( "Detected data corruption while saving blob %v: %w\nCorrupted blobs are either caused by hardware issues or software bugs. Please open an issue at https://github.com/restic/restic/issues/new/choose for further troubleshooting." , id , err )
2024-02-02 21:15:39 +01:00
}
2015-04-26 17:44:38 +02:00
// find suitable packer and add blob
2017-07-16 20:16:02 +02:00
var pm * packerManager
switch t {
case restic . TreeBlob :
pm = r . treePM
case restic . DataBlob :
pm = r . dataPM
default :
panic ( fmt . Sprintf ( "invalid type: %v" , t ) )
}
2021-08-07 22:52:05 +02:00
return pm . SaveBlob ( ctx , t , id , ciphertext , uncompressedLength )
2015-04-26 17:44:38 +02:00
}
2024-02-02 21:15:39 +01:00
func ( r * Repository ) verifyCiphertext ( buf [ ] byte , uncompressedLength int , id restic . ID ) error {
2024-02-04 16:50:50 +01:00
if r . opts . NoExtraVerify {
2024-02-03 17:47:48 +01:00
return nil
}
2024-02-02 21:15:39 +01:00
nonce , ciphertext := buf [ : r . key . NonceSize ( ) ] , buf [ r . key . NonceSize ( ) : ]
plaintext , err := r . key . Open ( nil , nonce , ciphertext , nil )
if err != nil {
return fmt . Errorf ( "decryption failed: %w" , err )
}
if uncompressedLength != 0 {
// DecodeAll will allocate a slice if it is not large enough since it
// knows the decompressed size (because we're using EncodeAll)
plaintext , err = r . getZstdDecoder ( ) . DecodeAll ( plaintext , nil )
if err != nil {
return fmt . Errorf ( "decompression failed: %w" , err )
}
}
if ! restic . Hash ( plaintext ) . Equal ( id ) {
return errors . New ( "hash mismatch" )
}
return nil
}
2022-02-13 00:12:40 +01:00
func ( r * Repository ) compressUnpacked ( p [ ] byte ) ( [ ] byte , error ) {
// compression is only available starting from version 2
if r . cfg . Version < 2 {
return p , nil
}
// version byte
out := [ ] byte { 2 }
2022-02-19 21:15:31 +01:00
out = r . getZstdEncoder ( ) . EncodeAll ( p , out )
2022-02-13 00:12:40 +01:00
return out , nil
}
func ( r * Repository ) decompressUnpacked ( p [ ] byte ) ( [ ] byte , error ) {
// compression is only available starting from version 2
if r . cfg . Version < 2 {
return p , nil
}
2022-04-16 21:05:15 +02:00
if len ( p ) == 0 {
2022-02-13 00:12:40 +01:00
// too short for version header
return p , nil
}
if p [ 0 ] == '[' || p [ 0 ] == '{' {
// probably raw JSON
return p , nil
}
// version
if p [ 0 ] != 2 {
return nil , errors . New ( "not supported encoding format" )
}
2022-02-19 21:15:31 +01:00
return r . getZstdDecoder ( ) . DecodeAll ( p [ 1 : ] , nil )
2022-02-13 00:12:40 +01:00
}
2016-01-24 18:52:11 +01:00
// SaveUnpacked encrypts data and stores it in the backend. Returned is the
// storage hash.
2024-02-03 17:30:58 +01:00
func ( r * Repository ) SaveUnpacked ( ctx context . Context , t restic . FileType , buf [ ] byte ) ( id restic . ID , err error ) {
p := buf
2022-02-13 00:12:40 +01:00
if t != restic . ConfigFile {
p , err = r . compressUnpacked ( p )
if err != nil {
return restic . ID { } , err
}
}
2022-06-12 14:48:30 +02:00
ciphertext := crypto . NewBlobBuffer ( len ( p ) )
2017-10-29 11:33:57 +01:00
ciphertext = ciphertext [ : 0 ]
nonce := crypto . NewRandomNonce ( )
ciphertext = append ( ciphertext , nonce ... )
ciphertext = r . key . Seal ( ciphertext , nonce , p , nil )
2015-02-15 17:26:08 +01:00
2024-02-03 17:30:58 +01:00
if err := r . verifyUnpacked ( ciphertext , t , buf ) ; err != nil {
2024-02-04 18:09:32 +01:00
//nolint:revive // ignore linter warnings about error message spelling
return restic . ID { } , fmt . Errorf ( "Detected data corruption while saving file of type %v: %w\nCorrupted data is either caused by hardware issues or software bugs. Please open an issue at https://github.com/restic/restic/issues/new/choose for further troubleshooting." , t , err )
2024-02-03 17:30:58 +01:00
}
2019-12-05 14:29:34 +01:00
if t == restic . ConfigFile {
id = restic . ID { }
} else {
id = restic . Hash ( ciphertext )
}
2023-10-01 11:40:12 +02:00
h := backend . Handle { Type : t , Name : id . String ( ) }
2015-04-26 17:44:38 +02:00
2023-10-01 11:40:12 +02:00
err = r . be . Save ( ctx , h , backend . NewByteReader ( ciphertext , r . be . Hasher ( ) ) )
2015-02-15 17:26:08 +01:00
if err != nil {
2016-09-27 22:35:08 +02:00
debug . Log ( "error saving blob %v: %v" , h , err )
2016-08-31 20:29:54 +02:00
return restic . ID { } , err
2015-02-15 17:26:08 +01:00
}
2016-09-27 22:35:08 +02:00
debug . Log ( "blob %v saved" , h )
2016-01-24 18:50:41 +01:00
return id , nil
2015-04-26 17:44:38 +02:00
}
2024-02-03 17:30:58 +01:00
func ( r * Repository ) verifyUnpacked ( buf [ ] byte , t restic . FileType , expected [ ] byte ) error {
2024-02-04 16:50:50 +01:00
if r . opts . NoExtraVerify {
2024-02-03 17:47:48 +01:00
return nil
}
2024-02-03 17:30:58 +01:00
nonce , ciphertext := buf [ : r . key . NonceSize ( ) ] , buf [ r . key . NonceSize ( ) : ]
plaintext , err := r . key . Open ( nil , nonce , ciphertext , nil )
if err != nil {
return fmt . Errorf ( "decryption failed: %w" , err )
}
if t != restic . ConfigFile {
plaintext , err = r . decompressUnpacked ( plaintext )
if err != nil {
return fmt . Errorf ( "decompression failed: %w" , err )
}
}
if ! bytes . Equal ( plaintext , expected ) {
return errors . New ( "data mismatch" )
}
return nil
}
2024-05-10 01:16:23 +02:00
func ( r * Repository ) RemoveUnpacked ( ctx context . Context , t restic . FileType , id restic . ID ) error {
// TODO prevent everything except removing snapshots for non-repository code
return r . be . Remove ( ctx , backend . Handle { Type : t , Name : id . String ( ) } )
}
2020-06-06 22:20:44 +02:00
// Flush saves all remaining packs and the index
2017-11-22 06:27:29 -05:00
func ( r * Repository ) Flush ( ctx context . Context ) error {
2022-05-26 13:30:52 +02:00
if err := r . flushPacks ( ctx ) ; err != nil {
2020-06-06 22:20:44 +02:00
return err
}
2022-05-26 12:38:18 +02:00
return r . idx . SaveIndex ( ctx , r )
2020-06-06 22:20:44 +02:00
}
2021-08-07 22:52:05 +02:00
func ( r * Repository ) StartPackUploader ( ctx context . Context , wg * errgroup . Group ) {
if r . packerWg != nil {
panic ( "uploader already started" )
2017-07-16 20:24:37 +02:00
}
2021-08-07 22:52:05 +02:00
innerWg , ctx := errgroup . WithContext ( ctx )
r . packerWg = innerWg
r . uploader = newPackerUploader ( ctx , innerWg , r , r . be . Connections ( ) )
2024-05-19 12:51:58 +02:00
r . treePM = newPackerManager ( r . key , restic . TreeBlob , r . packSize ( ) , r . uploader . QueuePacker )
r . dataPM = newPackerManager ( r . key , restic . DataBlob , r . packSize ( ) , r . uploader . QueuePacker )
2017-07-16 20:16:02 +02:00
2021-08-07 22:52:05 +02:00
wg . Go ( func ( ) error {
return innerWg . Wait ( )
} )
}
// FlushPacks saves all remaining packs.
func ( r * Repository ) flushPacks ( ctx context . Context ) error {
if r . packerWg == nil {
return nil
2015-04-26 17:44:38 +02:00
}
2021-08-07 22:52:05 +02:00
err := r . treePM . Flush ( ctx )
if err != nil {
return err
}
err = r . dataPM . Flush ( ctx )
if err != nil {
return err
}
r . uploader . TriggerShutdown ( )
err = r . packerWg . Wait ( )
r . treePM = nil
r . dataPM = nil
r . uploader = nil
r . packerWg = nil
return err
2015-04-26 17:44:38 +02:00
}
2021-08-08 00:38:17 +02:00
func ( r * Repository ) Connections ( ) uint {
return r . be . Connections ( )
}
2024-05-19 14:56:17 +02:00
func ( r * Repository ) LookupBlob ( tpe restic . BlobType , id restic . ID ) [ ] restic . PackedBlob {
return r . idx . Lookup ( restic . BlobHandle { Type : tpe , ID : id } )
2024-05-19 12:41:56 +02:00
}
// LookupBlobSize returns the size of blob id.
2024-05-19 14:54:50 +02:00
func ( r * Repository ) LookupBlobSize ( tpe restic . BlobType , id restic . ID ) ( uint , bool ) {
return r . idx . LookupSize ( restic . BlobHandle { Type : tpe , ID : id } )
2024-05-19 12:41:56 +02:00
}
// ListBlobs runs fn on all blobs known to the index. When the context is cancelled,
// the index iteration returns immediately with ctx.Err(). This blocks any modification of the index.
func ( r * Repository ) ListBlobs ( ctx context . Context , fn func ( restic . PackedBlob ) ) error {
return r . idx . Each ( ctx , fn )
}
func ( r * Repository ) ListPacksFromIndex ( ctx context . Context , packs restic . IDSet ) <- chan restic . PackBlobs {
return r . idx . ListPacks ( ctx , packs )
2015-04-26 17:44:38 +02:00
}
2015-05-09 13:25:52 +02:00
// SetIndex instructs the repository to use the given index.
2024-08-17 00:18:13 +05:30
func ( r * Repository ) SetIndex ( i restic . MasterIndex ) error {
2022-06-12 14:43:43 +02:00
r . idx = i . ( * index . MasterIndex )
2024-08-17 00:18:13 +05:30
return r . prepareCache ( )
2015-02-08 22:54:45 +01:00
}
2024-05-19 12:51:58 +02:00
func ( r * Repository ) clearIndex ( ) {
2024-04-14 13:46:21 +02:00
r . idx = index . NewMasterIndex ( )
}
2015-10-12 22:34:12 +02:00
// LoadIndex loads all index files from the backend in parallel and stores them
2023-07-15 22:48:30 -04:00
func ( r * Repository ) LoadIndex ( ctx context . Context , p * progress . Counter ) error {
2016-09-27 22:35:08 +02:00
debug . Log ( "Loading index" )
2015-04-26 17:44:38 +02:00
2024-04-14 13:46:21 +02:00
// reset in-memory index before loading it from the repository
2024-05-19 12:51:58 +02:00
r . clearIndex ( )
2024-04-14 13:46:21 +02:00
2024-05-19 15:37:54 +02:00
err := r . idx . Load ( ctx , r , p , nil )
2021-01-30 16:35:05 +01:00
if err != nil {
return err
}
2020-11-07 18:50:19 +01:00
2023-06-02 21:56:14 +02:00
// Trigger GC to reset garbage collection threshold
runtime . GC ( )
2022-02-13 17:24:09 +01:00
if r . cfg . Version < 2 {
// sanity check
ctx , cancel := context . WithCancel ( ctx )
defer cancel ( )
2022-08-19 20:04:39 +02:00
invalidIndex := false
2024-04-05 22:20:14 +02:00
err := r . idx . Each ( ctx , func ( blob restic . PackedBlob ) {
2022-02-13 17:24:09 +01:00
if blob . IsCompressed ( ) {
2022-08-19 20:04:39 +02:00
invalidIndex = true
2022-02-13 17:24:09 +01:00
}
2022-08-19 20:04:39 +02:00
} )
2024-04-05 22:20:14 +02:00
if err != nil {
return err
}
2022-08-19 20:04:39 +02:00
if invalidIndex {
2023-05-13 22:43:42 +02:00
return errors . New ( "index uses feature not supported by repository version 1" )
2022-02-13 17:24:09 +01:00
}
}
2024-08-17 00:18:13 +05:30
if ctx . Err ( ) != nil {
return ctx . Err ( )
}
2022-02-13 17:24:09 +01:00
2024-08-17 00:18:13 +05:30
// remove index files from the cache which have been removed in the repo
return r . prepareCache ( )
2018-03-31 09:50:45 +02:00
}
2017-07-18 23:16:50 +02:00
2024-05-19 16:14:31 +02:00
// createIndexFromPacks creates a new index by reading all given pack files (with sizes).
2020-10-10 21:31:40 +02:00
// The index is added to the MasterIndex but not marked as finalized.
// Returned is the list of pack files which could not be read.
2024-05-19 16:14:31 +02:00
func ( r * Repository ) createIndexFromPacks ( ctx context . Context , packsize map [ restic . ID ] int64 , p * progress . Counter ) ( invalid restic . IDs , err error ) {
2020-10-10 21:31:40 +02:00
var m sync . Mutex
debug . Log ( "Loading index from pack files" )
// track spawned goroutines using wg, create a new context which is
// cancelled as soon as an error occurs.
wg , ctx := errgroup . WithContext ( ctx )
type FileInfo struct {
restic . ID
Size int64
}
ch := make ( chan FileInfo )
// send list of pack files through ch, which is closed afterwards
wg . Go ( func ( ) error {
defer close ( ch )
for id , size := range packsize {
select {
case <- ctx . Done ( ) :
2022-05-10 22:17:50 +02:00
return ctx . Err ( )
2020-10-10 21:31:40 +02:00
case ch <- FileInfo { id , size } :
}
}
return nil
} )
// a worker receives an pack ID from ch, reads the pack contents, and adds them to idx
worker := func ( ) error {
for fi := range ch {
entries , _ , err := r . ListPack ( ctx , fi . ID , fi . Size )
if err != nil {
debug . Log ( "unable to list pack file %v" , fi . ID . Str ( ) )
m . Lock ( )
invalid = append ( invalid , fi . ID )
m . Unlock ( )
}
2022-05-26 16:13:41 +02:00
r . idx . StorePack ( fi . ID , entries )
2020-10-10 21:31:40 +02:00
p . Add ( 1 )
}
return nil
}
2021-08-08 00:38:17 +02:00
// decoding the pack header is usually quite fast, thus we are primarily IO-bound
workerCount := int ( r . Connections ( ) )
2020-10-10 21:31:40 +02:00
// run workers on ch
2021-08-08 00:38:17 +02:00
for i := 0 ; i < workerCount ; i ++ {
2022-05-10 22:17:50 +02:00
wg . Go ( worker )
}
2020-10-10 21:31:40 +02:00
err = wg . Wait ( )
if err != nil {
2023-05-13 22:43:42 +02:00
return invalid , err
2020-10-10 21:31:40 +02:00
}
return invalid , nil
}
2024-08-17 00:18:13 +05:30
// prepareCache initializes the local cache. indexIDs is the list of IDs of
// index files still present in the repo.
func ( r * Repository ) prepareCache ( ) error {
if r . Cache == nil {
return nil
}
packs := r . idx . Packs ( restic . NewIDSet ( ) )
// clear old packs
2024-08-17 00:21:49 +05:30
err := r . Cache . Clear ( restic . PackFile , packs )
2024-08-17 00:18:13 +05:30
if err != nil {
fmt . Fprintf ( os . Stderr , "error clearing pack files in cache: %v\n" , err )
}
return nil
}
2015-05-04 20:39:45 +02:00
// SearchKey finds a key with the supplied password, afterwards the config is
2016-08-21 13:09:31 +02:00
// read and parsed. It tries at most maxKeys key files in the repo.
2018-11-25 09:10:45 -05:00
func ( r * Repository ) SearchKey ( ctx context . Context , password string , maxKeys int , keyHint string ) error {
key , err := SearchKey ( ctx , r , password , maxKeys , keyHint )
2014-12-21 18:10:19 +01:00
if err != nil {
return err
}
2024-01-21 20:38:42 +01:00
oldKey := r . key
oldKeyID := r . keyID
2015-07-02 21:52:57 +02:00
r . key = key . master
2022-10-15 16:01:38 +02:00
r . keyID = key . ID ( )
2022-04-29 23:12:43 +02:00
cfg , err := restic . LoadConfig ( ctx , r )
2024-01-21 20:38:42 +01:00
if err != nil {
r . key = oldKey
r . keyID = oldKeyID
if err == crypto . ErrUnauthenticated {
return fmt . Errorf ( "config or key %v is damaged: %w" , key . ID ( ) , err )
}
2023-05-13 22:43:42 +02:00
return fmt . Errorf ( "config cannot be loaded: %w" , err )
2018-03-09 21:05:14 +01:00
}
2022-04-29 23:12:43 +02:00
r . setConfig ( cfg )
2018-03-09 21:05:14 +01:00
return nil
2015-05-03 16:36:52 +02:00
}
2014-12-21 18:10:19 +01:00
2015-07-02 22:36:31 +02:00
// Init creates a new master key with the supplied password, initializes and
// saves the repository config.
2022-02-13 00:52:03 +01:00
func ( r * Repository ) Init ( ctx context . Context , version uint , password string , chunkerPolynomial * chunker . Pol ) error {
if version > restic . MaxRepoVersion {
2022-05-07 22:23:59 +02:00
return fmt . Errorf ( "repository version %v too high" , version )
2022-02-13 00:52:03 +01:00
}
if version < restic . MinRepoVersion {
2022-05-07 22:23:59 +02:00
return fmt . Errorf ( "repository version %v too low" , version )
2022-02-13 00:52:03 +01:00
}
2023-10-01 11:40:12 +02:00
_ , err := r . be . Stat ( ctx , backend . Handle { Type : restic . ConfigFile } )
2022-12-03 11:28:10 +01:00
if err != nil && ! r . be . IsNotExist ( err ) {
2015-05-03 17:46:18 +02:00
return err
}
2022-12-03 11:28:10 +01:00
if err == nil {
2015-05-03 17:46:18 +02:00
return errors . New ( "repository master key and config already initialized" )
}
2024-06-05 22:46:34 +02:00
// double check to make sure that a repository is not accidentally reinitialized
// if the backend somehow fails to stat the config file. An initialized repository
// must always contain at least one key file.
if err := r . List ( ctx , restic . KeyFile , func ( _ restic . ID , _ int64 ) error {
return errors . New ( "repository already contains keys" )
} ) ; err != nil {
return err
}
2024-06-13 20:00:20 +02:00
// Also check for snapshots to detect repositories with a misconfigured retention
// policy that deletes files older than x days. For such repositories usually the
// config and key files are removed first and therefore the check would not detect
// the old repository.
if err := r . List ( ctx , restic . SnapshotFile , func ( _ restic . ID , _ int64 ) error {
return errors . New ( "repository already contains snapshots" )
} ) ; err != nil {
return err
}
2015-05-03 17:46:18 +02:00
2022-02-13 00:52:03 +01:00
cfg , err := restic . CreateConfig ( version )
2016-07-31 16:27:36 +02:00
if err != nil {
return err
}
2020-09-19 12:41:52 +02:00
if chunkerPolynomial != nil {
cfg . ChunkerPolynomial = * chunkerPolynomial
}
2016-07-31 16:27:36 +02:00
2017-06-04 11:16:55 +02:00
return r . init ( ctx , password , cfg )
2016-07-31 16:27:36 +02:00
}
// init creates a new master key with the supplied password and uses it to save
// the config into the repo.
2017-06-04 11:16:55 +02:00
func ( r * Repository ) init ( ctx context . Context , password string , cfg restic . Config ) error {
2020-04-10 11:37:39 +02:00
key , err := createMasterKey ( ctx , r , password )
2015-05-03 16:36:52 +02:00
if err != nil {
return err
}
2015-07-02 21:52:57 +02:00
r . key = key . master
2022-10-15 16:01:38 +02:00
r . keyID = key . ID ( )
2022-04-29 23:12:43 +02:00
r . setConfig ( cfg )
2022-06-12 14:38:19 +02:00
return restic . SaveConfig ( ctx , r , cfg )
2014-12-21 18:10:19 +01:00
}
2015-07-02 22:53:03 +02:00
// Key returns the current master key.
2015-07-02 21:52:57 +02:00
func ( r * Repository ) Key ( ) * crypto . Key {
return r . key
2014-12-21 18:10:19 +01:00
}
2022-10-15 16:01:38 +02:00
// KeyID returns the id of the current key in the backend.
func ( r * Repository ) KeyID ( ) restic . ID {
return r . keyID
2015-05-03 18:04:13 +02:00
}
2018-01-21 17:25:36 +01:00
// List runs fn for all files of type t in the repo.
func ( r * Repository ) List ( ctx context . Context , t restic . FileType , fn func ( restic . ID , int64 ) error ) error {
2023-10-01 11:40:12 +02:00
return r . be . List ( ctx , t , func ( fi backend . FileInfo ) error {
2018-01-21 17:25:36 +01:00
id , err := restic . ParseID ( fi . Name )
if err != nil {
debug . Log ( "unable to parse %v as an ID" , fi . Name )
2018-02-26 20:53:38 +01:00
return nil
2017-03-06 22:19:38 +01:00
}
2018-01-21 17:25:36 +01:00
return fn ( id , fi . Size )
} )
2014-12-21 17:02:49 +01:00
}
2016-08-07 21:56:42 +02:00
// ListPack returns the list of blobs saved in the pack id and the length of
2023-10-27 18:56:00 +02:00
// the pack header.
2020-11-16 04:03:45 +01:00
func ( r * Repository ) ListPack ( ctx context . Context , id restic . ID , size int64 ) ( [ ] restic . Blob , uint32 , error ) {
2023-10-01 11:40:12 +02:00
h := backend . Handle { Type : restic . PackFile , Name : id . String ( ) }
2016-08-07 21:56:42 +02:00
2024-05-10 17:05:16 +02:00
entries , hdrSize , err := pack . List ( r . Key ( ) , backend . ReaderAt ( ctx , r . be , h ) , size )
2024-05-09 15:44:27 +02:00
if err != nil {
if r . Cache != nil {
// ignore error as there is not much we can do here
_ = r . Cache . Forget ( h )
}
// retry on error
2024-05-10 17:05:16 +02:00
entries , hdrSize , err = pack . List ( r . Key ( ) , backend . ReaderAt ( ctx , r . be , h ) , size )
2024-05-09 15:44:27 +02:00
}
return entries , hdrSize , err
2016-05-08 13:51:21 +02:00
}
2015-07-02 22:53:03 +02:00
// Delete calls backend.Delete() if implemented, and returns an error
// otherwise.
2017-06-04 11:16:55 +02:00
func ( r * Repository ) Delete ( ctx context . Context ) error {
2017-10-14 15:56:38 +02:00
return r . be . Delete ( ctx )
2014-12-21 17:02:49 +01:00
}
2015-03-14 11:56:45 +01:00
2015-07-02 22:53:03 +02:00
// Close closes the repository by closing the backend.
2015-07-02 21:52:57 +02:00
func ( r * Repository ) Close ( ) error {
return r . be . Close ( )
2015-03-28 11:50:23 +01:00
}
2016-09-03 11:22:01 +02:00
2020-06-06 22:20:44 +02:00
// SaveBlob saves a blob of type t into the repository.
// It takes care that no duplicates are saved; this can be overwritten
// by setting storeDuplicate to true.
// If id is the null id, it will be computed and returned.
2022-05-01 14:26:57 +02:00
// Also returns if the blob was already known before.
// If the blob was not known before, it returns the number of bytes the blob
// occupies in the repo (compressed or not, including encryption overhead).
func ( r * Repository ) SaveBlob ( ctx context . Context , t restic . BlobType , buf [ ] byte , id restic . ID , storeDuplicate bool ) ( newID restic . ID , known bool , size int , err error ) {
2020-06-06 22:20:44 +02:00
2024-04-19 20:48:43 +02:00
if int64 ( len ( buf ) ) > math . MaxUint32 {
return restic . ID { } , false , 0 , fmt . Errorf ( "blob is larger than 4GB" )
}
2020-06-06 22:20:44 +02:00
// compute plaintext hash if not already set
if id . IsNull ( ) {
2022-09-04 10:49:16 +02:00
// Special case the hash calculation for all zero chunks. This is especially
// useful for sparse files containing large all zero regions. For these we can
// process chunks as fast as we can read the from disk.
if len ( buf ) == chunker . MinSize && restic . ZeroPrefixLen ( buf ) == chunker . MinSize {
newID = ZeroChunk ( )
} else {
newID = restic . Hash ( buf )
}
2020-06-06 22:20:44 +02:00
} else {
newID = id
}
// first try to add to pending blobs; if not successful, this blob is already known
2022-06-12 14:43:43 +02:00
known = ! r . idx . AddPending ( restic . BlobHandle { ID : newID , Type : t } )
2020-06-06 22:20:44 +02:00
2020-10-05 23:13:38 +02:00
// only save when needed or explicitly told
2020-06-06 22:20:44 +02:00
if ! known || storeDuplicate {
2022-05-01 14:26:57 +02:00
size , err = r . saveAndEncrypt ( ctx , t , buf , newID )
2016-09-03 20:55:22 +02:00
}
2020-06-06 22:20:44 +02:00
2022-05-01 14:26:57 +02:00
return newID , known , size , err
2016-09-03 20:55:22 +02:00
}
2023-12-31 12:07:19 +01:00
type backendLoadFn func ( ctx context . Context , h backend . Handle , length int , offset int64 , fn func ( rd io . Reader ) error ) error
2023-12-31 15:27:36 +01:00
type loadBlobFn func ( ctx context . Context , t restic . BlobType , id restic . ID , buf [ ] byte ) ( [ ] byte , error )
2021-08-20 23:21:05 +02:00
2024-04-22 21:12:20 +02:00
// Skip sections with more than 1MB unused blobs
const maxUnusedRange = 1 * 1024 * 1024
2022-07-23 22:40:15 +02:00
2023-12-31 12:07:19 +01:00
// LoadBlobsFromPack loads the listed blobs from the specified pack file. The plaintext blob is passed to
2023-12-30 21:40:41 +01:00
// the handleBlobFn callback or an error if decryption failed or the blob hash does not match.
2023-12-31 00:18:41 +01:00
// handleBlobFn is called at most once for each blob. If the callback returns an error,
2024-04-22 20:53:31 +02:00
// then LoadBlobsFromPack will abort and not retry it. The buf passed to the callback is only valid within
// this specific call. The callback must not keep a reference to buf.
2023-12-31 12:07:19 +01:00
func ( r * Repository ) LoadBlobsFromPack ( ctx context . Context , packID restic . ID , blobs [ ] restic . Blob , handleBlobFn func ( blob restic . BlobHandle , buf [ ] byte , err error ) error ) error {
2024-05-10 17:05:16 +02:00
return streamPack ( ctx , r . be . Load , r . LoadBlob , r . getZstdDecoder ( ) , r . key , packID , blobs , handleBlobFn )
2023-12-31 12:07:19 +01:00
}
2024-04-22 21:11:52 +02:00
func streamPack ( ctx context . Context , beLoad backendLoadFn , loadBlobFn loadBlobFn , dec * zstd . Decoder , key * crypto . Key , packID restic . ID , blobs [ ] restic . Blob , handleBlobFn func ( blob restic . BlobHandle , buf [ ] byte , err error ) error ) error {
2021-08-20 23:21:05 +02:00
if len ( blobs ) == 0 {
// nothing to do
return nil
}
sort . Slice ( blobs , func ( i , j int ) bool {
return blobs [ i ] . Offset < blobs [ j ] . Offset
} )
2022-07-23 22:40:15 +02:00
lowerIdx := 0
lastPos := blobs [ 0 ] . Offset
2024-04-22 20:53:31 +02:00
const maxChunkSize = 2 * DefaultPackSize
2022-07-23 22:40:15 +02:00
for i := 0 ; i < len ( blobs ) ; i ++ {
if blobs [ i ] . Offset < lastPos {
// don't wait for streamPackPart to fail
return errors . Errorf ( "overlapping blobs in pack %v" , packID )
}
2024-04-22 20:53:31 +02:00
chunkSizeAfter := ( blobs [ i ] . Offset + blobs [ i ] . Length ) - blobs [ lowerIdx ] . Offset
split := false
// split if the chunk would become larger than maxChunkSize. Oversized chunks are
// handled by the requirement that the chunk contains at least one blob (i > lowerIdx)
if i > lowerIdx && chunkSizeAfter >= maxChunkSize {
split = true
}
// skip too large gaps as a new request is typically much cheaper than data transfers
2022-07-23 22:40:15 +02:00
if blobs [ i ] . Offset - lastPos > maxUnusedRange {
2024-04-22 20:53:31 +02:00
split = true
}
if split {
2022-07-23 22:40:15 +02:00
// load everything up to the skipped file section
2024-04-22 21:11:52 +02:00
err := streamPackPart ( ctx , beLoad , loadBlobFn , dec , key , packID , blobs [ lowerIdx : i ] , handleBlobFn )
2022-07-23 22:40:15 +02:00
if err != nil {
return err
}
lowerIdx = i
}
lastPos = blobs [ i ] . Offset + blobs [ i ] . Length
}
// load remainder
2024-04-22 21:11:52 +02:00
return streamPackPart ( ctx , beLoad , loadBlobFn , dec , key , packID , blobs [ lowerIdx : ] , handleBlobFn )
2022-07-23 22:40:15 +02:00
}
2024-04-22 21:11:52 +02:00
func streamPackPart ( ctx context . Context , beLoad backendLoadFn , loadBlobFn loadBlobFn , dec * zstd . Decoder , key * crypto . Key , packID restic . ID , blobs [ ] restic . Blob , handleBlobFn func ( blob restic . BlobHandle , buf [ ] byte , err error ) error ) error {
2024-05-09 18:30:04 +02:00
h := backend . Handle { Type : restic . PackFile , Name : packID . String ( ) , IsMetadata : blobs [ 0 ] . Type . IsMetadata ( ) }
2021-08-20 23:21:05 +02:00
dataStart := blobs [ 0 ] . Offset
dataEnd := blobs [ len ( blobs ) - 1 ] . Offset + blobs [ len ( blobs ) - 1 ] . Length
debug . Log ( "streaming pack %v (%d to %d bytes), blobs: %v" , packID , dataStart , dataEnd , len ( blobs ) )
2024-04-22 20:53:31 +02:00
data := make ( [ ] byte , int ( dataEnd - dataStart ) )
2024-04-22 21:11:52 +02:00
err := beLoad ( ctx , h , int ( dataEnd - dataStart ) , int64 ( dataStart ) , func ( rd io . Reader ) error {
2024-04-22 20:53:31 +02:00
_ , cerr := io . ReadFull ( rd , data )
return cerr
} )
// prevent callbacks after cancellation
if ctx . Err ( ) != nil {
return ctx . Err ( )
}
2022-02-13 17:24:09 +01:00
if err != nil {
2024-04-22 20:53:31 +02:00
// the context is only still valid if handleBlobFn never returned an error
if loadBlobFn != nil {
// check whether we can get the remaining blobs somewhere else
for _ , entry := range blobs {
buf , ierr := loadBlobFn ( ctx , entry . Type , entry . ID , nil )
err = handleBlobFn ( entry . BlobHandle , buf , ierr )
if err != nil {
break
2023-12-31 15:27:36 +01:00
}
}
2024-04-22 20:53:31 +02:00
}
return errors . Wrap ( err , "StreamPack" )
2022-02-13 17:24:09 +01:00
}
2024-05-10 16:29:48 +02:00
it := newPackBlobIterator ( packID , newByteReader ( data ) , dataStart , blobs , key , dec )
2024-04-22 20:53:31 +02:00
for {
2024-07-31 19:30:47 +02:00
if ctx . Err ( ) != nil {
return ctx . Err ( )
}
2024-04-22 20:53:31 +02:00
val , err := it . Next ( )
2024-05-10 16:29:48 +02:00
if err == errPackEOF {
2024-04-22 20:53:31 +02:00
break
} else if err != nil {
return err
2021-08-20 23:21:05 +02:00
}
2024-04-22 20:53:31 +02:00
if val . Err != nil && loadBlobFn != nil {
var ierr error
// check whether we can get a valid copy somewhere else
buf , ierr := loadBlobFn ( ctx , val . Handle . Type , val . Handle . ID , nil )
if ierr == nil {
// success
val . Plaintext = buf
val . Err = nil
2021-08-20 23:21:05 +02:00
}
}
2024-04-22 20:53:31 +02:00
err = handleBlobFn ( val . Handle , val . Plaintext , val . Err )
if err != nil {
return err
}
// ensure that each blob is only passed once to handleBlobFn
blobs = blobs [ 1 : ]
2023-12-31 15:27:36 +01:00
}
2021-08-20 23:21:05 +02:00
return errors . Wrap ( err , "StreamPack" )
}
2022-09-04 10:49:16 +02:00
2024-04-22 20:53:31 +02:00
// discardReader allows the PackBlobIterator to perform zero copy
// reads if the underlying data source is a byte slice.
type discardReader interface {
Discard ( n int ) ( discarded int , err error )
// ReadFull reads the next n bytes into a byte slice. The caller must not
// retain a reference to the byte. Modifications are only allowed within
// the boundaries of the returned slice.
ReadFull ( n int ) ( buf [ ] byte , err error )
}
type byteReader struct {
buf [ ] byte
}
func newByteReader ( buf [ ] byte ) * byteReader {
return & byteReader {
buf : buf ,
}
}
func ( b * byteReader ) Discard ( n int ) ( discarded int , err error ) {
if len ( b . buf ) < n {
return 0 , io . ErrUnexpectedEOF
}
b . buf = b . buf [ n : ]
return n , nil
}
func ( b * byteReader ) ReadFull ( n int ) ( buf [ ] byte , err error ) {
if len ( b . buf ) < n {
return nil , io . ErrUnexpectedEOF
}
buf = b . buf [ : n ]
b . buf = b . buf [ n : ]
return buf , nil
}
2024-05-10 16:29:48 +02:00
type packBlobIterator struct {
2023-12-31 00:18:41 +01:00
packID restic . ID
2024-04-22 20:53:31 +02:00
rd discardReader
2023-12-31 00:18:41 +01:00
currentOffset uint
blobs [ ] restic . Blob
key * crypto . Key
dec * zstd . Decoder
decode [ ] byte
}
2024-05-10 16:29:48 +02:00
type packBlobValue struct {
2023-12-31 00:18:41 +01:00
Handle restic . BlobHandle
Plaintext [ ] byte
Err error
}
2024-05-10 16:29:48 +02:00
var errPackEOF = errors . New ( "reached EOF of pack file" )
2023-12-31 00:18:41 +01:00
2024-05-10 16:29:48 +02:00
func newPackBlobIterator ( packID restic . ID , rd discardReader , currentOffset uint ,
blobs [ ] restic . Blob , key * crypto . Key , dec * zstd . Decoder ) * packBlobIterator {
return & packBlobIterator {
2023-12-31 00:18:41 +01:00
packID : packID ,
rd : rd ,
currentOffset : currentOffset ,
blobs : blobs ,
key : key ,
dec : dec ,
}
}
// Next returns the next blob, an error or ErrPackEOF if all blobs were read
2024-05-10 16:29:48 +02:00
func ( b * packBlobIterator ) Next ( ) ( packBlobValue , error ) {
2023-12-31 00:18:41 +01:00
if len ( b . blobs ) == 0 {
2024-05-10 16:29:48 +02:00
return packBlobValue { } , errPackEOF
2023-12-31 00:18:41 +01:00
}
entry := b . blobs [ 0 ]
b . blobs = b . blobs [ 1 : ]
skipBytes := int ( entry . Offset - b . currentOffset )
if skipBytes < 0 {
2024-05-10 16:29:48 +02:00
return packBlobValue { } , fmt . Errorf ( "overlapping blobs in pack %v" , b . packID )
2023-12-31 00:18:41 +01:00
}
_ , err := b . rd . Discard ( skipBytes )
if err != nil {
2024-05-10 16:29:48 +02:00
return packBlobValue { } , err
2023-12-31 00:18:41 +01:00
}
b . currentOffset = entry . Offset
h := restic . BlobHandle { ID : entry . ID , Type : entry . Type }
debug . Log ( " process blob %v, skipped %d, %v" , h , skipBytes , entry )
2024-04-22 20:53:31 +02:00
buf , err := b . rd . ReadFull ( int ( entry . Length ) )
2023-12-31 00:18:41 +01:00
if err != nil {
debug . Log ( " read error %v" , err )
2024-05-10 16:29:48 +02:00
return packBlobValue { } , fmt . Errorf ( "readFull: %w" , err )
2023-12-31 00:18:41 +01:00
}
b . currentOffset = entry . Offset + entry . Length
if int ( entry . Length ) <= b . key . NonceSize ( ) {
debug . Log ( "%v" , b . blobs )
2024-05-10 16:29:48 +02:00
return packBlobValue { } , fmt . Errorf ( "invalid blob length %v" , entry )
2023-12-31 00:18:41 +01:00
}
// decryption errors are likely permanent, give the caller a chance to skip them
2024-04-22 20:53:31 +02:00
nonce , ciphertext := buf [ : b . key . NonceSize ( ) ] , buf [ b . key . NonceSize ( ) : ]
2023-12-31 00:18:41 +01:00
plaintext , err := b . key . Open ( ciphertext [ : 0 ] , nonce , ciphertext , nil )
2024-02-17 19:38:01 +01:00
if err != nil {
err = fmt . Errorf ( "decrypting blob %v from %v failed: %w" , h , b . packID . Str ( ) , err )
}
2023-12-31 00:18:41 +01:00
if err == nil && entry . IsCompressed ( ) {
// DecodeAll will allocate a slice if it is not large enough since it
// knows the decompressed size (because we're using EncodeAll)
b . decode , err = b . dec . DecodeAll ( plaintext , b . decode [ : 0 ] )
plaintext = b . decode
if err != nil {
2024-02-17 19:38:01 +01:00
err = fmt . Errorf ( "decompressing blob %v from %v failed: %w" , h , b . packID . Str ( ) , err )
2023-12-31 00:18:41 +01:00
}
}
if err == nil {
id := restic . Hash ( plaintext )
if ! id . Equal ( entry . ID ) {
debug . Log ( "read blob %v/%v from %v: wrong data returned, hash is %v" ,
h . Type , h . ID , b . packID . Str ( ) , id )
2024-02-17 19:37:32 +01:00
err = fmt . Errorf ( "read blob %v from %v: wrong data returned, hash is %v" ,
2023-12-31 00:18:41 +01:00
h , b . packID . Str ( ) , id )
}
}
2024-05-10 16:29:48 +02:00
return packBlobValue { entry . BlobHandle , plaintext , err } , nil
2023-12-31 00:18:41 +01:00
}
2022-09-04 10:49:16 +02:00
var zeroChunkOnce sync . Once
var zeroChunkID restic . ID
// ZeroChunk computes and returns (cached) the ID of an all-zero chunk with size chunker.MinSize
func ZeroChunk ( ) restic . ID {
zeroChunkOnce . Do ( func ( ) {
zeroChunkID = restic . Hash ( make ( [ ] byte , chunker . MinSize ) )
} )
return zeroChunkID
}