2015-04-26 13:36:49 +00:00
package pack
import (
2024-02-04 10:58:29 +00:00
"bytes"
2022-02-13 13:25:38 +00:00
"context"
2015-04-26 13:36:49 +00:00
"encoding/binary"
"fmt"
"io"
"sync"
2017-07-23 12:21:03 +00:00
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
2017-07-24 15:42:25 +00:00
"github.com/restic/restic/internal/restic"
2017-07-23 12:21:03 +00:00
"github.com/restic/restic/internal/crypto"
2015-04-26 13:36:49 +00:00
)
// Packer is used to create a new Pack.
type Packer struct {
2016-08-31 20:39:36 +00:00
blobs [ ] restic . Blob
2015-04-26 13:36:49 +00:00
bytes uint
k * crypto . Key
2016-03-06 11:26:25 +00:00
wr io . Writer
2015-04-26 13:36:49 +00:00
m sync . Mutex
}
2020-03-09 13:25:55 +00:00
// NewPacker returns a new Packer that can be used to pack blobs together.
2016-03-06 11:26:25 +00:00
func NewPacker ( k * crypto . Key , wr io . Writer ) * Packer {
return & Packer { k : k , wr : wr }
2015-04-26 13:36:49 +00:00
}
// Add saves the data read from rd as a new blob to the packer. Returned is the
2022-06-05 09:39:57 +00:00
// number of bytes written to the pack plus the pack header entry size.
2022-02-13 16:24:09 +00:00
func ( p * Packer ) Add ( t restic . BlobType , id restic . ID , data [ ] byte , uncompressedLength int ) ( int , error ) {
2015-04-26 13:36:49 +00:00
p . m . Lock ( )
defer p . m . Unlock ( )
2020-11-05 20:52:34 +00:00
c := restic . Blob { BlobHandle : restic . BlobHandle { Type : t , ID : id } }
2015-04-26 13:36:49 +00:00
2016-03-06 11:26:25 +00:00
n , err := p . wr . Write ( data )
2015-08-08 11:47:08 +00:00
c . Length = uint ( n )
2015-04-26 13:36:49 +00:00
c . Offset = p . bytes
2022-02-13 16:24:09 +00:00
c . UncompressedLength = uint ( uncompressedLength )
2015-04-26 13:36:49 +00:00
p . bytes += uint ( n )
p . blobs = append ( p . blobs , c )
2022-06-05 09:39:57 +00:00
n += CalculateEntrySize ( c )
2015-04-26 13:36:49 +00:00
2016-08-29 20:16:58 +00:00
return n , errors . Wrap ( err , "Write" )
2015-04-26 13:36:49 +00:00
}
2022-02-13 16:24:09 +00:00
var entrySize = uint ( binary . Size ( restic . BlobType ( 0 ) ) + 2 * headerLengthSize + len ( restic . ID { } ) )
var plainEntrySize = uint ( binary . Size ( restic . BlobType ( 0 ) ) + headerLengthSize + len ( restic . ID { } ) )
2015-04-26 13:36:49 +00:00
2020-07-22 16:46:55 +00:00
// headerEntry describes the format of header entries. It serves only as
// documentation.
2015-04-26 13:36:49 +00:00
type headerEntry struct {
2022-04-29 21:41:03 +00:00
Type uint8
Length uint32
ID restic . ID
}
// compressedHeaderEntry describes the format of header entries for compressed blobs.
// It serves only as documentation.
type compressedHeaderEntry struct {
Type uint8
Length uint32
UncompressedLength uint32
ID restic . ID
2015-04-26 13:36:49 +00:00
}
// Finalize writes the header for all added blobs and finalizes the pack.
2021-08-07 20:52:05 +00:00
func ( p * Packer ) Finalize ( ) error {
2015-04-26 13:36:49 +00:00
p . m . Lock ( )
defer p . m . Unlock ( )
2024-02-04 10:58:29 +00:00
header , err := makeHeader ( p . blobs )
2015-04-29 22:36:36 +00:00
if err != nil {
2021-08-07 20:52:05 +00:00
return err
2015-04-26 13:36:49 +00:00
}
2022-06-12 12:48:30 +00:00
encryptedHeader := make ( [ ] byte , 0 , crypto . CiphertextLength ( len ( header ) ) )
2017-10-29 10:33:57 +00:00
nonce := crypto . NewRandomNonce ( )
encryptedHeader = append ( encryptedHeader , nonce ... )
2020-07-22 16:46:55 +00:00
encryptedHeader = p . k . Seal ( encryptedHeader , nonce , header , nil )
2024-02-04 10:58:29 +00:00
encryptedHeader = binary . LittleEndian . AppendUint32 ( encryptedHeader , uint32 ( len ( encryptedHeader ) ) )
if err := verifyHeader ( p . k , encryptedHeader , p . blobs ) ; err != nil {
2024-02-04 17:09:32 +00:00
//nolint:revive // ignore linter warnings about error message spelling
return fmt . Errorf ( "Detected data corruption while writing pack-file header: %w\nCorrupted data is either caused by hardware issues or software bugs. Please open an issue at https://github.com/restic/restic/issues/new/choose for further troubleshooting." , err )
2024-02-04 10:58:29 +00:00
}
2015-04-29 22:41:11 +00:00
2016-01-24 18:30:14 +00:00
// append the header
2016-03-06 11:26:25 +00:00
n , err := p . wr . Write ( encryptedHeader )
2015-04-26 13:36:49 +00:00
if err != nil {
2021-08-07 20:52:05 +00:00
return errors . Wrap ( err , "Write" )
2016-01-24 18:30:14 +00:00
}
2024-02-04 10:58:29 +00:00
if n != len ( encryptedHeader ) {
2021-08-07 20:52:05 +00:00
return errors . New ( "wrong number of bytes written" )
2015-04-26 13:36:49 +00:00
}
2024-02-04 10:58:29 +00:00
p . bytes += uint ( len ( encryptedHeader ) )
return nil
}
2015-04-26 13:36:49 +00:00
2024-02-04 10:58:29 +00:00
func verifyHeader ( k * crypto . Key , header [ ] byte , expected [ ] restic . Blob ) error {
// do not offer a way to skip the pack header verification, as pack headers are usually small enough
// to not result in a significant performance impact
decoded , hdrSize , err := List ( k , bytes . NewReader ( header ) , int64 ( len ( header ) ) )
2015-04-26 13:36:49 +00:00
if err != nil {
2024-02-04 10:58:29 +00:00
return fmt . Errorf ( "header decoding failed: %w" , err )
}
if hdrSize != uint32 ( len ( header ) ) {
return fmt . Errorf ( "unexpected header size %v instead of %v" , hdrSize , len ( header ) )
}
if len ( decoded ) != len ( expected ) {
return fmt . Errorf ( "pack header size mismatch" )
}
for i := 0 ; i < len ( decoded ) ; i ++ {
if decoded [ i ] != expected [ i ] {
return fmt . Errorf ( "pack header entry mismatch got %v instead of %v" , decoded [ i ] , expected [ i ] )
}
2015-04-26 13:36:49 +00:00
}
2021-08-07 20:52:05 +00:00
return nil
}
// HeaderOverhead returns an estimate of the number of bytes written by a call to Finalize.
func ( p * Packer ) HeaderOverhead ( ) int {
2022-06-12 12:48:30 +00:00
return crypto . CiphertextLength ( 0 ) + binary . Size ( uint32 ( 0 ) )
2015-04-26 13:36:49 +00:00
}
2020-07-22 16:46:55 +00:00
// makeHeader constructs the header for p.
2024-02-04 10:58:29 +00:00
func makeHeader ( blobs [ ] restic . Blob ) ( [ ] byte , error ) {
buf := make ( [ ] byte , 0 , len ( blobs ) * int ( entrySize ) )
2015-04-29 22:36:36 +00:00
2024-02-04 10:58:29 +00:00
for _ , b := range blobs {
2022-02-13 16:24:09 +00:00
switch {
case b . Type == restic . DataBlob && b . UncompressedLength == 0 :
2020-07-22 16:46:55 +00:00
buf = append ( buf , 0 )
2022-02-13 16:24:09 +00:00
case b . Type == restic . TreeBlob && b . UncompressedLength == 0 :
2020-07-22 16:46:55 +00:00
buf = append ( buf , 1 )
2022-02-13 16:24:09 +00:00
case b . Type == restic . DataBlob && b . UncompressedLength != 0 :
buf = append ( buf , 2 )
case b . Type == restic . TreeBlob && b . UncompressedLength != 0 :
buf = append ( buf , 3 )
2016-08-04 16:40:31 +00:00
default :
2020-07-22 16:46:55 +00:00
return nil , errors . Errorf ( "invalid blob type %v" , b . Type )
2015-04-29 22:36:36 +00:00
}
2020-07-22 16:46:55 +00:00
var lenLE [ 4 ] byte
binary . LittleEndian . PutUint32 ( lenLE [ : ] , uint32 ( b . Length ) )
buf = append ( buf , lenLE [ : ] ... )
2022-02-13 16:24:09 +00:00
if b . UncompressedLength != 0 {
binary . LittleEndian . PutUint32 ( lenLE [ : ] , uint32 ( b . UncompressedLength ) )
buf = append ( buf , lenLE [ : ] ... )
}
2020-07-22 16:46:55 +00:00
buf = append ( buf , b . ID [ : ] ... )
2015-04-29 22:36:36 +00:00
}
2020-07-22 16:46:55 +00:00
return buf , nil
2015-04-29 22:36:36 +00:00
}
2015-04-26 13:36:49 +00:00
// Size returns the number of bytes written so far.
func ( p * Packer ) Size ( ) uint {
p . m . Lock ( )
defer p . m . Unlock ( )
return p . bytes
}
// Count returns the number of blobs in this packer.
func ( p * Packer ) Count ( ) int {
p . m . Lock ( )
defer p . m . Unlock ( )
return len ( p . blobs )
}
2022-04-30 22:05:20 +00:00
// HeaderFull returns true if the pack header is full.
func ( p * Packer ) HeaderFull ( ) bool {
p . m . Lock ( )
defer p . m . Unlock ( )
return headerSize + uint ( len ( p . blobs ) + 1 ) * entrySize > MaxHeaderSize
}
2015-04-26 13:36:49 +00:00
// Blobs returns the slice of blobs that have been written.
2016-08-31 20:39:36 +00:00
func ( p * Packer ) Blobs ( ) [ ] restic . Blob {
2015-04-26 13:36:49 +00:00
p . m . Lock ( )
defer p . m . Unlock ( )
return p . blobs
}
func ( p * Packer ) String ( ) string {
return fmt . Sprintf ( "<Packer %d blobs, %d bytes>" , len ( p . blobs ) , p . bytes )
}
2018-02-21 23:22:18 +00:00
var (
// we require at least one entry in the header, and one blob for a pack file
2022-02-13 16:24:09 +00:00
minFileSize = plainEntrySize + crypto . Extension + uint ( headerLengthSize )
2018-02-21 23:22:18 +00:00
)
const (
2020-05-01 20:56:34 +00:00
// size of the header-length field at the end of the file; it is a uint32
headerLengthSize = 4
2022-06-30 13:27:34 +00:00
// headerSize is the header's constant overhead (independent of #entries)
headerSize = headerLengthSize + crypto . Extension
2020-05-01 20:56:34 +00:00
2021-08-20 14:15:40 +00:00
// MaxHeaderSize is the max size of header including header-length field
MaxHeaderSize = 16 * 1024 * 1024 + headerLengthSize
2023-12-06 12:11:55 +00:00
// number of header entries to download as part of header-length request
2018-02-21 23:22:18 +00:00
eagerEntries = 15
)
2022-02-13 16:24:09 +00:00
// readRecords reads up to bufsize bytes from the underlying ReaderAt, returning
// the raw header, the total number of bytes in the header, and any error.
// If the header contains fewer than bufsize bytes, the header is truncated to
2018-02-22 17:37:10 +00:00
// the appropriate size.
2022-02-13 16:24:09 +00:00
func readRecords ( rd io . ReaderAt , size int64 , bufsize int ) ( [ ] byte , int , error ) {
2018-02-21 23:22:18 +00:00
if bufsize > int ( size ) {
bufsize = int ( size )
}
2016-08-25 19:51:07 +00:00
2018-02-21 23:22:18 +00:00
b := make ( [ ] byte , bufsize )
off := size - int64 ( bufsize )
if _ , err := rd . ReadAt ( b , off ) ; err != nil {
return nil , 0 , err
}
2018-02-22 17:37:10 +00:00
hlen := binary . LittleEndian . Uint32 ( b [ len ( b ) - headerLengthSize : ] )
2018-02-21 23:22:18 +00:00
b = b [ : len ( b ) - headerLengthSize ]
2018-02-22 17:37:10 +00:00
debug . Log ( "header length: %v" , hlen )
2018-02-21 23:22:18 +00:00
var err error
switch {
2018-02-22 17:37:10 +00:00
case hlen == 0 :
2018-02-21 23:22:18 +00:00
err = InvalidFileError { Message : "header length is zero" }
2018-02-22 17:37:10 +00:00
case hlen < crypto . Extension :
2024-05-11 19:36:16 +00:00
err = InvalidFileError { Message : "header length is too short" }
2018-02-22 17:37:10 +00:00
case int64 ( hlen ) > size - int64 ( headerLengthSize ) :
2018-02-21 23:22:18 +00:00
err = InvalidFileError { Message : "header is larger than file" }
2021-08-20 14:15:40 +00:00
case int64 ( hlen ) > MaxHeaderSize - int64 ( headerLengthSize ) :
2018-02-21 23:22:18 +00:00
err = InvalidFileError { Message : "header is larger than maxHeaderSize" }
}
if err != nil {
return nil , 0 , errors . Wrap ( err , "readHeader" )
}
2018-02-21 02:28:37 +00:00
2022-02-13 16:24:09 +00:00
total := int ( hlen + headerLengthSize )
if total < bufsize {
2018-02-22 17:37:10 +00:00
// truncate to the beginning of the pack header
b = b [ len ( b ) - int ( hlen ) : ]
2018-02-21 23:22:18 +00:00
}
2017-06-08 18:40:12 +00:00
2018-02-22 17:37:10 +00:00
return b , total , nil
2018-02-21 23:22:18 +00:00
}
2018-01-23 23:51:22 +00:00
2016-08-25 19:51:07 +00:00
// readHeader reads the header at the end of rd. size is the length of the
// whole data accessible in rd.
func readHeader ( rd io . ReaderAt , size int64 ) ( [ ] byte , error ) {
2017-06-08 19:04:07 +00:00
debug . Log ( "size: %v" , size )
2017-06-08 18:40:12 +00:00
if size < int64 ( minFileSize ) {
2024-05-11 19:36:16 +00:00
err := InvalidFileError { Message : "file is too short" }
2017-06-08 18:40:12 +00:00
return nil , errors . Wrap ( err , "readHeader" )
}
2018-01-23 23:51:22 +00:00
// assuming extra request is significantly slower than extra bytes download,
// eagerly download eagerEntries header entries as part of header-length request.
// only make second request if actual number of entries is greater than eagerEntries
2022-06-30 13:27:34 +00:00
eagerSize := eagerEntries * int ( entrySize ) + headerSize
2022-02-13 16:24:09 +00:00
b , c , err := readRecords ( rd , size , eagerSize )
2016-08-07 11:12:52 +00:00
if err != nil {
2016-08-25 19:51:07 +00:00
return nil , err
2016-08-07 11:12:52 +00:00
}
2022-02-13 16:24:09 +00:00
if c <= eagerSize {
2018-02-21 23:22:18 +00:00
// eager read sufficed, return what we got
return b , nil
2016-08-23 20:21:29 +00:00
}
2018-02-21 23:22:18 +00:00
b , _ , err = readRecords ( rd , size , c )
if err != nil {
return nil , err
2016-08-07 11:12:52 +00:00
}
2018-02-21 23:22:18 +00:00
return b , nil
2016-08-25 19:51:07 +00:00
}
2017-06-08 18:40:12 +00:00
// InvalidFileError is return when a file is found that is not a pack file.
type InvalidFileError struct {
Message string
}
func ( e InvalidFileError ) Error ( ) string {
return e . Message
}
2020-11-16 03:03:45 +00:00
// List returns the list of entries found in a pack file and the length of the
// header (including header size and crypto overhead)
func List ( k * crypto . Key , rd io . ReaderAt , size int64 ) ( entries [ ] restic . Blob , hdrSize uint32 , err error ) {
2016-08-25 19:51:07 +00:00
buf , err := readHeader ( rd , size )
if err != nil {
2020-11-16 03:03:45 +00:00
return nil , 0 , err
2016-08-25 19:51:07 +00:00
}
2015-04-26 13:36:49 +00:00
2022-06-12 12:48:30 +00:00
if len ( buf ) < crypto . CiphertextLength ( 0 ) {
2024-05-11 19:36:16 +00:00
return nil , 0 , errors . New ( "invalid header, too short" )
2017-10-29 10:33:57 +00:00
}
2020-11-16 03:03:45 +00:00
hdrSize = headerLengthSize + uint32 ( len ( buf ) )
2017-10-29 10:33:57 +00:00
nonce , buf := buf [ : k . NonceSize ( ) ] , buf [ k . NonceSize ( ) : ]
buf , err = k . Open ( buf [ : 0 ] , nonce , buf , nil )
2015-04-26 13:36:49 +00:00
if err != nil {
2020-11-16 03:03:45 +00:00
return nil , 0 , err
2015-04-26 13:36:49 +00:00
}
2022-02-13 16:24:09 +00:00
// might over allocate a bit if all blobs have EntrySize but only by a few percent
entries = make ( [ ] restic . Blob , 0 , uint ( len ( buf ) ) / plainEntrySize )
2017-01-15 14:27:58 +00:00
2015-10-25 17:07:51 +00:00
pos := uint ( 0 )
2020-07-22 16:46:55 +00:00
for len ( buf ) > 0 {
2022-02-13 16:24:09 +00:00
entry , headerSize , err := parseHeaderEntry ( buf )
2015-10-25 17:07:51 +00:00
if err != nil {
2020-11-16 03:03:45 +00:00
return nil , 0 , err
2016-08-04 16:40:31 +00:00
}
2020-07-22 16:46:55 +00:00
entry . Offset = pos
2016-08-04 16:40:31 +00:00
entries = append ( entries , entry )
2020-07-22 16:46:55 +00:00
pos += entry . Length
2022-02-13 16:24:09 +00:00
buf = buf [ headerSize : ]
2015-04-26 13:36:49 +00:00
}
2020-11-16 03:03:45 +00:00
return entries , hdrSize , nil
2015-04-26 13:36:49 +00:00
}
2020-05-01 20:56:34 +00:00
2022-02-13 16:24:09 +00:00
func parseHeaderEntry ( p [ ] byte ) ( b restic . Blob , size uint , err error ) {
l := uint ( len ( p ) )
size = plainEntrySize
if l < plainEntrySize {
2020-07-22 16:46:55 +00:00
err = errors . Errorf ( "parseHeaderEntry: buffer of size %d too short" , len ( p ) )
2022-02-13 16:24:09 +00:00
return b , size , err
2020-07-22 16:46:55 +00:00
}
2022-02-13 16:24:09 +00:00
tpe := p [ 0 ]
2020-07-22 16:46:55 +00:00
2022-02-13 16:24:09 +00:00
switch tpe {
case 0 , 2 :
2020-07-22 16:46:55 +00:00
b . Type = restic . DataBlob
2022-02-13 16:24:09 +00:00
case 1 , 3 :
2020-07-22 16:46:55 +00:00
b . Type = restic . TreeBlob
default :
2022-02-13 16:24:09 +00:00
return b , size , errors . Errorf ( "invalid type %d" , tpe )
2020-07-22 16:46:55 +00:00
}
b . Length = uint ( binary . LittleEndian . Uint32 ( p [ 1 : 5 ] ) )
2022-02-13 16:24:09 +00:00
p = p [ 5 : ]
if tpe == 2 || tpe == 3 {
size = entrySize
if l < entrySize {
err = errors . Errorf ( "parseHeaderEntry: buffer of size %d too short" , len ( p ) )
return b , size , err
}
b . UncompressedLength = uint ( binary . LittleEndian . Uint32 ( p [ 0 : 4 ] ) )
p = p [ 4 : ]
}
2020-07-22 16:46:55 +00:00
2022-02-13 16:24:09 +00:00
copy ( b . ID [ : ] , p [ : ] )
return b , size , nil
}
func CalculateEntrySize ( blob restic . Blob ) int {
if blob . UncompressedLength != 0 {
return int ( entrySize )
}
return int ( plainEntrySize )
2020-07-22 16:46:55 +00:00
}
2022-02-13 13:25:38 +00:00
func CalculateHeaderSize ( blobs [ ] restic . Blob ) int {
2022-06-30 13:27:34 +00:00
size := headerSize
2022-02-13 16:24:09 +00:00
for _ , blob := range blobs {
size += CalculateEntrySize ( blob )
}
return size
2022-02-13 13:25:38 +00:00
}
// Size returns the size of all packs computed by index information.
2022-06-30 13:27:34 +00:00
// If onlyHdr is set to true, only the size of the header is returned
2022-02-13 13:25:38 +00:00
// Note that this function only gives correct sizes, if there are no
// duplicates in the index.
2024-05-19 10:41:56 +00:00
func Size ( ctx context . Context , mi restic . ListBlobser , onlyHdr bool ) ( map [ restic . ID ] int64 , error ) {
2022-02-13 13:25:38 +00:00
packSize := make ( map [ restic . ID ] int64 )
2024-05-19 10:41:56 +00:00
err := mi . ListBlobs ( ctx , func ( blob restic . PackedBlob ) {
2022-02-13 13:25:38 +00:00
size , ok := packSize [ blob . PackID ]
if ! ok {
2022-06-30 13:27:34 +00:00
size = headerSize
}
if ! onlyHdr {
size += int64 ( blob . Length )
2022-02-13 13:25:38 +00:00
}
2022-02-13 16:24:09 +00:00
packSize [ blob . PackID ] = size + int64 ( CalculateEntrySize ( blob . Blob ) )
2022-08-19 18:04:39 +00:00
} )
2022-02-13 13:25:38 +00:00
2024-04-05 20:20:14 +00:00
return packSize , err
2022-02-13 13:25:38 +00:00
}