mirror of
https://github.com/octoleo/restic.git
synced 2024-11-26 14:56:29 +00:00
archiver: reduce memory usage for large files
FutureBlob now uses a Take() method as a more memory-efficient way to retrieve the futures result. In addition, futures are now collected while saving the file. As only a limited number of blobs can be queued for uploading, for a large file nearly all FutureBlobs already have their result ready, such that the FutureBlob object just consumes memory.
This commit is contained in:
parent
b817681a11
commit
4a10ebed15
@ -184,17 +184,17 @@ func (arch *Archiver) saveTree(ctx context.Context, t *restic.TreeJSONBuilder) (
|
|||||||
b := &Buffer{Data: buf}
|
b := &Buffer{Data: buf}
|
||||||
res := arch.blobSaver.Save(ctx, restic.TreeBlob, b)
|
res := arch.blobSaver.Save(ctx, restic.TreeBlob, b)
|
||||||
|
|
||||||
res.Wait(ctx)
|
sbr := res.Take(ctx)
|
||||||
if !res.Known() {
|
if !sbr.known {
|
||||||
s.TreeBlobs++
|
s.TreeBlobs++
|
||||||
s.TreeSize += uint64(res.Length())
|
s.TreeSize += uint64(sbr.length)
|
||||||
s.TreeSizeInRepo += uint64(res.SizeInRepo())
|
s.TreeSizeInRepo += uint64(sbr.sizeInRepo)
|
||||||
}
|
}
|
||||||
// The context was canceled in the meantime, res.ID() might be invalid
|
// The context was canceled in the meantime, id might be invalid
|
||||||
if ctx.Err() != nil {
|
if ctx.Err() != nil {
|
||||||
return restic.ID{}, s, ctx.Err()
|
return restic.ID{}, s, ctx.Err()
|
||||||
}
|
}
|
||||||
return res.ID(), s, nil
|
return sbr.id, s, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// nodeFromFileInfo returns the restic node from an os.FileInfo.
|
// nodeFromFileInfo returns the restic node from an os.FileInfo.
|
||||||
|
@ -44,9 +44,7 @@ func (s *BlobSaver) TriggerShutdown() {
|
|||||||
// Save stores a blob in the repo. It checks the index and the known blobs
|
// Save stores a blob in the repo. It checks the index and the known blobs
|
||||||
// before saving anything. It takes ownership of the buffer passed in.
|
// before saving anything. It takes ownership of the buffer passed in.
|
||||||
func (s *BlobSaver) Save(ctx context.Context, t restic.BlobType, buf *Buffer) FutureBlob {
|
func (s *BlobSaver) Save(ctx context.Context, t restic.BlobType, buf *Buffer) FutureBlob {
|
||||||
// buf might be freed once the job was submitted, thus calculate the length now
|
ch := make(chan SaveBlobResponse, 1)
|
||||||
length := len(buf.Data)
|
|
||||||
ch := make(chan saveBlobResponse, 1)
|
|
||||||
select {
|
select {
|
||||||
case s.ch <- saveBlobJob{BlobType: t, buf: buf, ch: ch}:
|
case s.ch <- saveBlobJob{BlobType: t, buf: buf, ch: ch}:
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
@ -55,72 +53,62 @@ func (s *BlobSaver) Save(ctx context.Context, t restic.BlobType, buf *Buffer) Fu
|
|||||||
return FutureBlob{ch: ch}
|
return FutureBlob{ch: ch}
|
||||||
}
|
}
|
||||||
|
|
||||||
return FutureBlob{ch: ch, length: length}
|
return FutureBlob{ch: ch}
|
||||||
}
|
}
|
||||||
|
|
||||||
// FutureBlob is returned by SaveBlob and will return the data once it has been processed.
|
// FutureBlob is returned by SaveBlob and will return the data once it has been processed.
|
||||||
type FutureBlob struct {
|
type FutureBlob struct {
|
||||||
ch <-chan saveBlobResponse
|
ch <-chan SaveBlobResponse
|
||||||
length int
|
|
||||||
res saveBlobResponse
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait blocks until the result is available or the context is cancelled.
|
func (s *FutureBlob) Poll() *SaveBlobResponse {
|
||||||
func (s *FutureBlob) Wait(ctx context.Context) {
|
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
|
||||||
return
|
|
||||||
case res, ok := <-s.ch:
|
case res, ok := <-s.ch:
|
||||||
if ok {
|
if ok {
|
||||||
s.res = res
|
return &res
|
||||||
}
|
}
|
||||||
|
default:
|
||||||
}
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ID returns the ID of the blob after it has been saved.
|
// Take blocks until the result is available or the context is cancelled.
|
||||||
func (s *FutureBlob) ID() restic.ID {
|
func (s *FutureBlob) Take(ctx context.Context) SaveBlobResponse {
|
||||||
return s.res.id
|
select {
|
||||||
}
|
case res, ok := <-s.ch:
|
||||||
|
if ok {
|
||||||
// Known returns whether or not the blob was already known.
|
return res
|
||||||
func (s *FutureBlob) Known() bool {
|
}
|
||||||
return s.res.known
|
case <-ctx.Done():
|
||||||
}
|
}
|
||||||
|
return SaveBlobResponse{}
|
||||||
// Length returns the raw length of the blob.
|
|
||||||
func (s *FutureBlob) Length() int {
|
|
||||||
return s.length
|
|
||||||
}
|
|
||||||
|
|
||||||
// SizeInRepo returns the number of bytes added to the repo (including
|
|
||||||
// compression and crypto overhead).
|
|
||||||
func (s *FutureBlob) SizeInRepo() int {
|
|
||||||
return s.res.size
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type saveBlobJob struct {
|
type saveBlobJob struct {
|
||||||
restic.BlobType
|
restic.BlobType
|
||||||
buf *Buffer
|
buf *Buffer
|
||||||
ch chan<- saveBlobResponse
|
ch chan<- SaveBlobResponse
|
||||||
}
|
}
|
||||||
|
|
||||||
type saveBlobResponse struct {
|
type SaveBlobResponse struct {
|
||||||
id restic.ID
|
id restic.ID
|
||||||
known bool
|
length int
|
||||||
size int
|
sizeInRepo int
|
||||||
|
known bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *BlobSaver) saveBlob(ctx context.Context, t restic.BlobType, buf []byte) (saveBlobResponse, error) {
|
func (s *BlobSaver) saveBlob(ctx context.Context, t restic.BlobType, buf []byte) (SaveBlobResponse, error) {
|
||||||
id, known, size, err := s.repo.SaveBlob(ctx, t, buf, restic.ID{}, false)
|
id, known, sizeInRepo, err := s.repo.SaveBlob(ctx, t, buf, restic.ID{}, false)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return saveBlobResponse{}, err
|
return SaveBlobResponse{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return saveBlobResponse{
|
return SaveBlobResponse{
|
||||||
id: id,
|
id: id,
|
||||||
known: known,
|
length: len(buf),
|
||||||
size: size,
|
sizeInRepo: sizeInRepo,
|
||||||
|
known: known,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -54,8 +54,8 @@ func TestBlobSaver(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for i, blob := range results {
|
for i, blob := range results {
|
||||||
blob.Wait(ctx)
|
sbr := blob.Take(ctx)
|
||||||
if blob.Known() {
|
if sbr.known {
|
||||||
t.Errorf("blob %v is known, that should not be the case", i)
|
t.Errorf("blob %v is known, that should not be the case", i)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -129,6 +129,15 @@ func (s *FileSaver) saveFile(ctx context.Context, chnker *chunker.Chunker, snPat
|
|||||||
chnker.Reset(f, s.pol)
|
chnker.Reset(f, s.pol)
|
||||||
|
|
||||||
var results []FutureBlob
|
var results []FutureBlob
|
||||||
|
complete := func(sbr SaveBlobResponse) {
|
||||||
|
if !sbr.known {
|
||||||
|
stats.DataBlobs++
|
||||||
|
stats.DataSize += uint64(sbr.length)
|
||||||
|
stats.DataSizeInRepo += uint64(sbr.sizeInRepo)
|
||||||
|
}
|
||||||
|
|
||||||
|
node.Content = append(node.Content, sbr.id)
|
||||||
|
}
|
||||||
|
|
||||||
node.Content = []restic.ID{}
|
node.Content = []restic.ID{}
|
||||||
var size uint64
|
var size uint64
|
||||||
@ -168,6 +177,17 @@ func (s *FileSaver) saveFile(ctx context.Context, chnker *chunker.Chunker, snPat
|
|||||||
}
|
}
|
||||||
|
|
||||||
s.CompleteBlob(f.Name(), uint64(len(chunk.Data)))
|
s.CompleteBlob(f.Name(), uint64(len(chunk.Data)))
|
||||||
|
|
||||||
|
// collect already completed blobs
|
||||||
|
for len(results) > 0 {
|
||||||
|
sbr := results[0].Poll()
|
||||||
|
if sbr == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
results[0] = FutureBlob{}
|
||||||
|
results = results[1:]
|
||||||
|
complete(*sbr)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
err = f.Close()
|
err = f.Close()
|
||||||
@ -176,15 +196,10 @@ func (s *FileSaver) saveFile(ctx context.Context, chnker *chunker.Chunker, snPat
|
|||||||
return fnr
|
return fnr
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, res := range results {
|
for i, res := range results {
|
||||||
res.Wait(ctx)
|
results[i] = FutureBlob{}
|
||||||
if !res.Known() {
|
sbr := res.Take(ctx)
|
||||||
stats.DataBlobs++
|
complete(sbr)
|
||||||
stats.DataSize += uint64(res.Length())
|
|
||||||
stats.DataSizeInRepo += uint64(res.SizeInRepo())
|
|
||||||
}
|
|
||||||
|
|
||||||
node.Content = append(node.Content, res.ID())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
node.Size = size
|
node.Size = size
|
||||||
|
@ -34,7 +34,7 @@ func startFileSaver(ctx context.Context, t testing.TB) (*FileSaver, context.Cont
|
|||||||
wg, ctx := errgroup.WithContext(ctx)
|
wg, ctx := errgroup.WithContext(ctx)
|
||||||
|
|
||||||
saveBlob := func(ctx context.Context, tpe restic.BlobType, buf *Buffer) FutureBlob {
|
saveBlob := func(ctx context.Context, tpe restic.BlobType, buf *Buffer) FutureBlob {
|
||||||
ch := make(chan saveBlobResponse)
|
ch := make(chan SaveBlobResponse)
|
||||||
close(ch)
|
close(ch)
|
||||||
return FutureBlob{ch: ch}
|
return FutureBlob{ch: ch}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user