From e78be75d1efdf51051d1721e5bb48d041bc03ddb Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 7 Jan 2024 12:17:35 +0100 Subject: [PATCH] restore: separately restore blobs that are frequently referenced Writing these blobs to their files can take a long time and consequently cause the backend connection to time out. Avoid that by retrieving these blobs separately. --- internal/restorer/filerestorer.go | 27 +++++++++++++++++++++++++- internal/restorer/filerestorer_test.go | 21 ++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index f2e2cf24a..99a460321 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -242,8 +242,33 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error { // track already processed blobs for precise error reporting processedBlobs := restic.NewBlobSet() - err := r.downloadBlobs(ctx, pack.id, blobs, processedBlobs) + for _, entry := range blobs { + occurrences := 0 + for _, offsets := range entry.files { + occurrences += len(offsets) + } + // With a maximum blob size of 8MB, the normal blob streaming has to write + // at most 800MB for a single blob. This should be short enough to avoid + // network connection timeouts. Based on a quick test, a limit of 100 only + // selects a very small number of blobs (the number of references per blob + // - aka. `count` - seem to follow a expontential distribution) + if occurrences > 100 { + // process frequently referenced blobs first as these can take a long time to write + // which can cause backend connections to time out + delete(blobs, entry.blob.ID) + partialBlobs := blobToFileOffsetsMapping{entry.blob.ID: entry} + err := r.downloadBlobs(ctx, pack.id, partialBlobs, processedBlobs) + if err := r.reportError(blobs, processedBlobs, err); err != nil { + return err + } + } + } + if len(blobs) == 0 { + return nil + } + + err := r.downloadBlobs(ctx, pack.id, blobs, processedBlobs) return r.reportError(blobs, processedBlobs, err) } diff --git a/internal/restorer/filerestorer_test.go b/internal/restorer/filerestorer_test.go index 94b068159..c5bc3fe31 100644 --- a/internal/restorer/filerestorer_test.go +++ b/internal/restorer/filerestorer_test.go @@ -248,6 +248,27 @@ func TestFileRestorerPackSkip(t *testing.T) { } } +func TestFileRestorerFrequentBlob(t *testing.T) { + tempdir := rtest.TempDir(t) + + for _, sparse := range []bool{false, true} { + blobs := []TestBlob{ + {"data1-1", "pack1-1"}, + } + for i := 0; i < 10000; i++ { + blobs = append(blobs, TestBlob{"a", "pack1-1"}) + } + blobs = append(blobs, TestBlob{"end", "pack1-1"}) + + restoreAndVerify(t, tempdir, []TestFile{ + { + name: "file1", + blobs: blobs, + }, + }, nil, sparse) + } +} + func TestErrorRestoreFiles(t *testing.T) { tempdir := rtest.TempDir(t) content := []TestFile{