From a0fa9c6e9fbf410fa61417d15f1ed03521c1455a Mon Sep 17 00:00:00 2001 From: greatroar <61184462+greatroar@users.noreply.github.com> Date: Thu, 30 Jun 2022 15:27:34 +0200 Subject: [PATCH] Revert "restic prune: Merge three loops over the index" This reverts commit 8bdfcf779fb4e7260fc05649beb7c524d7518bbe. Should fix #3809. Also needed to make #3290 apply cleanly. --- cmd/restic/cmd_prune.go | 80 +++++++++++++++++++-------------- cmd/restic/cmd_rebuild_index.go | 2 +- internal/checker/checker.go | 2 +- internal/pack/pack.go | 17 ++++--- 4 files changed, 59 insertions(+), 42 deletions(-) diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index e0273122e..76801dea4 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -242,26 +242,11 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB Verbosef("searching used packs...\n") - indexPack := make(map[restic.ID]packInfo) keepBlobs := restic.NewBlobSet() + duplicateBlobs := restic.NewBlobSet() - // iterate over all blobs in index to generate packInfo and find duplicates + // iterate over all blobs in index to find out which blobs are duplicates for blob := range repo.Index().Each(ctx) { - ip, seen := indexPack[blob.PackID] - - if seen { - // mark mixed packs with "Invalid blob type" - if ip.tpe != blob.Type { - ip.tpe = restic.InvalidBlob - } - } else { - ip = packInfo{ - tpe: blob.Type, - usedSize: pack.HeaderSize, - } - } - ip.usedSize += uint64(pack.CalculateEntrySize(blob.Blob)) - bh := blob.BlobHandle size := uint64(blob.Length) switch { @@ -270,27 +255,14 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB keepBlobs.Insert(bh) stats.size.used += size stats.blobs.used++ - ip.usedSize += size - ip.usedBlobs++ - - case keepBlobs.Has(bh): // duplicate of a blob that we want to keep + case keepBlobs.Has(bh): // duplicate blob + duplicateBlobs.Insert(bh) stats.size.duplicate += size stats.blobs.duplicate++ - ip.usedSize += size - ip.duplicateBlobs++ - - default: // unused, don't care if it's a duplicate + default: stats.size.unused += size stats.blobs.unused++ - ip.unusedSize += size - ip.unusedBlobs++ } - - if !blob.IsCompressed() { - ip.uncompressed = true - } - // update indexPack - indexPack[blob.PackID] = ip } // Check if all used blobs have been found in index @@ -303,6 +275,48 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB return errorIndexIncomplete } + indexPack := make(map[restic.ID]packInfo) + + // save computed pack header size + for pid, hdrSize := range pack.Size(ctx, repo.Index(), true) { + // initialize tpe with NumBlobTypes to indicate it's not set + indexPack[pid] = packInfo{tpe: restic.NumBlobTypes, usedSize: uint64(hdrSize)} + } + + // iterate over all blobs in index to generate packInfo + for blob := range repo.Index().Each(ctx) { + ip := indexPack[blob.PackID] + + // Set blob type if not yet set + if ip.tpe == restic.NumBlobTypes { + ip.tpe = blob.Type + } + + // mark mixed packs with "Invalid blob type" + if ip.tpe != blob.Type { + ip.tpe = restic.InvalidBlob + } + + bh := blob.BlobHandle + size := uint64(blob.Length) + switch { + case duplicateBlobs.Has(bh): // duplicate blob + ip.usedSize += size + ip.duplicateBlobs++ + case keepBlobs.Has(bh): // used blob, not duplicate + ip.usedSize += size + ip.usedBlobs++ + default: // unused blob + ip.unusedSize += size + ip.unusedBlobs++ + } + if !blob.IsCompressed() { + ip.uncompressed = true + } + // update indexPack + indexPack[blob.PackID] = ip + } + Verbosef("collecting packs for deletion and repacking\n") removePacksFirst := restic.NewIDSet() removePacks := restic.NewIDSet() diff --git a/cmd/restic/cmd_rebuild_index.go b/cmd/restic/cmd_rebuild_index.go index 5611fa939..0b3274ec4 100644 --- a/cmd/restic/cmd_rebuild_index.go +++ b/cmd/restic/cmd_rebuild_index.go @@ -98,7 +98,7 @@ func rebuildIndex(opts RebuildIndexOptions, gopts GlobalOptions, repo *repositor if err != nil { return err } - packSizeFromIndex = pack.Size(ctx, repo.Index()) + packSizeFromIndex = pack.Size(ctx, repo.Index(), false) } Verbosef("getting pack files to read...\n") diff --git a/internal/checker/checker.go b/internal/checker/checker.go index e8b24b8c7..a31235fae 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -131,7 +131,7 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) { } // compute pack size using index entries - c.packs = pack.Size(ctx, c.masterIndex) + c.packs = pack.Size(ctx, c.masterIndex, false) debug.Log("checking for duplicate packs") for packID := range c.packs { diff --git a/internal/pack/pack.go b/internal/pack/pack.go index 196d882cd..2d7a5c3fb 100644 --- a/internal/pack/pack.go +++ b/internal/pack/pack.go @@ -177,8 +177,8 @@ var ( const ( // size of the header-length field at the end of the file; it is a uint32 headerLengthSize = 4 - // HeaderSize is the header's constant overhead (independent of #entries) - HeaderSize = headerLengthSize + crypto.Extension + // headerSize is the header's constant overhead (independent of #entries) + headerSize = headerLengthSize + crypto.Extension // MaxHeaderSize is the max size of header including header-length field MaxHeaderSize = 16*1024*1024 + headerLengthSize @@ -242,7 +242,7 @@ func readHeader(rd io.ReaderAt, size int64) ([]byte, error) { // eagerly download eagerEntries header entries as part of header-length request. // only make second request if actual number of entries is greater than eagerEntries - eagerSize := eagerEntries*int(entrySize) + HeaderSize + eagerSize := eagerEntries*int(entrySize) + headerSize b, c, err := readRecords(rd, size, eagerSize) if err != nil { return nil, err @@ -349,7 +349,7 @@ func CalculateEntrySize(blob restic.Blob) int { } func CalculateHeaderSize(blobs []restic.Blob) int { - size := HeaderSize + size := headerSize for _, blob := range blobs { size += CalculateEntrySize(blob) } @@ -357,17 +357,20 @@ func CalculateHeaderSize(blobs []restic.Blob) int { } // Size returns the size of all packs computed by index information. +// If onlyHdr is set to true, only the size of the header is returned // Note that this function only gives correct sizes, if there are no // duplicates in the index. -func Size(ctx context.Context, mi restic.MasterIndex) map[restic.ID]int64 { +func Size(ctx context.Context, mi restic.MasterIndex, onlyHdr bool) map[restic.ID]int64 { packSize := make(map[restic.ID]int64) for blob := range mi.Each(ctx) { size, ok := packSize[blob.PackID] if !ok { - size = HeaderSize + size = headerSize + } + if !onlyHdr { + size += int64(blob.Length) } - size += int64(blob.Length) packSize[blob.PackID] = size + int64(CalculateEntrySize(blob.Blob)) }