Revert "restic prune: Merge three loops over the index"

This reverts commit 8bdfcf779fb4e7260fc05649beb7c524d7518bbe. Should fix #3809. Also needed to make #3290 apply cleanly.
2025-01-22 14:48:24 +00:00 · 2022-06-30 15:27:34 +02:00 · 2022-06-30 15:27:34 +02:00 · a0fa9c6e9f
commit a0fa9c6e9f
parent bc96879d41
4 changed files with 59 additions and 42 deletions
--- a/cmd/restic/cmd_prune.go
+++ b/cmd/restic/cmd_prune.go
@ -242,26 +242,11 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB

 	Verbosef("searching used packs...\n")

-	indexPack := make(map[restic.ID]packInfo)
 	keepBlobs := restic.NewBlobSet()
+	duplicateBlobs := restic.NewBlobSet()

-	// iterate over all blobs in index to generate packInfo and find duplicates
+	// iterate over all blobs in index to find out which blobs are duplicates
 	for blob := range repo.Index().Each(ctx) {
-		ip, seen := indexPack[blob.PackID]
-
-		if seen {
-			// mark mixed packs with "Invalid blob type"
-			if ip.tpe != blob.Type {
-				ip.tpe = restic.InvalidBlob
-			}
-		} else {
-			ip = packInfo{
-				tpe:      blob.Type,
-				usedSize: pack.HeaderSize,
-			}
-		}
-		ip.usedSize += uint64(pack.CalculateEntrySize(blob.Blob))
-
 		bh := blob.BlobHandle
 		size := uint64(blob.Length)
 		switch {
@ -270,27 +255,14 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
 			keepBlobs.Insert(bh)
 			stats.size.used += size
 			stats.blobs.used++
-			ip.usedSize += size
-			ip.usedBlobs++
-
-		case keepBlobs.Has(bh): // duplicate of a blob that we want to keep
+		case keepBlobs.Has(bh): // duplicate blob
+			duplicateBlobs.Insert(bh)
 			stats.size.duplicate += size
 			stats.blobs.duplicate++
-			ip.usedSize += size
-			ip.duplicateBlobs++
-
-		default: // unused, don't care if it's a duplicate
+		default:
 			stats.size.unused += size
 			stats.blobs.unused++
-			ip.unusedSize += size
-			ip.unusedBlobs++
 		}
-
-		if !blob.IsCompressed() {
-			ip.uncompressed = true
-		}
-		// update indexPack
-		indexPack[blob.PackID] = ip
 	}

 	// Check if all used blobs have been found in index
@ -303,6 +275,48 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
 		return errorIndexIncomplete
 	}

+	indexPack := make(map[restic.ID]packInfo)
+
+	// save computed pack header size
+	for pid, hdrSize := range pack.Size(ctx, repo.Index(), true) {
+		// initialize tpe with NumBlobTypes to indicate it's not set
+		indexPack[pid] = packInfo{tpe: restic.NumBlobTypes, usedSize: uint64(hdrSize)}
+	}
+
+	// iterate over all blobs in index to generate packInfo
+	for blob := range repo.Index().Each(ctx) {
+		ip := indexPack[blob.PackID]
+
+		// Set blob type if not yet set
+		if ip.tpe == restic.NumBlobTypes {
+			ip.tpe = blob.Type
+		}
+
+		// mark mixed packs with "Invalid blob type"
+		if ip.tpe != blob.Type {
+			ip.tpe = restic.InvalidBlob
+		}
+
+		bh := blob.BlobHandle
+		size := uint64(blob.Length)
+		switch {
+		case duplicateBlobs.Has(bh): // duplicate blob
+			ip.usedSize += size
+			ip.duplicateBlobs++
+		case keepBlobs.Has(bh): // used blob, not duplicate
+			ip.usedSize += size
+			ip.usedBlobs++
+		default: // unused blob
+			ip.unusedSize += size
+			ip.unusedBlobs++
+		}
+		if !blob.IsCompressed() {
+			ip.uncompressed = true
+		}
+		// update indexPack
+		indexPack[blob.PackID] = ip
+	}
+
 	Verbosef("collecting packs for deletion and repacking\n")
 	removePacksFirst := restic.NewIDSet()
 	removePacks := restic.NewIDSet()
--- a/cmd/restic/cmd_rebuild_index.go
+++ b/cmd/restic/cmd_rebuild_index.go
@ -98,7 +98,7 @@ func rebuildIndex(opts RebuildIndexOptions, gopts GlobalOptions, repo *repositor
 		if err != nil {
 			return err
 		}
-		packSizeFromIndex = pack.Size(ctx, repo.Index())
+		packSizeFromIndex = pack.Size(ctx, repo.Index(), false)
 	}

 	Verbosef("getting pack files to read...\n")
--- a/internal/checker/checker.go
+++ b/internal/checker/checker.go
@ -131,7 +131,7 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) {
 	}

 	// compute pack size using index entries
-	c.packs = pack.Size(ctx, c.masterIndex)
+	c.packs = pack.Size(ctx, c.masterIndex, false)

 	debug.Log("checking for duplicate packs")
 	for packID := range c.packs {
--- a/internal/pack/pack.go
+++ b/internal/pack/pack.go
@ -177,8 +177,8 @@ var (
 const (
 	// size of the header-length field at the end of the file; it is a uint32
 	headerLengthSize = 4
-	// HeaderSize is the header's constant overhead (independent of #entries)
-	HeaderSize = headerLengthSize + crypto.Extension
+	// headerSize is the header's constant overhead (independent of #entries)
+	headerSize = headerLengthSize + crypto.Extension

 	// MaxHeaderSize is the max size of header including header-length field
 	MaxHeaderSize = 16*1024*1024 + headerLengthSize
@ -242,7 +242,7 @@ func readHeader(rd io.ReaderAt, size int64) ([]byte, error) {
 	// eagerly download eagerEntries header entries as part of header-length request.
 	// only make second request if actual number of entries is greater than eagerEntries

-	eagerSize := eagerEntries*int(entrySize) + HeaderSize
+	eagerSize := eagerEntries*int(entrySize) + headerSize
 	b, c, err := readRecords(rd, size, eagerSize)
 	if err != nil {
 		return nil, err
@ -349,7 +349,7 @@ func CalculateEntrySize(blob restic.Blob) int {
 }

 func CalculateHeaderSize(blobs []restic.Blob) int {
-	size := HeaderSize
+	size := headerSize
 	for _, blob := range blobs {
 		size += CalculateEntrySize(blob)
 	}
@ -357,17 +357,20 @@ func CalculateHeaderSize(blobs []restic.Blob) int {
 }

 // Size returns the size of all packs computed by index information.
+// If onlyHdr is set to true, only the size of the header is returned
 // Note that this function only gives correct sizes, if there are no
 // duplicates in the index.
-func Size(ctx context.Context, mi restic.MasterIndex) map[restic.ID]int64 {
+func Size(ctx context.Context, mi restic.MasterIndex, onlyHdr bool) map[restic.ID]int64 {
 	packSize := make(map[restic.ID]int64)

 	for blob := range mi.Each(ctx) {
 		size, ok := packSize[blob.PackID]
 		if !ok {
-			size = HeaderSize
+			size = headerSize
+		}
+		if !onlyHdr {
+			size += int64(blob.Length)
 		}
-		size += int64(blob.Length)
 		packSize[blob.PackID] = size + int64(CalculateEntrySize(blob.Blob))
 	}