diff --git a/changelog/unreleased/issue-3918 b/changelog/unreleased/issue-3918 new file mode 100644 index 000000000..2cc60bf0d --- /dev/null +++ b/changelog/unreleased/issue-3918 @@ -0,0 +1,12 @@ +Bugfix: Correct prune statistics for partially compressed repositories + +In a partially compressed repository, one data blob can exist both in an +uncompressed and a compressed version. This caused the prune statistics to +become inaccurate and for example report a too high value for the unused size: + +> unused size after prune: 16777215.991 TiB + +This has been fixed. + +https://github.com/restic/restic/issues/3918 +https://github.com/restic/restic/pull/3980 diff --git a/cmd/restic/cmd_prune.go b/cmd/restic/cmd_prune.go index b2344d3cd..08b847e27 100644 --- a/cmd/restic/cmd_prune.go +++ b/cmd/restic/cmd_prune.go @@ -307,7 +307,6 @@ func packInfoFromIndex(ctx context.Context, idx restic.MasterIndex, usedBlobs re // Thus 0 == blob is missing, 1 == blob exists once, >= 2 == duplicates exist idx.Each(ctx, func(blob restic.PackedBlob) { bh := blob.BlobHandle - size := uint64(blob.Length) count, ok := usedBlobs[bh] if ok { if count < math.MaxUint8 { @@ -317,19 +316,7 @@ func packInfoFromIndex(ctx context.Context, idx restic.MasterIndex, usedBlobs re count++ } - if count == 1 { - stats.size.used += size - stats.blobs.used++ - } else { - // duplicate if counted more than once - stats.size.duplicate += size - stats.blobs.duplicate++ - } - usedBlobs[bh] = count - } else { - stats.size.unused += size - stats.blobs.unused++ } }) @@ -383,12 +370,22 @@ func packInfoFromIndex(ctx context.Context, idx restic.MasterIndex, usedBlobs re // mark as unused for now, we will later on select one copy ip.unusedSize += size ip.unusedBlobs++ + + // count as duplicate, will later on change one copy to be counted as used + stats.size.duplicate += size + stats.blobs.duplicate++ case dupCount == 1: // used blob, not duplicate ip.usedSize += size ip.usedBlobs++ + + stats.size.used += size + stats.blobs.used++ default: // unused blob ip.unusedSize += size ip.unusedBlobs++ + + stats.size.unused += size + stats.blobs.unused++ } if !blob.IsCompressed() { ip.uncompressed = true @@ -421,6 +418,11 @@ func packInfoFromIndex(ctx context.Context, idx restic.MasterIndex, usedBlobs re ip.usedBlobs++ ip.unusedSize -= size ip.unusedBlobs-- + // same for the global statistics + stats.size.used += size + stats.blobs.used++ + stats.size.duplicate -= size + stats.blobs.duplicate-- // let other occurences remain marked as unused usedBlobs[bh] = 1 default: @@ -686,6 +688,7 @@ func printPruneStats(gopts GlobalOptions, stats pruneStats) error { func doPrune(ctx context.Context, opts PruneOptions, gopts GlobalOptions, repo restic.Repository, plan prunePlan) (err error) { if opts.DryRun { if !gopts.JSON && gopts.verbosity >= 2 { + Printf("Repeated prune dry-runs can report slightly different amounts of data to keep or repack. This is expected behavior.\n\n") if len(plan.removePacksFirst) > 0 { Printf("Would have removed the following unreferenced packs:\n%v\n\n", plan.removePacksFirst) }