mirror of
https://github.com/octoleo/restic.git
synced 2024-12-22 10:58:55 +00:00
prune: handle very high duplication of some blobs
Suggested-By: Alexander Weiss <alex@weissfam.de>
This commit is contained in:
parent
7478cbf70e
commit
9be1bd2acc
@ -1,10 +1,10 @@
|
|||||||
Enhancement: Improve `prune` in presence of duplicate blobs
|
Enhancement: Optimize handling of duplicate blobs in `prune`
|
||||||
|
|
||||||
|
Restic `prune` always used to repack all data files containing duplicate
|
||||||
|
blobs. This effectively removed all duplicates during prune. However, as a
|
||||||
|
consequence all these data files were repacked even if the unused repository
|
||||||
|
space threshold could be reached with less work.
|
||||||
|
|
||||||
Restic `prune` always used to repack all pack files containing duplicate
|
|
||||||
blobs. This effectively removed all duplicates during prune. However, one
|
|
||||||
of the consequences was that all those pack files were downloadeded and
|
|
||||||
duplicate blobs did not contribute to the threshold for unused repository
|
|
||||||
space.
|
|
||||||
This is now changed and `prune` works nice and fast also if there are lots
|
This is now changed and `prune` works nice and fast also if there are lots
|
||||||
of duplicates.
|
of duplicates.
|
||||||
|
|
||||||
|
@ -258,12 +258,11 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
|||||||
count, ok := duplicateBlobs[bh]
|
count, ok := duplicateBlobs[bh]
|
||||||
if !ok {
|
if !ok {
|
||||||
count = 2 // this one is already the second blob!
|
count = 2 // this one is already the second blob!
|
||||||
} else {
|
} else if count < math.MaxUint8 {
|
||||||
|
// don't overflow, but saturate count at 255
|
||||||
|
// this can lead to a non-optimal pack selection, but won't cause
|
||||||
|
// problems otherwise
|
||||||
count++
|
count++
|
||||||
if count == 0 {
|
|
||||||
// catch uint8 overflow
|
|
||||||
panic("too many duplicates, prune can only handly up to 255!")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
duplicateBlobs[bh] = count
|
duplicateBlobs[bh] = count
|
||||||
stats.size.duplicate += size
|
stats.size.duplicate += size
|
||||||
@ -326,9 +325,9 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
|||||||
}
|
}
|
||||||
|
|
||||||
// if duplicate blobs exist, those will be set to either "used" or "unused":
|
// if duplicate blobs exist, those will be set to either "used" or "unused":
|
||||||
// - mark only one occurency of duplicate blobs as used
|
// - mark only one occurence of duplicate blobs as used
|
||||||
// - if there are already some used blobs in a pack, possibly mark duplicates in this pack as "used"
|
// - if there are already some used blobs in a pack, possibly mark duplicates in this pack as "used"
|
||||||
// - if there are no used blobs in a pack, possibly mark duplicates as "usused"
|
// - if there are no used blobs in a pack, possibly mark duplicates as "unused"
|
||||||
if len(duplicateBlobs) > 0 {
|
if len(duplicateBlobs) > 0 {
|
||||||
// iterate again over all blobs in index (this is pretty cheap, all in-mem)
|
// iterate again over all blobs in index (this is pretty cheap, all in-mem)
|
||||||
for blob := range repo.Index().Each(ctx) {
|
for blob := range repo.Index().Each(ctx) {
|
||||||
|
Loading…
Reference in New Issue
Block a user