mirror of
https://github.com/octoleo/restic.git
synced 2025-01-03 07:12:28 +00:00
Merge pull request #3786 from greatroar/prune
restic prune: Merge three loops over the index
This commit is contained in:
commit
19581dbc18
@ -242,11 +242,26 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
|||||||
|
|
||||||
Verbosef("searching used packs...\n")
|
Verbosef("searching used packs...\n")
|
||||||
|
|
||||||
|
indexPack := make(map[restic.ID]packInfo)
|
||||||
keepBlobs := restic.NewBlobSet()
|
keepBlobs := restic.NewBlobSet()
|
||||||
duplicateBlobs := restic.NewBlobSet()
|
|
||||||
|
|
||||||
// iterate over all blobs in index to find out which blobs are duplicates
|
// iterate over all blobs in index to generate packInfo and find duplicates
|
||||||
for blob := range repo.Index().Each(ctx) {
|
for blob := range repo.Index().Each(ctx) {
|
||||||
|
ip, seen := indexPack[blob.PackID]
|
||||||
|
|
||||||
|
if seen {
|
||||||
|
// mark mixed packs with "Invalid blob type"
|
||||||
|
if ip.tpe != blob.Type {
|
||||||
|
ip.tpe = restic.InvalidBlob
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ip = packInfo{
|
||||||
|
tpe: blob.Type,
|
||||||
|
usedSize: pack.HeaderSize,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ip.usedSize += uint64(pack.CalculateEntrySize(blob.Blob))
|
||||||
|
|
||||||
bh := blob.BlobHandle
|
bh := blob.BlobHandle
|
||||||
size := uint64(blob.Length)
|
size := uint64(blob.Length)
|
||||||
switch {
|
switch {
|
||||||
@ -255,14 +270,27 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
|||||||
keepBlobs.Insert(bh)
|
keepBlobs.Insert(bh)
|
||||||
stats.size.used += size
|
stats.size.used += size
|
||||||
stats.blobs.used++
|
stats.blobs.used++
|
||||||
case keepBlobs.Has(bh): // duplicate blob
|
ip.usedSize += size
|
||||||
duplicateBlobs.Insert(bh)
|
ip.usedBlobs++
|
||||||
|
|
||||||
|
case keepBlobs.Has(bh): // duplicate of a blob that we want to keep
|
||||||
stats.size.duplicate += size
|
stats.size.duplicate += size
|
||||||
stats.blobs.duplicate++
|
stats.blobs.duplicate++
|
||||||
default:
|
ip.usedSize += size
|
||||||
|
ip.duplicateBlobs++
|
||||||
|
|
||||||
|
default: // unused, don't care if it's a duplicate
|
||||||
stats.size.unused += size
|
stats.size.unused += size
|
||||||
stats.blobs.unused++
|
stats.blobs.unused++
|
||||||
|
ip.unusedSize += size
|
||||||
|
ip.unusedBlobs++
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !blob.IsCompressed() {
|
||||||
|
ip.uncompressed = true
|
||||||
|
}
|
||||||
|
// update indexPack
|
||||||
|
indexPack[blob.PackID] = ip
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if all used blobs have been found in index
|
// Check if all used blobs have been found in index
|
||||||
@ -275,48 +303,6 @@ func prune(opts PruneOptions, gopts GlobalOptions, repo restic.Repository, usedB
|
|||||||
return errorIndexIncomplete
|
return errorIndexIncomplete
|
||||||
}
|
}
|
||||||
|
|
||||||
indexPack := make(map[restic.ID]packInfo)
|
|
||||||
|
|
||||||
// save computed pack header size
|
|
||||||
for pid, hdrSize := range pack.Size(ctx, repo.Index(), true) {
|
|
||||||
// initialize tpe with NumBlobTypes to indicate it's not set
|
|
||||||
indexPack[pid] = packInfo{tpe: restic.NumBlobTypes, usedSize: uint64(hdrSize)}
|
|
||||||
}
|
|
||||||
|
|
||||||
// iterate over all blobs in index to generate packInfo
|
|
||||||
for blob := range repo.Index().Each(ctx) {
|
|
||||||
ip := indexPack[blob.PackID]
|
|
||||||
|
|
||||||
// Set blob type if not yet set
|
|
||||||
if ip.tpe == restic.NumBlobTypes {
|
|
||||||
ip.tpe = blob.Type
|
|
||||||
}
|
|
||||||
|
|
||||||
// mark mixed packs with "Invalid blob type"
|
|
||||||
if ip.tpe != blob.Type {
|
|
||||||
ip.tpe = restic.InvalidBlob
|
|
||||||
}
|
|
||||||
|
|
||||||
bh := blob.BlobHandle
|
|
||||||
size := uint64(blob.Length)
|
|
||||||
switch {
|
|
||||||
case duplicateBlobs.Has(bh): // duplicate blob
|
|
||||||
ip.usedSize += size
|
|
||||||
ip.duplicateBlobs++
|
|
||||||
case keepBlobs.Has(bh): // used blob, not duplicate
|
|
||||||
ip.usedSize += size
|
|
||||||
ip.usedBlobs++
|
|
||||||
default: // unused blob
|
|
||||||
ip.unusedSize += size
|
|
||||||
ip.unusedBlobs++
|
|
||||||
}
|
|
||||||
if !blob.IsCompressed() {
|
|
||||||
ip.uncompressed = true
|
|
||||||
}
|
|
||||||
// update indexPack
|
|
||||||
indexPack[blob.PackID] = ip
|
|
||||||
}
|
|
||||||
|
|
||||||
Verbosef("collecting packs for deletion and repacking\n")
|
Verbosef("collecting packs for deletion and repacking\n")
|
||||||
removePacksFirst := restic.NewIDSet()
|
removePacksFirst := restic.NewIDSet()
|
||||||
removePacks := restic.NewIDSet()
|
removePacks := restic.NewIDSet()
|
||||||
|
@ -98,7 +98,7 @@ func rebuildIndex(opts RebuildIndexOptions, gopts GlobalOptions, repo *repositor
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
packSizeFromIndex = pack.Size(ctx, repo.Index(), false)
|
packSizeFromIndex = pack.Size(ctx, repo.Index())
|
||||||
}
|
}
|
||||||
|
|
||||||
Verbosef("getting pack files to read...\n")
|
Verbosef("getting pack files to read...\n")
|
||||||
|
@ -131,7 +131,7 @@ func (c *Checker) LoadIndex(ctx context.Context) (hints []error, errs []error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// compute pack size using index entries
|
// compute pack size using index entries
|
||||||
c.packs = pack.Size(ctx, c.masterIndex, false)
|
c.packs = pack.Size(ctx, c.masterIndex)
|
||||||
|
|
||||||
debug.Log("checking for duplicate packs")
|
debug.Log("checking for duplicate packs")
|
||||||
for packID := range c.packs {
|
for packID := range c.packs {
|
||||||
|
@ -177,8 +177,8 @@ var (
|
|||||||
const (
|
const (
|
||||||
// size of the header-length field at the end of the file; it is a uint32
|
// size of the header-length field at the end of the file; it is a uint32
|
||||||
headerLengthSize = 4
|
headerLengthSize = 4
|
||||||
// headerSize is the header's constant overhead (independent of #entries)
|
// HeaderSize is the header's constant overhead (independent of #entries)
|
||||||
headerSize = headerLengthSize + crypto.Extension
|
HeaderSize = headerLengthSize + crypto.Extension
|
||||||
|
|
||||||
// MaxHeaderSize is the max size of header including header-length field
|
// MaxHeaderSize is the max size of header including header-length field
|
||||||
MaxHeaderSize = 16*1024*1024 + headerLengthSize
|
MaxHeaderSize = 16*1024*1024 + headerLengthSize
|
||||||
@ -242,7 +242,7 @@ func readHeader(rd io.ReaderAt, size int64) ([]byte, error) {
|
|||||||
// eagerly download eagerEntries header entries as part of header-length request.
|
// eagerly download eagerEntries header entries as part of header-length request.
|
||||||
// only make second request if actual number of entries is greater than eagerEntries
|
// only make second request if actual number of entries is greater than eagerEntries
|
||||||
|
|
||||||
eagerSize := eagerEntries*int(entrySize) + headerSize
|
eagerSize := eagerEntries*int(entrySize) + HeaderSize
|
||||||
b, c, err := readRecords(rd, size, eagerSize)
|
b, c, err := readRecords(rd, size, eagerSize)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -349,7 +349,7 @@ func CalculateEntrySize(blob restic.Blob) int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func CalculateHeaderSize(blobs []restic.Blob) int {
|
func CalculateHeaderSize(blobs []restic.Blob) int {
|
||||||
size := headerSize
|
size := HeaderSize
|
||||||
for _, blob := range blobs {
|
for _, blob := range blobs {
|
||||||
size += CalculateEntrySize(blob)
|
size += CalculateEntrySize(blob)
|
||||||
}
|
}
|
||||||
@ -357,20 +357,17 @@ func CalculateHeaderSize(blobs []restic.Blob) int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Size returns the size of all packs computed by index information.
|
// Size returns the size of all packs computed by index information.
|
||||||
// If onlyHdr is set to true, only the size of the header is returned
|
|
||||||
// Note that this function only gives correct sizes, if there are no
|
// Note that this function only gives correct sizes, if there are no
|
||||||
// duplicates in the index.
|
// duplicates in the index.
|
||||||
func Size(ctx context.Context, mi restic.MasterIndex, onlyHdr bool) map[restic.ID]int64 {
|
func Size(ctx context.Context, mi restic.MasterIndex) map[restic.ID]int64 {
|
||||||
packSize := make(map[restic.ID]int64)
|
packSize := make(map[restic.ID]int64)
|
||||||
|
|
||||||
for blob := range mi.Each(ctx) {
|
for blob := range mi.Each(ctx) {
|
||||||
size, ok := packSize[blob.PackID]
|
size, ok := packSize[blob.PackID]
|
||||||
if !ok {
|
if !ok {
|
||||||
size = headerSize
|
size = HeaderSize
|
||||||
}
|
}
|
||||||
if !onlyHdr {
|
|
||||||
size += int64(blob.Length)
|
size += int64(blob.Length)
|
||||||
}
|
|
||||||
packSize[blob.PackID] = size + int64(CalculateEntrySize(blob.Blob))
|
packSize[blob.PackID] = size + int64(CalculateEntrySize(blob.Blob))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user