From a8fdcf79b7caae3d80a0ae03307b42264e78b00a Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 1 Oct 2023 23:24:42 +0200 Subject: [PATCH 1/2] restorer: Make hardlink index generic This will allow reusing it for the stats command without regressing the memory usage due to storing an unnecessary file path. --- internal/restorer/hardlinks_index.go | 22 +++++++++++----------- internal/restorer/hardlinks_index_test.go | 6 +++--- internal/restorer/restorer.go | 6 +++--- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/internal/restorer/hardlinks_index.go b/internal/restorer/hardlinks_index.go index 9cf45975a..208a38ad0 100644 --- a/internal/restorer/hardlinks_index.go +++ b/internal/restorer/hardlinks_index.go @@ -10,20 +10,20 @@ type HardlinkKey struct { } // HardlinkIndex contains a list of inodes, devices these inodes are one, and associated file names. -type HardlinkIndex struct { +type HardlinkIndex[T any] struct { m sync.Mutex - Index map[HardlinkKey]string + Index map[HardlinkKey]T } // NewHardlinkIndex create a new index for hard links -func NewHardlinkIndex() *HardlinkIndex { - return &HardlinkIndex{ - Index: make(map[HardlinkKey]string), +func NewHardlinkIndex[T any]() *HardlinkIndex[T] { + return &HardlinkIndex[T]{ + Index: make(map[HardlinkKey]T), } } // Has checks wether the link already exist in the index. -func (idx *HardlinkIndex) Has(inode uint64, device uint64) bool { +func (idx *HardlinkIndex[T]) Has(inode uint64, device uint64) bool { idx.m.Lock() defer idx.m.Unlock() _, ok := idx.Index[HardlinkKey{inode, device}] @@ -32,25 +32,25 @@ func (idx *HardlinkIndex) Has(inode uint64, device uint64) bool { } // Add adds a link to the index. -func (idx *HardlinkIndex) Add(inode uint64, device uint64, name string) { +func (idx *HardlinkIndex[T]) Add(inode uint64, device uint64, value T) { idx.m.Lock() defer idx.m.Unlock() _, ok := idx.Index[HardlinkKey{inode, device}] if !ok { - idx.Index[HardlinkKey{inode, device}] = name + idx.Index[HardlinkKey{inode, device}] = value } } -// GetFilename obtains the filename from the index. -func (idx *HardlinkIndex) GetFilename(inode uint64, device uint64) string { +// Value obtains the filename from the index. +func (idx *HardlinkIndex[T]) Value(inode uint64, device uint64) T { idx.m.Lock() defer idx.m.Unlock() return idx.Index[HardlinkKey{inode, device}] } // Remove removes a link from the index. -func (idx *HardlinkIndex) Remove(inode uint64, device uint64) { +func (idx *HardlinkIndex[T]) Remove(inode uint64, device uint64) { idx.m.Lock() defer idx.m.Unlock() delete(idx.Index, HardlinkKey{inode, device}) diff --git a/internal/restorer/hardlinks_index_test.go b/internal/restorer/hardlinks_index_test.go index 75a2b83ee..31ce938f9 100644 --- a/internal/restorer/hardlinks_index_test.go +++ b/internal/restorer/hardlinks_index_test.go @@ -10,15 +10,15 @@ import ( // TestHardLinks contains various tests for HardlinkIndex. func TestHardLinks(t *testing.T) { - idx := restorer.NewHardlinkIndex() + idx := restorer.NewHardlinkIndex[string]() idx.Add(1, 2, "inode1-file1-on-device2") idx.Add(2, 3, "inode2-file2-on-device3") - sresult := idx.GetFilename(1, 2) + sresult := idx.Value(1, 2) rtest.Equals(t, sresult, "inode1-file1-on-device2") - sresult = idx.GetFilename(2, 3) + sresult = idx.Value(2, 3) rtest.Equals(t, sresult, "inode2-file2-on-device3") bresult := idx.Has(1, 2) diff --git a/internal/restorer/restorer.go b/internal/restorer/restorer.go index 3c60aca1b..e973316c0 100644 --- a/internal/restorer/restorer.go +++ b/internal/restorer/restorer.go @@ -230,7 +230,7 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { } } - idx := NewHardlinkIndex() + idx := NewHardlinkIndex[string]() filerestorer := newFileRestorer(dst, res.repo.Backend().Load, res.repo.Key(), res.repo.Index().Lookup, res.repo.Connections(), res.sparse, res.progress) filerestorer.Error = res.Error @@ -319,8 +319,8 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { return res.restoreEmptyFileAt(node, target, location) } - if idx.Has(node.Inode, node.DeviceID) && idx.GetFilename(node.Inode, node.DeviceID) != location { - return res.restoreHardlinkAt(node, filerestorer.targetPath(idx.GetFilename(node.Inode, node.DeviceID)), target, location) + if idx.Has(node.Inode, node.DeviceID) && idx.Value(node.Inode, node.DeviceID) != location { + return res.restoreHardlinkAt(node, filerestorer.targetPath(idx.Value(node.Inode, node.DeviceID)), target, location) } return res.restoreNodeMetadataTo(node, target, location) From 731b3a43574f81ad3f9d6204ac4f7d0cfff65699 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sun, 1 Oct 2023 23:27:09 +0200 Subject: [PATCH 2/2] stats: fix hardlink tracking in a snapshot inodes are only unique within a device. Use the HardlinkIndex from the restorer instead of the custom (broken) hashmap to correctly account for both inode and deviceID. --- changelog/unreleased/pull-4503 | 7 +++++++ cmd/restic/cmd_stats.go | 11 ++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) create mode 100644 changelog/unreleased/pull-4503 diff --git a/changelog/unreleased/pull-4503 b/changelog/unreleased/pull-4503 new file mode 100644 index 000000000..3ce5c48e8 --- /dev/null +++ b/changelog/unreleased/pull-4503 @@ -0,0 +1,7 @@ +Bugfix: Correct hardlink handling in `stats` command + +If files on different devices had the same inode id, then the `stats` command +did not correctly calculate the snapshot size. This has been fixed. + +https://github.com/restic/restic/pull/4503 +https://forum.restic.net/t/possible-bug-in-stats/6461/8 diff --git a/cmd/restic/cmd_stats.go b/cmd/restic/cmd_stats.go index 6e1c7c2c2..a3e0cefc7 100644 --- a/cmd/restic/cmd_stats.go +++ b/cmd/restic/cmd_stats.go @@ -11,6 +11,7 @@ import ( "github.com/restic/restic/internal/crypto" "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/restic" + "github.com/restic/restic/internal/restorer" "github.com/restic/restic/internal/ui" "github.com/restic/restic/internal/ui/table" "github.com/restic/restic/internal/walker" @@ -201,8 +202,8 @@ func statsWalkSnapshot(ctx context.Context, snapshot *restic.Snapshot, repo rest return restic.FindUsedBlobs(ctx, repo, restic.IDs{*snapshot.Tree}, stats.blobs, nil) } - uniqueInodes := make(map[uint64]struct{}) - err := walker.Walk(ctx, repo, *snapshot.Tree, restic.NewIDSet(), statsWalkTree(repo, opts, stats, uniqueInodes)) + hardLinkIndex := restorer.NewHardlinkIndex[struct{}]() + err := walker.Walk(ctx, repo, *snapshot.Tree, restic.NewIDSet(), statsWalkTree(repo, opts, stats, hardLinkIndex)) if err != nil { return fmt.Errorf("walking tree %s: %v", *snapshot.Tree, err) } @@ -210,7 +211,7 @@ func statsWalkSnapshot(ctx context.Context, snapshot *restic.Snapshot, repo rest return nil } -func statsWalkTree(repo restic.Repository, opts StatsOptions, stats *statsContainer, uniqueInodes map[uint64]struct{}) walker.WalkFunc { +func statsWalkTree(repo restic.Repository, opts StatsOptions, stats *statsContainer, hardLinkIndex *restorer.HardlinkIndex[struct{}]) walker.WalkFunc { return func(parentTreeID restic.ID, npath string, node *restic.Node, nodeErr error) (bool, error) { if nodeErr != nil { return true, nodeErr @@ -269,8 +270,8 @@ func statsWalkTree(repo restic.Repository, opts StatsOptions, stats *statsContai // if inodes are present, only count each inode once // (hard links do not increase restore size) - if _, ok := uniqueInodes[node.Inode]; !ok || node.Inode == 0 { - uniqueInodes[node.Inode] = struct{}{} + if !hardLinkIndex.Has(node.Inode, node.DeviceID) || node.Inode == 0 { + hardLinkIndex.Add(node.Inode, node.DeviceID, struct{}{}) stats.TotalSize += node.Size }