From 04c23fa95d834c4b0e9b875ad256a91da7519158 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Tue, 24 May 2022 22:30:42 +0200 Subject: [PATCH] rebuild-index: correctly rebuild index for mixed packs For mixed packs, data and tree blobs were stored in separate index entries. This results in warning from the check command and maybe other problems. --- changelog/unreleased/pull-3772 | 13 ++++++++++ internal/repository/index.go | 32 +++++++++++++---------- internal/repository/index_test.go | 39 +++++++++++++++++++++++++++++ internal/repository/master_index.go | 2 +- 4 files changed, 72 insertions(+), 14 deletions(-) create mode 100644 changelog/unreleased/pull-3772 diff --git a/changelog/unreleased/pull-3772 b/changelog/unreleased/pull-3772 new file mode 100644 index 000000000..dfcb46c98 --- /dev/null +++ b/changelog/unreleased/pull-3772 @@ -0,0 +1,13 @@ +Bugfix: Correctly rebuild index for legacy repositories + +After running `rebuild-index` on a legacy repository containing mixed pack +files, that is pack files with store both metadata and file data, `check` +printed warnings like `pack 12345678 contained in several indexes: ...`. The +warning is not critical. It has been fixed by properly handling mixed pack +files while rebuilding the index. + +Running `prune` for such legacy repositories will also fix the warning by +reorganizing the pack files which cause the warning. + +https://github.com/restic/restic/pull/3772 +https://forum.restic.net/t/help-in-recovery-a-corrupted-repository/5044/13 diff --git a/internal/repository/index.go b/internal/repository/index.go index 6676d0635..a35f8952e 100644 --- a/internal/repository/index.go +++ b/internal/repository/index.go @@ -247,8 +247,8 @@ func (idx *Index) Each(ctx context.Context) <-chan restic.PackedBlob { } type EachByPackResult struct { - packID restic.ID - blobs []restic.Blob + PackID restic.ID + Blobs []restic.Blob } // EachByPack returns a channel that yields all blobs known to the index @@ -269,29 +269,35 @@ func (idx *Index) EachByPack(ctx context.Context, packBlacklist restic.IDSet) <- close(ch) }() + byPack := make(map[restic.ID][][]*indexEntry) + for typ := range idx.byType { - byPack := make(map[restic.ID][]*indexEntry) m := &idx.byType[typ] m.foreach(func(e *indexEntry) bool { packID := idx.packs[e.packIndex] + if _, ok := byPack[packID]; !ok { + byPack[packID] = make([][]*indexEntry, restic.NumBlobTypes) + } if !idx.final || !packBlacklist.Has(packID) { - byPack[packID] = append(byPack[packID], e) + byPack[packID][typ] = append(byPack[packID][typ], e) } return true }) + } - for packID, pack := range byPack { - var result EachByPackResult - result.packID = packID + for packID, packByType := range byPack { + var result EachByPackResult + result.PackID = packID + for typ, pack := range packByType { for _, e := range pack { - result.blobs = append(result.blobs, idx.toPackedBlob(e, restic.BlobType(typ)).Blob) - } - select { - case <-ctx.Done(): - return - case ch <- result: + result.Blobs = append(result.Blobs, idx.toPackedBlob(e, restic.BlobType(typ)).Blob) } } + select { + case <-ctx.Done(): + return + case ch <- result: + } } }() diff --git a/internal/repository/index_test.go b/internal/repository/index_test.go index dab52a6bc..233dde030 100644 --- a/internal/repository/index_test.go +++ b/internal/repository/index_test.go @@ -574,3 +574,42 @@ func TestIndexHas(t *testing.T) { rtest.Assert(t, !idx.Has(restic.NewRandomBlobHandle()), "Index reports having a data blob not added to it") rtest.Assert(t, !idx.Has(restic.BlobHandle{ID: tests[0].ID, Type: restic.TreeBlob}), "Index reports having a tree blob added to it with the same id as a data blob") } + +func TestMixedEachByPack(t *testing.T) { + idx := repository.NewIndex() + + expected := make(map[restic.ID]int) + // create 50 packs with 2 blobs each + for i := 0; i < 50; i++ { + packID := restic.NewRandomID() + expected[packID] = 1 + blobs := []restic.Blob{ + { + BlobHandle: restic.BlobHandle{Type: restic.DataBlob, ID: restic.NewRandomID()}, + Offset: 0, + Length: 42, + }, + { + BlobHandle: restic.BlobHandle{Type: restic.TreeBlob, ID: restic.NewRandomID()}, + Offset: 42, + Length: 43, + }, + } + idx.StorePack(packID, blobs) + } + + reported := make(map[restic.ID]int) + for bp := range idx.EachByPack(context.TODO(), restic.NewIDSet()) { + reported[bp.PackID]++ + + rtest.Equals(t, 2, len(bp.Blobs)) // correct blob count + if bp.Blobs[0].Offset > bp.Blobs[1].Offset { + bp.Blobs[1], bp.Blobs[0] = bp.Blobs[0], bp.Blobs[1] + } + b0 := bp.Blobs[0] + rtest.Assert(t, b0.Type == restic.DataBlob && b0.Offset == 0 && b0.Length == 42, "wrong blob", b0) + b1 := bp.Blobs[1] + rtest.Assert(t, b1.Type == restic.TreeBlob && b1.Offset == 42 && b1.Length == 43, "wrong blob", b1) + } + rtest.Equals(t, expected, reported) +} diff --git a/internal/repository/master_index.go b/internal/repository/master_index.go index 5b1b4754c..d6f0962d3 100644 --- a/internal/repository/master_index.go +++ b/internal/repository/master_index.go @@ -339,7 +339,7 @@ func (mi *MasterIndex) Save(ctx context.Context, repo restic.SaverUnpacked, pack debug.Log("adding index %d", i) for pbs := range idx.EachByPack(ctx, packBlacklist) { - newIndex.StorePack(pbs.packID, pbs.blobs) + newIndex.StorePack(pbs.PackID, pbs.Blobs) p.Add(1) if IndexFull(newIndex, mi.compress) { select {