Merge pull request #3772 from MichaelEischer/fix-mixed-index

rebuild-index: correctly rebuild index for mixed packs
2024-11-25 06:07:44 +00:00 · 2022-07-02 20:10:02 +02:00 · 2022-07-02 20:10:02 +02:00 · 621023a50b
commit 621023a50b
parent 90e9c5c4cc 04c23fa95d
4 changed files with 72 additions and 14 deletions
--- a/changelog/unreleased/pull-3772
+++ b/changelog/unreleased/pull-3772
@ -0,0 +1,13 @@
+Bugfix: Correctly rebuild index for legacy repositories
+
+After running `rebuild-index` on a legacy repository containing mixed pack
+files, that is pack files with store both metadata and file data, `check`
+printed warnings like `pack 12345678 contained in several indexes: ...`.  The
+warning is not critical. It has been fixed by properly handling mixed pack
+files while rebuilding the index.
+
+Running `prune` for such legacy repositories will also fix the warning by
+reorganizing the pack files which cause the warning.
+
+https://github.com/restic/restic/pull/3772
+https://forum.restic.net/t/help-in-recovery-a-corrupted-repository/5044/13
--- a/internal/repository/index.go
+++ b/internal/repository/index.go
@ -247,8 +247,8 @@ func (idx *Index) Each(ctx context.Context) <-chan restic.PackedBlob {
 }

 type EachByPackResult struct {
-	packID restic.ID
-	blobs  []restic.Blob
+	PackID restic.ID
+	Blobs  []restic.Blob
 }

 // EachByPack returns a channel that yields all blobs known to the index
@ -269,29 +269,35 @@ func (idx *Index) EachByPack(ctx context.Context, packBlacklist restic.IDSet) <-
 			close(ch)
 		}()

+		byPack := make(map[restic.ID][][]*indexEntry)
+
 		for typ := range idx.byType {
-			byPack := make(map[restic.ID][]*indexEntry)
 			m := &idx.byType[typ]
 			m.foreach(func(e *indexEntry) bool {
 				packID := idx.packs[e.packIndex]
+				if _, ok := byPack[packID]; !ok {
+					byPack[packID] = make([][]*indexEntry, restic.NumBlobTypes)
+				}
 				if !idx.final || !packBlacklist.Has(packID) {
-					byPack[packID] = append(byPack[packID], e)
+					byPack[packID][typ] = append(byPack[packID][typ], e)
 				}
 				return true
 			})
+		}

-			for packID, pack := range byPack {
-				var result EachByPackResult
-				result.packID = packID
+		for packID, packByType := range byPack {
+			var result EachByPackResult
+			result.PackID = packID
+			for typ, pack := range packByType {
 				for _, e := range pack {
-					result.blobs = append(result.blobs, idx.toPackedBlob(e, restic.BlobType(typ)).Blob)
-				}
-				select {
-				case <-ctx.Done():
-					return
-				case ch <- result:
+					result.Blobs = append(result.Blobs, idx.toPackedBlob(e, restic.BlobType(typ)).Blob)
 				}
 			}
+			select {
+			case <-ctx.Done():
+				return
+			case ch <- result:
+			}
 		}
 	}()

--- a/internal/repository/index_test.go
+++ b/internal/repository/index_test.go
@ -574,3 +574,42 @@ func TestIndexHas(t *testing.T) {
 	rtest.Assert(t, !idx.Has(restic.NewRandomBlobHandle()), "Index reports having a data blob not added to it")
 	rtest.Assert(t, !idx.Has(restic.BlobHandle{ID: tests[0].ID, Type: restic.TreeBlob}), "Index reports having a tree blob added to it with the same id as a data blob")
 }
+
+func TestMixedEachByPack(t *testing.T) {
+	idx := repository.NewIndex()
+
+	expected := make(map[restic.ID]int)
+	// create 50 packs with 2 blobs each
+	for i := 0; i < 50; i++ {
+		packID := restic.NewRandomID()
+		expected[packID] = 1
+		blobs := []restic.Blob{
+			{
+				BlobHandle: restic.BlobHandle{Type: restic.DataBlob, ID: restic.NewRandomID()},
+				Offset:     0,
+				Length:     42,
+			},
+			{
+				BlobHandle: restic.BlobHandle{Type: restic.TreeBlob, ID: restic.NewRandomID()},
+				Offset:     42,
+				Length:     43,
+			},
+		}
+		idx.StorePack(packID, blobs)
+	}
+
+	reported := make(map[restic.ID]int)
+	for bp := range idx.EachByPack(context.TODO(), restic.NewIDSet()) {
+		reported[bp.PackID]++
+
+		rtest.Equals(t, 2, len(bp.Blobs)) // correct blob count
+		if bp.Blobs[0].Offset > bp.Blobs[1].Offset {
+			bp.Blobs[1], bp.Blobs[0] = bp.Blobs[0], bp.Blobs[1]
+		}
+		b0 := bp.Blobs[0]
+		rtest.Assert(t, b0.Type == restic.DataBlob && b0.Offset == 0 && b0.Length == 42, "wrong blob", b0)
+		b1 := bp.Blobs[1]
+		rtest.Assert(t, b1.Type == restic.TreeBlob && b1.Offset == 42 && b1.Length == 43, "wrong blob", b1)
+	}
+	rtest.Equals(t, expected, reported)
+}
--- a/internal/repository/master_index.go
+++ b/internal/repository/master_index.go
@ -339,7 +339,7 @@ func (mi *MasterIndex) Save(ctx context.Context, repo restic.SaverUnpacked, pack
 			debug.Log("adding index %d", i)

 			for pbs := range idx.EachByPack(ctx, packBlacklist) {
-				newIndex.StorePack(pbs.packID, pbs.blobs)
+				newIndex.StorePack(pbs.PackID, pbs.Blobs)
 				p.Add(1)
 				if IndexFull(newIndex, mi.compress) {
 					select {