rebuild-index: correctly rebuild index for mixed packs

For mixed packs, data and tree blobs were stored in separate index entries. This results in warning from the check command and maybe other problems.
2025-02-03 20:38:28 +00:00 · 2022-05-24 22:30:42 +02:00 · 2022-05-24 22:30:42 +02:00 · 04c23fa95d
commit 04c23fa95d
parent bb5f196b09
4 changed files with 72 additions and 14 deletions
--- a/changelog/unreleased/pull-3772
+++ b/changelog/unreleased/pull-3772
@ -0,0 +1,13 @@
+Bugfix: Correctly rebuild index for legacy repositories
+
+After running `rebuild-index` on a legacy repository containing mixed pack
+files, that is pack files with store both metadata and file data, `check`
+printed warnings like `pack 12345678 contained in several indexes: ...`.  The
+warning is not critical. It has been fixed by properly handling mixed pack
+files while rebuilding the index.
+
+Running `prune` for such legacy repositories will also fix the warning by
+reorganizing the pack files which cause the warning.
+
+https://github.com/restic/restic/pull/3772
+https://forum.restic.net/t/help-in-recovery-a-corrupted-repository/5044/13
--- a/internal/repository/index.go
+++ b/internal/repository/index.go
@ -247,8 +247,8 @@ func (idx *Index) Each(ctx context.Context) <-chan restic.PackedBlob {
 }

 type EachByPackResult struct {
-	packID restic.ID
-	blobs  []restic.Blob
+	PackID restic.ID
+	Blobs  []restic.Blob
 }

 // EachByPack returns a channel that yields all blobs known to the index
@ -269,29 +269,35 @@ func (idx *Index) EachByPack(ctx context.Context, packBlacklist restic.IDSet) <-
 			close(ch)
 		}()

+		byPack := make(map[restic.ID][][]*indexEntry)
+
 		for typ := range idx.byType {
-			byPack := make(map[restic.ID][]*indexEntry)
 			m := &idx.byType[typ]
 			m.foreach(func(e *indexEntry) bool {
 				packID := idx.packs[e.packIndex]
+				if _, ok := byPack[packID]; !ok {
+					byPack[packID] = make([][]*indexEntry, restic.NumBlobTypes)
+				}
 				if !idx.final || !packBlacklist.Has(packID) {
-					byPack[packID] = append(byPack[packID], e)
+					byPack[packID][typ] = append(byPack[packID][typ], e)
 				}
 				return true
 			})
+		}

-			for packID, pack := range byPack {
-				var result EachByPackResult
-				result.packID = packID
+		for packID, packByType := range byPack {
+			var result EachByPackResult
+			result.PackID = packID
+			for typ, pack := range packByType {
 				for _, e := range pack {
-					result.blobs = append(result.blobs, idx.toPackedBlob(e, restic.BlobType(typ)).Blob)
-				}
-				select {
-				case <-ctx.Done():
-					return
-				case ch <- result:
+					result.Blobs = append(result.Blobs, idx.toPackedBlob(e, restic.BlobType(typ)).Blob)
 				}
 			}
+			select {
+			case <-ctx.Done():
+				return
+			case ch <- result:
+			}
 		}
 	}()

--- a/internal/repository/index_test.go
+++ b/internal/repository/index_test.go
@ -574,3 +574,42 @@ func TestIndexHas(t *testing.T) {
 	rtest.Assert(t, !idx.Has(restic.NewRandomBlobHandle()), "Index reports having a data blob not added to it")
 	rtest.Assert(t, !idx.Has(restic.BlobHandle{ID: tests[0].ID, Type: restic.TreeBlob}), "Index reports having a tree blob added to it with the same id as a data blob")
 }
+
+func TestMixedEachByPack(t *testing.T) {
+	idx := repository.NewIndex()
+
+	expected := make(map[restic.ID]int)
+	// create 50 packs with 2 blobs each
+	for i := 0; i < 50; i++ {
+		packID := restic.NewRandomID()
+		expected[packID] = 1
+		blobs := []restic.Blob{
+			{
+				BlobHandle: restic.BlobHandle{Type: restic.DataBlob, ID: restic.NewRandomID()},
+				Offset:     0,
+				Length:     42,
+			},
+			{
+				BlobHandle: restic.BlobHandle{Type: restic.TreeBlob, ID: restic.NewRandomID()},
+				Offset:     42,
+				Length:     43,
+			},
+		}
+		idx.StorePack(packID, blobs)
+	}
+
+	reported := make(map[restic.ID]int)
+	for bp := range idx.EachByPack(context.TODO(), restic.NewIDSet()) {
+		reported[bp.PackID]++
+
+		rtest.Equals(t, 2, len(bp.Blobs)) // correct blob count
+		if bp.Blobs[0].Offset > bp.Blobs[1].Offset {
+			bp.Blobs[1], bp.Blobs[0] = bp.Blobs[0], bp.Blobs[1]
+		}
+		b0 := bp.Blobs[0]
+		rtest.Assert(t, b0.Type == restic.DataBlob && b0.Offset == 0 && b0.Length == 42, "wrong blob", b0)
+		b1 := bp.Blobs[1]
+		rtest.Assert(t, b1.Type == restic.TreeBlob && b1.Offset == 42 && b1.Length == 43, "wrong blob", b1)
+	}
+	rtest.Equals(t, expected, reported)
+}
--- a/internal/repository/master_index.go
+++ b/internal/repository/master_index.go
@ -339,7 +339,7 @@ func (mi *MasterIndex) Save(ctx context.Context, repo restic.SaverUnpacked, pack
 			debug.Log("adding index %d", i)

 			for pbs := range idx.EachByPack(ctx, packBlacklist) {
-				newIndex.StorePack(pbs.packID, pbs.blobs)
+				newIndex.StorePack(pbs.PackID, pbs.Blobs)
 				p.Add(1)
 				if IndexFull(newIndex, mi.compress) {
 					select {