From 539599d1f108170911d330a5ff9f3a1163311637 Mon Sep 17 00:00:00 2001
From: Matthew Dawson <matthew@mjdsystems.ca>
Date: Mon, 8 Jan 2018 13:38:21 -0500
Subject: [PATCH] repository/index: Optimize index.Has()

When backing up several million files (>14M tested here) with few changes,
a large amount of time is spent failing to find an id in an index and creating
an error to signify this.  Since this is checked using the Has method,
which doesn't use this error, this time creating the error is wasted.

Instead, directly check if the given id and type are present in the index.
This also avoids reporting all the packs containing this blob, further
reducing cpu usage.
---
 internal/repository/index.go      | 11 +++----
 internal/repository/index_test.go | 49 +++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/internal/repository/index.go b/internal/repository/index.go
index 70664fad0..b42ea6baf 100644
--- a/internal/repository/index.go
+++ b/internal/repository/index.go
@@ -169,12 +169,13 @@ func (idx *Index) ListPack(id restic.ID) (list []restic.PackedBlob) {
 
 // Has returns true iff the id is listed in the index.
 func (idx *Index) Has(id restic.ID, tpe restic.BlobType) bool {
-	_, err := idx.Lookup(id, tpe)
-	if err == nil {
-		return true
-	}
+	idx.m.Lock()
+	defer idx.m.Unlock()
 
-	return false
+	h := restic.BlobHandle{ID: id, Type: tpe}
+
+	_, ok := idx.pack[h]
+	return ok
 }
 
 // LookupSize returns the length of the plaintext content of the blob with the
diff --git a/internal/repository/index_test.go b/internal/repository/index_test.go
index 38c3f57bf..f2e0746c0 100644
--- a/internal/repository/index_test.go
+++ b/internal/repository/index_test.go
@@ -434,3 +434,52 @@ func BenchmarkIndexHasKnown(b *testing.B) {
 		idx.Has(lookupID, restic.DataBlob)
 	}
 }
+
+func TestIndexHas(t *testing.T) {
+	type testEntry struct {
+		id             restic.ID
+		pack           restic.ID
+		tpe            restic.BlobType
+		offset, length uint
+	}
+	tests := []testEntry{}
+
+	idx := repository.NewIndex()
+
+	// create 50 packs with 20 blobs each
+	for i := 0; i < 50; i++ {
+		packID := restic.NewRandomID()
+
+		pos := uint(0)
+		for j := 0; j < 20; j++ {
+			id := restic.NewRandomID()
+			length := uint(i*100 + j)
+			idx.Store(restic.PackedBlob{
+				Blob: restic.Blob{
+					Type:   restic.DataBlob,
+					ID:     id,
+					Offset: pos,
+					Length: length,
+				},
+				PackID: packID,
+			})
+
+			tests = append(tests, testEntry{
+				id:     id,
+				pack:   packID,
+				tpe:    restic.DataBlob,
+				offset: pos,
+				length: length,
+			})
+
+			pos += length
+		}
+	}
+
+	for _, testBlob := range tests {
+		rtest.Assert(t, idx.Has(testBlob.id, testBlob.tpe), "Index reports not having data blob added to it")
+	}
+
+	rtest.Assert(t, !idx.Has(restic.NewRandomID(), restic.DataBlob), "Index reports having a data blob not added to it")
+	rtest.Assert(t, !idx.Has(tests[0].id, restic.TreeBlob), "Index reports having a tree blob added to it with the same id as a data blob")
+}