lib/scanner, vendor: Fix previous commit

Can't do what I did, as the rolling function is not the same as the
non-rolling one. Instead this uses an improved version of the rolling
adler32 to accomplish the same thing. (PR filed on upstream, so should
be able to use that directly in the future.)
This commit is contained in:
Jakob Borg 2017-01-18 11:57:01 +01:00
parent 9b1c592fb7
commit bd1c29ee32
4 changed files with 81 additions and 10 deletions

View File

@ -9,9 +9,9 @@ package scanner
import (
"bytes"
"fmt"
"hash/adler32"
"io"
"github.com/chmduquesne/rollinghash/adler32"
"github.com/syncthing/syncthing/lib/protocol"
"github.com/syncthing/syncthing/lib/sha256"
)

View File

@ -8,9 +8,13 @@ package scanner
import (
"bytes"
"crypto/rand"
"fmt"
origAdler32 "hash/adler32"
"testing"
"testing/quick"
rollingAdler32 "github.com/chmduquesne/rollinghash/adler32"
"github.com/syncthing/syncthing/lib/protocol"
)
@ -160,3 +164,66 @@ func TestDiffEmpty(t *testing.T) {
}
}
}
func TestAdler32Variants(t *testing.T) {
// Verify that the two adler32 functions give matching results for a few
// different blocks of data.
hf1 := origAdler32.New()
hf2 := rollingAdler32.New()
checkFn := func(data []byte) bool {
hf1.Write(data)
sum1 := hf1.Sum32()
hf2.Write(data)
sum2 := hf2.Sum32()
hf1.Reset()
hf2.Reset()
return sum1 == sum2
}
// protocol block sized data
data := make([]byte, protocol.BlockSize)
for i := 0; i < 5; i++ {
rand.Read(data)
if !checkFn(data) {
t.Errorf("Hash mismatch on block sized data")
}
}
// random small blocks
if err := quick.Check(checkFn, nil); err != nil {
t.Error(err)
}
// rolling should have the same result as the individual blocks
// themselves. Which is not the same as the original non-rollind adler32
// blocks.
windowSize := 128
hf2.Reset()
hf3 := rollingAdler32.New()
hf3.Write(data[:windowSize])
for i := windowSize; i < len(data); i++ {
if i%windowSize == 0 {
// let the reference function catch up
hf2.Write(data[i-windowSize : i])
// verify that they are in sync with the rolling function
sum2 := hf2.Sum32()
sum3 := hf3.Sum32()
t.Logf("At i=%d, sum2=%08x, sum3=%08x", i, sum2, sum3)
if sum2 != sum3 {
t.Errorf("Mismatch after roll; i=%d, sum2=%08x, sum3=%08x", i, sum2, sum3)
break
}
}
hf3.Roll(data[i])
}
}

View File

@ -3,6 +3,7 @@
package adler32
import (
"hash"
vanilla "hash/adler32"
"github.com/chmduquesne/rollinghash"
@ -22,6 +23,8 @@ type digest struct {
window []byte
oldest int
n uint32
vanilla hash.Hash32
}
// Reset resets the Hash to its initial state.
@ -37,7 +40,7 @@ func (d *digest) Reset() {
// only used to determine which is the oldest element (leaving the
// window). The calls to Roll() do not recompute the whole checksum.
func New() rollinghash.Hash32 {
return &digest{a: 1, b: 0, window: nil, oldest: 0}
return &digest{a: 1, b: 0, window: nil, oldest: 0, vanilla: vanilla.New()}
}
// Size returns the number of bytes Sum will return.
@ -53,13 +56,15 @@ func (d *digest) BlockSize() int { return 1 }
// running hash. It never returns an error.
func (d *digest) Write(p []byte) (int, error) {
// Copy the window
d.window = make([]byte, len(p))
if len(d.window) != len(p) {
d.window = make([]byte, len(p))
}
copy(d.window, p)
// Piggy-back on the core implementation
h := vanilla.New()
h.Write(p)
s := h.Sum32()
d.vanilla.Reset()
d.vanilla.Write(p)
s := d.vanilla.Sum32()
d.a, d.b = s&0xffff, s>>16
d.n = uint32(len(p)) % mod
return len(d.window), nil

7
vendor/manifest vendored
View File

@ -46,10 +46,9 @@
},
{
"importpath": "github.com/chmduquesne/rollinghash",
"repository": "https://github.com/chmduquesne/rollinghash",
"repository": "https://github.com/kastelo/rollinghash",
"vcs": "git",
"revision": "88b86a92826991b14d01fb43456909fcb8a76b8b",
"branch": "master",
"branch": "reducealloc",
"notests": true
},
{
@ -422,4 +421,4 @@
"notests": true
}
]
}
}