mirror of
https://github.com/octoleo/syncthing.git
synced 2024-11-08 22:31:04 +00:00
lib/model, lib/weakhash: Hash using adler32, add heuristic in puller
Adler32 is much faster, and the heuristic avoid the obvious cases where it will not help. GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/3872
This commit is contained in:
parent
920274bce4
commit
29d010ec0e
@ -108,6 +108,7 @@ func TestDeviceConfig(t *testing.T) {
|
||||
Versioning: VersioningConfiguration{
|
||||
Params: map[string]string{},
|
||||
},
|
||||
WeakHashThresholdPct: 25,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -40,8 +40,8 @@ type FolderConfiguration struct {
|
||||
DisableSparseFiles bool `xml:"disableSparseFiles" json:"disableSparseFiles"`
|
||||
DisableTempIndexes bool `xml:"disableTempIndexes" json:"disableTempIndexes"`
|
||||
Fsync bool `xml:"fsync" json:"fsync"`
|
||||
DisableWeakHash bool `xml:"disableWeakHash" json:"disableWeakHash"`
|
||||
Paused bool `xml:"paused" json:"paused"`
|
||||
WeakHashThresholdPct int `xml:"weakHashThresholdPct" json:"weakHashThresholdPct"` // Use weak hash if more than X percent of the file has changed. Set to -1 to always use weak hash.
|
||||
|
||||
cachedPath string
|
||||
|
||||
@ -146,6 +146,10 @@ func (f *FolderConfiguration) prepare() {
|
||||
if f.Versioning.Params == nil {
|
||||
f.Versioning.Params = make(map[string]string)
|
||||
}
|
||||
|
||||
if f.WeakHashThresholdPct == 0 {
|
||||
f.WeakHashThresholdPct = 25
|
||||
}
|
||||
}
|
||||
|
||||
func (f *FolderConfiguration) cleanedPath() string {
|
||||
|
@ -47,6 +47,7 @@ type pullBlockState struct {
|
||||
type copyBlocksState struct {
|
||||
*sharedPullerState
|
||||
blocks []protocol.BlockInfo
|
||||
have int
|
||||
}
|
||||
|
||||
// Which filemode bits to preserve
|
||||
@ -1003,7 +1004,9 @@ func (f *sendReceiveFolder) renameFile(source, target protocol.FileInfo) {
|
||||
func (f *sendReceiveFolder) handleFile(file protocol.FileInfo, copyChan chan<- copyBlocksState, finisherChan chan<- *sharedPullerState) {
|
||||
curFile, hasCurFile := f.model.CurrentFolderFile(f.folderID, file.Name)
|
||||
|
||||
if hasCurFile && len(curFile.Blocks) == len(file.Blocks) && scanner.BlocksEqual(curFile.Blocks, file.Blocks) {
|
||||
have, need := scanner.BlockDiff(curFile.Blocks, file.Blocks)
|
||||
|
||||
if hasCurFile && len(need) == 0 {
|
||||
// We are supposed to copy the entire file, and then fetch nothing. We
|
||||
// are only updating metadata, so we don't actually *need* to make the
|
||||
// copy.
|
||||
@ -1158,6 +1161,7 @@ func (f *sendReceiveFolder) handleFile(file protocol.FileInfo, copyChan chan<- c
|
||||
cs := copyBlocksState{
|
||||
sharedPullerState: &s,
|
||||
blocks: blocks,
|
||||
have: len(have),
|
||||
}
|
||||
copyChan <- cs
|
||||
}
|
||||
@ -1216,7 +1220,12 @@ func (f *sendReceiveFolder) copierRoutine(in <-chan copyBlocksState, pullChan ch
|
||||
f.model.fmut.RUnlock()
|
||||
|
||||
var weakHashFinder *weakhash.Finder
|
||||
if !f.DisableWeakHash {
|
||||
blocksPercentChanged := 0
|
||||
if tot := len(state.file.Blocks); tot > 0 {
|
||||
blocksPercentChanged = (tot - state.have) * 100 / tot
|
||||
}
|
||||
|
||||
if blocksPercentChanged >= f.WeakHashThresholdPct {
|
||||
hashesToFind := make([]uint32, 0, len(state.blocks))
|
||||
for _, block := range state.blocks {
|
||||
if block.WeakHash != 0 {
|
||||
|
@ -322,7 +322,7 @@ func TestWeakHash(t *testing.T) {
|
||||
go fo.copierRoutine(copyChan, pullChan, finisherChan)
|
||||
|
||||
// Test 1 - no weak hashing, file gets fully repulled (`expectBlocks` pulls).
|
||||
fo.DisableWeakHash = true
|
||||
fo.WeakHashThresholdPct = 101
|
||||
fo.handleFile(desiredFile, copyChan, finisherChan)
|
||||
|
||||
var pulls []pullBlockState
|
||||
@ -350,7 +350,7 @@ func TestWeakHash(t *testing.T) {
|
||||
}
|
||||
|
||||
// Test 2 - using weak hash, expectPulls blocks pulled.
|
||||
fo.DisableWeakHash = false
|
||||
fo.WeakHashThresholdPct = -1
|
||||
fo.handleFile(desiredFile, copyChan, finisherChan)
|
||||
|
||||
pulls = pulls[:0]
|
||||
|
@ -11,9 +11,9 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/chmduquesne/rollinghash/adler32"
|
||||
"github.com/syncthing/syncthing/lib/protocol"
|
||||
"github.com/syncthing/syncthing/lib/sha256"
|
||||
"github.com/syncthing/syncthing/lib/weakhash"
|
||||
)
|
||||
|
||||
var SHA256OfNothing = []uint8{0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55}
|
||||
@ -26,7 +26,8 @@ type Counter interface {
|
||||
func Blocks(r io.Reader, blocksize int, sizehint int64, counter Counter) ([]protocol.BlockInfo, error) {
|
||||
hf := sha256.New()
|
||||
hashLength := hf.Size()
|
||||
whf := weakhash.NewHash(blocksize)
|
||||
whf := adler32.New()
|
||||
mhf := io.MultiWriter(hf, whf)
|
||||
|
||||
var blocks []protocol.BlockInfo
|
||||
var hashes, thisHash []byte
|
||||
@ -46,7 +47,7 @@ func Blocks(r io.Reader, blocksize int, sizehint int64, counter Counter) ([]prot
|
||||
var offset int64
|
||||
for {
|
||||
lr := io.LimitReader(r, int64(blocksize))
|
||||
n, err := io.CopyBuffer(hf, io.TeeReader(lr, whf), buf)
|
||||
n, err := io.CopyBuffer(mhf, lr, buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -122,3 +122,25 @@ func TestDiff(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDiffEmpty(t *testing.T) {
|
||||
emptyCases := []struct {
|
||||
a []protocol.BlockInfo
|
||||
b []protocol.BlockInfo
|
||||
need int
|
||||
have int
|
||||
}{
|
||||
{nil, nil, 0, 0},
|
||||
{[]protocol.BlockInfo{{Offset: 3, Size: 1}}, nil, 0, 0},
|
||||
{nil, []protocol.BlockInfo{{Offset: 3, Size: 1}}, 1, 0},
|
||||
}
|
||||
for _, emptyCase := range emptyCases {
|
||||
h, n := BlockDiff(emptyCase.a, emptyCase.b)
|
||||
if len(h) != emptyCase.have {
|
||||
t.Errorf("incorrect have: %d != %d", len(h), emptyCase.have)
|
||||
}
|
||||
if len(n) != emptyCase.need {
|
||||
t.Errorf("incorrect have: %d != %d", len(h), emptyCase.have)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -9,9 +9,12 @@ package weakhash
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/chmduquesne/rollinghash/adler32"
|
||||
)
|
||||
|
||||
const testFile = "../model/testdata/~syncthing~file.tmp"
|
||||
const size = 128 << 10
|
||||
|
||||
func BenchmarkFind1MFile(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
@ -21,10 +24,38 @@ func BenchmarkFind1MFile(b *testing.B) {
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
_, err = Find(fd, []uint32{0, 1, 2}, 128<<10)
|
||||
_, err = Find(fd, []uint32{0, 1, 2}, size)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
fd.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkWeakHashAdler32(b *testing.B) {
|
||||
data := make([]byte, size)
|
||||
hf := adler32.New()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
hf.Write(data)
|
||||
}
|
||||
|
||||
_ = hf.Sum32()
|
||||
b.SetBytes(size)
|
||||
}
|
||||
|
||||
func BenchmarkWeakHashAdler32Roll(b *testing.B) {
|
||||
data := make([]byte, size)
|
||||
hf := adler32.New()
|
||||
hf.Write(data)
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
for i := 0; i <= size; i++ {
|
||||
hf.Roll('a')
|
||||
}
|
||||
}
|
||||
|
||||
b.SetBytes(size)
|
||||
}
|
||||
|
@ -8,22 +8,16 @@ package weakhash
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"hash"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/chmduquesne/rollinghash/adler32"
|
||||
)
|
||||
|
||||
const (
|
||||
Size = 4
|
||||
)
|
||||
|
||||
func NewHash(size int) hash.Hash32 {
|
||||
return &digest{
|
||||
buf: make([]byte, size),
|
||||
size: size,
|
||||
}
|
||||
}
|
||||
|
||||
// Find finds all the blocks of the given size within io.Reader that matches
|
||||
// the hashes provided, and returns a hash -> slice of offsets within reader
|
||||
// map, that produces the same weak hash.
|
||||
@ -33,7 +27,7 @@ func Find(ir io.Reader, hashesToFind []uint32, size int) (map[uint32][]int64, er
|
||||
}
|
||||
|
||||
r := bufio.NewReader(ir)
|
||||
hf := NewHash(size)
|
||||
hf := adler32.New()
|
||||
|
||||
n, err := io.CopyN(hf, r, int64(size))
|
||||
if err == io.EOF {
|
||||
@ -66,56 +60,11 @@ func Find(ir io.Reader, hashesToFind []uint32, size int) (map[uint32][]int64, er
|
||||
} else if err != nil {
|
||||
return offsets, err
|
||||
}
|
||||
hf.Write([]byte{bt})
|
||||
hf.Roll(bt)
|
||||
}
|
||||
return offsets, nil
|
||||
}
|
||||
|
||||
// Using this: http://tutorials.jenkov.com/rsync/checksums.html
|
||||
// Example implementations: https://gist.github.com/csabahenk/1096262/revisions
|
||||
// Alternative that could be used is adler32 http://blog.liw.fi/posts/rsync-in-python/#comment-fee8d5e07794fdba3fe2d76aa2706a13
|
||||
type digest struct {
|
||||
buf []byte
|
||||
size int
|
||||
a uint16
|
||||
b uint16
|
||||
j int
|
||||
}
|
||||
|
||||
func (d *digest) Write(data []byte) (int, error) {
|
||||
for _, c := range data {
|
||||
// TODO: Use this in Go 1.6
|
||||
// d.a = d.a - uint16(d.buf[d.j]) + uint16(c)
|
||||
// d.b = d.b - uint16(d.size)*uint16(d.buf[d.j]) + d.a
|
||||
d.a -= uint16(d.buf[d.j])
|
||||
d.a += uint16(c)
|
||||
d.b -= uint16(d.size) * uint16(d.buf[d.j])
|
||||
d.b += d.a
|
||||
|
||||
d.buf[d.j] = c
|
||||
d.j = (d.j + 1) % d.size
|
||||
}
|
||||
return len(data), nil
|
||||
}
|
||||
|
||||
func (d *digest) Reset() {
|
||||
for i := range d.buf {
|
||||
d.buf[i] = 0x0
|
||||
}
|
||||
d.a = 0
|
||||
d.b = 0
|
||||
d.j = 0
|
||||
}
|
||||
|
||||
func (d *digest) Sum(b []byte) []byte {
|
||||
r := d.Sum32()
|
||||
return append(b, byte(r>>24), byte(r>>16), byte(r>>8), byte(r))
|
||||
}
|
||||
|
||||
func (d *digest) Sum32() uint32 { return uint32(d.a) | (uint32(d.b) << 16) }
|
||||
func (digest) Size() int { return Size }
|
||||
func (digest) BlockSize() int { return 1 }
|
||||
|
||||
func NewFinder(path string, size int, hashesToFind []uint32) (*Finder, error) {
|
||||
file, err := os.Open(path)
|
||||
if err != nil {
|
||||
|
@ -18,129 +18,6 @@ import (
|
||||
)
|
||||
|
||||
var payload = []byte("abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz")
|
||||
var hashes = []uint32{
|
||||
64225674,
|
||||
64881038,
|
||||
65536402,
|
||||
66191766,
|
||||
66847130,
|
||||
67502494,
|
||||
68157858,
|
||||
68813222,
|
||||
69468586,
|
||||
70123950,
|
||||
70779314,
|
||||
71434678,
|
||||
72090042,
|
||||
72745406,
|
||||
73400770,
|
||||
74056134,
|
||||
74711498,
|
||||
75366862,
|
||||
76022226,
|
||||
76677590,
|
||||
77332954,
|
||||
77988318,
|
||||
78643682,
|
||||
77595084,
|
||||
74842550,
|
||||
70386080,
|
||||
64225674,
|
||||
64881038,
|
||||
65536402,
|
||||
66191766,
|
||||
66847130,
|
||||
67502494,
|
||||
68157858,
|
||||
68813222,
|
||||
69468586,
|
||||
70123950,
|
||||
70779314,
|
||||
71434678,
|
||||
72090042,
|
||||
72745406,
|
||||
73400770,
|
||||
74056134,
|
||||
74711498,
|
||||
75366862,
|
||||
76022226,
|
||||
76677590,
|
||||
77332954,
|
||||
77988318,
|
||||
78643682,
|
||||
77595084,
|
||||
74842550,
|
||||
70386080,
|
||||
64225674,
|
||||
64881038,
|
||||
65536402,
|
||||
66191766,
|
||||
66847130,
|
||||
67502494,
|
||||
68157858,
|
||||
68813222,
|
||||
69468586,
|
||||
70123950,
|
||||
70779314,
|
||||
71434678,
|
||||
72090042,
|
||||
72745406,
|
||||
73400770,
|
||||
74056134,
|
||||
74711498,
|
||||
75366862,
|
||||
76022226,
|
||||
76677590,
|
||||
77332954,
|
||||
77988318,
|
||||
78643682,
|
||||
77595084,
|
||||
74842550,
|
||||
70386080,
|
||||
64225674,
|
||||
64881038,
|
||||
65536402,
|
||||
66191766,
|
||||
66847130,
|
||||
67502494,
|
||||
68157858,
|
||||
68813222,
|
||||
69468586,
|
||||
70123950,
|
||||
70779314,
|
||||
71434678,
|
||||
72090042,
|
||||
72745406,
|
||||
73400770,
|
||||
74056134,
|
||||
74711498,
|
||||
75366862,
|
||||
76022226,
|
||||
76677590,
|
||||
77332954,
|
||||
77988318,
|
||||
78643682,
|
||||
71893365,
|
||||
71893365,
|
||||
}
|
||||
|
||||
// Tested using an alternative C implementation at https://gist.github.com/csabahenk/1096262
|
||||
func TestHashCorrect(t *testing.T) {
|
||||
h := NewHash(Size)
|
||||
pos := 0
|
||||
for pos < Size {
|
||||
h.Write([]byte{payload[pos]})
|
||||
pos++
|
||||
}
|
||||
|
||||
for i := 0; pos < len(payload); i++ {
|
||||
if h.Sum32() != hashes[i] {
|
||||
t.Errorf("mismatch at %d", i)
|
||||
}
|
||||
h.Write([]byte{payload[pos]})
|
||||
pos++
|
||||
}
|
||||
}
|
||||
|
||||
func TestFinder(t *testing.T) {
|
||||
f, err := ioutil.TempFile("", "")
|
||||
@ -154,7 +31,7 @@ func TestFinder(t *testing.T) {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
hashes := []uint32{64881038, 65536402}
|
||||
hashes := []uint32{65143183, 65798547}
|
||||
finder, err := NewFinder(f.Name(), 4, hashes)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
@ -162,8 +39,8 @@ func TestFinder(t *testing.T) {
|
||||
defer finder.Close()
|
||||
|
||||
expected := map[uint32][]int64{
|
||||
64881038: []int64{1, 27, 53, 79},
|
||||
65536402: []int64{2, 28, 54, 80},
|
||||
65143183: []int64{1, 27, 53, 79},
|
||||
65798547: []int64{2, 28, 54, 80},
|
||||
}
|
||||
actual := make(map[uint32][]int64)
|
||||
|
||||
|
97
vendor/github.com/chmduquesne/rollinghash/adler32/adler32.go
generated
vendored
Normal file
97
vendor/github.com/chmduquesne/rollinghash/adler32/adler32.go
generated
vendored
Normal file
@ -0,0 +1,97 @@
|
||||
// Package rollinghash/adler32 implements a rolling version of hash/adler32
|
||||
|
||||
package adler32
|
||||
|
||||
import (
|
||||
vanilla "hash/adler32"
|
||||
|
||||
"github.com/chmduquesne/rollinghash"
|
||||
)
|
||||
|
||||
const (
|
||||
mod = 65521
|
||||
)
|
||||
|
||||
const Size = 4
|
||||
|
||||
type digest struct {
|
||||
a, b uint32
|
||||
|
||||
// window is treated like a circular buffer, where the oldest element
|
||||
// is indicated by d.oldest
|
||||
window []byte
|
||||
oldest int
|
||||
n uint32
|
||||
}
|
||||
|
||||
// Reset resets the Hash to its initial state.
|
||||
func (d *digest) Reset() {
|
||||
d.a = 1
|
||||
d.b = 0
|
||||
d.window = nil
|
||||
d.oldest = 0
|
||||
}
|
||||
|
||||
// New returns a new rollinghash.Hash32 computing the rolling Adler-32
|
||||
// checksum. The window is copied from the last Write(). This window is
|
||||
// only used to determine which is the oldest element (leaving the
|
||||
// window). The calls to Roll() do not recompute the whole checksum.
|
||||
func New() rollinghash.Hash32 {
|
||||
return &digest{a: 1, b: 0, window: nil, oldest: 0}
|
||||
}
|
||||
|
||||
// Size returns the number of bytes Sum will return.
|
||||
func (d *digest) Size() int { return Size }
|
||||
|
||||
// BlockSize returns the hash's underlying block size.
|
||||
// The Write method must be able to accept any amount
|
||||
// of data, but it may operate more efficiently if all
|
||||
// writes are a multiple of the block size.
|
||||
func (d *digest) BlockSize() int { return 1 }
|
||||
|
||||
// Write (via the embedded io.Writer interface) adds more data to the
|
||||
// running hash. It never returns an error.
|
||||
func (d *digest) Write(p []byte) (int, error) {
|
||||
// Copy the window
|
||||
d.window = make([]byte, len(p))
|
||||
copy(d.window, p)
|
||||
|
||||
// Piggy-back on the core implementation
|
||||
h := vanilla.New()
|
||||
h.Write(p)
|
||||
s := h.Sum32()
|
||||
d.a, d.b = s&0xffff, s>>16
|
||||
d.n = uint32(len(p)) % mod
|
||||
return len(d.window), nil
|
||||
}
|
||||
|
||||
func (d *digest) Sum32() uint32 {
|
||||
return d.b<<16 | d.a
|
||||
}
|
||||
|
||||
func (d *digest) Sum(b []byte) []byte {
|
||||
v := d.Sum32()
|
||||
return append(b, byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
|
||||
}
|
||||
|
||||
// Roll updates the checksum of the window from the leaving byte and the
|
||||
// entering byte. See
|
||||
// http://stackoverflow.com/questions/40985080/why-does-my-rolling-adler32-checksum-not-work-in-go-modulo-arithmetic
|
||||
func (d *digest) Roll(b byte) {
|
||||
if len(d.window) == 0 {
|
||||
d.window = make([]byte, 1)
|
||||
d.window[0] = b
|
||||
}
|
||||
// extract the entering/leaving bytes and update the circular buffer.
|
||||
enter := uint32(b)
|
||||
leave := uint32(d.window[d.oldest])
|
||||
d.window[d.oldest] = b
|
||||
d.oldest += 1
|
||||
if d.oldest >= len(d.window) {
|
||||
d.oldest = 0
|
||||
}
|
||||
|
||||
// compute
|
||||
d.a = (d.a + mod + enter - leave) % mod
|
||||
d.b = (d.b + (d.n*leave/mod+1)*mod + d.a - (d.n * leave) - 1) % mod
|
||||
}
|
143
vendor/github.com/chmduquesne/rollinghash/buzhash/buzhash.go
generated
vendored
Normal file
143
vendor/github.com/chmduquesne/rollinghash/buzhash/buzhash.go
generated
vendored
Normal file
@ -0,0 +1,143 @@
|
||||
// Package rollinghash/buzhash implements buzhash as described by
|
||||
// https://en.wikipedia.org/wiki/Rolling_hash#Cyclic_polynomial
|
||||
|
||||
package buzhash
|
||||
|
||||
import rollinghash "github.com/chmduquesne/rollinghash"
|
||||
|
||||
// 256 random integers generated with a dummy python script
|
||||
var bytehash = [256]uint32{
|
||||
0xa5659a00, 0x2dbfda02, 0xac29a407, 0xce942c08, 0x48513609,
|
||||
0x325f158, 0xb54e5e13, 0xa9063618, 0xa5793419, 0x554b081a,
|
||||
0xe5643dac, 0xfb50e41c, 0x2b31661d, 0x335da61f, 0xe702f7b0,
|
||||
0xe31c1424, 0x6dfed825, 0xd30cf628, 0xba626a2a, 0x74b9c22b,
|
||||
0xa5d1942d, 0xf364ae2f, 0x70d2e84c, 0x190ad208, 0x92e3b740,
|
||||
0xd7e9f435, 0x15763836, 0x930ecab4, 0x641ea65e, 0xc0b2eb0a,
|
||||
0x2675e03e, 0x1a24c63f, 0xeddbcbb7, 0x3ea42bb2, 0x815f5849,
|
||||
0xa55c284b, 0xbb30964c, 0x6f7acc4e, 0x74538a50, 0x66df9652,
|
||||
0x2bae8454, 0xfe9d8055, 0x8c866fd4, 0x82f0a63d, 0x8f26365e,
|
||||
0xe66c3460, 0x6423266, 0x60696abc, 0xf75de6d, 0xd20c86e,
|
||||
0x69f8c6f, 0x8ac0f470, 0x273aab68, 0x4e044c74, 0xb2ec7875,
|
||||
0xf642d676, 0xd719e877, 0xee557e78, 0xdd20be7a, 0xd252707e,
|
||||
0xfa507a7f, 0xee537683, 0x6aac7684, 0x340e3485, 0x1c291288,
|
||||
0xab89c8c, 0xbe6e6c8d, 0xf99cf2f7, 0x69c65890, 0xd3757491,
|
||||
0xfeb63895, 0x67067a96, 0xa0089b19, 0x6c449898, 0x4eca749a,
|
||||
0x1101229b, 0x6b86d29d, 0x9c21be9e, 0xc5904933, 0xe1e820a3,
|
||||
0x6bd524a6, 0xd4695ea7, 0xc3d007e0, 0xbed8e4a9, 0x1c49d8af,
|
||||
0xedbae4b1, 0x1d2af6b4, 0x79526b9, 0xbc1d5abb, 0x6a2eb8bc,
|
||||
0x611b3695, 0x745c3cc4, 0x81005276, 0x5f442c8, 0x42dc30ca,
|
||||
0x55e460cb, 0x47648cc, 0x20da7122, 0xc4eedccd, 0xc21c14d0,
|
||||
0x27b5dfa9, 0x7e961fce, 0x8d0296d6, 0xce3684d7, 0x28e96da,
|
||||
0xedf7dcdc, 0x6817a0df, 0x51caae0, 0x8f226e1, 0xa1a00ce3,
|
||||
0xf811c6e5, 0x13e96ee6, 0xd4d4e4d1, 0xab160ee9, 0xb2cf06ea,
|
||||
0xf4ab6eb, 0x998f56f1, 0x16974cf2, 0xd42438f5, 0xe00ba6f7,
|
||||
0xbf01b8f8, 0x7a8a00f9, 0xdded6a7f, 0xb0ce58fd, 0xe5d81901,
|
||||
0xcc823b03, 0xc962e704, 0x2b4aff05, 0x5bcb7181, 0xe7207108,
|
||||
0xf3c93109, 0x1ffb650a, 0x37a31ad7, 0xfe27322d, 0x15b16d11,
|
||||
0x51a70512, 0xb579d92e, 0x53658284, 0x91fedb1b, 0x2ef0b122,
|
||||
0x93966523, 0xfa66af26, 0xa7fac32b, 0x7a81692c, 0x4f8d7f2e,
|
||||
0xf9875730, 0xa5ab2331, 0x79db8333, 0x8be32937, 0xf900af39,
|
||||
0xd09d4f3a, 0x9b22053d, 0xd2053e1c, 0xd0deaa35, 0x4a975740,
|
||||
0xcb3706e0, 0x40aea6cd, 0x769fdd44, 0x7e3e4947, 0xc20ac949,
|
||||
0x3788c34b, 0x9b23f74c, 0xb33e441d, 0x705d8a8d, 0x6a5e3a84,
|
||||
0xb4f955e3, 0xf681a155, 0x7dec1b56, 0x7bf5df58, 0xd3fa255a,
|
||||
0x3797c15c, 0xbf511562, 0xb048d65, 0xcd04f367, 0xae3a8368,
|
||||
0x769c856d, 0xc7bb9d6f, 0xe43e1f71, 0xa24de03e, 0x7f8cb376,
|
||||
0x618b778, 0x19e02f33, 0x2f810eea, 0x2b1ce595, 0x4f2f7180,
|
||||
0x72903140, 0x26a44584, 0x6af97e96, 0xb08acb86, 0x4d25cd41,
|
||||
0x1d74fd89, 0xe0f5b277, 0xbad158c, 0x5fed3b8d, 0x68b26794,
|
||||
0xcbe58795, 0xc1180797, 0xa1352399, 0x71dacd9c, 0x42b5549a,
|
||||
0xbf5371a0, 0x7ed41fa1, 0x6fe29a3, 0xa779fba5, 0x48a095a7,
|
||||
0xc2cad5a8, 0x7d7f15a9, 0xccd195aa, 0x2a9047ac, 0x3ec66ef2,
|
||||
0x252743ae, 0xdd8827af, 0x85fc5055, 0xb9d5c7b2, 0x5a224fb4,
|
||||
0xec26e7b6, 0xe4d8f7b7, 0x6e5aa58d, 0xeff753b9, 0x6c391fbb,
|
||||
0x989f65bc, 0x2fe4a7c1, 0x9d1d9bc3, 0xa09aadc6, 0x2df33fc8,
|
||||
0x5ec27933, 0x5e7f41cb, 0xb920f7cd, 0xc1a603ce, 0xf0888fcf,
|
||||
0xdc4ad1d1, 0x34b3dbd4, 0x170981d5, 0x22e5b5d6, 0x13049bd7,
|
||||
0xf12a8b95, 0xff7e87d9, 0xabb74b84, 0x215cff4f, 0xaf24f7dc,
|
||||
0xc87461d, 0x41a55e0, 0xfde9b9e1, 0x1d1956fb, 0x13d60de4,
|
||||
0x435f93e5, 0xe0ab5de6, 0x5c1d3fe7, 0x411a1fe8, 0x55e102a9,
|
||||
0x3d9b07eb, 0xdd6b8dee, 0x741293f3, 0xa5b10ca9, 0x5abad5fd,
|
||||
0x22372f55,
|
||||
}
|
||||
|
||||
// The size of the checksum.
|
||||
const Size = 4
|
||||
|
||||
// digest represents the partial evaluation of a checksum.
|
||||
type digest struct {
|
||||
sum uint32
|
||||
nRotate uint
|
||||
nRotateComplement uint // redundant, but pre-computed to spare an operation
|
||||
|
||||
// window is treated like a circular buffer, where the oldest element
|
||||
// is indicated by d.oldest
|
||||
window []byte
|
||||
oldest int
|
||||
}
|
||||
|
||||
// Reset resets the Hash to its initial state.
|
||||
func (d *digest) Reset() {
|
||||
d.window = nil
|
||||
d.oldest = 0
|
||||
d.sum = 0
|
||||
}
|
||||
|
||||
func New() rollinghash.Hash32 {
|
||||
return &digest{sum: 0, window: nil, oldest: 0}
|
||||
}
|
||||
|
||||
// Size returns the number of bytes Sum will return.
|
||||
func (d *digest) Size() int { return Size }
|
||||
|
||||
// BlockSize returns the hash's underlying block size.
|
||||
// The Write method must be able to accept any amount
|
||||
// of data, but it may operate more efficiently if all
|
||||
// writes are a multiple of the block size.
|
||||
func (d *digest) BlockSize() int { return 1 }
|
||||
|
||||
// Write (via the embedded io.Writer interface) adds more data to the
|
||||
// running hash. It never returns an error.
|
||||
func (d *digest) Write(data []byte) (int, error) {
|
||||
// Copy the window
|
||||
d.window = make([]byte, len(data))
|
||||
copy(d.window, data)
|
||||
|
||||
for _, c := range d.window {
|
||||
d.sum = d.sum<<1 | d.sum>>31
|
||||
d.sum ^= bytehash[int(c)]
|
||||
}
|
||||
d.nRotate = uint(len(d.window)) % 32
|
||||
d.nRotateComplement = 32 - d.nRotate
|
||||
return len(d.window), nil
|
||||
}
|
||||
|
||||
func (d *digest) Sum32() uint32 {
|
||||
return d.sum
|
||||
}
|
||||
|
||||
func (d *digest) Sum(b []byte) []byte {
|
||||
v := d.Sum32()
|
||||
return append(b, byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
|
||||
}
|
||||
|
||||
// Roll updates the checksum of the window from the leaving byte and the
|
||||
// entering byte.
|
||||
func (d *digest) Roll(c byte) {
|
||||
if len(d.window) == 0 {
|
||||
d.window = make([]byte, 1)
|
||||
d.window[0] = c
|
||||
}
|
||||
// extract the entering/leaving bytes and update the circular buffer.
|
||||
hn := bytehash[int(c)]
|
||||
h0 := bytehash[int(d.window[d.oldest])]
|
||||
|
||||
d.window[d.oldest] = c
|
||||
l := len(d.window)
|
||||
d.oldest += 1
|
||||
if d.oldest >= l {
|
||||
d.oldest = 0
|
||||
}
|
||||
|
||||
d.sum = (d.sum<<1 | d.sum>>31) ^ (h0<<d.nRotate | h0>>d.nRotateComplement) ^ hn
|
||||
}
|
89
vendor/github.com/chmduquesne/rollinghash/rabinkarp32/rabinkarp32.go
generated
vendored
Normal file
89
vendor/github.com/chmduquesne/rollinghash/rabinkarp32/rabinkarp32.go
generated
vendored
Normal file
@ -0,0 +1,89 @@
|
||||
// Package rollinghash/rabinkarp32 implements a particular case of
|
||||
// rabin-karp where the modulus is 0xffffffff (32 bits of '1')
|
||||
|
||||
package rabinkarp32
|
||||
|
||||
import rollinghash "github.com/chmduquesne/rollinghash"
|
||||
|
||||
// The size of a rabinkarp32 checksum.
|
||||
const Size = 4
|
||||
|
||||
// digest represents the partial evaluation of a checksum.
|
||||
type digest struct {
|
||||
a uint32
|
||||
h uint32
|
||||
aPowerN uint32
|
||||
|
||||
// window is treated like a circular buffer, where the oldest element
|
||||
// is indicated by d.oldest
|
||||
window []byte
|
||||
oldest int
|
||||
}
|
||||
|
||||
// Reset resets the Hash to its initial state.
|
||||
func (d *digest) Reset() {
|
||||
d.h = 0
|
||||
d.aPowerN = 1
|
||||
d.window = nil
|
||||
d.oldest = 0
|
||||
}
|
||||
|
||||
func NewFromInt(a uint32) rollinghash.Hash32 {
|
||||
return &digest{a: a, h: 0, aPowerN: 1, window: nil, oldest: 0}
|
||||
}
|
||||
|
||||
func New() rollinghash.Hash32 {
|
||||
return NewFromInt(65521) // largest prime fitting in 16 bits
|
||||
}
|
||||
|
||||
// Size returns the number of bytes Sum will return.
|
||||
func (d *digest) Size() int { return Size }
|
||||
|
||||
// BlockSize returns the hash's underlying block size.
|
||||
// The Write method must be able to accept any amount
|
||||
// of data, but it may operate more efficiently if all
|
||||
// writes are a multiple of the block size.
|
||||
func (d *digest) BlockSize() int { return 1 }
|
||||
|
||||
// Write (via the embedded io.Writer interface) adds more data to the
|
||||
// running hash. It never returns an error.
|
||||
func (d *digest) Write(data []byte) (int, error) {
|
||||
// Copy the window
|
||||
d.window = make([]byte, len(data))
|
||||
copy(d.window, data)
|
||||
for _, c := range d.window {
|
||||
d.h *= d.a
|
||||
d.h += uint32(c)
|
||||
d.aPowerN *= d.a
|
||||
}
|
||||
return len(d.window), nil
|
||||
}
|
||||
|
||||
func (d *digest) Sum32() uint32 {
|
||||
return d.h
|
||||
}
|
||||
|
||||
func (d *digest) Sum(b []byte) []byte {
|
||||
v := d.Sum32()
|
||||
return append(b, byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
|
||||
}
|
||||
|
||||
// Roll updates the checksum of the window from the leaving byte and the
|
||||
// entering byte.
|
||||
func (d *digest) Roll(c byte) {
|
||||
if len(d.window) == 0 {
|
||||
d.window = make([]byte, 1)
|
||||
d.window[0] = c
|
||||
}
|
||||
// extract the entering/leaving bytes and update the circular buffer.
|
||||
enter := uint32(c)
|
||||
leave := uint32(d.window[d.oldest])
|
||||
d.window[d.oldest] = c
|
||||
l := len(d.window)
|
||||
d.oldest += 1
|
||||
if d.oldest >= l {
|
||||
d.oldest = 0
|
||||
}
|
||||
|
||||
d.h = d.h*d.a + enter - leave*d.aPowerN
|
||||
}
|
40
vendor/github.com/chmduquesne/rollinghash/rollinghash.go
generated
vendored
Normal file
40
vendor/github.com/chmduquesne/rollinghash/rollinghash.go
generated
vendored
Normal file
@ -0,0 +1,40 @@
|
||||
/*
|
||||
|
||||
Package rollinghash implements rolling versions of some hashes
|
||||
|
||||
*/
|
||||
package rollinghash
|
||||
|
||||
import "hash"
|
||||
|
||||
type Roller interface {
|
||||
// Roll updates the hash of a rolling window from the entering byte.
|
||||
// A copy of the window is internally kept from the last Write().
|
||||
// This copy is updated along with the internal state of the checksum
|
||||
// in order to determine the new hash very quickly.
|
||||
Roll(b byte)
|
||||
}
|
||||
|
||||
// rollinghash.Hash extends hash.Hash by adding the method Roll. A
|
||||
// rollinghash.Hash can be updated byte by byte, by specifying which byte
|
||||
// enters the window.
|
||||
type Hash interface {
|
||||
hash.Hash
|
||||
Roller
|
||||
}
|
||||
|
||||
// rollinghash.Hash32 extends hash.Hash by adding the method Roll. A
|
||||
// rollinghash.Hash32 can be updated byte by byte, by specifying which
|
||||
// byte enters the window.
|
||||
type Hash32 interface {
|
||||
hash.Hash32
|
||||
Roller
|
||||
}
|
||||
|
||||
// rollinghash.Hash64 extends hash.Hash by adding the method Roll. A
|
||||
// rollinghash.Hash64 can be updated byte by byte, by specifying which
|
||||
// byte enters the window.
|
||||
type Hash64 interface {
|
||||
hash.Hash64
|
||||
Roller
|
||||
}
|
8
vendor/manifest
vendored
8
vendor/manifest
vendored
@ -44,6 +44,14 @@
|
||||
"revision": "f9b9f8f7aa27725f5cabb699bd9099ca7ce09143",
|
||||
"branch": "master"
|
||||
},
|
||||
{
|
||||
"importpath": "github.com/chmduquesne/rollinghash",
|
||||
"repository": "https://github.com/chmduquesne/rollinghash",
|
||||
"vcs": "git",
|
||||
"revision": "88b86a92826991b14d01fb43456909fcb8a76b8b",
|
||||
"branch": "master",
|
||||
"notests": true
|
||||
},
|
||||
{
|
||||
"importpath": "github.com/cznic/b",
|
||||
"repository": "https://github.com/cznic/b",
|
||||
|
Loading…
Reference in New Issue
Block a user