scanner: Allow disabling weak hash in scanning (fixes #3891)

GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/3905
This commit is contained in:
Audrius Butkevicius 2017-01-23 13:50:32 +00:00 committed by Jakob Borg
parent bd55ec79d2
commit dd78177ae0
10 changed files with 45 additions and 22 deletions

View File

@ -70,7 +70,7 @@ func main() {
if *standardBlocks || blockSize < protocol.BlockSize { if *standardBlocks || blockSize < protocol.BlockSize {
blockSize = protocol.BlockSize blockSize = protocol.BlockSize
} }
bs, err := scanner.Blocks(fd, blockSize, fi.Size(), nil) bs, err := scanner.Blocks(fd, blockSize, fi.Size(), nil, true)
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }

View File

@ -1813,6 +1813,7 @@ func (m *Model) internalScanFolderSubdirs(folder string, subDirs []string) error
ShortID: m.shortID, ShortID: m.shortID,
ProgressTickIntervalS: folderCfg.ScanProgressIntervalS, ProgressTickIntervalS: folderCfg.ScanProgressIntervalS,
Cancel: cancel, Cancel: cancel,
UseWeakHashes: folderCfg.WeakHashThresholdPct < 100,
}) })
if err != nil { if err != nil {

View File

@ -317,7 +317,7 @@ func (f *fakeConnection) addFile(name string, flags uint32, ftype protocol.FileI
f.mut.Lock() f.mut.Lock()
defer f.mut.Unlock() defer f.mut.Unlock()
blocks, _ := scanner.Blocks(bytes.NewReader(data), protocol.BlockSize, int64(len(data)), nil) blocks, _ := scanner.Blocks(bytes.NewReader(data), protocol.BlockSize, int64(len(data)), nil, true)
var version protocol.Vector var version protocol.Vector
version = version.Update(f.id.Short()) version = version.Update(f.id.Short())

View File

@ -1082,7 +1082,7 @@ func (f *sendReceiveFolder) handleFile(file protocol.FileInfo, copyChan chan<- c
// Check for an old temporary file which might have some blocks we could // Check for an old temporary file which might have some blocks we could
// reuse. // reuse.
tempBlocks, err := scanner.HashFile(tempName, protocol.BlockSize, nil) tempBlocks, err := scanner.HashFile(tempName, protocol.BlockSize, nil, false)
if err == nil { if err == nil {
// Check for any reusable blocks in the temp file // Check for any reusable blocks in the temp file
tempCopyBlocks, _ := scanner.BlockDiff(tempBlocks, file.Blocks) tempCopyBlocks, _ := scanner.BlockDiff(tempBlocks, file.Blocks)

View File

@ -238,7 +238,7 @@ func TestCopierFinder(t *testing.T) {
} }
// Verify that the fetched blocks have actually been written to the temp file // Verify that the fetched blocks have actually been written to the temp file
blks, err := scanner.HashFile(tempFile, protocol.BlockSize, nil) blks, err := scanner.HashFile(tempFile, protocol.BlockSize, nil, false)
if err != nil { if err != nil {
t.Log(err) t.Log(err)
} }
@ -291,7 +291,7 @@ func TestWeakHash(t *testing.T) {
// File 1: abcdefgh // File 1: abcdefgh
// File 2: xyabcdef // File 2: xyabcdef
f.Seek(0, os.SEEK_SET) f.Seek(0, os.SEEK_SET)
existing, err := scanner.Blocks(f, protocol.BlockSize, size, nil) existing, err := scanner.Blocks(f, protocol.BlockSize, size, nil, true)
if err != nil { if err != nil {
t.Error(err) t.Error(err)
} }
@ -300,7 +300,7 @@ func TestWeakHash(t *testing.T) {
remainder := io.LimitReader(f, size-shift) remainder := io.LimitReader(f, size-shift)
prefix := io.LimitReader(rand.Reader, shift) prefix := io.LimitReader(rand.Reader, shift)
nf := io.MultiReader(prefix, remainder) nf := io.MultiReader(prefix, remainder)
desired, err := scanner.Blocks(nf, protocol.BlockSize, size, nil) desired, err := scanner.Blocks(nf, protocol.BlockSize, size, nil, true)
if err != nil { if err != nil {
t.Error(err) t.Error(err)
} }

View File

@ -20,13 +20,13 @@ import (
// workers are used in parallel. The outbox will become closed when the inbox // workers are used in parallel. The outbox will become closed when the inbox
// is closed and all items handled. // is closed and all items handled.
func newParallelHasher(dir string, blockSize, workers int, outbox, inbox chan protocol.FileInfo, counter Counter, done, cancel chan struct{}) { func newParallelHasher(dir string, blockSize, workers int, outbox, inbox chan protocol.FileInfo, counter Counter, done, cancel chan struct{}, useWeakHashes bool) {
wg := sync.NewWaitGroup() wg := sync.NewWaitGroup()
wg.Add(workers) wg.Add(workers)
for i := 0; i < workers; i++ { for i := 0; i < workers; i++ {
go func() { go func() {
hashFiles(dir, blockSize, outbox, inbox, counter, cancel) hashFiles(dir, blockSize, outbox, inbox, counter, cancel, useWeakHashes)
wg.Done() wg.Done()
}() }()
} }
@ -40,7 +40,8 @@ func newParallelHasher(dir string, blockSize, workers int, outbox, inbox chan pr
}() }()
} }
func HashFile(path string, blockSize int, counter Counter) ([]protocol.BlockInfo, error) { // HashFile hashes the files and returns a list of blocks representing the file.
func HashFile(path string, blockSize int, counter Counter, useWeakHashes bool) ([]protocol.BlockInfo, error) {
fd, err := os.Open(path) fd, err := os.Open(path)
if err != nil { if err != nil {
l.Debugln("open:", err) l.Debugln("open:", err)
@ -60,7 +61,7 @@ func HashFile(path string, blockSize int, counter Counter) ([]protocol.BlockInfo
// Hash the file. This may take a while for large files. // Hash the file. This may take a while for large files.
blocks, err := Blocks(fd, blockSize, size, counter) blocks, err := Blocks(fd, blockSize, size, counter, useWeakHashes)
if err != nil { if err != nil {
l.Debugln("blocks:", err) l.Debugln("blocks:", err)
return nil, err return nil, err
@ -81,7 +82,7 @@ func HashFile(path string, blockSize int, counter Counter) ([]protocol.BlockInfo
return blocks, nil return blocks, nil
} }
func hashFiles(dir string, blockSize int, outbox, inbox chan protocol.FileInfo, counter Counter, cancel chan struct{}) { func hashFiles(dir string, blockSize int, outbox, inbox chan protocol.FileInfo, counter Counter, cancel chan struct{}, useWeakHashes bool) {
for { for {
select { select {
case f, ok := <-inbox: case f, ok := <-inbox:
@ -93,7 +94,7 @@ func hashFiles(dir string, blockSize int, outbox, inbox chan protocol.FileInfo,
panic("Bug. Asked to hash a directory or a deleted file.") panic("Bug. Asked to hash a directory or a deleted file.")
} }
blocks, err := HashFile(filepath.Join(dir, f.Name), blockSize, counter) blocks, err := HashFile(filepath.Join(dir, f.Name), blockSize, counter, useWeakHashes)
if err != nil { if err != nil {
l.Debugln("hash error:", f.Name, err) l.Debugln("hash error:", f.Name, err)
continue continue

View File

@ -9,6 +9,7 @@ package scanner
import ( import (
"bytes" "bytes"
"fmt" "fmt"
"hash"
"io" "io"
"github.com/chmduquesne/rollinghash/adler32" "github.com/chmduquesne/rollinghash/adler32"
@ -23,11 +24,20 @@ type Counter interface {
} }
// Blocks returns the blockwise hash of the reader. // Blocks returns the blockwise hash of the reader.
func Blocks(r io.Reader, blocksize int, sizehint int64, counter Counter) ([]protocol.BlockInfo, error) { func Blocks(r io.Reader, blocksize int, sizehint int64, counter Counter, useWeakHashes bool) ([]protocol.BlockInfo, error) {
hf := sha256.New() hf := sha256.New()
hashLength := hf.Size() hashLength := hf.Size()
whf := adler32.New()
mhf := io.MultiWriter(hf, whf) var mhf io.Writer
var whf hash.Hash32
if useWeakHashes {
whf = adler32.New()
mhf = io.MultiWriter(hf, whf)
} else {
whf = noopHash{}
mhf = hf
}
var blocks []protocol.BlockInfo var blocks []protocol.BlockInfo
var hashes, thisHash []byte var hashes, thisHash []byte
@ -189,3 +199,12 @@ func BlocksEqual(src, tgt []protocol.BlockInfo) bool {
} }
return true return true
} }
type noopHash struct{}
func (noopHash) Sum32() uint32 { return 0 }
func (noopHash) BlockSize() int { return 0 }
func (noopHash) Size() int { return 0 }
func (noopHash) Reset() {}
func (noopHash) Sum([]byte) []byte { return nil }
func (noopHash) Write([]byte) (int, error) { return 0, nil }

View File

@ -68,7 +68,7 @@ var blocksTestData = []struct {
func TestBlocks(t *testing.T) { func TestBlocks(t *testing.T) {
for testNo, test := range blocksTestData { for testNo, test := range blocksTestData {
buf := bytes.NewBuffer(test.data) buf := bytes.NewBuffer(test.data)
blocks, err := Blocks(buf, test.blocksize, -1, nil) blocks, err := Blocks(buf, test.blocksize, -1, nil, true)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
@ -125,8 +125,8 @@ var diffTestData = []struct {
func TestDiff(t *testing.T) { func TestDiff(t *testing.T) {
for i, test := range diffTestData { for i, test := range diffTestData {
a, _ := Blocks(bytes.NewBufferString(test.a), test.s, -1, nil) a, _ := Blocks(bytes.NewBufferString(test.a), test.s, -1, nil, false)
b, _ := Blocks(bytes.NewBufferString(test.b), test.s, -1, nil) b, _ := Blocks(bytes.NewBufferString(test.b), test.s, -1, nil, false)
_, d := BlockDiff(a, b) _, d := BlockDiff(a, b)
if len(d) != len(test.d) { if len(d) != len(test.d) {
t.Fatalf("Incorrect length for diff %d; %d != %d", i, len(d), len(test.d)) t.Fatalf("Incorrect length for diff %d; %d != %d", i, len(d), len(test.d))

View File

@ -72,6 +72,8 @@ type Config struct {
ProgressTickIntervalS int ProgressTickIntervalS int
// Signals cancel from the outside - when closed, we should stop walking. // Signals cancel from the outside - when closed, we should stop walking.
Cancel chan struct{} Cancel chan struct{}
// Wether or not we should also compute weak hashes
UseWeakHashes bool
} }
type CurrentFiler interface { type CurrentFiler interface {
@ -129,7 +131,7 @@ func (w *walker) walk() (chan protocol.FileInfo, error) {
// We're not required to emit scan progress events, just kick off hashers, // We're not required to emit scan progress events, just kick off hashers,
// and feed inputs directly from the walker. // and feed inputs directly from the walker.
if w.ProgressTickIntervalS < 0 { if w.ProgressTickIntervalS < 0 {
newParallelHasher(w.Dir, w.BlockSize, w.Hashers, finishedChan, toHashChan, nil, nil, w.Cancel) newParallelHasher(w.Dir, w.BlockSize, w.Hashers, finishedChan, toHashChan, nil, nil, w.Cancel, w.UseWeakHashes)
return finishedChan, nil return finishedChan, nil
} }
@ -160,7 +162,7 @@ func (w *walker) walk() (chan protocol.FileInfo, error) {
done := make(chan struct{}) done := make(chan struct{})
progress := newByteCounter() progress := newByteCounter()
newParallelHasher(w.Dir, w.BlockSize, w.Hashers, finishedChan, realToHashChan, progress, done, w.Cancel) newParallelHasher(w.Dir, w.BlockSize, w.Hashers, finishedChan, realToHashChan, progress, done, w.Cancel, w.UseWeakHashes)
// A routine which actually emits the FolderScanProgress events // A routine which actually emits the FolderScanProgress events
// every w.ProgressTicker ticks, until the hasher routines terminate. // every w.ProgressTicker ticks, until the hasher routines terminate.

View File

@ -148,7 +148,7 @@ func TestVerify(t *testing.T) {
progress := newByteCounter() progress := newByteCounter()
defer progress.Close() defer progress.Close()
blocks, err := Blocks(buf, blocksize, -1, progress) blocks, err := Blocks(buf, blocksize, -1, progress, false)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -423,7 +423,7 @@ func BenchmarkHashFile(b *testing.B) {
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
if _, err := HashFile(testdataName, protocol.BlockSize, nil); err != nil { if _, err := HashFile(testdataName, protocol.BlockSize, nil, true); err != nil {
b.Fatal(err) b.Fatal(err)
} }
} }