mirror of
https://github.com/octoleo/syncthing.git
synced 2025-01-03 15:17:25 +00:00
scanner: Allow disabling weak hash in scanning (fixes #3891)
GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/3905
This commit is contained in:
parent
bd55ec79d2
commit
dd78177ae0
@ -70,7 +70,7 @@ func main() {
|
|||||||
if *standardBlocks || blockSize < protocol.BlockSize {
|
if *standardBlocks || blockSize < protocol.BlockSize {
|
||||||
blockSize = protocol.BlockSize
|
blockSize = protocol.BlockSize
|
||||||
}
|
}
|
||||||
bs, err := scanner.Blocks(fd, blockSize, fi.Size(), nil)
|
bs, err := scanner.Blocks(fd, blockSize, fi.Size(), nil, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
@ -1813,6 +1813,7 @@ func (m *Model) internalScanFolderSubdirs(folder string, subDirs []string) error
|
|||||||
ShortID: m.shortID,
|
ShortID: m.shortID,
|
||||||
ProgressTickIntervalS: folderCfg.ScanProgressIntervalS,
|
ProgressTickIntervalS: folderCfg.ScanProgressIntervalS,
|
||||||
Cancel: cancel,
|
Cancel: cancel,
|
||||||
|
UseWeakHashes: folderCfg.WeakHashThresholdPct < 100,
|
||||||
})
|
})
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -317,7 +317,7 @@ func (f *fakeConnection) addFile(name string, flags uint32, ftype protocol.FileI
|
|||||||
f.mut.Lock()
|
f.mut.Lock()
|
||||||
defer f.mut.Unlock()
|
defer f.mut.Unlock()
|
||||||
|
|
||||||
blocks, _ := scanner.Blocks(bytes.NewReader(data), protocol.BlockSize, int64(len(data)), nil)
|
blocks, _ := scanner.Blocks(bytes.NewReader(data), protocol.BlockSize, int64(len(data)), nil, true)
|
||||||
var version protocol.Vector
|
var version protocol.Vector
|
||||||
version = version.Update(f.id.Short())
|
version = version.Update(f.id.Short())
|
||||||
|
|
||||||
|
@ -1082,7 +1082,7 @@ func (f *sendReceiveFolder) handleFile(file protocol.FileInfo, copyChan chan<- c
|
|||||||
|
|
||||||
// Check for an old temporary file which might have some blocks we could
|
// Check for an old temporary file which might have some blocks we could
|
||||||
// reuse.
|
// reuse.
|
||||||
tempBlocks, err := scanner.HashFile(tempName, protocol.BlockSize, nil)
|
tempBlocks, err := scanner.HashFile(tempName, protocol.BlockSize, nil, false)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
// Check for any reusable blocks in the temp file
|
// Check for any reusable blocks in the temp file
|
||||||
tempCopyBlocks, _ := scanner.BlockDiff(tempBlocks, file.Blocks)
|
tempCopyBlocks, _ := scanner.BlockDiff(tempBlocks, file.Blocks)
|
||||||
|
@ -238,7 +238,7 @@ func TestCopierFinder(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Verify that the fetched blocks have actually been written to the temp file
|
// Verify that the fetched blocks have actually been written to the temp file
|
||||||
blks, err := scanner.HashFile(tempFile, protocol.BlockSize, nil)
|
blks, err := scanner.HashFile(tempFile, protocol.BlockSize, nil, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Log(err)
|
t.Log(err)
|
||||||
}
|
}
|
||||||
@ -291,7 +291,7 @@ func TestWeakHash(t *testing.T) {
|
|||||||
// File 1: abcdefgh
|
// File 1: abcdefgh
|
||||||
// File 2: xyabcdef
|
// File 2: xyabcdef
|
||||||
f.Seek(0, os.SEEK_SET)
|
f.Seek(0, os.SEEK_SET)
|
||||||
existing, err := scanner.Blocks(f, protocol.BlockSize, size, nil)
|
existing, err := scanner.Blocks(f, protocol.BlockSize, size, nil, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
}
|
}
|
||||||
@ -300,7 +300,7 @@ func TestWeakHash(t *testing.T) {
|
|||||||
remainder := io.LimitReader(f, size-shift)
|
remainder := io.LimitReader(f, size-shift)
|
||||||
prefix := io.LimitReader(rand.Reader, shift)
|
prefix := io.LimitReader(rand.Reader, shift)
|
||||||
nf := io.MultiReader(prefix, remainder)
|
nf := io.MultiReader(prefix, remainder)
|
||||||
desired, err := scanner.Blocks(nf, protocol.BlockSize, size, nil)
|
desired, err := scanner.Blocks(nf, protocol.BlockSize, size, nil, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
}
|
}
|
||||||
|
@ -20,13 +20,13 @@ import (
|
|||||||
// workers are used in parallel. The outbox will become closed when the inbox
|
// workers are used in parallel. The outbox will become closed when the inbox
|
||||||
// is closed and all items handled.
|
// is closed and all items handled.
|
||||||
|
|
||||||
func newParallelHasher(dir string, blockSize, workers int, outbox, inbox chan protocol.FileInfo, counter Counter, done, cancel chan struct{}) {
|
func newParallelHasher(dir string, blockSize, workers int, outbox, inbox chan protocol.FileInfo, counter Counter, done, cancel chan struct{}, useWeakHashes bool) {
|
||||||
wg := sync.NewWaitGroup()
|
wg := sync.NewWaitGroup()
|
||||||
wg.Add(workers)
|
wg.Add(workers)
|
||||||
|
|
||||||
for i := 0; i < workers; i++ {
|
for i := 0; i < workers; i++ {
|
||||||
go func() {
|
go func() {
|
||||||
hashFiles(dir, blockSize, outbox, inbox, counter, cancel)
|
hashFiles(dir, blockSize, outbox, inbox, counter, cancel, useWeakHashes)
|
||||||
wg.Done()
|
wg.Done()
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
@ -40,7 +40,8 @@ func newParallelHasher(dir string, blockSize, workers int, outbox, inbox chan pr
|
|||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
func HashFile(path string, blockSize int, counter Counter) ([]protocol.BlockInfo, error) {
|
// HashFile hashes the files and returns a list of blocks representing the file.
|
||||||
|
func HashFile(path string, blockSize int, counter Counter, useWeakHashes bool) ([]protocol.BlockInfo, error) {
|
||||||
fd, err := os.Open(path)
|
fd, err := os.Open(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
l.Debugln("open:", err)
|
l.Debugln("open:", err)
|
||||||
@ -60,7 +61,7 @@ func HashFile(path string, blockSize int, counter Counter) ([]protocol.BlockInfo
|
|||||||
|
|
||||||
// Hash the file. This may take a while for large files.
|
// Hash the file. This may take a while for large files.
|
||||||
|
|
||||||
blocks, err := Blocks(fd, blockSize, size, counter)
|
blocks, err := Blocks(fd, blockSize, size, counter, useWeakHashes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
l.Debugln("blocks:", err)
|
l.Debugln("blocks:", err)
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -81,7 +82,7 @@ func HashFile(path string, blockSize int, counter Counter) ([]protocol.BlockInfo
|
|||||||
return blocks, nil
|
return blocks, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func hashFiles(dir string, blockSize int, outbox, inbox chan protocol.FileInfo, counter Counter, cancel chan struct{}) {
|
func hashFiles(dir string, blockSize int, outbox, inbox chan protocol.FileInfo, counter Counter, cancel chan struct{}, useWeakHashes bool) {
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case f, ok := <-inbox:
|
case f, ok := <-inbox:
|
||||||
@ -93,7 +94,7 @@ func hashFiles(dir string, blockSize int, outbox, inbox chan protocol.FileInfo,
|
|||||||
panic("Bug. Asked to hash a directory or a deleted file.")
|
panic("Bug. Asked to hash a directory or a deleted file.")
|
||||||
}
|
}
|
||||||
|
|
||||||
blocks, err := HashFile(filepath.Join(dir, f.Name), blockSize, counter)
|
blocks, err := HashFile(filepath.Join(dir, f.Name), blockSize, counter, useWeakHashes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
l.Debugln("hash error:", f.Name, err)
|
l.Debugln("hash error:", f.Name, err)
|
||||||
continue
|
continue
|
||||||
|
@ -9,6 +9,7 @@ package scanner
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"hash"
|
||||||
"io"
|
"io"
|
||||||
|
|
||||||
"github.com/chmduquesne/rollinghash/adler32"
|
"github.com/chmduquesne/rollinghash/adler32"
|
||||||
@ -23,11 +24,20 @@ type Counter interface {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Blocks returns the blockwise hash of the reader.
|
// Blocks returns the blockwise hash of the reader.
|
||||||
func Blocks(r io.Reader, blocksize int, sizehint int64, counter Counter) ([]protocol.BlockInfo, error) {
|
func Blocks(r io.Reader, blocksize int, sizehint int64, counter Counter, useWeakHashes bool) ([]protocol.BlockInfo, error) {
|
||||||
hf := sha256.New()
|
hf := sha256.New()
|
||||||
hashLength := hf.Size()
|
hashLength := hf.Size()
|
||||||
whf := adler32.New()
|
|
||||||
mhf := io.MultiWriter(hf, whf)
|
var mhf io.Writer
|
||||||
|
var whf hash.Hash32
|
||||||
|
|
||||||
|
if useWeakHashes {
|
||||||
|
whf = adler32.New()
|
||||||
|
mhf = io.MultiWriter(hf, whf)
|
||||||
|
} else {
|
||||||
|
whf = noopHash{}
|
||||||
|
mhf = hf
|
||||||
|
}
|
||||||
|
|
||||||
var blocks []protocol.BlockInfo
|
var blocks []protocol.BlockInfo
|
||||||
var hashes, thisHash []byte
|
var hashes, thisHash []byte
|
||||||
@ -189,3 +199,12 @@ func BlocksEqual(src, tgt []protocol.BlockInfo) bool {
|
|||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type noopHash struct{}
|
||||||
|
|
||||||
|
func (noopHash) Sum32() uint32 { return 0 }
|
||||||
|
func (noopHash) BlockSize() int { return 0 }
|
||||||
|
func (noopHash) Size() int { return 0 }
|
||||||
|
func (noopHash) Reset() {}
|
||||||
|
func (noopHash) Sum([]byte) []byte { return nil }
|
||||||
|
func (noopHash) Write([]byte) (int, error) { return 0, nil }
|
||||||
|
@ -68,7 +68,7 @@ var blocksTestData = []struct {
|
|||||||
func TestBlocks(t *testing.T) {
|
func TestBlocks(t *testing.T) {
|
||||||
for testNo, test := range blocksTestData {
|
for testNo, test := range blocksTestData {
|
||||||
buf := bytes.NewBuffer(test.data)
|
buf := bytes.NewBuffer(test.data)
|
||||||
blocks, err := Blocks(buf, test.blocksize, -1, nil)
|
blocks, err := Blocks(buf, test.blocksize, -1, nil, true)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
@ -125,8 +125,8 @@ var diffTestData = []struct {
|
|||||||
|
|
||||||
func TestDiff(t *testing.T) {
|
func TestDiff(t *testing.T) {
|
||||||
for i, test := range diffTestData {
|
for i, test := range diffTestData {
|
||||||
a, _ := Blocks(bytes.NewBufferString(test.a), test.s, -1, nil)
|
a, _ := Blocks(bytes.NewBufferString(test.a), test.s, -1, nil, false)
|
||||||
b, _ := Blocks(bytes.NewBufferString(test.b), test.s, -1, nil)
|
b, _ := Blocks(bytes.NewBufferString(test.b), test.s, -1, nil, false)
|
||||||
_, d := BlockDiff(a, b)
|
_, d := BlockDiff(a, b)
|
||||||
if len(d) != len(test.d) {
|
if len(d) != len(test.d) {
|
||||||
t.Fatalf("Incorrect length for diff %d; %d != %d", i, len(d), len(test.d))
|
t.Fatalf("Incorrect length for diff %d; %d != %d", i, len(d), len(test.d))
|
||||||
|
@ -72,6 +72,8 @@ type Config struct {
|
|||||||
ProgressTickIntervalS int
|
ProgressTickIntervalS int
|
||||||
// Signals cancel from the outside - when closed, we should stop walking.
|
// Signals cancel from the outside - when closed, we should stop walking.
|
||||||
Cancel chan struct{}
|
Cancel chan struct{}
|
||||||
|
// Wether or not we should also compute weak hashes
|
||||||
|
UseWeakHashes bool
|
||||||
}
|
}
|
||||||
|
|
||||||
type CurrentFiler interface {
|
type CurrentFiler interface {
|
||||||
@ -129,7 +131,7 @@ func (w *walker) walk() (chan protocol.FileInfo, error) {
|
|||||||
// We're not required to emit scan progress events, just kick off hashers,
|
// We're not required to emit scan progress events, just kick off hashers,
|
||||||
// and feed inputs directly from the walker.
|
// and feed inputs directly from the walker.
|
||||||
if w.ProgressTickIntervalS < 0 {
|
if w.ProgressTickIntervalS < 0 {
|
||||||
newParallelHasher(w.Dir, w.BlockSize, w.Hashers, finishedChan, toHashChan, nil, nil, w.Cancel)
|
newParallelHasher(w.Dir, w.BlockSize, w.Hashers, finishedChan, toHashChan, nil, nil, w.Cancel, w.UseWeakHashes)
|
||||||
return finishedChan, nil
|
return finishedChan, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -160,7 +162,7 @@ func (w *walker) walk() (chan protocol.FileInfo, error) {
|
|||||||
done := make(chan struct{})
|
done := make(chan struct{})
|
||||||
progress := newByteCounter()
|
progress := newByteCounter()
|
||||||
|
|
||||||
newParallelHasher(w.Dir, w.BlockSize, w.Hashers, finishedChan, realToHashChan, progress, done, w.Cancel)
|
newParallelHasher(w.Dir, w.BlockSize, w.Hashers, finishedChan, realToHashChan, progress, done, w.Cancel, w.UseWeakHashes)
|
||||||
|
|
||||||
// A routine which actually emits the FolderScanProgress events
|
// A routine which actually emits the FolderScanProgress events
|
||||||
// every w.ProgressTicker ticks, until the hasher routines terminate.
|
// every w.ProgressTicker ticks, until the hasher routines terminate.
|
||||||
|
@ -148,7 +148,7 @@ func TestVerify(t *testing.T) {
|
|||||||
progress := newByteCounter()
|
progress := newByteCounter()
|
||||||
defer progress.Close()
|
defer progress.Close()
|
||||||
|
|
||||||
blocks, err := Blocks(buf, blocksize, -1, progress)
|
blocks, err := Blocks(buf, blocksize, -1, progress, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@ -423,7 +423,7 @@ func BenchmarkHashFile(b *testing.B) {
|
|||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
|
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
if _, err := HashFile(testdataName, protocol.BlockSize, nil); err != nil {
|
if _, err := HashFile(testdataName, protocol.BlockSize, nil, true); err != nil {
|
||||||
b.Fatal(err)
|
b.Fatal(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user