2014-11-16 20:13:20 +00:00
|
|
|
// Copyright (C) 2014 The Syncthing Authors.
|
2014-09-29 19:43:32 +00:00
|
|
|
//
|
2015-03-07 20:36:35 +00:00
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
2017-02-09 06:52:18 +00:00
|
|
|
// You can obtain one at https://mozilla.org/MPL/2.0/.
|
2014-07-30 18:10:46 +00:00
|
|
|
|
|
|
|
package scanner
|
|
|
|
|
|
|
|
import (
|
2017-04-26 00:15:23 +00:00
|
|
|
"context"
|
2016-07-26 08:51:39 +00:00
|
|
|
"errors"
|
2014-07-30 18:10:46 +00:00
|
|
|
"path/filepath"
|
|
|
|
|
2017-04-01 09:04:11 +00:00
|
|
|
"github.com/syncthing/syncthing/lib/fs"
|
2015-09-22 17:38:46 +00:00
|
|
|
"github.com/syncthing/syncthing/lib/protocol"
|
2015-08-06 09:29:25 +00:00
|
|
|
"github.com/syncthing/syncthing/lib/sync"
|
2014-07-30 18:10:46 +00:00
|
|
|
)
|
|
|
|
|
2017-01-23 13:50:32 +00:00
|
|
|
// HashFile hashes the files and returns a list of blocks representing the file.
|
2017-04-26 00:15:23 +00:00
|
|
|
func HashFile(ctx context.Context, fs fs.Filesystem, path string, blockSize int, counter Counter, useWeakHashes bool) ([]protocol.BlockInfo, error) {
|
2017-04-01 09:04:11 +00:00
|
|
|
fd, err := fs.Open(path)
|
2014-10-03 22:15:54 +00:00
|
|
|
if err != nil {
|
2015-10-03 15:25:21 +00:00
|
|
|
l.Debugln("open:", err)
|
2016-07-26 08:51:39 +00:00
|
|
|
return nil, err
|
2014-10-03 22:15:54 +00:00
|
|
|
}
|
2015-08-26 22:49:06 +00:00
|
|
|
defer fd.Close()
|
2014-07-30 18:10:46 +00:00
|
|
|
|
2016-07-26 08:51:39 +00:00
|
|
|
// Get the size and modtime of the file before we start hashing it.
|
|
|
|
|
|
|
|
fi, err := fd.Stat()
|
|
|
|
if err != nil {
|
|
|
|
l.Debugln("stat before:", err)
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
size := fi.Size()
|
|
|
|
modTime := fi.ModTime()
|
|
|
|
|
|
|
|
// Hash the file. This may take a while for large files.
|
|
|
|
|
2017-04-26 00:15:23 +00:00
|
|
|
blocks, err := Blocks(ctx, fd, blockSize, size, counter, useWeakHashes)
|
2016-07-26 08:51:39 +00:00
|
|
|
if err != nil {
|
|
|
|
l.Debugln("blocks:", err)
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Recheck the size and modtime again. If they differ, the file changed
|
|
|
|
// while we were reading it and our hash results are invalid.
|
|
|
|
|
|
|
|
fi, err = fd.Stat()
|
|
|
|
if err != nil {
|
|
|
|
l.Debugln("stat after:", err)
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if size != fi.Size() || !modTime.Equal(fi.ModTime()) {
|
|
|
|
return nil, errors.New("file changed during hashing")
|
2014-10-03 22:15:54 +00:00
|
|
|
}
|
2015-08-26 22:49:06 +00:00
|
|
|
|
2016-07-26 08:51:39 +00:00
|
|
|
return blocks, nil
|
2014-10-03 22:15:54 +00:00
|
|
|
}
|
2014-07-30 18:10:46 +00:00
|
|
|
|
2017-04-01 09:04:11 +00:00
|
|
|
// The parallel hasher reads FileInfo structures from the inbox, hashes the
|
|
|
|
// file to populate the Blocks element and sends it to the outbox. A number of
|
|
|
|
// workers are used in parallel. The outbox will become closed when the inbox
|
|
|
|
// is closed and all items handled.
|
|
|
|
type parallelHasher struct {
|
|
|
|
fs fs.Filesystem
|
|
|
|
dir string
|
|
|
|
blockSize int
|
|
|
|
workers int
|
|
|
|
outbox chan<- protocol.FileInfo
|
|
|
|
inbox <-chan protocol.FileInfo
|
|
|
|
counter Counter
|
|
|
|
done chan<- struct{}
|
|
|
|
useWeakHashes bool
|
|
|
|
wg sync.WaitGroup
|
|
|
|
}
|
|
|
|
|
2017-04-26 00:15:23 +00:00
|
|
|
func newParallelHasher(ctx context.Context, fs fs.Filesystem, dir string, blockSize, workers int, outbox chan<- protocol.FileInfo, inbox <-chan protocol.FileInfo, counter Counter, done chan<- struct{}, useWeakHashes bool) {
|
2017-04-01 09:04:11 +00:00
|
|
|
ph := ¶llelHasher{
|
|
|
|
fs: fs,
|
|
|
|
dir: dir,
|
|
|
|
blockSize: blockSize,
|
|
|
|
workers: workers,
|
|
|
|
outbox: outbox,
|
|
|
|
inbox: inbox,
|
|
|
|
counter: counter,
|
|
|
|
done: done,
|
|
|
|
useWeakHashes: useWeakHashes,
|
|
|
|
wg: sync.NewWaitGroup(),
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := 0; i < workers; i++ {
|
|
|
|
ph.wg.Add(1)
|
2017-04-26 00:15:23 +00:00
|
|
|
go ph.hashFiles(ctx)
|
2017-04-01 09:04:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
go ph.closeWhenDone()
|
|
|
|
}
|
|
|
|
|
2017-04-26 00:15:23 +00:00
|
|
|
func (ph *parallelHasher) hashFiles(ctx context.Context) {
|
2017-04-01 09:04:11 +00:00
|
|
|
defer ph.wg.Done()
|
|
|
|
|
2015-11-13 14:00:32 +00:00
|
|
|
for {
|
|
|
|
select {
|
2017-04-01 09:04:11 +00:00
|
|
|
case f, ok := <-ph.inbox:
|
2015-11-13 14:00:32 +00:00
|
|
|
if !ok {
|
|
|
|
return
|
|
|
|
}
|
2014-07-30 18:10:46 +00:00
|
|
|
|
2015-11-13 14:00:32 +00:00
|
|
|
if f.IsDirectory() || f.IsDeleted() {
|
|
|
|
panic("Bug. Asked to hash a directory or a deleted file.")
|
|
|
|
}
|
2014-07-30 18:10:46 +00:00
|
|
|
|
2017-04-26 00:15:23 +00:00
|
|
|
blocks, err := HashFile(ctx, ph.fs, filepath.Join(ph.dir, f.Name), ph.blockSize, ph.counter, ph.useWeakHashes)
|
2015-11-13 14:00:32 +00:00
|
|
|
if err != nil {
|
|
|
|
l.Debugln("hash error:", f.Name, err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
f.Blocks = blocks
|
2016-07-26 08:51:39 +00:00
|
|
|
|
|
|
|
// The size we saw when initially deciding to hash the file
|
|
|
|
// might not have been the size it actually had when we hashed
|
|
|
|
// it. Update the size from the block list.
|
|
|
|
|
|
|
|
f.Size = 0
|
|
|
|
for _, b := range blocks {
|
|
|
|
f.Size += int64(b.Size)
|
|
|
|
}
|
|
|
|
|
2015-11-13 14:00:32 +00:00
|
|
|
select {
|
2017-04-01 09:04:11 +00:00
|
|
|
case ph.outbox <- f:
|
2017-04-26 00:15:23 +00:00
|
|
|
case <-ctx.Done():
|
2015-11-13 14:00:32 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2017-04-26 00:15:23 +00:00
|
|
|
case <-ctx.Done():
|
2015-11-13 14:00:32 +00:00
|
|
|
return
|
|
|
|
}
|
2014-07-30 18:10:46 +00:00
|
|
|
}
|
|
|
|
}
|
2017-04-01 09:04:11 +00:00
|
|
|
|
|
|
|
func (ph *parallelHasher) closeWhenDone() {
|
|
|
|
ph.wg.Wait()
|
|
|
|
if ph.done != nil {
|
|
|
|
close(ph.done)
|
|
|
|
}
|
|
|
|
close(ph.outbox)
|
|
|
|
}
|