// Copyright (C) 2014 The Syncthing Authors. // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this file, // You can obtain one at https://mozilla.org/MPL/2.0/. package model import ( "context" "fmt" "math/rand" "path/filepath" "sort" "sync/atomic" "time" "github.com/syncthing/syncthing/lib/config" "github.com/syncthing/syncthing/lib/db" "github.com/syncthing/syncthing/lib/events" "github.com/syncthing/syncthing/lib/fs" "github.com/syncthing/syncthing/lib/ignore" "github.com/syncthing/syncthing/lib/locations" "github.com/syncthing/syncthing/lib/osutil" "github.com/syncthing/syncthing/lib/protocol" "github.com/syncthing/syncthing/lib/scanner" "github.com/syncthing/syncthing/lib/stats" "github.com/syncthing/syncthing/lib/sync" "github.com/syncthing/syncthing/lib/watchaggregator" "github.com/thejerf/suture" ) // scanLimiter limits the number of concurrent scans. A limit of zero means no limit. var scanLimiter = newByteSemaphore(0) type folder struct { suture.Service stateTracker config.FolderConfiguration *stats.FolderStatisticsReference localFlags uint32 model *model shortID protocol.ShortID fset *db.FileSet ignores *ignore.Matcher ctx context.Context cancel context.CancelFunc scanInterval time.Duration scanTimer *time.Timer scanNow chan rescanRequest scanDelay chan time.Duration initialScanFinished chan struct{} scanErrors []FileError scanErrorsMut sync.Mutex pullScheduled chan struct{} watchCancel context.CancelFunc watchChan chan []string restartWatchChan chan struct{} watchErr error watchMut sync.Mutex puller puller } type rescanRequest struct { subdirs []string err chan error } type puller interface { pull() bool // true when successfull and should not be retried } func newFolder(model *model, fset *db.FileSet, ignores *ignore.Matcher, cfg config.FolderConfiguration, evLogger events.Logger) folder { ctx, cancel := context.WithCancel(context.Background()) return folder{ stateTracker: newStateTracker(cfg.ID, evLogger), FolderConfiguration: cfg, FolderStatisticsReference: stats.NewFolderStatisticsReference(model.db, cfg.ID), model: model, shortID: model.shortID, fset: fset, ignores: ignores, ctx: ctx, cancel: cancel, scanInterval: time.Duration(cfg.RescanIntervalS) * time.Second, scanTimer: time.NewTimer(time.Millisecond), // The first scan should be done immediately. scanNow: make(chan rescanRequest), scanDelay: make(chan time.Duration), initialScanFinished: make(chan struct{}), scanErrorsMut: sync.NewMutex(), pullScheduled: make(chan struct{}, 1), // This needs to be 1-buffered so that we queue a pull if we're busy when it comes. watchCancel: func() {}, restartWatchChan: make(chan struct{}, 1), watchMut: sync.NewMutex(), } } func (f *folder) serve(_ chan struct{}) { atomic.AddInt32(&f.model.foldersRunning, 1) defer atomic.AddInt32(&f.model.foldersRunning, -1) l.Debugln(f, "starting") defer l.Debugln(f, "exiting") defer func() { f.scanTimer.Stop() f.setState(FolderIdle) }() pause := f.basePause() pullFailTimer := time.NewTimer(0) <-pullFailTimer.C if f.FSWatcherEnabled && f.CheckHealth() == nil { f.startWatch() } initialCompleted := f.initialScanFinished pull := func() { startTime := time.Now() if f.puller.pull() { // We're good. Don't schedule another pull and reset // the pause interval. pause = f.basePause() return } // Pulling failed, try again later. delay := pause + time.Since(startTime) l.Infof("Folder %v isn't making sync progress - retrying in %v.", f.Description(), delay) pullFailTimer.Reset(delay) if pause < 60*f.basePause() { pause *= 2 } } for { select { case <-f.ctx.Done(): return case <-f.pullScheduled: pullFailTimer.Stop() select { case <-pullFailTimer.C: default: } pull() case <-pullFailTimer.C: pull() case <-initialCompleted: // Initial scan has completed, we should do a pull initialCompleted = nil // never hit this case again if !f.puller.pull() { // Pulling failed, try again later. pullFailTimer.Reset(pause) } case <-f.scanTimer.C: l.Debugln(f, "Scanning subdirectories") f.scanTimerFired() case req := <-f.scanNow: req.err <- f.scanSubdirs(req.subdirs) case next := <-f.scanDelay: f.scanTimer.Reset(next) case fsEvents := <-f.watchChan: l.Debugln(f, "filesystem notification rescan") f.scanSubdirs(fsEvents) case <-f.restartWatchChan: f.restartWatch() } } } func (f *folder) BringToFront(string) {} func (f *folder) Override() {} func (f *folder) Revert() {} func (f *folder) DelayScan(next time.Duration) { f.Delay(next) } func (f *folder) ignoresUpdated() { if f.FSWatcherEnabled { f.scheduleWatchRestart() } } func (f *folder) SchedulePull() { select { case f.pullScheduled <- struct{}{}: default: // We might be busy doing a pull and thus not reading from this // channel. The channel is 1-buffered, so one notification will be // queued to ensure we recheck after the pull, but beyond that we must // make sure to not block index receiving. } } func (f *folder) Jobs(_, _ int) ([]string, []string, int) { return nil, nil, 0 } func (f *folder) Scan(subdirs []string) error { <-f.initialScanFinished req := rescanRequest{ subdirs: subdirs, err: make(chan error), } select { case f.scanNow <- req: return <-req.err case <-f.ctx.Done(): return f.ctx.Err() } } func (f *folder) Reschedule() { if f.scanInterval == 0 { return } // Sleep a random time between 3/4 and 5/4 of the configured interval. sleepNanos := (f.scanInterval.Nanoseconds()*3 + rand.Int63n(2*f.scanInterval.Nanoseconds())) / 4 interval := time.Duration(sleepNanos) * time.Nanosecond l.Debugln(f, "next rescan in", interval) f.scanTimer.Reset(interval) } func (f *folder) Delay(next time.Duration) { f.scanDelay <- next } func (f *folder) Stop() { f.cancel() f.Service.Stop() } // CheckHealth checks the folder for common errors, updates the folder state // and returns the current folder error, or nil if the folder is healthy. func (f *folder) CheckHealth() error { err := f.getHealthError() f.setError(err) return err } func (f *folder) getHealthError() error { // Check for folder errors, with the most serious and specific first and // generic ones like out of space on the home disk later. if err := f.CheckPath(); err != nil { return err } dbPath := locations.Get(locations.Database) if usage, err := fs.NewFilesystem(fs.FilesystemTypeBasic, dbPath).Usage("."); err == nil { if err = config.CheckFreeSpace(f.model.cfg.Options().MinHomeDiskFree, usage); err != nil { return fmt.Errorf("insufficient space on disk for database (%v): %v", dbPath, err) } } return nil } func (f *folder) scanSubdirs(subDirs []string) error { if err := f.CheckHealth(); err != nil { return err } mtimefs := f.fset.MtimeFS() f.setState(FolderScanWaiting) scanLimiter.take(1) defer scanLimiter.give(1) for i := range subDirs { sub := osutil.NativeFilename(subDirs[i]) if sub == "" { // A blank subdirs means to scan the entire folder. We can trim // the subDirs list and go on our way. subDirs = nil break } subDirs[i] = sub } // Check if the ignore patterns changed as part of scanning this folder. // If they did we should schedule a pull of the folder so that we // request things we might have suddenly become unignored and so on. oldHash := f.ignores.Hash() defer func() { if f.ignores.Hash() != oldHash { l.Debugln("Folder", f.Description(), "ignore patterns change detected while scanning; triggering puller") f.ignoresUpdated() f.SchedulePull() } }() if err := f.ignores.Load(".stignore"); err != nil && !fs.IsNotExist(err) { err = fmt.Errorf("loading ignores: %v", err) f.setError(err) return err } // Clean the list of subitems to ensure that we start at a known // directory, and don't scan subdirectories of things we've already // scanned. subDirs = unifySubs(subDirs, func(file string) bool { _, ok := f.fset.Get(protocol.LocalDeviceID, file) return ok }) f.setState(FolderScanning) fchan := scanner.Walk(f.ctx, scanner.Config{ Folder: f.ID, Subs: subDirs, Matcher: f.ignores, TempLifetime: time.Duration(f.model.cfg.Options().KeepTemporariesH) * time.Hour, CurrentFiler: cFiler{f.fset}, Filesystem: mtimefs, IgnorePerms: f.IgnorePerms, AutoNormalize: f.AutoNormalize, Hashers: f.model.numHashers(f.ID), ShortID: f.shortID, ProgressTickIntervalS: f.ScanProgressIntervalS, LocalFlags: f.localFlags, ModTimeWindow: f.ModTimeWindow(), EventLogger: f.evLogger, }) batchFn := func(fs []protocol.FileInfo) error { if err := f.CheckHealth(); err != nil { l.Debugf("Stopping scan of folder %s due to: %s", f.Description(), err) return err } f.updateLocalsFromScanning(fs) return nil } // Resolve items which are identical with the global state. if f.localFlags&protocol.FlagLocalReceiveOnly != 0 { oldBatchFn := batchFn // can't reference batchFn directly (recursion) batchFn = func(fs []protocol.FileInfo) error { for i := range fs { switch gf, ok := f.fset.GetGlobal(fs[i].Name); { case !ok: continue case gf.IsEquivalentOptional(fs[i], f.ModTimeWindow(), false, false, protocol.FlagLocalReceiveOnly): // What we have locally is equivalent to the global file. fs[i].Version = fs[i].Version.Merge(gf.Version) fallthrough case fs[i].IsDeleted() && gf.IsReceiveOnlyChanged(): // Our item is deleted and the global item is our own // receive only file. We can't delete file infos, so // we just pretend it is a normal deleted file (nobody // cares about that). fs[i].LocalFlags &^= protocol.FlagLocalReceiveOnly } } return oldBatchFn(fs) } } batch := newFileInfoBatch(batchFn) // Schedule a pull after scanning, but only if we actually detected any // changes. changes := 0 defer func() { if changes > 0 { f.SchedulePull() } }() f.clearScanErrors(subDirs) for res := range fchan { if res.Err != nil { f.newScanError(res.Path, res.Err) continue } if err := batch.flushIfFull(); err != nil { return err } batch.append(res.File) changes++ } if err := batch.flush(); err != nil { return err } if len(subDirs) == 0 { // If we have no specific subdirectories to traverse, set it to one // empty prefix so we traverse the entire folder contents once. subDirs = []string{""} } // Do a scan of the database for each prefix, to check for deleted and // ignored files. var toIgnore []db.FileInfoTruncated ignoredParent := "" for _, sub := range subDirs { var iterError error f.fset.WithPrefixedHaveTruncated(protocol.LocalDeviceID, sub, func(fi db.FileIntf) bool { select { case <-f.ctx.Done(): return false default: } file := fi.(db.FileInfoTruncated) if err := batch.flushIfFull(); err != nil { iterError = err return false } if ignoredParent != "" && !fs.IsParent(file.Name, ignoredParent) { for _, file := range toIgnore { l.Debugln("marking file as ignored", file) nf := file.ConvertToIgnoredFileInfo(f.shortID) batch.append(nf) changes++ if err := batch.flushIfFull(); err != nil { iterError = err return false } } toIgnore = toIgnore[:0] ignoredParent = "" } switch ignored := f.ignores.Match(file.Name).IsIgnored(); { case !file.IsIgnored() && ignored: // File was not ignored at last pass but has been ignored. if file.IsDirectory() { // Delay ignoring as a child might be unignored. toIgnore = append(toIgnore, file) if ignoredParent == "" { // If the parent wasn't ignored already, set // this path as the "highest" ignored parent ignoredParent = file.Name } return true } l.Debugln("marking file as ignored", f) nf := file.ConvertToIgnoredFileInfo(f.shortID) batch.append(nf) changes++ case file.IsIgnored() && !ignored: // Successfully scanned items are already un-ignored during // the scan, so check whether it is deleted. fallthrough case !file.IsIgnored() && !file.IsDeleted() && !file.IsUnsupported(): // The file is not ignored, deleted or unsupported. Lets check if // it's still here. Simply stat:ing it wont do as there are // tons of corner cases (e.g. parent dir->symlink, missing // permissions) if !osutil.IsDeleted(mtimefs, file.Name) { if ignoredParent != "" { // Don't ignore parents of this not ignored item toIgnore = toIgnore[:0] ignoredParent = "" } return true } nf := protocol.FileInfo{ Name: file.Name, Type: file.Type, Size: 0, ModifiedS: file.ModifiedS, ModifiedNs: file.ModifiedNs, ModifiedBy: f.shortID, Deleted: true, Version: file.Version.Update(f.shortID), LocalFlags: f.localFlags, } // We do not want to override the global version // with the deleted file. Keeping only our local // counter makes sure we are in conflict with any // other existing versions, which will be resolved // by the normal pulling mechanisms. if file.ShouldConflict() { nf.Version = nf.Version.DropOthers(f.shortID) } batch.append(nf) changes++ } return true }) select { case <-f.ctx.Done(): return f.ctx.Err() default: } if iterError == nil && len(toIgnore) > 0 { for _, file := range toIgnore { l.Debugln("marking file as ignored", f) nf := file.ConvertToIgnoredFileInfo(f.shortID) batch.append(nf) changes++ if iterError = batch.flushIfFull(); iterError != nil { break } } toIgnore = toIgnore[:0] } if iterError != nil { return iterError } } if err := batch.flush(); err != nil { return err } f.ScanCompleted() f.setState(FolderIdle) return nil } func (f *folder) scanTimerFired() { err := f.scanSubdirs(nil) select { case <-f.initialScanFinished: default: status := "Completed" if err != nil { status = "Failed" } l.Infoln(status, "initial scan of", f.Type.String(), "folder", f.Description()) close(f.initialScanFinished) } f.Reschedule() } func (f *folder) WatchError() error { f.watchMut.Lock() defer f.watchMut.Unlock() return f.watchErr } // stopWatch immediately aborts watching and may be called asynchronously func (f *folder) stopWatch() { f.watchMut.Lock() f.watchCancel() f.watchMut.Unlock() f.setWatchError(nil) } // scheduleWatchRestart makes sure watching is restarted from the main for loop // in a folder's Serve and thus may be called asynchronously (e.g. when ignores change). func (f *folder) scheduleWatchRestart() { select { case f.restartWatchChan <- struct{}{}: default: // We might be busy doing a pull and thus not reading from this // channel. The channel is 1-buffered, so one notification will be // queued to ensure we recheck after the pull. } } // restartWatch should only ever be called synchronously. If you want to use // this asynchronously, you should probably use scheduleWatchRestart instead. func (f *folder) restartWatch() { f.stopWatch() f.startWatch() f.scanSubdirs(nil) } // startWatch should only ever be called synchronously. If you want to use // this asynchronously, you should probably use scheduleWatchRestart instead. func (f *folder) startWatch() { ctx, cancel := context.WithCancel(f.ctx) f.watchMut.Lock() f.watchChan = make(chan []string) f.watchCancel = cancel f.watchMut.Unlock() go f.monitorWatch(ctx) } // monitorWatch starts the filesystem watching and retries every minute on failure. // It should not be used except in startWatch. func (f *folder) monitorWatch(ctx context.Context) { failTimer := time.NewTimer(0) aggrCtx, aggrCancel := context.WithCancel(ctx) var err error var eventChan <-chan fs.Event var errChan <-chan error warnedOutside := false for { select { case <-failTimer.C: eventChan, errChan, err = f.Filesystem().Watch(".", f.ignores, ctx, f.IgnorePerms) // We do this at most once per minute which is the // default rescan time without watcher. f.scanOnWatchErr() f.setWatchError(err) if err != nil { failTimer.Reset(time.Minute) continue } watchaggregator.Aggregate(eventChan, f.watchChan, f.FolderConfiguration, f.model.cfg, f.evLogger, aggrCtx) l.Debugln("Started filesystem watcher for folder", f.Description()) case err = <-errChan: f.setWatchError(err) // This error was previously a panic and should never occur, so generate // a warning, but don't do it repetitively. if !warnedOutside { if _, ok := err.(*fs.ErrWatchEventOutsideRoot); ok { l.Warnln(err) warnedOutside = true } } aggrCancel() errChan = nil aggrCtx, aggrCancel = context.WithCancel(ctx) failTimer.Reset(time.Minute) case <-ctx.Done(): return } } } // setWatchError sets the current error state of the watch and should be called // regardless of whether err is nil or not. func (f *folder) setWatchError(err error) { f.watchMut.Lock() prevErr := f.watchErr f.watchErr = err f.watchMut.Unlock() if err != prevErr { data := map[string]interface{}{ "folder": f.ID, } if prevErr != nil { data["from"] = prevErr.Error() } if err != nil { data["to"] = err.Error() } f.evLogger.Log(events.FolderWatchStateChanged, data) } if err == nil { return } msg := fmt.Sprintf("Error while trying to start filesystem watcher for folder %s, trying again in 1min: %v", f.Description(), err) if prevErr != err { l.Infof(msg) return } l.Debugf(msg) } // scanOnWatchErr schedules a full scan immediately if an error occurred while watching. func (f *folder) scanOnWatchErr() { f.watchMut.Lock() err := f.watchErr f.watchMut.Unlock() if err != nil { f.Delay(0) } } func (f *folder) setError(err error) { select { case <-f.ctx.Done(): return default: } _, _, oldErr := f.getState() if (err != nil && oldErr != nil && oldErr.Error() == err.Error()) || (err == nil && oldErr == nil) { return } if err != nil { if oldErr == nil { l.Warnf("Error on folder %s: %v", f.Description(), err) } else { l.Infof("Error on folder %s changed: %q -> %q", f.Description(), oldErr, err) } } else { l.Infoln("Cleared error on folder", f.Description()) } if f.FSWatcherEnabled { if err != nil { f.stopWatch() } else { f.scheduleWatchRestart() } } f.stateTracker.setError(err) } func (f *folder) basePause() time.Duration { if f.PullerPauseS == 0 { return defaultPullerPause } return time.Duration(f.PullerPauseS) * time.Second } func (f *folder) String() string { return fmt.Sprintf("%s/%s@%p", f.Type, f.folderID, f) } func (f *folder) newScanError(path string, err error) { f.scanErrorsMut.Lock() f.scanErrors = append(f.scanErrors, FileError{ Err: err.Error(), Path: path, }) f.scanErrorsMut.Unlock() } func (f *folder) clearScanErrors(subDirs []string) { f.scanErrorsMut.Lock() defer f.scanErrorsMut.Unlock() if len(subDirs) == 0 { f.scanErrors = nil return } filtered := f.scanErrors[:0] outer: for _, fe := range f.scanErrors { for _, sub := range subDirs { if fe.Path == sub || fs.IsParent(fe.Path, sub) { continue outer } } filtered = append(filtered, fe) } f.scanErrors = filtered } func (f *folder) Errors() []FileError { f.scanErrorsMut.Lock() defer f.scanErrorsMut.Unlock() return append([]FileError{}, f.scanErrors...) } // ForceRescan marks the file such that it gets rehashed on next scan and then // immediately executes that scan. func (f *folder) ForceRescan(file protocol.FileInfo) error { file.SetMustRescan(f.shortID) f.fset.Update(protocol.LocalDeviceID, []protocol.FileInfo{file}) return f.Scan([]string{file.Name}) } func (f *folder) updateLocalsFromScanning(fs []protocol.FileInfo) { f.updateLocals(fs) f.emitDiskChangeEvents(fs, events.LocalChangeDetected) } func (f *folder) updateLocalsFromPulling(fs []protocol.FileInfo) { f.updateLocals(fs) f.emitDiskChangeEvents(fs, events.RemoteChangeDetected) } func (f *folder) updateLocals(fs []protocol.FileInfo) { f.fset.Update(protocol.LocalDeviceID, fs) filenames := make([]string, len(fs)) for i, file := range fs { filenames[i] = file.Name } f.evLogger.Log(events.LocalIndexUpdated, map[string]interface{}{ "folder": f.ID, "items": len(fs), "filenames": filenames, "version": f.fset.Sequence(protocol.LocalDeviceID), }) } func (f *folder) emitDiskChangeEvents(fs []protocol.FileInfo, typeOfEvent events.EventType) { for _, file := range fs { if file.IsInvalid() { continue } objType := "file" action := "modified" switch { case file.IsDeleted(): action = "deleted" // If our local vector is version 1 AND it is the only version // vector so far seen for this file then it is a new file. Else if // it is > 1 it's not new, and if it is 1 but another shortId // version vector exists then it is new for us but created elsewhere // so the file is still not new but modified by us. Only if it is // truly new do we change this to 'added', else we leave it as // 'modified'. case len(file.Version.Counters) == 1 && file.Version.Counters[0].Value == 1: action = "added" } if file.IsSymlink() { objType = "symlink" } else if file.IsDirectory() { objType = "dir" } // Two different events can be fired here based on what EventType is passed into function f.evLogger.Log(typeOfEvent, map[string]string{ "folder": f.ID, "folderID": f.ID, // incorrect, deprecated, kept for historical compliance "label": f.Label, "action": action, "type": objType, "path": filepath.FromSlash(file.Name), "modifiedBy": file.ModifiedBy.String(), }) } } // The exists function is expected to return true for all known paths // (excluding "" and ".") func unifySubs(dirs []string, exists func(dir string) bool) []string { if len(dirs) == 0 { return nil } sort.Strings(dirs) if dirs[0] == "" || dirs[0] == "." || dirs[0] == string(fs.PathSeparator) { return nil } prev := "./" // Anything that can't be parent of a clean path for i := 0; i < len(dirs); { dir, err := fs.Canonicalize(dirs[i]) if err != nil { l.Debugf("Skipping %v for scan: %s", dirs[i], err) dirs = append(dirs[:i], dirs[i+1:]...) continue } if dir == prev || fs.IsParent(dir, prev) { dirs = append(dirs[:i], dirs[i+1:]...) continue } parent := filepath.Dir(dir) for parent != "." && parent != string(fs.PathSeparator) && !exists(parent) { dir = parent parent = filepath.Dir(dir) } dirs[i] = dir prev = dir i++ } return dirs } type cFiler struct { *db.FileSet } // Implements scanner.CurrentFiler func (cf cFiler) CurrentFile(file string) (protocol.FileInfo, bool) { return cf.Get(protocol.LocalDeviceID, file) }