From 10f071325726e8de1d44ec9ea1a7a9741f885c6d Mon Sep 17 00:00:00 2001 From: Jakob Borg Date: Tue, 2 Sep 2014 13:08:24 +0200 Subject: [PATCH] Use a monitor process to handle panics and restarts (fixes #586) --- cmd/syncthing/heapprof.go | 3 +- cmd/syncthing/main.go | 131 ++++++++++------------------- cmd/syncthing/monitor.go | 144 ++++++++++++++++++++++++++++++++ cmd/syncthing/perfstats_unix.go | 2 +- 4 files changed, 189 insertions(+), 91 deletions(-) create mode 100644 cmd/syncthing/monitor.go diff --git a/cmd/syncthing/heapprof.go b/cmd/syncthing/heapprof.go index 24e0e522c..a4631451f 100644 --- a/cmd/syncthing/heapprof.go +++ b/cmd/syncthing/heapprof.go @@ -14,7 +14,8 @@ import ( ) func init() { - if os.Getenv("STHEAPPROFILE") != "" { + if innerProcess && os.Getenv("STHEAPPROFILE") != "" { + l.Debugln("Starting heap profiling") go saveHeapProfiles() } } diff --git a/cmd/syncthing/main.go b/cmd/syncthing/main.go index 3e51d3750..b18e30768 100644 --- a/cmd/syncthing/main.go +++ b/cmd/syncthing/main.go @@ -16,7 +16,6 @@ import ( "net/http" _ "net/http/pprof" "os" - "os/exec" "path/filepath" "regexp" "runtime" @@ -52,7 +51,15 @@ var ( GoArchExtra string // "", "v5", "v6", "v7" ) +const ( + exitSuccess = 0 + exitError = 1 + exitNoUpgradeAvailable = 2 + exitRestarting = 3 +) + var l = logger.DefaultLogger +var innerProcess = os.Getenv("STNORESTART") != "" func init() { if Version != "unknown-dev" { @@ -80,10 +87,8 @@ var ( confDir string logFlags int = log.Ltime rateBucket *ratelimit.Bucket - stop = make(chan bool) + stop = make(chan int) discoverer *discover.Discoverer - lockConn *net.TCPListener - lockPort int externalPort int cert tls.Certificate ) @@ -152,16 +157,20 @@ func init() { rand.Seed(time.Now().UnixNano()) } +// Command line options +var ( + reset bool + showVersion bool + doUpgrade bool + doUpgradeCheck bool + noBrowser bool + generateDir string + guiAddress string + guiAuthentication string + guiAPIKey string +) + func main() { - var reset bool - var showVersion bool - var doUpgrade bool - var doUpgradeCheck bool - var noBrowser bool - var generateDir string - var guiAddress string - var guiAuthentication string - var guiAPIKey string flag.StringVar(&confDir, "home", getDefaultConfDir(), "Set configuration directory") flag.BoolVar(&reset, "reset", false, "Prepare to resync from cluster") flag.BoolVar(&showVersion, "version", false, "Show version") @@ -216,7 +225,7 @@ func main() { if upgrade.CompareVersions(rel.Tag, Version) <= 0 { l.Infof("No upgrade available (current %q >= latest %q).", Version, rel.Tag) - os.Exit(2) + os.Exit(exitNoUpgradeAvailable) } l.Infof("Upgrade available (current %q < latest %q)", Version, rel.Tag) @@ -233,12 +242,21 @@ func main() { } } - var err error - lockPort, err = getLockPort() - if err != nil { - l.Fatalln("Opening lock port:", err) + if reset { + resetRepositories() + return } + if os.Getenv("STNORESTART") != "" { + syncthingMain() + } else { + monitorMain() + } +} + +func syncthingMain() { + var err error + if len(os.Getenv("GOGC")) == 0 { debug.SetGCPercent(25) } @@ -251,7 +269,7 @@ func main() { events.Default.Log(events.Starting, map[string]string{"home": confDir}) - if _, err := os.Stat(confDir); err != nil && confDir == getDefaultConfDir() { + if _, err = os.Stat(confDir); err != nil && confDir == getDefaultConfDir() { // We are supposed to use the default configuration directory. It // doesn't exist. In the past our default has been ~/.syncthing, so if // that directory exists we move it to the new default location and @@ -346,15 +364,6 @@ func main() { l.Infof("Edit %s to taste or use the GUI\n", cfgFile) } - if reset { - resetRepositories() - return - } - - if len(os.Getenv("STRESTART")) > 0 { - waitForParentExit() - } - if profiler := os.Getenv("STPROFILER"); len(profiler) > 0 { go func() { l.Debugln("Starting profiler on", profiler) @@ -585,9 +594,10 @@ nextRepo: events.Default.Log(events.StartupComplete, nil) go generateEvents() - <-stop + code := <-stop l.Okln("Exiting") + os.Exit(code) } func generateEvents() { @@ -597,25 +607,6 @@ func generateEvents() { } } -func waitForParentExit() { - l.Infoln("Waiting for parent to exit...") - lockPortStr := os.Getenv("STRESTART") - lockPort, err := strconv.Atoi(lockPortStr) - if err != nil { - l.Warnln("Invalid lock port %q: %v", lockPortStr, err) - } - // Wait for the listen address to become free, indicating that the parent has exited. - for { - ln, err := net.Listen("tcp", fmt.Sprintf("127.0.0.1:%d", lockPort)) - if err == nil { - ln.Close() - break - } - time.Sleep(250 * time.Millisecond) - } - l.Infoln("Continuing") -} - func setupUPnP() { if len(cfg.Options.ListenAddress) == 1 { _, portStr, err := net.SplitHostPort(cfg.Options.ListenAddress[0]) @@ -742,40 +733,12 @@ func archiveLegacyConfig() { func restart() { l.Infoln("Restarting") - if os.Getenv("SMF_FMRI") != "" || os.Getenv("STNORESTART") != "" { - // Solaris SMF - l.Infoln("Service manager detected; exit instead of restart") - stop <- true - return - } - - env := os.Environ() - newEnv := make([]string, 0, len(env)) - for _, s := range env { - if !strings.HasPrefix(s, "STRESTART=") { - newEnv = append(newEnv, s) - } - } - newEnv = append(newEnv, fmt.Sprintf("STRESTART=%d", lockPort)) - - pgm, err := exec.LookPath(os.Args[0]) - if err != nil { - l.Warnln("Cannot restart:", err) - return - } - proc, err := os.StartProcess(pgm, os.Args, &os.ProcAttr{ - Env: newEnv, - Files: []*os.File{os.Stdin, os.Stdout, os.Stderr}, - }) - if err != nil { - l.Fatalln(err) - } - proc.Release() - stop <- true + stop <- exitRestarting } func shutdown() { - stop <- true + l.Infoln("Shutting down") + stop <- exitSuccess } var saveConfigCh = make(chan struct{}) @@ -1129,16 +1092,6 @@ func getFreePort(host string, ports ...int) (int, error) { return addr.Port, nil } -func getLockPort() (int, error) { - var err error - lockConn, err = net.ListenTCP("tcp", &net.TCPAddr{IP: net.IP{127, 0, 0, 1}}) - if err != nil { - return 0, err - } - addr := lockConn.Addr().(*net.TCPAddr) - return addr.Port, nil -} - func overrideGUIConfig(originalCfg config.GUIConfiguration, address, authentication, apikey string) config.GUIConfiguration { // Make a copy of the config cfg := originalCfg diff --git a/cmd/syncthing/monitor.go b/cmd/syncthing/monitor.go new file mode 100644 index 000000000..9c5c11bd3 --- /dev/null +++ b/cmd/syncthing/monitor.go @@ -0,0 +1,144 @@ +package main + +import ( + "bufio" + "io" + "os" + "os/exec" + "path/filepath" + "strings" + "sync" + "time" +) + +var ( + stdoutFirstLines []string // The first 10 lines of stdout + stdoutLastLines []string // The last 50 lines of stdout + stdoutMut sync.Mutex +) + +const ( + countRestarts = 5 + loopThreshold = 15 * time.Second +) + +func monitorMain() { + os.Setenv("STNORESTART", "yes") + l.SetPrefix("[monitor] ") + + args := os.Args + var restarts [countRestarts]time.Time + + for { + if t := time.Since(restarts[0]); t < loopThreshold { + l.Warnf("%d restarts in %v; not retrying further", countRestarts, t) + os.Exit(exitError) + } + + copy(restarts[0:], restarts[1:]) + restarts[len(restarts)-1] = time.Now() + + cmd := exec.Command(args[0], args[1:]...) + + stderr, err := cmd.StderrPipe() + if err != nil { + l.Fatalln(err) + } + + stdout, err := cmd.StdoutPipe() + if err != nil { + l.Fatalln(err) + } + + l.Infoln("Starting syncthing") + err = cmd.Start() + if err != nil { + l.Fatalln(err) + } + + stdoutMut.Lock() + stdoutFirstLines = make([]string, 0, 10) + stdoutLastLines = make([]string, 0, 50) + stdoutMut.Unlock() + + go copyStderr(stderr) + go copyStdout(stdout) + + err = cmd.Wait() + if err == nil { + // Successfull exit indicates an intentional shutdown + return + } + + l.Infoln("Syncthing exited:", err) + time.Sleep(1 * time.Second) + } +} + +func copyStderr(stderr io.ReadCloser) { + br := bufio.NewReader(stderr) + + var panicFd *os.File + for { + line, err := br.ReadString('\n') + if err != nil { + if err != io.EOF { + l.Warnln("stderr:", err) + } + return + } + + if panicFd == nil { + os.Stderr.WriteString(line) + + if strings.HasPrefix(line, "panic:") || strings.HasPrefix(line, "fatal error:") { + panicFd, err = os.Create(filepath.Join(confDir, time.Now().Format("panic-20060102-150405.log"))) + if err != nil { + l.Warnln("Create panic log:", err) + continue + } + + l.Warnf("Panic detected, writing to \"%s\"", panicFd.Name()) + l.Warnln("Please create an issue at https://github.com/syncting/syncthing/issues/ with the panic log attached") + + stdoutMut.Lock() + for _, line := range stdoutFirstLines { + panicFd.WriteString(line) + } + panicFd.WriteString("...\n") + for _, line := range stdoutLastLines { + panicFd.WriteString(line) + } + } + } + + if panicFd != nil { + panicFd.WriteString(line) + } + } +} + +func copyStdout(stderr io.ReadCloser) { + br := bufio.NewReader(stderr) + for { + line, err := br.ReadString('\n') + if err != nil { + if err != io.EOF { + l.Warnln("stdout:", err) + } + return + } + + stdoutMut.Lock() + if len(stdoutFirstLines) < cap(stdoutFirstLines) { + stdoutFirstLines = append(stdoutFirstLines, line) + } + if l := len(stdoutLastLines); l == cap(stdoutLastLines) { + stdoutLastLines = stdoutLastLines[:l-1] + } + stdoutLastLines = append(stdoutLastLines, line) + stdoutMut.Unlock() + + os.Stdout.WriteString(line) + } +} diff --git a/cmd/syncthing/perfstats_unix.go b/cmd/syncthing/perfstats_unix.go index 403de2693..48746a6a8 100644 --- a/cmd/syncthing/perfstats_unix.go +++ b/cmd/syncthing/perfstats_unix.go @@ -15,7 +15,7 @@ import ( ) func init() { - if os.Getenv("STPERFSTATS") != "" { + if innerProcess && os.Getenv("STPERFSTATS") != "" { go savePerfStats(fmt.Sprintf("perfstats-%d.csv", syscall.Getpid())) } }