Use a monitor process to handle panics and restarts (fixes #586)

This commit is contained in:
Jakob Borg 2014-09-02 13:08:24 +02:00
parent e9c7970ea4
commit 10f0713257
4 changed files with 189 additions and 91 deletions

View File

@ -14,7 +14,8 @@ import (
)
func init() {
if os.Getenv("STHEAPPROFILE") != "" {
if innerProcess && os.Getenv("STHEAPPROFILE") != "" {
l.Debugln("Starting heap profiling")
go saveHeapProfiles()
}
}

View File

@ -16,7 +16,6 @@ import (
"net/http"
_ "net/http/pprof"
"os"
"os/exec"
"path/filepath"
"regexp"
"runtime"
@ -52,7 +51,15 @@ var (
GoArchExtra string // "", "v5", "v6", "v7"
)
const (
exitSuccess = 0
exitError = 1
exitNoUpgradeAvailable = 2
exitRestarting = 3
)
var l = logger.DefaultLogger
var innerProcess = os.Getenv("STNORESTART") != ""
func init() {
if Version != "unknown-dev" {
@ -80,10 +87,8 @@ var (
confDir string
logFlags int = log.Ltime
rateBucket *ratelimit.Bucket
stop = make(chan bool)
stop = make(chan int)
discoverer *discover.Discoverer
lockConn *net.TCPListener
lockPort int
externalPort int
cert tls.Certificate
)
@ -152,16 +157,20 @@ func init() {
rand.Seed(time.Now().UnixNano())
}
// Command line options
var (
reset bool
showVersion bool
doUpgrade bool
doUpgradeCheck bool
noBrowser bool
generateDir string
guiAddress string
guiAuthentication string
guiAPIKey string
)
func main() {
var reset bool
var showVersion bool
var doUpgrade bool
var doUpgradeCheck bool
var noBrowser bool
var generateDir string
var guiAddress string
var guiAuthentication string
var guiAPIKey string
flag.StringVar(&confDir, "home", getDefaultConfDir(), "Set configuration directory")
flag.BoolVar(&reset, "reset", false, "Prepare to resync from cluster")
flag.BoolVar(&showVersion, "version", false, "Show version")
@ -216,7 +225,7 @@ func main() {
if upgrade.CompareVersions(rel.Tag, Version) <= 0 {
l.Infof("No upgrade available (current %q >= latest %q).", Version, rel.Tag)
os.Exit(2)
os.Exit(exitNoUpgradeAvailable)
}
l.Infof("Upgrade available (current %q < latest %q)", Version, rel.Tag)
@ -233,12 +242,21 @@ func main() {
}
}
var err error
lockPort, err = getLockPort()
if err != nil {
l.Fatalln("Opening lock port:", err)
if reset {
resetRepositories()
return
}
if os.Getenv("STNORESTART") != "" {
syncthingMain()
} else {
monitorMain()
}
}
func syncthingMain() {
var err error
if len(os.Getenv("GOGC")) == 0 {
debug.SetGCPercent(25)
}
@ -251,7 +269,7 @@ func main() {
events.Default.Log(events.Starting, map[string]string{"home": confDir})
if _, err := os.Stat(confDir); err != nil && confDir == getDefaultConfDir() {
if _, err = os.Stat(confDir); err != nil && confDir == getDefaultConfDir() {
// We are supposed to use the default configuration directory. It
// doesn't exist. In the past our default has been ~/.syncthing, so if
// that directory exists we move it to the new default location and
@ -346,15 +364,6 @@ func main() {
l.Infof("Edit %s to taste or use the GUI\n", cfgFile)
}
if reset {
resetRepositories()
return
}
if len(os.Getenv("STRESTART")) > 0 {
waitForParentExit()
}
if profiler := os.Getenv("STPROFILER"); len(profiler) > 0 {
go func() {
l.Debugln("Starting profiler on", profiler)
@ -585,9 +594,10 @@ nextRepo:
events.Default.Log(events.StartupComplete, nil)
go generateEvents()
<-stop
code := <-stop
l.Okln("Exiting")
os.Exit(code)
}
func generateEvents() {
@ -597,25 +607,6 @@ func generateEvents() {
}
}
func waitForParentExit() {
l.Infoln("Waiting for parent to exit...")
lockPortStr := os.Getenv("STRESTART")
lockPort, err := strconv.Atoi(lockPortStr)
if err != nil {
l.Warnln("Invalid lock port %q: %v", lockPortStr, err)
}
// Wait for the listen address to become free, indicating that the parent has exited.
for {
ln, err := net.Listen("tcp", fmt.Sprintf("127.0.0.1:%d", lockPort))
if err == nil {
ln.Close()
break
}
time.Sleep(250 * time.Millisecond)
}
l.Infoln("Continuing")
}
func setupUPnP() {
if len(cfg.Options.ListenAddress) == 1 {
_, portStr, err := net.SplitHostPort(cfg.Options.ListenAddress[0])
@ -742,40 +733,12 @@ func archiveLegacyConfig() {
func restart() {
l.Infoln("Restarting")
if os.Getenv("SMF_FMRI") != "" || os.Getenv("STNORESTART") != "" {
// Solaris SMF
l.Infoln("Service manager detected; exit instead of restart")
stop <- true
return
}
env := os.Environ()
newEnv := make([]string, 0, len(env))
for _, s := range env {
if !strings.HasPrefix(s, "STRESTART=") {
newEnv = append(newEnv, s)
}
}
newEnv = append(newEnv, fmt.Sprintf("STRESTART=%d", lockPort))
pgm, err := exec.LookPath(os.Args[0])
if err != nil {
l.Warnln("Cannot restart:", err)
return
}
proc, err := os.StartProcess(pgm, os.Args, &os.ProcAttr{
Env: newEnv,
Files: []*os.File{os.Stdin, os.Stdout, os.Stderr},
})
if err != nil {
l.Fatalln(err)
}
proc.Release()
stop <- true
stop <- exitRestarting
}
func shutdown() {
stop <- true
l.Infoln("Shutting down")
stop <- exitSuccess
}
var saveConfigCh = make(chan struct{})
@ -1129,16 +1092,6 @@ func getFreePort(host string, ports ...int) (int, error) {
return addr.Port, nil
}
func getLockPort() (int, error) {
var err error
lockConn, err = net.ListenTCP("tcp", &net.TCPAddr{IP: net.IP{127, 0, 0, 1}})
if err != nil {
return 0, err
}
addr := lockConn.Addr().(*net.TCPAddr)
return addr.Port, nil
}
func overrideGUIConfig(originalCfg config.GUIConfiguration, address, authentication, apikey string) config.GUIConfiguration {
// Make a copy of the config
cfg := originalCfg

144
cmd/syncthing/monitor.go Normal file
View File

@ -0,0 +1,144 @@
package main
import (
"bufio"
"io"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"time"
)
var (
stdoutFirstLines []string // The first 10 lines of stdout
stdoutLastLines []string // The last 50 lines of stdout
stdoutMut sync.Mutex
)
const (
countRestarts = 5
loopThreshold = 15 * time.Second
)
func monitorMain() {
os.Setenv("STNORESTART", "yes")
l.SetPrefix("[monitor] ")
args := os.Args
var restarts [countRestarts]time.Time
for {
if t := time.Since(restarts[0]); t < loopThreshold {
l.Warnf("%d restarts in %v; not retrying further", countRestarts, t)
os.Exit(exitError)
}
copy(restarts[0:], restarts[1:])
restarts[len(restarts)-1] = time.Now()
cmd := exec.Command(args[0], args[1:]...)
stderr, err := cmd.StderrPipe()
if err != nil {
l.Fatalln(err)
}
stdout, err := cmd.StdoutPipe()
if err != nil {
l.Fatalln(err)
}
l.Infoln("Starting syncthing")
err = cmd.Start()
if err != nil {
l.Fatalln(err)
}
stdoutMut.Lock()
stdoutFirstLines = make([]string, 0, 10)
stdoutLastLines = make([]string, 0, 50)
stdoutMut.Unlock()
go copyStderr(stderr)
go copyStdout(stdout)
err = cmd.Wait()
if err == nil {
// Successfull exit indicates an intentional shutdown
return
}
l.Infoln("Syncthing exited:", err)
time.Sleep(1 * time.Second)
}
}
func copyStderr(stderr io.ReadCloser) {
br := bufio.NewReader(stderr)
var panicFd *os.File
for {
line, err := br.ReadString('\n')
if err != nil {
if err != io.EOF {
l.Warnln("stderr:", err)
}
return
}
if panicFd == nil {
os.Stderr.WriteString(line)
if strings.HasPrefix(line, "panic:") || strings.HasPrefix(line, "fatal error:") {
panicFd, err = os.Create(filepath.Join(confDir, time.Now().Format("panic-20060102-150405.log")))
if err != nil {
l.Warnln("Create panic log:", err)
continue
}
l.Warnf("Panic detected, writing to \"%s\"", panicFd.Name())
l.Warnln("Please create an issue at https://github.com/syncting/syncthing/issues/ with the panic log attached")
stdoutMut.Lock()
for _, line := range stdoutFirstLines {
panicFd.WriteString(line)
}
panicFd.WriteString("...\n")
for _, line := range stdoutLastLines {
panicFd.WriteString(line)
}
}
}
if panicFd != nil {
panicFd.WriteString(line)
}
}
}
func copyStdout(stderr io.ReadCloser) {
br := bufio.NewReader(stderr)
for {
line, err := br.ReadString('\n')
if err != nil {
if err != io.EOF {
l.Warnln("stdout:", err)
}
return
}
stdoutMut.Lock()
if len(stdoutFirstLines) < cap(stdoutFirstLines) {
stdoutFirstLines = append(stdoutFirstLines, line)
}
if l := len(stdoutLastLines); l == cap(stdoutLastLines) {
stdoutLastLines = stdoutLastLines[:l-1]
}
stdoutLastLines = append(stdoutLastLines, line)
stdoutMut.Unlock()
os.Stdout.WriteString(line)
}
}

View File

@ -15,7 +15,7 @@ import (
)
func init() {
if os.Getenv("STPERFSTATS") != "" {
if innerProcess && os.Getenv("STPERFSTATS") != "" {
go savePerfStats(fmt.Sprintf("perfstats-%d.csv", syscall.Getpid()))
}
}