diff --git a/lib/relay/client/dynamic.go b/lib/relay/client/dynamic.go index be9fcc897..d3781e432 100644 --- a/lib/relay/client/dynamic.go +++ b/lib/relay/client/dynamic.go @@ -93,7 +93,8 @@ func (c *dynamicClient) serve(ctx context.Context) error { c.client = client c.mut.Unlock() - c.client.Serve(ctx) + err = c.client.Serve(ctx) + l.Debugf("Disconnected from %s://%s: %v", c.client.URI().Scheme, c.client.URI().Host, err) c.mut.Lock() c.client = nil diff --git a/lib/relay/client/static.go b/lib/relay/client/static.go index 4e6ec94a8..f35f6fc5d 100644 --- a/lib/relay/client/static.go +++ b/lib/relay/client/static.go @@ -48,7 +48,7 @@ func newStaticClient(uri *url.URL, certs []tls.Certificate, invitations chan pro func (c *staticClient) serve(ctx context.Context) error { if err := c.connect(ctx); err != nil { - l.Infof("Could not connect to relay %s: %s", c.uri, err) + l.Debugf("Could not connect to relay %s: %s", c.uri, err) return err } @@ -56,17 +56,16 @@ func (c *staticClient) serve(ctx context.Context) error { defer c.disconnect() if err := c.join(); err != nil { - l.Infof("Could not join relay %s: %s", c.uri, err) + l.Debugf("Could not join relay %s: %s", c.uri, err) return err } if err := c.conn.SetDeadline(time.Time{}); err != nil { - l.Infoln("Relay set deadline:", err) + l.Debugln("Relay set deadline:", err) return err } l.Infof("Joined relay %s://%s", c.uri.Scheme, c.uri.Host) - defer l.Infof("Disconnected from relay %s://%s", c.uri.Scheme, c.uri.Host) c.mut.Lock() c.connected = true @@ -88,7 +87,7 @@ func (c *staticClient) serve(ctx context.Context) error { switch msg := message.(type) { case protocol.Ping: if err := protocol.WriteMessage(c.conn, protocol.Pong{}); err != nil { - l.Infoln("Relay write:", err) + l.Debugln("Relay write:", err) return err } l.Debugln(c, "sent pong") @@ -106,20 +105,20 @@ func (c *staticClient) serve(ctx context.Context) error { } case protocol.RelayFull: - l.Infof("Disconnected from relay %s due to it becoming full.", c.uri) + l.Debugf("Disconnected from relay %s due to it becoming full.", c.uri) return errors.New("relay full") default: - l.Infoln("Relay: protocol error: unexpected message %v", msg) + l.Debugln("Relay: protocol error: unexpected message %v", msg) return fmt.Errorf("protocol error: unexpected message %v", msg) } case <-ctx.Done(): l.Debugln(c, "stopping") - return nil + return ctx.Err() case err := <-errorsc: - l.Infof("Disconnecting from relay %s due to error: %s", c.uri, err) + l.Debugf("Disconnecting from relay %s due to error: %s", c.uri, err) return err case <-timeout.C: diff --git a/lib/svcutil/svcutil.go b/lib/svcutil/svcutil.go index 4a373909f..546bea4b8 100644 --- a/lib/svcutil/svcutil.go +++ b/lib/svcutil/svcutil.go @@ -160,7 +160,7 @@ func SpecWithDebugLogger(l logger.Logger) suture.Spec { } func SpecWithInfoLogger(l logger.Logger) suture.Spec { - return spec(func(e suture.Event) { l.Infoln(e) }) + return spec(infoEventHook(l)) } func spec(eventHook suture.EventHook) suture.Spec { @@ -171,3 +171,35 @@ func spec(eventHook suture.EventHook) suture.Spec { DontPropagateTermination: false, } } + +// infoEventHook prints service failures and failures to stop services at level +// info. All other events and identical, consecutive failures are logged at +// debug only. +func infoEventHook(l logger.Logger) suture.EventHook { + var prevTerminate suture.EventServiceTerminate + return func(ei suture.Event) { + switch e := ei.(type) { + case suture.EventStopTimeout: + l.Infof("%s: Service %s failed to terminate in a timely manner", e.SupervisorName, e.ServiceName) + case suture.EventServicePanic: + l.Warnln("Caught a service panic, which shouldn't happen") + l.Infoln(e) + case suture.EventServiceTerminate: + msg := fmt.Sprintf("%s: service %s failed: %s", e.SupervisorName, e.ServiceName, e.Err) + if e.ServiceName == prevTerminate.ServiceName && e.Err == prevTerminate.Err { + l.Debugln(msg) + } else { + l.Infoln(msg) + } + prevTerminate = e + l.Debugln(e) // Contains some backoff statistics + case suture.EventBackoff: + l.Debugf("%s: exiting the backoff state.", e.SupervisorName) + case suture.EventResume: + l.Debugf("%s: too many service failures - entering the backoff state.", e.SupervisorName) + default: + l.Warnln("Unknown suture supervisor event type", e.Type()) + l.Infoln(e) + } + } +}