2020-10-07 10:05:13 +02:00
|
|
|
// Copyright (C) 2020 The Syncthing Authors.
|
|
|
|
//
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
|
|
|
// You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
|
|
|
|
|
|
package ur
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"context"
|
|
|
|
"encoding/json"
|
|
|
|
"net/http"
|
2021-07-27 21:27:52 +02:00
|
|
|
"runtime/pprof"
|
|
|
|
"strings"
|
2020-10-07 10:05:13 +02:00
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/syncthing/syncthing/lib/build"
|
|
|
|
"github.com/syncthing/syncthing/lib/config"
|
|
|
|
"github.com/syncthing/syncthing/lib/dialer"
|
|
|
|
"github.com/syncthing/syncthing/lib/events"
|
2020-12-22 20:17:14 +01:00
|
|
|
"github.com/syncthing/syncthing/lib/svcutil"
|
2020-10-07 10:05:13 +02:00
|
|
|
|
2020-11-17 13:19:04 +01:00
|
|
|
"github.com/thejerf/suture/v4"
|
2020-10-07 10:05:13 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
|
|
|
// When a specific failure first occurs, it is delayed by minDelay. If
|
|
|
|
// more of the same failures occurs those are further delayed and
|
|
|
|
// aggregated for maxDelay.
|
2020-11-03 12:29:33 +01:00
|
|
|
minDelay = 10 * time.Second
|
|
|
|
maxDelay = time.Minute
|
|
|
|
sendTimeout = time.Minute
|
2020-12-22 20:17:14 +01:00
|
|
|
finalSendTimeout = svcutil.ServiceTimeout / 2
|
2020-11-03 12:29:33 +01:00
|
|
|
evChanClosed = "failure event channel closed"
|
|
|
|
invalidEventDataType = "failure event data is not a string"
|
2020-10-07 10:05:13 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
type FailureReport struct {
|
2021-07-27 21:27:52 +02:00
|
|
|
FailureData
|
|
|
|
Count int
|
|
|
|
Version string
|
|
|
|
}
|
|
|
|
|
|
|
|
type FailureData struct {
|
2020-10-07 10:05:13 +02:00
|
|
|
Description string
|
2021-07-27 21:27:52 +02:00
|
|
|
Goroutines string
|
|
|
|
Extra map[string]string
|
|
|
|
}
|
|
|
|
|
|
|
|
func FailureDataWithGoroutines(description string) FailureData {
|
2021-08-25 13:16:55 +08:00
|
|
|
var buf strings.Builder
|
|
|
|
pprof.Lookup("goroutine").WriteTo(&buf, 1)
|
2021-07-27 21:27:52 +02:00
|
|
|
return FailureData{
|
|
|
|
Description: description,
|
|
|
|
Goroutines: buf.String(),
|
2022-08-03 10:41:26 +02:00
|
|
|
Extra: make(map[string]string),
|
2021-07-27 21:27:52 +02:00
|
|
|
}
|
2020-10-07 10:05:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
type FailureHandler interface {
|
|
|
|
suture.Service
|
|
|
|
config.Committer
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewFailureHandler(cfg config.Wrapper, evLogger events.Logger) FailureHandler {
|
2020-11-17 13:19:04 +01:00
|
|
|
return &failureHandler{
|
2020-10-07 10:05:13 +02:00
|
|
|
cfg: cfg,
|
|
|
|
evLogger: evLogger,
|
|
|
|
optsChan: make(chan config.OptionsConfiguration),
|
2020-11-03 12:29:33 +01:00
|
|
|
buf: make(map[string]*failureStat),
|
2020-10-07 10:05:13 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
type failureHandler struct {
|
|
|
|
cfg config.Wrapper
|
|
|
|
evLogger events.Logger
|
|
|
|
optsChan chan config.OptionsConfiguration
|
|
|
|
buf map[string]*failureStat
|
|
|
|
}
|
|
|
|
|
|
|
|
type failureStat struct {
|
|
|
|
first, last time.Time
|
|
|
|
count int
|
2021-07-27 21:27:52 +02:00
|
|
|
data FailureData
|
2020-10-07 10:05:13 +02:00
|
|
|
}
|
|
|
|
|
2020-11-17 13:19:04 +01:00
|
|
|
func (h *failureHandler) Serve(ctx context.Context) error {
|
2021-01-15 15:43:34 +01:00
|
|
|
cfg := h.cfg.Subscribe(h)
|
2020-10-07 10:05:13 +02:00
|
|
|
defer h.cfg.Unsubscribe(h)
|
2021-01-15 15:43:34 +01:00
|
|
|
url, sub, evChan := h.applyOpts(cfg.Options, nil)
|
2020-10-07 10:05:13 +02:00
|
|
|
|
|
|
|
var err error
|
|
|
|
timer := time.NewTimer(minDelay)
|
|
|
|
resetTimer := make(chan struct{})
|
2021-01-15 15:43:34 +01:00
|
|
|
for err == nil {
|
2020-10-07 10:05:13 +02:00
|
|
|
select {
|
|
|
|
case opts := <-h.optsChan:
|
2021-01-15 15:43:34 +01:00
|
|
|
url, sub, evChan = h.applyOpts(opts, sub)
|
2020-11-03 12:29:33 +01:00
|
|
|
case e, ok := <-evChan:
|
|
|
|
if !ok {
|
|
|
|
// Just to be safe - shouldn't ever happen, as
|
|
|
|
// evChan is set to nil when unsubscribing.
|
2021-07-27 21:27:52 +02:00
|
|
|
h.addReport(FailureData{Description: evChanClosed}, time.Now())
|
2020-11-03 12:29:33 +01:00
|
|
|
evChan = nil
|
|
|
|
continue
|
|
|
|
}
|
2021-07-27 21:27:52 +02:00
|
|
|
var data FailureData
|
|
|
|
switch d := e.Data.(type) {
|
|
|
|
case string:
|
|
|
|
data.Description = d
|
|
|
|
case FailureData:
|
|
|
|
data = d
|
|
|
|
default:
|
2020-11-03 12:29:33 +01:00
|
|
|
// Same here, shouldn't ever happen.
|
2021-07-27 21:27:52 +02:00
|
|
|
h.addReport(FailureData{Description: invalidEventDataType}, time.Now())
|
2020-11-03 12:29:33 +01:00
|
|
|
continue
|
2020-10-07 10:05:13 +02:00
|
|
|
}
|
2021-07-27 21:27:52 +02:00
|
|
|
h.addReport(data, e.Time)
|
2020-10-07 10:05:13 +02:00
|
|
|
case <-timer.C:
|
|
|
|
reports := make([]FailureReport, 0, len(h.buf))
|
|
|
|
now := time.Now()
|
|
|
|
for descr, stat := range h.buf {
|
|
|
|
if now.Sub(stat.last) > minDelay || now.Sub(stat.first) > maxDelay {
|
2021-07-27 21:27:52 +02:00
|
|
|
reports = append(reports, newFailureReport(stat))
|
2020-10-07 10:05:13 +02:00
|
|
|
delete(h.buf, descr)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if len(reports) > 0 {
|
|
|
|
// Lets keep process events/configs while it might be timing out for a while
|
|
|
|
go func() {
|
|
|
|
sendFailureReports(ctx, reports, url)
|
|
|
|
select {
|
|
|
|
case resetTimer <- struct{}{}:
|
|
|
|
case <-ctx.Done():
|
|
|
|
}
|
|
|
|
}()
|
2020-11-28 20:09:22 +01:00
|
|
|
} else {
|
|
|
|
timer.Reset(minDelay)
|
2020-10-07 10:05:13 +02:00
|
|
|
}
|
|
|
|
case <-resetTimer:
|
|
|
|
timer.Reset(minDelay)
|
|
|
|
case <-ctx.Done():
|
2021-01-15 15:43:34 +01:00
|
|
|
err = ctx.Err()
|
2020-10-07 10:05:13 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if sub != nil {
|
|
|
|
sub.Unsubscribe()
|
2021-02-05 11:21:14 +01:00
|
|
|
if len(h.buf) > 0 {
|
|
|
|
reports := make([]FailureReport, 0, len(h.buf))
|
2021-07-27 21:27:52 +02:00
|
|
|
for _, stat := range h.buf {
|
|
|
|
reports = append(reports, newFailureReport(stat))
|
2021-02-05 11:21:14 +01:00
|
|
|
}
|
|
|
|
timeout, cancel := context.WithTimeout(context.Background(), finalSendTimeout)
|
|
|
|
defer cancel()
|
|
|
|
sendFailureReports(timeout, reports, url)
|
2020-12-22 20:17:14 +01:00
|
|
|
}
|
2020-10-07 10:05:13 +02:00
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-01-15 15:43:34 +01:00
|
|
|
func (h *failureHandler) applyOpts(opts config.OptionsConfiguration, sub events.Subscription) (string, events.Subscription, <-chan events.Event) {
|
|
|
|
// Sub nil checks just for safety - config updates can be racy.
|
|
|
|
url := opts.CRURL + "/failure"
|
|
|
|
if opts.URAccepted > 0 {
|
|
|
|
if sub == nil {
|
|
|
|
sub = h.evLogger.Subscribe(events.Failure)
|
|
|
|
}
|
|
|
|
return url, sub, sub.C()
|
|
|
|
}
|
|
|
|
if sub != nil {
|
|
|
|
sub.Unsubscribe()
|
|
|
|
}
|
|
|
|
return url, nil, nil
|
|
|
|
}
|
|
|
|
|
2021-07-27 21:27:52 +02:00
|
|
|
func (h *failureHandler) addReport(data FailureData, evTime time.Time) {
|
|
|
|
if stat, ok := h.buf[data.Description]; ok {
|
2020-11-03 12:29:33 +01:00
|
|
|
stat.last = evTime
|
|
|
|
stat.count++
|
|
|
|
return
|
|
|
|
}
|
2021-07-27 21:27:52 +02:00
|
|
|
h.buf[data.Description] = &failureStat{
|
2020-11-03 12:29:33 +01:00
|
|
|
first: evTime,
|
|
|
|
last: evTime,
|
|
|
|
count: 1,
|
2021-07-27 21:27:52 +02:00
|
|
|
data: data,
|
2020-11-03 12:29:33 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-07 10:05:13 +02:00
|
|
|
func (h *failureHandler) CommitConfiguration(from, to config.Configuration) bool {
|
|
|
|
if from.Options.CREnabled != to.Options.CREnabled || from.Options.CRURL != to.Options.CRURL {
|
|
|
|
h.optsChan <- to.Options
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2022-07-28 17:32:45 +02:00
|
|
|
func (*failureHandler) String() string {
|
2020-10-07 10:05:13 +02:00
|
|
|
return "FailureHandler"
|
|
|
|
}
|
|
|
|
|
|
|
|
func sendFailureReports(ctx context.Context, reports []FailureReport, url string) {
|
|
|
|
var b bytes.Buffer
|
|
|
|
if err := json.NewEncoder(&b).Encode(reports); err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
client := &http.Client{
|
|
|
|
Transport: &http.Transport{
|
|
|
|
DialContext: dialer.DialContext,
|
|
|
|
Proxy: http.ProxyFromEnvironment,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
reqCtx, reqCancel := context.WithTimeout(ctx, sendTimeout)
|
|
|
|
defer reqCancel()
|
2021-02-05 11:21:14 +01:00
|
|
|
req, err := http.NewRequestWithContext(reqCtx, http.MethodPost, url, &b)
|
2020-10-07 10:05:13 +02:00
|
|
|
if err != nil {
|
|
|
|
l.Infoln("Failed to send failure report:", err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
req.Header.Set("Content-Type", "application/json")
|
|
|
|
|
|
|
|
resp, err := client.Do(req)
|
|
|
|
if err != nil {
|
|
|
|
l.Infoln("Failed to send failure report:", err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
resp.Body.Close()
|
|
|
|
}
|
2020-12-22 20:17:14 +01:00
|
|
|
|
2021-07-27 21:27:52 +02:00
|
|
|
func newFailureReport(stat *failureStat) FailureReport {
|
2020-12-22 20:17:14 +01:00
|
|
|
return FailureReport{
|
2021-07-27 21:27:52 +02:00
|
|
|
FailureData: stat.data,
|
|
|
|
Count: stat.count,
|
2020-12-22 20:17:14 +01:00
|
|
|
Version: build.LongVersion,
|
|
|
|
}
|
|
|
|
}
|