mirror of
https://github.com/octoleo/syncthing.git
synced 2025-01-10 10:16:24 +00:00
617 lines
13 KiB
Go
617 lines
13 KiB
Go
|
package suture
|
||
|
|
||
|
import (
|
||
|
"errors"
|
||
|
"fmt"
|
||
|
"reflect"
|
||
|
"sync"
|
||
|
"testing"
|
||
|
"time"
|
||
|
)
|
||
|
|
||
|
const (
|
||
|
Happy = iota
|
||
|
Fail
|
||
|
Panic
|
||
|
Hang
|
||
|
UseStopChan
|
||
|
)
|
||
|
|
||
|
var everMultistarted = false
|
||
|
|
||
|
// Test that supervisors work perfectly when everything is hunky dory.
|
||
|
func TestTheHappyCase(t *testing.T) {
|
||
|
t.Parallel()
|
||
|
|
||
|
s := NewSimple("A")
|
||
|
if s.String() != "A" {
|
||
|
t.Fatal("Can't get name from a supervisor")
|
||
|
}
|
||
|
service := NewService("B")
|
||
|
|
||
|
s.Add(service)
|
||
|
|
||
|
go s.Serve()
|
||
|
|
||
|
<-service.started
|
||
|
|
||
|
// If we stop the service, it just gets restarted
|
||
|
service.Stop()
|
||
|
<-service.started
|
||
|
|
||
|
// And it is shut down when we stop the supervisor
|
||
|
service.take <- UseStopChan
|
||
|
s.Stop()
|
||
|
<-service.stop
|
||
|
}
|
||
|
|
||
|
// Test that adding to a running supervisor does indeed start the service.
|
||
|
func TestAddingToRunningSupervisor(t *testing.T) {
|
||
|
t.Parallel()
|
||
|
|
||
|
s := NewSimple("A1")
|
||
|
|
||
|
s.ServeBackground()
|
||
|
defer s.Stop()
|
||
|
|
||
|
service := NewService("B1")
|
||
|
s.Add(service)
|
||
|
|
||
|
<-service.started
|
||
|
|
||
|
services := s.Services()
|
||
|
if !reflect.DeepEqual([]Service{service}, services) {
|
||
|
t.Fatal("Can't get list of services as expected.")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Test what happens when services fail.
|
||
|
func TestFailures(t *testing.T) {
|
||
|
t.Parallel()
|
||
|
|
||
|
s := NewSimple("A2")
|
||
|
s.failureThreshold = 3.5
|
||
|
|
||
|
go s.Serve()
|
||
|
defer func() {
|
||
|
// to avoid deadlocks during shutdown, we have to not try to send
|
||
|
// things out on channels while we're shutting down (this undoes the
|
||
|
// logFailure overide about 25 lines down)
|
||
|
s.logFailure = func(*Supervisor, Service, float64, float64, bool, interface{}, []byte) {}
|
||
|
s.Stop()
|
||
|
}()
|
||
|
s.sync()
|
||
|
|
||
|
service1 := NewService("B2")
|
||
|
service2 := NewService("C2")
|
||
|
|
||
|
s.Add(service1)
|
||
|
<-service1.started
|
||
|
s.Add(service2)
|
||
|
<-service2.started
|
||
|
|
||
|
nowFeeder := NewNowFeeder()
|
||
|
pastVal := time.Unix(1000000, 0)
|
||
|
nowFeeder.appendTimes(pastVal)
|
||
|
s.getNow = nowFeeder.getter
|
||
|
|
||
|
resumeChan := make(chan time.Time)
|
||
|
s.getAfterChan = func(d time.Duration) <-chan time.Time {
|
||
|
return resumeChan
|
||
|
}
|
||
|
|
||
|
failNotify := make(chan bool)
|
||
|
// use this to synchronize on here
|
||
|
s.logFailure = func(supervisor *Supervisor, s Service, cf float64, ft float64, r bool, error interface{}, stacktrace []byte) {
|
||
|
failNotify <- r
|
||
|
}
|
||
|
|
||
|
// All that setup was for this: Service1, please return now.
|
||
|
service1.take <- Fail
|
||
|
restarted := <-failNotify
|
||
|
<-service1.started
|
||
|
|
||
|
if !restarted || s.failures != 1 || s.lastFail != pastVal {
|
||
|
t.Fatal("Did not fail in the expected manner")
|
||
|
}
|
||
|
// Getting past this means the service was restarted.
|
||
|
service1.take <- Happy
|
||
|
|
||
|
// Service2, your turn.
|
||
|
service2.take <- Fail
|
||
|
nowFeeder.appendTimes(pastVal)
|
||
|
restarted = <-failNotify
|
||
|
<-service2.started
|
||
|
if !restarted || s.failures != 2 || s.lastFail != pastVal {
|
||
|
t.Fatal("Did not fail in the expected manner")
|
||
|
}
|
||
|
// And you're back. (That is, the correct service was restarted.)
|
||
|
service2.take <- Happy
|
||
|
|
||
|
// Now, one failureDecay later, is everything working correctly?
|
||
|
oneDecayLater := time.Unix(1000030, 0)
|
||
|
nowFeeder.appendTimes(oneDecayLater)
|
||
|
service2.take <- Fail
|
||
|
restarted = <-failNotify
|
||
|
<-service2.started
|
||
|
// playing a bit fast and loose here with floating point, but...
|
||
|
// we get 2 by taking the current failure value of 2, decaying it
|
||
|
// by one interval, which cuts it in half to 1, then adding 1 again,
|
||
|
// all of which "should" be precise
|
||
|
if !restarted || s.failures != 2 || s.lastFail != oneDecayLater {
|
||
|
t.Fatal("Did not decay properly", s.lastFail, oneDecayLater)
|
||
|
}
|
||
|
|
||
|
// For a change of pace, service1 would you be so kind as to panic?
|
||
|
nowFeeder.appendTimes(oneDecayLater)
|
||
|
service1.take <- Panic
|
||
|
restarted = <-failNotify
|
||
|
<-service1.started
|
||
|
if !restarted || s.failures != 3 || s.lastFail != oneDecayLater {
|
||
|
t.Fatal("Did not correctly recover from a panic")
|
||
|
}
|
||
|
|
||
|
nowFeeder.appendTimes(oneDecayLater)
|
||
|
backingoff := make(chan bool)
|
||
|
s.logBackoff = func(s *Supervisor, backingOff bool) {
|
||
|
backingoff <- backingOff
|
||
|
}
|
||
|
|
||
|
// And with this failure, we trigger the backoff code.
|
||
|
service1.take <- Fail
|
||
|
backoff := <-backingoff
|
||
|
restarted = <-failNotify
|
||
|
|
||
|
if !backoff || restarted || s.failures != 4 {
|
||
|
t.Fatal("Broke past the threshold but did not log correctly", s.failures)
|
||
|
}
|
||
|
if service1.existing != 0 {
|
||
|
t.Fatal("service1 still exists according to itself?")
|
||
|
}
|
||
|
|
||
|
// service2 is still running, because we don't shut anything down in a
|
||
|
// backoff, we just stop restarting.
|
||
|
service2.take <- Happy
|
||
|
|
||
|
var correct bool
|
||
|
timer := time.NewTimer(time.Millisecond * 10)
|
||
|
// verify the service has not been restarted
|
||
|
// hard to get around race conditions here without simply using a timer...
|
||
|
select {
|
||
|
case service1.take <- Happy:
|
||
|
correct = false
|
||
|
case <-timer.C:
|
||
|
correct = true
|
||
|
}
|
||
|
if !correct {
|
||
|
t.Fatal("Restarted the service during the backoff interval")
|
||
|
}
|
||
|
|
||
|
// tell the supervisor the restart interval has passed
|
||
|
resumeChan <- time.Time{}
|
||
|
backoff = <-backingoff
|
||
|
<-service1.started
|
||
|
s.sync()
|
||
|
if s.failures != 0 {
|
||
|
t.Fatal("Did not reset failure count after coming back from timeout.")
|
||
|
}
|
||
|
|
||
|
nowFeeder.appendTimes(oneDecayLater)
|
||
|
service1.take <- Fail
|
||
|
restarted = <-failNotify
|
||
|
<-service1.started
|
||
|
if !restarted || backoff {
|
||
|
t.Fatal("For some reason, got that we were backing off again.", restarted, backoff)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func TestRunningAlreadyRunning(t *testing.T) {
|
||
|
t.Parallel()
|
||
|
|
||
|
s := NewSimple("A3")
|
||
|
go s.Serve()
|
||
|
defer s.Stop()
|
||
|
|
||
|
// ensure the supervisor has made it to its main loop
|
||
|
s.sync()
|
||
|
var errored bool
|
||
|
func() {
|
||
|
defer func() {
|
||
|
if r := recover(); r != nil {
|
||
|
errored = true
|
||
|
}
|
||
|
}()
|
||
|
|
||
|
s.Serve()
|
||
|
}()
|
||
|
if !errored {
|
||
|
t.Fatal("Supervisor failed to prevent itself from double-running.")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func TestFullConstruction(t *testing.T) {
|
||
|
t.Parallel()
|
||
|
|
||
|
s := New("Moo", Spec{
|
||
|
Log: func(string) {},
|
||
|
FailureDecay: 1,
|
||
|
FailureThreshold: 2,
|
||
|
FailureBackoff: 3,
|
||
|
Timeout: time.Second * 29,
|
||
|
})
|
||
|
if s.String() != "Moo" || s.failureDecay != 1 || s.failureThreshold != 2 || s.failureBackoff != 3 || s.timeout != time.Second*29 {
|
||
|
t.Fatal("Full construction failed somehow")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// This is mostly for coverage testing.
|
||
|
func TestDefaultLogging(t *testing.T) {
|
||
|
t.Parallel()
|
||
|
|
||
|
s := NewSimple("A4")
|
||
|
|
||
|
service := NewService("B4")
|
||
|
s.Add(service)
|
||
|
|
||
|
s.failureThreshold = .5
|
||
|
s.failureBackoff = time.Millisecond * 25
|
||
|
go s.Serve()
|
||
|
s.sync()
|
||
|
|
||
|
<-service.started
|
||
|
|
||
|
resumeChan := make(chan time.Time)
|
||
|
s.getAfterChan = func(d time.Duration) <-chan time.Time {
|
||
|
return resumeChan
|
||
|
}
|
||
|
|
||
|
service.take <- UseStopChan
|
||
|
service.take <- Fail
|
||
|
<-service.stop
|
||
|
resumeChan <- time.Time{}
|
||
|
|
||
|
<-service.started
|
||
|
|
||
|
service.take <- Happy
|
||
|
|
||
|
serviceName(&BarelyService{})
|
||
|
|
||
|
s.logBadStop(s, service)
|
||
|
s.logFailure(s, service, 1, 1, true, errors.New("test error"), []byte{})
|
||
|
|
||
|
s.Stop()
|
||
|
}
|
||
|
|
||
|
func TestNestedSupervisors(t *testing.T) {
|
||
|
t.Parallel()
|
||
|
|
||
|
super1 := NewSimple("Top5")
|
||
|
super2 := NewSimple("Nested5")
|
||
|
service := NewService("Service5")
|
||
|
|
||
|
super2.logBadStop = func(*Supervisor, Service) {
|
||
|
panic("Failed to copy logBadStop")
|
||
|
}
|
||
|
|
||
|
super1.Add(super2)
|
||
|
super2.Add(service)
|
||
|
|
||
|
// test the functions got copied from super1; if this panics, it didn't
|
||
|
// get copied
|
||
|
super2.logBadStop(super2, service)
|
||
|
|
||
|
go super1.Serve()
|
||
|
super1.sync()
|
||
|
|
||
|
<-service.started
|
||
|
service.take <- Happy
|
||
|
|
||
|
super1.Stop()
|
||
|
}
|
||
|
|
||
|
func TestStoppingSupervisorStopsServices(t *testing.T) {
|
||
|
t.Parallel()
|
||
|
|
||
|
s := NewSimple("Top6")
|
||
|
service := NewService("Service 6")
|
||
|
|
||
|
s.Add(service)
|
||
|
|
||
|
go s.Serve()
|
||
|
s.sync()
|
||
|
|
||
|
<-service.started
|
||
|
|
||
|
service.take <- UseStopChan
|
||
|
|
||
|
s.Stop()
|
||
|
<-service.stop
|
||
|
}
|
||
|
|
||
|
func TestStoppingStillWorksWithHungServices(t *testing.T) {
|
||
|
t.Parallel()
|
||
|
|
||
|
s := NewSimple("Top7")
|
||
|
service := NewService("Service WillHang7")
|
||
|
|
||
|
s.Add(service)
|
||
|
|
||
|
go s.Serve()
|
||
|
|
||
|
<-service.started
|
||
|
|
||
|
service.take <- UseStopChan
|
||
|
service.take <- Hang
|
||
|
|
||
|
resumeChan := make(chan time.Time)
|
||
|
s.getAfterChan = func(d time.Duration) <-chan time.Time {
|
||
|
return resumeChan
|
||
|
}
|
||
|
failNotify := make(chan struct{})
|
||
|
s.logBadStop = func(supervisor *Supervisor, s Service) {
|
||
|
failNotify <- struct{}{}
|
||
|
}
|
||
|
|
||
|
s.Stop()
|
||
|
|
||
|
resumeChan <- time.Time{}
|
||
|
<-failNotify
|
||
|
service.release <- true
|
||
|
<-service.stop
|
||
|
}
|
||
|
|
||
|
func TestRemoveService(t *testing.T) {
|
||
|
t.Parallel()
|
||
|
|
||
|
s := NewSimple("Top")
|
||
|
service := NewService("ServiceToRemove8")
|
||
|
|
||
|
id := s.Add(service)
|
||
|
|
||
|
go s.Serve()
|
||
|
|
||
|
<-service.started
|
||
|
service.take <- UseStopChan
|
||
|
|
||
|
err := s.Remove(id)
|
||
|
if err != nil {
|
||
|
t.Fatal("Removing service somehow failed")
|
||
|
}
|
||
|
<-service.stop
|
||
|
|
||
|
err = s.Remove(ServiceToken{1<<36 + 1})
|
||
|
if err != ErrWrongSupervisor {
|
||
|
t.Fatal("Did not detect that the ServiceToken was wrong")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func TestFailureToConstruct(t *testing.T) {
|
||
|
t.Parallel()
|
||
|
|
||
|
var s *Supervisor
|
||
|
|
||
|
panics(func() {
|
||
|
s.Serve()
|
||
|
})
|
||
|
|
||
|
s = new(Supervisor)
|
||
|
panics(func() {
|
||
|
s.Serve()
|
||
|
})
|
||
|
}
|
||
|
|
||
|
func TestFailingSupervisors(t *testing.T) {
|
||
|
t.Parallel()
|
||
|
|
||
|
// This is a bit of a complicated test, so let me explain what
|
||
|
// all this is doing:
|
||
|
// 1. Set up a top-level supervisor with a hair-trigger backoff.
|
||
|
// 2. Add a supervisor to that.
|
||
|
// 3. To that supervisor, add a service.
|
||
|
// 4. Panic the supervisor in the middle, sending the top-level into
|
||
|
// backoff.
|
||
|
// 5. Kill the lower level service too.
|
||
|
// 6. Verify that when the top-level service comes out of backoff,
|
||
|
// the service ends up restarted as expected.
|
||
|
|
||
|
// Ultimately, we can't have more than a best-effort recovery here.
|
||
|
// A panic'ed supervisor can't really be trusted to have consistent state,
|
||
|
// and without *that*, we can't trust it to do anything sensible with
|
||
|
// the children it may have been running. So unlike Erlang, we can't
|
||
|
// can't really expect to be able to safely restart them or anything.
|
||
|
// Really, the "correct" answer is that the Supervisor must never panic,
|
||
|
// but in the event that it does, this verifies that it at least tries
|
||
|
// to get on with life.
|
||
|
|
||
|
// This also tests that if a Supervisor itself panics, and one of its
|
||
|
// monitored services goes down in the meantime, that the monitored
|
||
|
// service also gets correctly restarted when the supervisor does.
|
||
|
|
||
|
s1 := NewSimple("Top9")
|
||
|
s2 := NewSimple("Nested9")
|
||
|
service := NewService("Service9")
|
||
|
|
||
|
s1.Add(s2)
|
||
|
s2.Add(service)
|
||
|
|
||
|
go s1.Serve()
|
||
|
<-service.started
|
||
|
|
||
|
s1.failureThreshold = .5
|
||
|
|
||
|
// let us control precisely when s1 comes back
|
||
|
resumeChan := make(chan time.Time)
|
||
|
s1.getAfterChan = func(d time.Duration) <-chan time.Time {
|
||
|
return resumeChan
|
||
|
}
|
||
|
failNotify := make(chan string)
|
||
|
// use this to synchronize on here
|
||
|
s1.logFailure = func(supervisor *Supervisor, s Service, cf float64, ft float64, r bool, error interface{}, stacktrace []byte) {
|
||
|
failNotify <- fmt.Sprintf("%s", s)
|
||
|
}
|
||
|
|
||
|
s2.panic()
|
||
|
|
||
|
failing := <-failNotify
|
||
|
// that's enough sync to guarantee this:
|
||
|
if failing != "Nested9" || s1.state != paused {
|
||
|
t.Fatal("Top-level supervisor did not go into backoff as expected")
|
||
|
}
|
||
|
|
||
|
service.take <- Fail
|
||
|
|
||
|
resumeChan <- time.Time{}
|
||
|
<-service.started
|
||
|
}
|
||
|
|
||
|
func TestNilSupervisorAdd(t *testing.T) {
|
||
|
t.Parallel()
|
||
|
|
||
|
var s *Supervisor
|
||
|
|
||
|
defer func() {
|
||
|
if r := recover(); r == nil {
|
||
|
t.Fatal("did not panic as expected on nil add")
|
||
|
}
|
||
|
}()
|
||
|
|
||
|
s.Add(s)
|
||
|
}
|
||
|
|
||
|
// https://github.com/thejerf/suture/issues/11
|
||
|
//
|
||
|
// The purpose of this test is to verify that it does not cause data races,
|
||
|
// so there are no obvious assertions.
|
||
|
func TestIssue11(t *testing.T) {
|
||
|
t.Parallel()
|
||
|
|
||
|
s := NewSimple("main")
|
||
|
s.ServeBackground()
|
||
|
|
||
|
subsuper := NewSimple("sub")
|
||
|
s.Add(subsuper)
|
||
|
|
||
|
subsuper.Add(NewService("may cause data race"))
|
||
|
}
|
||
|
|
||
|
// http://golangtutorials.blogspot.com/2011/10/gotest-unit-testing-and-benchmarking-go.html
|
||
|
// claims test function are run in the same order as the source file...
|
||
|
// I'm not sure if this is part of the contract, though. Especially in the
|
||
|
// face of "t.Parallel()"...
|
||
|
func TestEverMultistarted(t *testing.T) {
|
||
|
if everMultistarted {
|
||
|
t.Fatal("Seem to have multistarted a service at some point, bummer.")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// A test service that can be induced to fail, panic, or hang on demand.
|
||
|
func NewService(name string) *FailableService {
|
||
|
return &FailableService{name, make(chan bool), make(chan int),
|
||
|
make(chan bool, 1), make(chan bool), make(chan bool), 0}
|
||
|
}
|
||
|
|
||
|
type FailableService struct {
|
||
|
name string
|
||
|
started chan bool
|
||
|
take chan int
|
||
|
shutdown chan bool
|
||
|
release chan bool
|
||
|
stop chan bool
|
||
|
existing int
|
||
|
}
|
||
|
|
||
|
func (s *FailableService) Serve() {
|
||
|
if s.existing != 0 {
|
||
|
everMultistarted = true
|
||
|
panic("Multi-started the same service! " + s.name)
|
||
|
}
|
||
|
s.existing++
|
||
|
|
||
|
s.started <- true
|
||
|
|
||
|
useStopChan := false
|
||
|
|
||
|
for {
|
||
|
select {
|
||
|
case val := <-s.take:
|
||
|
switch val {
|
||
|
case Happy:
|
||
|
// Do nothing on purpose. Life is good!
|
||
|
case Fail:
|
||
|
s.existing--
|
||
|
if useStopChan {
|
||
|
s.stop <- true
|
||
|
}
|
||
|
return
|
||
|
case Panic:
|
||
|
s.existing--
|
||
|
panic("Panic!")
|
||
|
case Hang:
|
||
|
// or more specifically, "hang until I release you"
|
||
|
<-s.release
|
||
|
case UseStopChan:
|
||
|
useStopChan = true
|
||
|
}
|
||
|
case <-s.shutdown:
|
||
|
s.existing--
|
||
|
if useStopChan {
|
||
|
s.stop <- true
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (s *FailableService) String() string {
|
||
|
return s.name
|
||
|
}
|
||
|
|
||
|
func (s *FailableService) Stop() {
|
||
|
s.shutdown <- true
|
||
|
}
|
||
|
|
||
|
type NowFeeder struct {
|
||
|
values []time.Time
|
||
|
getter func() time.Time
|
||
|
m sync.Mutex
|
||
|
}
|
||
|
|
||
|
// This is used to test serviceName; it's a service without a Stringer.
|
||
|
type BarelyService struct{}
|
||
|
|
||
|
func (bs *BarelyService) Serve() {}
|
||
|
func (bs *BarelyService) Stop() {}
|
||
|
|
||
|
func NewNowFeeder() (nf *NowFeeder) {
|
||
|
nf = new(NowFeeder)
|
||
|
nf.getter = func() time.Time {
|
||
|
nf.m.Lock()
|
||
|
defer nf.m.Unlock()
|
||
|
if len(nf.values) > 0 {
|
||
|
ret := nf.values[0]
|
||
|
nf.values = nf.values[1:]
|
||
|
return ret
|
||
|
}
|
||
|
panic("Ran out of values for NowFeeder")
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (nf *NowFeeder) appendTimes(t ...time.Time) {
|
||
|
nf.m.Lock()
|
||
|
defer nf.m.Unlock()
|
||
|
nf.values = append(nf.values, t...)
|
||
|
}
|
||
|
|
||
|
func panics(doesItPanic func()) (panics bool) {
|
||
|
defer func() {
|
||
|
if r := recover(); r != nil {
|
||
|
panics = true
|
||
|
}
|
||
|
}()
|
||
|
|
||
|
doesItPanic()
|
||
|
|
||
|
return
|
||
|
}
|