From debc4a3a99417df9503ef89864fc868f5c7eee02 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Wed, 30 Dec 2020 17:31:22 +0100 Subject: [PATCH] archiver: fix race condition during worker startup When the tomb is created with a canceled context, then the workers started via `t.Go` exist nearly immediately. Once for the first time all started goroutines have been stopped, it is not allowed to issue further calls to `t.Go`. This is a problem when the started goroutines exit immediately, as for example the first goroutine might already have stopped before starting the second one, which is not allowed as once the first goroutines has stopped no goroutines were running. To fix this race condition the startup and main task of the archiver now also run within a `t.Go` function. This also allows unifying the error handling as it is no longer necessary to distinguish between errors returned by the workers or the saveTree processing. The tomb now just returns the first error encountered, which should also be the most descriptive one. --- internal/archiver/archiver.go | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/internal/archiver/archiver.go b/internal/archiver/archiver.go index 4992d439b..2fd170a3a 100644 --- a/internal/archiver/archiver.go +++ b/internal/archiver/archiver.go @@ -784,33 +784,31 @@ func (arch *Archiver) Snapshot(ctx context.Context, targets []string, opts Snaps var t tomb.Tomb wctx := t.Context(ctx) - - arch.runWorkers(wctx, &t) - start := time.Now() - debug.Log("starting snapshot") - rootTreeID, stats, err := func() (restic.ID, ItemStats, error) { + var rootTreeID restic.ID + var stats ItemStats + t.Go(func() error { + arch.runWorkers(wctx, &t) + + debug.Log("starting snapshot") tree, err := arch.SaveTree(wctx, "/", atree, arch.loadParentTree(wctx, opts.ParentSnapshot)) if err != nil { - return restic.ID{}, ItemStats{}, err + return err } if len(tree.Nodes) == 0 { - return restic.ID{}, ItemStats{}, errors.New("snapshot is empty") + return errors.New("snapshot is empty") } - return arch.saveTree(wctx, tree) - }() - debug.Log("saved tree, error: %v", err) + rootTreeID, stats, err = arch.saveTree(wctx, tree) + // trigger shutdown but don't set an error + t.Kill(nil) + return err + }) - t.Kill(nil) - werr := t.Wait() - debug.Log("err is %v, werr is %v", err, werr) - // Use werr when it might contain a more specific error than "context canceled" - if err == nil || (errors.Cause(err) == context.Canceled && werr != nil) { - err = werr - } + err = t.Wait() + debug.Log("err is %v", err) if err != nil { debug.Log("error while saving tree: %v", err)