2024-02-24 16:26:29 +01:00
package repository
import (
"context"
"fmt"
"sync"
"time"
"github.com/restic/restic/internal/backend"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic"
)
type lockContext struct {
lock * restic . Lock
cancel context . CancelFunc
refreshWG sync . WaitGroup
}
2024-02-24 17:07:14 +01:00
type locker struct {
retrySleepStart time . Duration
retrySleepMax time . Duration
refreshInterval time . Duration
refreshabilityTimeout time . Duration
}
2024-02-24 16:26:29 +01:00
2024-02-24 17:07:14 +01:00
const defaultRefreshInterval = 5 * time . Minute
var lockerInst = & locker {
retrySleepStart : 5 * time . Second ,
retrySleepMax : 60 * time . Second ,
refreshInterval : defaultRefreshInterval ,
// consider a lock refresh failed a bit before the lock actually becomes stale
// the difference allows to compensate for a small time drift between clients.
refreshabilityTimeout : restic . StaleLockTimeout - defaultRefreshInterval * 3 / 2 ,
}
func Lock ( ctx context . Context , repo restic . Repository , exclusive bool , retryLock time . Duration , printRetry func ( msg string ) , logger func ( format string , args ... interface { } ) ) ( * Unlocker , context . Context , error ) {
return lockerInst . Lock ( ctx , repo , exclusive , retryLock , printRetry , logger )
2024-02-24 16:26:29 +01:00
}
// Lock wraps the ctx such that it is cancelled when the repository is unlocked
// cancelling the original context also stops the lock refresh
2024-02-24 17:07:14 +01:00
func ( l * locker ) Lock ( ctx context . Context , repo restic . Repository , exclusive bool , retryLock time . Duration , printRetry func ( msg string ) , logger func ( format string , args ... interface { } ) ) ( * Unlocker , context . Context , error ) {
2024-02-24 16:26:29 +01:00
lockFn := restic . NewLock
if exclusive {
lockFn = restic . NewExclusiveLock
}
var lock * restic . Lock
var err error
2024-02-24 17:07:14 +01:00
retrySleep := minDuration ( l . retrySleepStart , retryLock )
2024-02-24 16:26:29 +01:00
retryMessagePrinted := false
retryTimeout := time . After ( retryLock )
retryLoop :
for {
lock , err = lockFn ( ctx , repo )
if err != nil && restic . IsAlreadyLocked ( err ) {
if ! retryMessagePrinted {
printRetry ( fmt . Sprintf ( "repo already locked, waiting up to %s for the lock\n" , retryLock ) )
retryMessagePrinted = true
}
debug . Log ( "repo already locked, retrying in %v" , retrySleep )
retrySleepCh := time . After ( retrySleep )
select {
case <- ctx . Done ( ) :
return nil , ctx , ctx . Err ( )
case <- retryTimeout :
debug . Log ( "repo already locked, timeout expired" )
// Last lock attempt
lock , err = lockFn ( ctx , repo )
break retryLoop
case <- retrySleepCh :
2024-02-24 17:07:14 +01:00
retrySleep = minDuration ( retrySleep * 2 , l . retrySleepMax )
2024-02-24 16:26:29 +01:00
}
} else {
// anything else, either a successful lock or another error
break retryLoop
}
}
if restic . IsInvalidLock ( err ) {
return nil , ctx , errors . Fatalf ( "%v\n\nthe `unlock --remove-all` command can be used to remove invalid locks. Make sure that no other restic process is accessing the repository when running the command" , err )
}
if err != nil {
return nil , ctx , fmt . Errorf ( "unable to create lock in backend: %w" , err )
}
debug . Log ( "create lock %p (exclusive %v)" , lock , exclusive )
ctx , cancel := context . WithCancel ( ctx )
lockInfo := & lockContext {
lock : lock ,
cancel : cancel ,
}
lockInfo . refreshWG . Add ( 2 )
refreshChan := make ( chan struct { } )
forceRefreshChan := make ( chan refreshLockRequest )
2024-02-24 17:07:14 +01:00
go l . refreshLocks ( ctx , repo . Backend ( ) , lockInfo , refreshChan , forceRefreshChan , logger )
go l . monitorLockRefresh ( ctx , lockInfo , refreshChan , forceRefreshChan , logger )
2024-02-24 16:26:29 +01:00
2024-02-24 16:45:57 +01:00
return & Unlocker { lockInfo } , ctx , nil
2024-02-24 16:26:29 +01:00
}
2024-02-24 17:07:14 +01:00
func minDuration ( a , b time . Duration ) time . Duration {
if a <= b {
return a
}
return b
}
2024-02-24 16:26:29 +01:00
type refreshLockRequest struct {
result chan bool
}
2024-02-24 17:07:14 +01:00
func ( l * locker ) refreshLocks ( ctx context . Context , backend backend . Backend , lockInfo * lockContext , refreshed chan <- struct { } , forceRefresh <- chan refreshLockRequest , logger func ( format string , args ... interface { } ) ) {
2024-02-24 16:26:29 +01:00
debug . Log ( "start" )
lock := lockInfo . lock
2024-02-24 17:07:14 +01:00
ticker := time . NewTicker ( l . refreshInterval )
2024-02-24 16:26:29 +01:00
lastRefresh := lock . Time
defer func ( ) {
ticker . Stop ( )
// ensure that the context was cancelled before removing the lock
lockInfo . cancel ( )
// remove the lock from the repo
debug . Log ( "unlocking repository with lock %v" , lock )
if err := lock . Unlock ( ) ; err != nil {
debug . Log ( "error while unlocking: %v" , err )
logger ( "error while unlocking: %v" , err )
}
lockInfo . refreshWG . Done ( )
} ( )
for {
select {
case <- ctx . Done ( ) :
debug . Log ( "terminate" )
return
case req := <- forceRefresh :
debug . Log ( "trying to refresh stale lock" )
// keep on going if our current lock still exists
success := tryRefreshStaleLock ( ctx , backend , lock , lockInfo . cancel , logger )
// inform refresh goroutine about forced refresh
select {
case <- ctx . Done ( ) :
case req . result <- success :
}
if success {
// update lock refresh time
lastRefresh = lock . Time
}
case <- ticker . C :
2024-02-24 17:07:14 +01:00
if time . Since ( lastRefresh ) > l . refreshabilityTimeout {
2024-02-24 16:26:29 +01:00
// the lock is too old, wait until the expiry monitor cancels the context
continue
}
debug . Log ( "refreshing locks" )
err := lock . Refresh ( context . TODO ( ) )
if err != nil {
logger ( "unable to refresh lock: %v\n" , err )
} else {
lastRefresh = lock . Time
// inform monitor goroutine about successful refresh
select {
case <- ctx . Done ( ) :
case refreshed <- struct { } { } :
}
}
}
}
}
2024-02-24 17:07:14 +01:00
func ( l * locker ) monitorLockRefresh ( ctx context . Context , lockInfo * lockContext , refreshed <- chan struct { } , forceRefresh chan <- refreshLockRequest , logger func ( format string , args ... interface { } ) ) {
2024-02-24 16:26:29 +01:00
// time.Now() might use a monotonic timer which is paused during standby
// convert to unix time to ensure we compare real time values
lastRefresh := time . Now ( ) . UnixNano ( )
pollDuration := 1 * time . Second
2024-02-24 17:07:14 +01:00
if l . refreshInterval < pollDuration {
2024-02-24 16:26:29 +01:00
// required for TestLockFailedRefresh
2024-02-24 17:07:14 +01:00
pollDuration = l . refreshInterval / 5
2024-02-24 16:26:29 +01:00
}
// timers are paused during standby, which is a problem as the refresh timeout
// _must_ expire if the host was too long in standby. Thus fall back to periodic checks
// https://github.com/golang/go/issues/35012
ticker := time . NewTicker ( pollDuration )
defer func ( ) {
ticker . Stop ( )
lockInfo . cancel ( )
lockInfo . refreshWG . Done ( )
} ( )
var refreshStaleLockResult chan bool
for {
select {
case <- ctx . Done ( ) :
debug . Log ( "terminate expiry monitoring" )
return
case <- refreshed :
if refreshStaleLockResult != nil {
// ignore delayed refresh notifications while the stale lock is refreshed
continue
}
lastRefresh = time . Now ( ) . UnixNano ( )
case <- ticker . C :
2024-02-24 17:07:14 +01:00
if time . Now ( ) . UnixNano ( ) - lastRefresh < l . refreshabilityTimeout . Nanoseconds ( ) || refreshStaleLockResult != nil {
2024-02-24 16:26:29 +01:00
continue
}
debug . Log ( "trying to refreshStaleLock" )
// keep on going if our current lock still exists
refreshReq := refreshLockRequest {
result : make ( chan bool ) ,
}
refreshStaleLockResult = refreshReq . result
// inform refresh goroutine about forced refresh
select {
case <- ctx . Done ( ) :
case forceRefresh <- refreshReq :
}
case success := <- refreshStaleLockResult :
if success {
lastRefresh = time . Now ( ) . UnixNano ( )
refreshStaleLockResult = nil
continue
}
logger ( "Fatal: failed to refresh lock in time\n" )
return
}
}
}
func tryRefreshStaleLock ( ctx context . Context , be backend . Backend , lock * restic . Lock , cancel context . CancelFunc , logger func ( format string , args ... interface { } ) ) bool {
freeze := backend . AsBackend [ backend . FreezeBackend ] ( be )
if freeze != nil {
debug . Log ( "freezing backend" )
freeze . Freeze ( )
defer freeze . Unfreeze ( )
}
err := lock . RefreshStaleLock ( ctx )
if err != nil {
logger ( "failed to refresh stale lock: %v\n" , err )
// cancel context while the backend is still frozen to prevent accidental modifications
cancel ( )
return false
}
return true
}
2024-02-24 16:45:57 +01:00
type Unlocker struct {
info * lockContext
2024-02-24 16:26:29 +01:00
}
2024-02-24 16:45:57 +01:00
func ( l * Unlocker ) Unlock ( ) {
l . info . cancel ( )
l . info . refreshWG . Wait ( )
2024-02-24 16:26:29 +01:00
}