From 578020e55363f7ada44c9e8b66b7e6454ac19165 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Thu, 2 Dec 2021 12:12:23 +0100 Subject: [PATCH] Add exponential backoff when re-connecting to upstream The first reconnection attempt waits for 1min, the second the 2min, and so on up to 10min. There's a 1min jitter so that multiple failed connections don't try to reconnect at the exact same time. Closes: https://todo.sr.ht/~emersion/soju/161 --- rate.go | 40 ++++++++++++++++++++++++++++++++++++++++ server.go | 4 +++- user.go | 6 ++++-- 3 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 rate.go diff --git a/rate.go b/rate.go new file mode 100644 index 0000000..74225a3 --- /dev/null +++ b/rate.go @@ -0,0 +1,40 @@ +package soju + +import ( + "math/rand" + "time" +) + +// backoffer implements a simple exponential backoff. +type backoffer struct { + min, max, jitter time.Duration + n int64 +} + +func newBackoffer(min, max, jitter time.Duration) *backoffer { + return &backoffer{min: min, max: max, jitter: jitter} +} + +func (b *backoffer) Reset() { + b.n = 0 +} + +func (b *backoffer) Next() time.Duration { + if b.n == 0 { + b.n = 1 + return 0 + } + + d := time.Duration(b.n) * b.min + if d > b.max { + d = b.max + } else { + b.n *= 2 + } + + if b.jitter != 0 { + d += time.Duration(rand.Int63n(int64(b.jitter))) + } + + return d +} diff --git a/server.go b/server.go index bf47f6d..c786ec0 100644 --- a/server.go +++ b/server.go @@ -23,7 +23,9 @@ import ( ) // TODO: make configurable -var retryConnectDelay = time.Minute +var retryConnectMinDelay = time.Minute +var retryConnectMaxDelay = 10 * time.Minute +var retryConnectJitter = time.Minute var connectTimeout = 15 * time.Second var writeTimeout = 10 * time.Second var upstreamMessageDelay = 2 * time.Second diff --git a/user.go b/user.go index 9839bcf..fbd4d32 100644 --- a/user.go +++ b/user.go @@ -190,13 +190,14 @@ func (net *network) run() { } var lastTry time.Time + backoff := newBackoffer(retryConnectMinDelay, retryConnectMaxDelay, retryConnectJitter) for { if net.isStopped() { return } - if dur := time.Now().Sub(lastTry); dur < retryConnectDelay { - delay := retryConnectDelay - dur + delay := backoff.Next() - time.Now().Sub(lastTry) + if delay > 0 { net.logger.Printf("waiting %v before trying to reconnect to %q", delay.Truncate(time.Second), net.Addr) time.Sleep(delay) } @@ -247,6 +248,7 @@ func (net *network) run() { } net.user.srv.metrics.upstreams.Add(-1) + backoff.Reset() } }