Skip to content

Commit 8ea2fb0

Browse files
authored
Merge pull request #428 from polvalente/feat/improve-retry-backoff
feat: improve retry backoff jitter
2 parents 89d1004 + 615976a commit 8ea2fb0

File tree

2 files changed

+61
-10
lines changed

2 files changed

+61
-10
lines changed

lib/tesla/middleware/retry.ex

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@ defmodule Tesla.Middleware.Retry do
1414
or the maximum delay specified. This creates an upper bound on the maximum delay
1515
we can see.
1616
17-
In order to find the actual delay value we take a random number between 0 and
18-
the maximum delay based on a uniform distribution. This randomness ensures that
19-
our retried requests don't "harmonize" making it harder for the downstream
20-
service to heal.
17+
In order to find the actual delay value we apply additive noise which is proportional to the
18+
current desired delay. This ensures that the actual delay is kept within the expected order
19+
of magnitude, while still having some randomness, which ensures that our retried requests
20+
don't "harmonize" making it harder for the downstream service to heal.
2121
2222
## Example
2323
@@ -43,6 +43,7 @@ defmodule Tesla.Middleware.Retry do
4343
- `:max_retries` - maximum number of retries (non-negative integer, defaults to 5)
4444
- `:max_delay` - maximum delay in milliseconds (positive integer, defaults to 5000)
4545
- `:should_retry` - function to determine if request should be retried
46+
- `:jitter_factor` - additive noise proportionality constant (float between 0 and 1, defaults to 0.2)
4647
"""
4748

4849
# Not necessary in Elixir 1.10+
@@ -53,7 +54,8 @@ defmodule Tesla.Middleware.Retry do
5354
@defaults [
5455
delay: 50,
5556
max_retries: 5,
56-
max_delay: 5_000
57+
max_delay: 5_000,
58+
jitter_factor: 0.2
5759
]
5860

5961
@impl Tesla.Middleware
@@ -65,7 +67,8 @@ defmodule Tesla.Middleware.Retry do
6567
delay: integer_opt!(opts, :delay, 1),
6668
max_retries: integer_opt!(opts, :max_retries, 0),
6769
max_delay: integer_opt!(opts, :max_delay, 1),
68-
should_retry: Keyword.get(opts, :should_retry, &match?({:error, _}, &1))
70+
should_retry: Keyword.get(opts, :should_retry, &match?({:error, _}, &1)),
71+
jitter_factor: float_opt!(opts, :jitter_factor, 0, 1)
6972
}
7073

7174
retry(env, next, context)
@@ -84,7 +87,7 @@ defmodule Tesla.Middleware.Retry do
8487
res = Tesla.run(env, next)
8588

8689
if context.should_retry.(res) do
87-
backoff(context.max_delay, context.delay, context.retries)
90+
backoff(context.max_delay, context.delay, context.retries, context.jitter_factor)
8891
context = update_in(context, [:retries], &(&1 + 1))
8992
retry(env, next, context)
9093
else
@@ -93,10 +96,16 @@ defmodule Tesla.Middleware.Retry do
9396
end
9497

9598
# Exponential backoff with jitter
96-
defp backoff(cap, base, attempt) do
99+
defp backoff(cap, base, attempt, jitter_factor) do
97100
factor = Bitwise.bsl(1, attempt)
98101
max_sleep = min(cap, base * factor)
99-
delay = :rand.uniform(max_sleep)
102+
103+
# This ensures that the delay's order of magnitude is kept intact,
104+
# while still having some jitter. Generates a value x where 1-jitter_factor <= x <= 1 + jitter_factor
105+
jitter = 1 + 2 * jitter_factor * :rand.uniform() - jitter_factor
106+
107+
# The actual delay is in the range max_sleep * (1 - jitter_factor), max_sleep * (1 + jitter_factor)
108+
delay = trunc(max_sleep + jitter)
100109

101110
:timer.sleep(delay)
102111
end
@@ -109,7 +118,22 @@ defmodule Tesla.Middleware.Retry do
109118
end
110119
end
111120

121+
defp float_opt!(opts, key, min, max) do
122+
case Keyword.fetch(opts, key) do
123+
{:ok, value} when is_float(value) and value >= min and value <= max -> value
124+
{:ok, invalid} -> invalid_float(key, invalid, min, max)
125+
:error -> @defaults[key]
126+
end
127+
end
128+
112129
defp invalid_integer(key, value, min) do
113130
raise(ArgumentError, "expected :#{key} to be an integer >= #{min}, got #{inspect(value)}")
114131
end
132+
133+
defp invalid_float(key, value, min, max) do
134+
raise(
135+
ArgumentError,
136+
"expected :#{key} to be a float >= #{min} and <= #{max}, got #{inspect(value)}"
137+
)
138+
end
115139
end

test/tesla/middleware/retry_test.exs

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ defmodule Tesla.Middleware.RetryTest do
2626

2727
plug Tesla.Middleware.Retry,
2828
delay: 10,
29-
max_retries: 10
29+
max_retries: 10,
30+
jitter_factor: 0.25
3031

3132
adapter LaggyAdapter
3233
end
@@ -144,4 +145,30 @@ defmodule Tesla.Middleware.RetryTest do
144145
ClientWithNegativeMaxRetries.get("/ok")
145146
end
146147
end
148+
149+
test "ensures jitter_factor option is a float between 0 and 1" do
150+
defmodule ClientWithJitterFactorLt0 do
151+
use Tesla
152+
plug Tesla.Middleware.Retry, jitter_factor: -0.1
153+
adapter LaggyAdapter
154+
end
155+
156+
defmodule ClientWithJitterFactorGt1 do
157+
use Tesla
158+
plug Tesla.Middleware.Retry, jitter_factor: 1.1
159+
adapter LaggyAdapter
160+
end
161+
162+
assert_raise ArgumentError,
163+
"expected :jitter_factor to be a float >= 0 and <= 1, got -0.1",
164+
fn ->
165+
ClientWithJitterFactorLt0.get("/ok")
166+
end
167+
168+
assert_raise ArgumentError,
169+
"expected :jitter_factor to be a float >= 0 and <= 1, got 1.1",
170+
fn ->
171+
ClientWithJitterFactorGt1.get("/ok")
172+
end
173+
end
147174
end

0 commit comments

Comments
 (0)