Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
synapse
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Monitor
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Maunium
synapse
Commits
dce6e9e0
Commit
dce6e9e0
authored
5 years ago
by
Richard van der Hoff
Browse files
Options
Downloads
Patches
Plain Diff
Avoid rapidly backing-off a server if we ignore the retry interval
parent
fec2dcb1
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
changelog.d/5335.bugfix
+1
-0
1 addition, 0 deletions
changelog.d/5335.bugfix
synapse/util/retryutils.py
+37
-23
37 additions, 23 deletions
synapse/util/retryutils.py
with
38 additions
and
23 deletions
changelog.d/5335.bugfix
0 → 100644
+
1
−
0
View file @
dce6e9e0
Fix a bug where we could rapidly mark a server as unreachable even though it was only down for a few minutes.
This diff is collapsed.
Click to expand it.
synapse/util/retryutils.py
+
37
−
23
View file @
dce6e9e0
...
@@ -46,8 +46,7 @@ class NotRetryingDestination(Exception):
...
@@ -46,8 +46,7 @@ class NotRetryingDestination(Exception):
@defer.inlineCallbacks
@defer.inlineCallbacks
def
get_retry_limiter
(
destination
,
clock
,
store
,
ignore_backoff
=
False
,
def
get_retry_limiter
(
destination
,
clock
,
store
,
ignore_backoff
=
False
,
**
kwargs
):
**
kwargs
):
"""
For a given destination check if we have previously failed to
"""
For a given destination check if we have previously failed to
send a request there and are waiting before retrying the destination.
send a request there and are waiting before retrying the destination.
If we are not ready to retry the destination, this will raise a
If we are not ready to retry the destination, this will raise a
...
@@ -60,8 +59,7 @@ def get_retry_limiter(destination, clock, store, ignore_backoff=False,
...
@@ -60,8 +59,7 @@ def get_retry_limiter(destination, clock, store, ignore_backoff=False,
clock (synapse.util.clock): timing source
clock (synapse.util.clock): timing source
store (synapse.storage.transactions.TransactionStore): datastore
store (synapse.storage.transactions.TransactionStore): datastore
ignore_backoff (bool): true to ignore the historical backoff data and
ignore_backoff (bool): true to ignore the historical backoff data and
try the request anyway. We will still update the next
try the request anyway. We will still reset the retry_interval on success.
retry_interval on success/failure.
Example usage:
Example usage:
...
@@ -75,13 +73,12 @@ def get_retry_limiter(destination, clock, store, ignore_backoff=False,
...
@@ -75,13 +73,12 @@ def get_retry_limiter(destination, clock, store, ignore_backoff=False,
"""
"""
retry_last_ts
,
retry_interval
=
(
0
,
0
)
retry_last_ts
,
retry_interval
=
(
0
,
0
)
retry_timings
=
yield
store
.
get_destination_retry_timings
(
retry_timings
=
yield
store
.
get_destination_retry_timings
(
destination
)
destination
)
if
retry_timings
:
if
retry_timings
:
retry_last_ts
,
retry_interval
=
(
retry_last_ts
,
retry_interval
=
(
retry_timings
[
"
retry_last_ts
"
],
retry_timings
[
"
retry_interval
"
]
retry_timings
[
"
retry_last_ts
"
],
retry_timings
[
"
retry_interval
"
],
)
)
now
=
int
(
clock
.
time_msec
())
now
=
int
(
clock
.
time_msec
())
...
@@ -93,22 +90,31 @@ def get_retry_limiter(destination, clock, store, ignore_backoff=False,
...
@@ -93,22 +90,31 @@ def get_retry_limiter(destination, clock, store, ignore_backoff=False,
destination
=
destination
,
destination
=
destination
,
)
)
# if we are ignoring the backoff data, we should also not increment the backoff
# when we get another failure - otherwise a server can very quickly reach the
# maximum backoff even though it might only have been down briefly
backoff_on_failure
=
not
ignore_backoff
defer
.
returnValue
(
defer
.
returnValue
(
RetryDestinationLimiter
(
RetryDestinationLimiter
(
destination
,
destination
,
clock
,
store
,
retry_interval
,
backoff_on_failure
,
**
kwargs
clock
,
store
,
retry_interval
,
**
kwargs
)
)
)
)
class
RetryDestinationLimiter
(
object
):
class
RetryDestinationLimiter
(
object
):
def
__init__
(
self
,
destination
,
clock
,
store
,
retry_interval
,
def
__init__
(
min_retry_interval
=
10
*
60
*
1000
,
self
,
max_retry_interval
=
24
*
60
*
60
*
1000
,
destination
,
multiplier_retry_interval
=
5
,
backoff_on_404
=
False
):
clock
,
store
,
retry_interval
,
min_retry_interval
=
10
*
60
*
1000
,
max_retry_interval
=
24
*
60
*
60
*
1000
,
multiplier_retry_interval
=
5
,
backoff_on_404
=
False
,
backoff_on_failure
=
True
,
):
"""
Marks the destination as
"
down
"
if an exception is thrown in the
"""
Marks the destination as
"
down
"
if an exception is thrown in the
context, except for CodeMessageException with code < 500.
context, except for CodeMessageException with code < 500.
...
@@ -128,6 +134,9 @@ class RetryDestinationLimiter(object):
...
@@ -128,6 +134,9 @@ class RetryDestinationLimiter(object):
multiplier_retry_interval (int): The multiplier to use to increase
multiplier_retry_interval (int): The multiplier to use to increase
the retry interval after a failed request.
the retry interval after a failed request.
backoff_on_404 (bool): Back off if we get a 404
backoff_on_404 (bool): Back off if we get a 404
backoff_on_failure (bool): set to False if we should not increase the
retry interval on a failure.
"""
"""
self
.
clock
=
clock
self
.
clock
=
clock
self
.
store
=
store
self
.
store
=
store
...
@@ -138,6 +147,7 @@ class RetryDestinationLimiter(object):
...
@@ -138,6 +147,7 @@ class RetryDestinationLimiter(object):
self
.
max_retry_interval
=
max_retry_interval
self
.
max_retry_interval
=
max_retry_interval
self
.
multiplier_retry_interval
=
multiplier_retry_interval
self
.
multiplier_retry_interval
=
multiplier_retry_interval
self
.
backoff_on_404
=
backoff_on_404
self
.
backoff_on_404
=
backoff_on_404
self
.
backoff_on_failure
=
backoff_on_failure
def
__enter__
(
self
):
def
__enter__
(
self
):
pass
pass
...
@@ -173,10 +183,13 @@ class RetryDestinationLimiter(object):
...
@@ -173,10 +183,13 @@ class RetryDestinationLimiter(object):
if
not
self
.
retry_interval
:
if
not
self
.
retry_interval
:
return
return
logger
.
debug
(
"
Connection to %s was successful; clearing backoff
"
,
logger
.
debug
(
self
.
destination
)
"
Connection to %s was successful; clearing backoff
"
,
self
.
destination
)
retry_last_ts
=
0
retry_last_ts
=
0
self
.
retry_interval
=
0
self
.
retry_interval
=
0
elif
not
self
.
backoff_on_failure
:
return
else
:
else
:
# We couldn't connect.
# We couldn't connect.
if
self
.
retry_interval
:
if
self
.
retry_interval
:
...
@@ -190,7 +203,10 @@ class RetryDestinationLimiter(object):
...
@@ -190,7 +203,10 @@ class RetryDestinationLimiter(object):
logger
.
info
(
logger
.
info
(
"
Connection to %s was unsuccessful (%s(%s)); backoff now %i
"
,
"
Connection to %s was unsuccessful (%s(%s)); backoff now %i
"
,
self
.
destination
,
exc_type
,
exc_val
,
self
.
retry_interval
self
.
destination
,
exc_type
,
exc_val
,
self
.
retry_interval
,
)
)
retry_last_ts
=
int
(
self
.
clock
.
time_msec
())
retry_last_ts
=
int
(
self
.
clock
.
time_msec
())
...
@@ -201,9 +217,7 @@ class RetryDestinationLimiter(object):
...
@@ -201,9 +217,7 @@ class RetryDestinationLimiter(object):
self
.
destination
,
retry_last_ts
,
self
.
retry_interval
self
.
destination
,
retry_last_ts
,
self
.
retry_interval
)
)
except
Exception
:
except
Exception
:
logger
.
exception
(
logger
.
exception
(
"
Failed to store destination_retry_timings
"
)
"
Failed to store destination_retry_timings
"
,
)
# we deliberately do this in the background.
# we deliberately do this in the background.
synapse
.
util
.
logcontext
.
run_in_background
(
store_retry_timings
)
synapse
.
util
.
logcontext
.
run_in_background
(
store_retry_timings
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment