Skip to content
Snippets Groups Projects
Unverified Commit d64653d0 authored by Eric Eastwood's avatar Eric Eastwood Committed by GitHub
Browse files

Track number of hosts affected by the rate limiter (#13541)

Track number of hosts affected by the rate limiter so we can differentiate one really noisy homeserver from a general ratelimit tuning problem across the federation.

Follow-up to https://github.com/matrix-org/synapse/pull/13534

Part of https://github.com/matrix-org/synapse/issues/13356
parent 22ea51fa
No related branches found
No related tags found
No related merge requests found
Add metrics to track how the rate limiter is affecting requests (sleep/reject).
...@@ -30,7 +30,7 @@ from synapse.logging.context import ( ...@@ -30,7 +30,7 @@ from synapse.logging.context import (
run_in_background, run_in_background,
) )
from synapse.logging.opentracing import start_active_span from synapse.logging.opentracing import start_active_span
from synapse.metrics import Histogram from synapse.metrics import Histogram, LaterGauge
from synapse.util import Clock from synapse.util import Clock
if typing.TYPE_CHECKING: if typing.TYPE_CHECKING:
...@@ -74,6 +74,27 @@ class FederationRateLimiter: ...@@ -74,6 +74,27 @@ class FederationRateLimiter:
str, "_PerHostRatelimiter" str, "_PerHostRatelimiter"
] = collections.defaultdict(new_limiter) ] = collections.defaultdict(new_limiter)
# We track the number of affected hosts per time-period so we can
# differentiate one really noisy homeserver from a general
# ratelimit tuning problem across the federation.
LaterGauge(
"synapse_rate_limit_sleep_affected_hosts",
"Number of hosts that had requests put to sleep",
[],
lambda: sum(
ratelimiter.should_sleep() for ratelimiter in self.ratelimiters.values()
),
)
LaterGauge(
"synapse_rate_limit_reject_affected_hosts",
"Number of hosts that had requests rejected",
[],
lambda: sum(
ratelimiter.should_reject()
for ratelimiter in self.ratelimiters.values()
),
)
def ratelimit(self, host: str) -> "_GeneratorContextManager[defer.Deferred[None]]": def ratelimit(self, host: str) -> "_GeneratorContextManager[defer.Deferred[None]]":
"""Used to ratelimit an incoming request from a given host """Used to ratelimit an incoming request from a given host
...@@ -139,6 +160,21 @@ class _PerHostRatelimiter: ...@@ -139,6 +160,21 @@ class _PerHostRatelimiter:
finally: finally:
self._on_exit(request_id) self._on_exit(request_id)
def should_reject(self) -> bool:
"""
Whether to reject the request if we already have too many queued up
(either sleeping or in the ready queue).
"""
queue_size = len(self.ready_request_queue) + len(self.sleeping_requests)
return queue_size > self.reject_limit
def should_sleep(self) -> bool:
"""
Whether to sleep the request if we already have too many requests coming
through within the window.
"""
return len(self.request_times) > self.sleep_limit
def _on_enter(self, request_id: object) -> "defer.Deferred[None]": def _on_enter(self, request_id: object) -> "defer.Deferred[None]":
time_now = self.clock.time_msec() time_now = self.clock.time_msec()
...@@ -149,8 +185,7 @@ class _PerHostRatelimiter: ...@@ -149,8 +185,7 @@ class _PerHostRatelimiter:
# reject the request if we already have too many queued up (either # reject the request if we already have too many queued up (either
# sleeping or in the ready queue). # sleeping or in the ready queue).
queue_size = len(self.ready_request_queue) + len(self.sleeping_requests) if self.should_reject():
if queue_size > self.reject_limit:
logger.debug("Ratelimiter(%s): rejecting request", self.host) logger.debug("Ratelimiter(%s): rejecting request", self.host)
rate_limit_reject_counter.inc() rate_limit_reject_counter.inc()
raise LimitExceededError( raise LimitExceededError(
...@@ -180,7 +215,7 @@ class _PerHostRatelimiter: ...@@ -180,7 +215,7 @@ class _PerHostRatelimiter:
len(self.request_times), len(self.request_times),
) )
if len(self.request_times) > self.sleep_limit: if self.should_sleep():
logger.debug( logger.debug(
"Ratelimiter(%s) [%s]: sleeping request for %f sec", "Ratelimiter(%s) [%s]: sleeping request for %f sec",
self.host, self.host,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment