diff --git a/changelog.d/7864.bugfix b/changelog.d/7864.bugfix new file mode 100644 index 0000000000000000000000000000000000000000..8623355fe9219a1f0e0e88d1669a2721b7e24b25 --- /dev/null +++ b/changelog.d/7864.bugfix @@ -0,0 +1 @@ +Fix a memory leak by limiting the length of time that messages will be queued for a remote server that has been unreachable. diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index dd150f89a6f043f133a4b9a195b2985e1c4d5826..8cbc23d901af1699cf24496007bd15fbae973ac1 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -337,6 +337,28 @@ class PerDestinationQueue(object): (e.retry_last_ts + e.retry_interval) / 1000.0 ), ) + + if e.retry_interval > 60 * 60 * 1000: + # we won't retry for another hour! + # (this suggests a significant outage) + # We drop pending PDUs and EDUs because otherwise they will + # rack up indefinitely. + # Note that: + # - the EDUs that are being dropped here are those that we can + # afford to drop (specifically, only typing notifications, + # read receipts and presence updates are being dropped here) + # - Other EDUs such as to_device messages are queued with a + # different mechanism + # - this is all volatile state that would be lost if the + # federation sender restarted anyway + + # dropping read receipts is a bit sad but should be solved + # through another mechanism, because this is all volatile! + self._pending_pdus = [] + self._pending_edus = [] + self._pending_edus_keyed = {} + self._pending_presence = {} + self._pending_rrs = {} except FederationDeniedError as e: logger.info(e) except HttpResponseException as e: