From a0cd8ae8cbe14d2821cbe8fd6b011c4ddc729344 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Fri, 4 Jun 2021 10:47:58 +0100
Subject: [PATCH] Don't try and backfill the same room in parallel. (#10116)

If backfilling is slow then the client may time out and retry, causing
Synapse to start a new `/backfill` before the existing backfill has
finished, duplicating work.
---
 changelog.d/10116.bugfix       | 1 +
 synapse/handlers/federation.py | 8 ++++++++
 2 files changed, 9 insertions(+)
 create mode 100644 changelog.d/10116.bugfix

diff --git a/changelog.d/10116.bugfix b/changelog.d/10116.bugfix
new file mode 100644
index 0000000000..90ef707559
--- /dev/null
+++ b/changelog.d/10116.bugfix
@@ -0,0 +1 @@
+Fix bug where the server would attempt to fetch the same history in the room from a remote server multiple times in parallel.
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index f3f97db2fa..b802822baa 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -178,6 +178,8 @@ class FederationHandler(BaseHandler):
         self.room_queues = {}  # type: Dict[str, List[Tuple[EventBase, str]]]
         self._room_pdu_linearizer = Linearizer("fed_room_pdu")
 
+        self._room_backfill = Linearizer("room_backfill")
+
         self.third_party_event_rules = hs.get_third_party_event_rules()
 
         self._ephemeral_messages_enabled = hs.config.enable_ephemeral_messages
@@ -1041,6 +1043,12 @@ class FederationHandler(BaseHandler):
                 return. This is used as part of the heuristic to decide if we
                 should back paginate.
         """
+        with (await self._room_backfill.queue(room_id)):
+            return await self._maybe_backfill_inner(room_id, current_depth, limit)
+
+    async def _maybe_backfill_inner(
+        self, room_id: str, current_depth: int, limit: int
+    ) -> bool:
         extremities = await self.store.get_oldest_events_with_depth_in_room(room_id)
 
         if not extremities:
-- 
GitLab