From 06ba71083eefbe1fd9a8eeed10e541dd7b52796f Mon Sep 17 00:00:00 2001
From: David Robertson <davidr@element.io>
Date: Tue, 14 Feb 2023 23:42:29 +0000
Subject: [PATCH] Fix order of partial state tables when purging (#15068)

* Fix order of partial state tables when purging

`partial_state_rooms` has an FK on `events` pointing to the join event we
get from `/send_join`, so we must delete from that table before deleting
from `events`.

**NB:** It would be nice to cancel any resync processes for the room
being purged. We do not do this at present. To do so reliably we'd need
an internal HTTP "replication" endpoint, because the worker doing the
resync process may be different to that handling the purge request.

The first time the resync process tries to write data after the deletion
it will fail because we have deleted necessary data e.g. auth
events. AFAICS it will not retry the resync, so the only downside to
not cancelling the resync is a scary-looking traceback.

(This is presumably extremely race-sensitive.)

* Changelog

* admist(?) -> between

* Warn about a race

* Fix typo, thanks Sean

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>

---------

Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com>
---
 changelog.d/15068.bugfix                       | 1 +
 synapse/handlers/federation.py                 | 5 +++++
 synapse/storage/databases/main/purge_events.py | 6 ++++--
 3 files changed, 10 insertions(+), 2 deletions(-)
 create mode 100644 changelog.d/15068.bugfix

diff --git a/changelog.d/15068.bugfix b/changelog.d/15068.bugfix
new file mode 100644
index 0000000000..f09ffa2877
--- /dev/null
+++ b/changelog.d/15068.bugfix
@@ -0,0 +1 @@
+Fix a bug introduced in Synapse 1.76.0 where partially-joined rooms could not be deleted using the [purge room API](https://matrix-org.github.io/synapse/latest/admin_api/rooms.html#delete-room-api).
diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py
index 08727e4857..1d0f6bcd6f 100644
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@@ -1880,6 +1880,11 @@ class FederationHandler:
                 logger.info("Updating current state for %s", room_id)
                 # TODO(faster_joins): notify workers in notify_room_un_partial_stated
                 #   https://github.com/matrix-org/synapse/issues/12994
+                #
+                # NB: there's a potential race here. If room is purged just before we
+                # call this, we _might_ end up inserting rows into current_state_events.
+                # (The logic is hard to chase through.) We think this is fine, but if
+                # not the HS admin should purge the room again.
                 await self.state_handler.update_current_state(room_id)
 
                 logger.info("Handling any pending device list updates")
diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py
index 9213ce0b5a..9c41d01e13 100644
--- a/synapse/storage/databases/main/purge_events.py
+++ b/synapse/storage/databases/main/purge_events.py
@@ -420,12 +420,14 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
             "event_push_actions",
             "event_search",
             "event_failed_pull_attempts",
+            # Note: the partial state tables have foreign keys between each other, and to
+            # `events` and `rooms`. We need to delete from them in the right order.
             "partial_state_events",
+            "partial_state_rooms_servers",
+            "partial_state_rooms",
             "events",
             "federation_inbound_events_staging",
             "local_current_membership",
-            "partial_state_rooms_servers",
-            "partial_state_rooms",
             "receipts_graph",
             "receipts_linearized",
             "room_aliases",
-- 
GitLab