From 5829872bec9b9986c741eafec36e47774e4d2b3e Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 11 Nov 2020 15:07:34 +0000
Subject: [PATCH] Fix port script to handle foreign key constraints (#8730)

---
 .buildkite/test_db.db   | Bin 19279872 -> 19296256 bytes
 changelog.d/8730.bugfix |   1 +
 scripts/synapse_port_db |  68 ++++++++++++++++++++++++++++++++++++----
 3 files changed, 63 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/8730.bugfix

diff --git a/.buildkite/test_db.db b/.buildkite/test_db.db
index 361369a581771bed36692a848aa396df96ad59d9..a0d9f16a7522c17f91ace4175a2234b3f6a93941 100644
GIT binary patch
delta 2680
zcmajf3v?7$9mnyV&0~{Yb|(pBlR{wc5|BhfD1uroN-1n<qy!p@6d$ESc9KlHnOSx>
z57Z_hv4dKmiB_jo(=@hJ@j<CnVzro2i;uRVt&a+=ZD_65M{7Y5wZ6VL3$mJ?<DB#P
zoz3jc|K9(-dv4#v?Ztf)cN7oKDt*!8DQoro=TWqwx~06VUva#EmnG^wWLe8la*m>o
zKmK%;GOCP;iK5P6N$INtMIMjR)}J9a_Fp$}SRFg8Dvg5$iu$rg9UJO<DBml`o(#?`
zQ3|T5qjziV$%J0tY_<FOQ&CTO#!CA9&#OZl3e`|vb#cxB+f{7WYrA>2n{T@Xwp(bs
zKHDv_-D2DI+it*ir`c}Mc0;zS+HQ&MmfCKa?M^@J&d9$$FRTnZK2iRJ*W+<6PA<C5
zxoj}Me%Elcd~7%xZfz_qS0+5lgj19Z*A#dcDwg6zODZaz^~vxHc_B@4w7%2(o#UeX
z=-9qgcs!sqIZFof8+RS*38Mm)xCk>b3ssnnIfy_Yin*x9Jj}=IumClv#p|&Uby$RY
zEXKuHf;V6(mf?+f6W)wV@D{ukmtr~IhPPt{F2hP(j#XHV7_Pt?T!{v}18Z>=)<O2T
z9vg5qny?YqU=!Yn&A1j@@GiU?@4<U<9k$|nXxN5k#G#`Ft!RUR8*n2MXom?4No+?7
zX=KoWPIRFgJMccdA0NO?=s_>~uoL|lzz1<NZ0y2^@L_xew_rE+U@vaPAcpW!d<?hY
z<M;#|+>Sf&Nqh>Q#xU;02=2ltK7+e)5BA|+d={U>etaHZz!&i)9Kd}zi2Lzn9Kr+m
z3JzloU&Vv?8orK)a0K7LI9z-a-@>=?FplCo_%6PO@8c0XipTH+9K#RsBRq~L@MHW0
zKgG}RBz}(LIDx0|G=70+@GK^963^jz{1PwVSNJtv#BcCh{0_gzOL!T7z$-X~)A%F)
zgfsXv&f+ilEB=O8@pt?K|HQv;In<-v3K?D*c{1{46v!x);geA$qgaMtMnJ|i89^B#
z8LEsD8KpAHWK5SaLq@rbu#5^Bl`<}pF;m7Y8C5c7%b25#b2})FQ8jqNGb3-FM?I=u
z6&eg}8Y#LfqKMg^BcbJ<hTz%YS<ewJ$O}*Kg!id2ZlEW<f&R!qk*^{a^A4=I^S+1@
z9@%|&L<x*<Aouv4rewH#Y}xj3Qt@5V-%(hxcD@ts30FE+$Xn<P_k_cH4@Q)|2L+;-
zi)zeM4ji1X^c4r0Qc)lEsLuUjU6s7O?^R2eD*4q{dvoq)=hU)2a|7zU>?MAT(a~47
zTq#H2q?RmUaPCF!oI1I`dKPv4zBL|oU+7ryrD@Mia{|u<ZYV1CJ?2{@`&#Jh-Kh+e
z`6^Z{_U?=rW?b)%>*<VPY8k^ax27zsJw2IQjrf!=%34=7#a3Pu6Ag{4W7mo)-=F_S
zzu;NlICX%iNm<2KJGE;sA1HN3JHu5@<ctzne{SL>Q%aWU_NIXnU&Z3Z-rbus+O~u~
zWyq8lWpQWrX{E9*TU6AroJEU7rcKXXkT6WKy+gKcne~3*42}C|t`kkMD`HKt##OP6
zxnt9!CTEh`x(fvxRtu|Tvb5Z}+?tdeeK;6iom+BpcjX>rw;-oZI+N11_N`VbuBQyM
z^?XsiQ#UhI3Oy2x_A={+riOJZo3@Cxu`U0#q&jiFW_D?tY1J|pWN4x}p`~=8i54rR
z8?C0eQQsjfQ~d9QZ91d2>CIY4TBjc^vmh?ob<NDy>(Up*dxfd%agnivZm^dr(XJV0
z20}AwoF9^?(YuqnnKnB0T4AMF0`pFunMgM?$ZE+}%h}6wN7U7(oAjCzUGj_)dWIF7
zVzPTtqb1UoNLsxa_QKE-i5=p?G@P6~+Xfa9j~ntV<pSmU>;8YwT~<dTPSYZ#$2-ip
zCbwEv)6!Hh((Gv$J?Lq<k*408Z9b*9uudk?L^`QA8!bk2cA=LFBl}CpF1uCFq|di2
z8)D~ZQW%`cxsqB-i{6}(uSKVpFjzh(lP&RDr;}IjuN`UN_R5SbKM+wK967@0wc}^_
X98ktfM<dE~?*r<ys&jhTp6I^;n<ZCq

delta 1527
zcmX}sc~q2D7=ZEb4#RxRz|1f#-wdM6i~^yUR6<#)8Eu$`mP;8LmSt)SqiK;D;Y><p
zv6T907Gr5yT1a6`R5Y|$*`BSM*;Lv>*<#tIY0v99b<g=d=iK++`|qt!thU!DR@s|#
zJ$p1QV}|zsQq?VkhcX;eG&`B0+r5!`4z`5y;+DleRjy)zM9MCwL(K};YMPo5u8wrN
zW1VhQ+1z0rf5^Xbl-1f`?NP8G39?Uy4LVX_hXbidgA*>ekq!?skh#IylQlibtL-rL
zK=yVsG1u7I&>GB+wFZrul~$iB)l5goDAHZEmSR<-%wW3LXO0UQd*hSZ{3@hk>*pCg
zspiDeiY$Ftc!AA3roar=89uYdWwAwjH~W>*&`~G6@F53%kc+<PhdlTZKoI>g0Qo3D
zA&x;224WD3F&M{U2ug4qj>icYic*x}M4W_SI2otlR1C)mjKpaeh0{@vGjJx(!r3?n
z6&Q_kF$QCC9x5>o<8eMNzywUhBuvJIxCm2lF{&^X({Kr@F&&rUGR(l`xB@eAC1znZ
z=HMz^jcaf%Y7oL)%)@n<j|I3MH=q_bVj*tABHWBR+=6;6Mi>#?iX~Wz2Hb|*aR-*+
zPTYk?G@%(SxEuFiIaXjLOsv9c+>1515BH-LQLM!Sco6Hb9uMJRY``OU6dSP#kKu8&
zVKdtC1fE0(w%{prB8I2Yg=g?Ao<ldDM;twP0Wabuyo{}Q1+U^YY{Tn#18?Fjyp4D8
zF5bg-ypIp?AwI&#_yjw!6Q5!iKErN&js(8Im-q^MuoqwB8+?oJ(2MW!1NPxZ{DhzJ
z3-;qz{DuSg9e?0Y{Dr@95Qp#&4&%tOjylx@iA9nmu}YF9Hi<4tk=P{;Nvb4G;*_`~
zZb`btBgv3tO0p!`5<}vZ_#`=!K9XEXUr9eno{IB3sLG(OQZ%nNT8pNy^{YV8vQ6z*
z9@hb_+cli2tnQ=zFVQ(wg=)TD5pmkQRb}!m<wRps3ste*xl^+ry%pa#tx#oJ;`!_R
zs>@-PH)U2wck`+%HLOR{bZ=RiB~lVC;navGakaOx-LD$k0|=r&1|VOxwHK)Re*u2p
B#UKCx

diff --git a/changelog.d/8730.bugfix b/changelog.d/8730.bugfix
new file mode 100644
index 0000000000..dcc42bc981
--- /dev/null
+++ b/changelog.d/8730.bugfix
@@ -0,0 +1 @@
+Fix port script to correctly handle foreign key constraints. Broke in v1.21.0.
diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db
index 13c0120bb4..7a638ea8e3 100755
--- a/scripts/synapse_port_db
+++ b/scripts/synapse_port_db
@@ -22,7 +22,7 @@ import logging
 import sys
 import time
 import traceback
-from typing import Optional
+from typing import Dict, Optional, Set
 
 import yaml
 
@@ -292,6 +292,34 @@ class Porter(object):
 
         return table, already_ported, total_to_port, forward_chunk, backward_chunk
 
+    async def get_table_constraints(self) -> Dict[str, Set[str]]:
+        """Returns a map of tables that have foreign key constraints to tables they depend on.
+        """
+
+        def _get_constraints(txn):
+            # We can pull the information about foreign key constraints out from
+            # the postgres schema tables.
+            sql = """
+                SELECT DISTINCT
+                    tc.table_name,
+                    ccu.table_name AS foreign_table_name
+                FROM
+                    information_schema.table_constraints AS tc
+                    INNER JOIN information_schema.constraint_column_usage AS ccu
+                    USING (table_schema, constraint_name)
+                WHERE tc.constraint_type = 'FOREIGN KEY';
+            """
+            txn.execute(sql)
+
+            results = {}
+            for table, foreign_table in txn:
+                results.setdefault(table, set()).add(foreign_table)
+            return results
+
+        return await self.postgres_store.db_pool.runInteraction(
+            "get_table_constraints", _get_constraints
+        )
+
     async def handle_table(
         self, table, postgres_size, table_size, forward_chunk, backward_chunk
     ):
@@ -619,15 +647,43 @@ class Porter(object):
                     consumeErrors=True,
                 )
             )
+            # Map from table name to args passed to `handle_table`, i.e. a tuple
+            # of: `postgres_size`, `table_size`, `forward_chunk`, `backward_chunk`.
+            tables_to_port_info_map = {r[0]: r[1:] for r in setup_res}
 
             # Step 4. Do the copying.
+            #
+            # This is slightly convoluted as we need to ensure tables are ported
+            # in the correct order due to foreign key constraints.
             self.progress.set_state("Copying to postgres")
-            await make_deferred_yieldable(
-                defer.gatherResults(
-                    [run_in_background(self.handle_table, *res) for res in setup_res],
-                    consumeErrors=True,
+
+            constraints = await self.get_table_constraints()
+            tables_ported = set()  # type: Set[str]
+
+            while tables_to_port_info_map:
+                # Pulls out all tables that are still to be ported and which
+                # only depend on tables that are already ported (if any).
+                tables_to_port = [
+                    table
+                    for table in tables_to_port_info_map
+                    if not constraints.get(table, set()) - tables_ported
+                ]
+
+                await make_deferred_yieldable(
+                    defer.gatherResults(
+                        [
+                            run_in_background(
+                                self.handle_table,
+                                table,
+                                *tables_to_port_info_map.pop(table),
+                            )
+                            for table in tables_to_port
+                        ],
+                        consumeErrors=True,
+                    )
                 )
-            )
+
+                tables_ported.update(tables_to_port)
 
             # Step 5. Set up sequences
             self.progress.set_state("Setting up sequence generators")
-- 
GitLab