From 5829872bec9b9986c741eafec36e47774e4d2b3e Mon Sep 17 00:00:00 2001
From: Erik Johnston <erik@matrix.org>
Date: Wed, 11 Nov 2020 15:07:34 +0000
Subject: [PATCH] Fix port script to handle foreign key constraints (#8730)

---
 .buildkite/test_db.db   | Bin 19279872 -> 19296256 bytes
 changelog.d/8730.bugfix |   1 +
 scripts/synapse_port_db |  68 ++++++++++++++++++++++++++++++++++++----
 3 files changed, 63 insertions(+), 6 deletions(-)
 create mode 100644 changelog.d/8730.bugfix

diff --git a/.buildkite/test_db.db b/.buildkite/test_db.db
index 361369a581771bed36692a848aa396df96ad59d9..a0d9f16a7522c17f91ace4175a2234b3f6a93941 100644
GIT binary patch
delta 2680
zcmZp8FoUz<!A#Bx(t<A-7=+Rp;D9l4qK>iP3kE&=o2(2B3<CU17#J9Y(k2$jG4k)f
zct?hDA>($12OMR*0)oGrIT#oiGnz^m7#M?_;+nVdZ{Nnx=r@6tk^db7|MrOudsvu2
zY9I1S2{5uMGcYiuRwU*Y<fQ5)=jU=UFfcIke`MG$(7^SWe_{X|KOeI)Cz67;1B`74
znA#37w;f<<JHXm@fUWHSd)onywga4P2e{e}aJL=cX*<B%c7U(#0Ds#7fwlvJZ3l$f
z4hU~MAi|QsEXFu@I=h1CB_;-j=>`S5Q>HsiV9{%zt0=mCuA*4FADbxS0|v$i(>V&n
zG+3Fm8S@#ZD+-89O!qGkd&<n0$T&T*;nT|LHx)#WZeLm?wx63ZXu9D97Qgn*bz<$C
z>%`kP*GaT*u9IxvTqo7OxlX!$bDd24<~rH-&2@6^o9pD;H`gh&Z?02p-(08EzPV1h
zeRG{k`{p{;_RV!_?VIb=+c(!~v~RA{Y~Ng`)xNn-yM1$=PW$FM-S*9OdhMI*^xHSr
z8MJS%Gi={nXVkvA&bWPZok{!VI@9*eb!P3G>&)9X*IBf0uCr|4TxZq3xz4(MbDd55
z<~rN<&2@I|o9pb`H`h6|Z?1D}-(2U^zPZl1eRG{l`{p{=_RV!}?VIb|+c(#Fv~RBS
zY~Nhx)xNpTyM1$=Py6ON-}cRQAV2!IZ>|ey-&_~izPT=_eRExK`{ufk_RV#n?VIbu
z+BesQw{NbCXy05H*}l0hs(o`^bo=JInD)(evF)4d;@UUY#kX&+OK9I*m)O3!E~$NU
zU2^;8x|H_Kb*b%}>(bgc*QK{_uFGiOT$kCtxh|`Hb6s}(=DM8r&2_o$o9pu0H`nF2
zZ>}q7-&|MNzPYZbeRExL`{ufm_RV#r?VIb$+Besgw{NbiXy05{*}l21s(o`^b^GSJ
zn)c0gwe6eh>e@Hg)wgf1YiQqG*Vw+fuBm-<U32^9x|a6Mb*=52>)P5k*R{8AuIp&u
zT-VvYxvr~yb6t1)=DMEt&2_!)o9p`8H`n#IZ?2orzPWB<`{uey?VIZ+w{NbS(!RNF
zYWwE8Y3-Zqrnhgdo6)|xZf5)Dx>@a;>t?rauA9@oxo&Rz=DK<9o9pJcZ?0R=zPWB;
z`{ueu?VIZsw{Nan(!RNFY5V57W$l~mmbY)NThYF`Ze{!Cx>fC)>sGgKu3OW-xo&Oy
z=DKz5o9ouMZ?4<WzPWB=`{ue$?VIa1w{Nc7(!RNFYy0N9ZS9-uwzqGt+tI$cZfE=E
zx?Sy?>vp$quG`bTxo&U!=DL0Do9p(sZ>~GgzPavT`{ues?VIZkw{NaH(!ROwX#3{6
zW9^&kj<;{FJJG(m?qvJsx>N0&>rS_Ct~=Adx$bQH=DKt3o9oWEZ?3!0zPavV`{ue!
z?VIZ^w{Nby(!ROwYWwE8YwersuD5TlyV1V6?q>Vux?Am=>u$GiuDjE|x$bWJ=DK_B
zo9phkZ?1dLzPavU`{uew?VIZ!w{Na{(!ROwY5V57XYHHop0{tVd(pnR?q&Ptx>xO+
z>t45Su6xtIx$bTI=DK(7o9o`UZ?5~$zPavW`{ue&?VIa9w{Ncd(!ROwYy0N9Z|$4w
zzPE3#`_aC+?q~bvx?k;^>wdRyuKUxzx$bZK=DL6Fo9q5}Z?0qP-(1HCBA7q~Gl*aT
z5v(AB4Mebm2o4az2_m>a1UHD_0TH|)f)7OSg9rf-AqXObK!h-e5CIXQAVLg8h=T|T
z5FrU7q(Fo;h>!shvLHf^asTEzc}71)e%@OQBFrib{73nH_$Kg$&gWPl&!`~Fu$Rw<
z!ISqp?{|j1@FE$+;Jw9kd%NKTmg`L1P4dkg?BcGjOwIQ5*2^=B&F@((&&a($V6i-7
zFW2;-0<ntiCWT@JjO-RorEKEfs?!zg#3ZKY^D(hapIav;*0({Pv2TL{W8Vfv#=Z?o
zjC~uF8T&S<Fs|F6%Gkik3oe#VGw@H}sNgFDD!rHS3mP-BC<ihjmCe&Xn)E7h^Q%A#
z@pBCP(-l8NdVo~d@(UQUC<h`J<kLSsT&XP0z`(%8?#{r!l<yqxTb@Te)4A_)XK@I!
zpJ8_g`AeI<zLBw6h+W)XpQ%wkGcP5zA|<uBBr`9uBr`uRz9>IGw-`*uXQp6C2&Fp*
zxjKfpDtP+2xJD>o$ip;pDJUo?`1@fhP|zsK=giNI&rH#DY8ITnuuM#5y8Ks0Zhx@O
z;&?+0BZX4iLYoEH#r5@>dcsQ*lX6lqlwb%5rA(Lo#3-QyF;qc=OF=<bSD_>$71==w
znRyC@r68~8=jm}NOrN-)OUhRv$koj?$kor;H5gfKv4RGYC7L=213g_7^3%XZgF+73
zOfH4#3y<)MxgZ+}Qd0~H14s}cnNwU+l$w|upI?-cT9lcW4l^{hEH$r$OJO?SDPF~T
zE(L{vAWvV%phyL8*GLp6=_r7V1_dj`Y2m2~DfxNoB?>92$vKHdsS1e-Y57H|ndx~7
z*{PKZ`FRTH7G|Utr79$)CMT8_rz(_WB$lYBDCDLl=0VIWPgPH;SIA3EO;ISxS4hn)
z$w)0y$W6@5D@n}EQ%KBH04F2`jns;Q)V$)%vQ$ll{33;ve1${>aGWU=Cnx49<flPQ
zLkdf16qRSB=B1XU7Acg2;wUGzBo$;R*!Kz=i8;mj3I+M~C8>EOnTa_$l?n)J^dKgB
z<|!nmq-27UFvx)*_v;Y$b9sJgPKrWOszOm}N@-q7A}FXqDJ8L3p(G=-SfMDtTp=Yt
zwHOqTd8zdf&ljbpr51rKNmM8<NKMX6%S=`PJJebs6Ot0ZJcabsl46MCQc^)4N&%&k
zJcY~>xRHrzX{pI2AYp~F#GK5O%#uo2ykm<UW<4&=`JM~p8B6BdtdnQlIe+g0c}DO3
bU)RYqax?B1Tqw^d%(R97KL7MjCcTOPn<ZCq

delta 1527
zcmZp8Fq5<4!3@p`(t=MI7=)4;;D9lHqK>iP69zqvOjZU427dlk3=9l{zb6*R?PqF~
zV02>KuJC~44lfsDMpFd?17mPgeDgN`?c4Yn{U&T?3HT#F&4Gobo#g;yJIev4c9sLo
z?JNgCeAafB18nUq2iV(L4sf)y9N=tcIl$G<a)7&?<p57R%K_eYmIHk4EC=}8Sq=!a
zvm6j?XE`9$&T>F_Im-c&1ZHuDo7349L@!N$P$+h^eV(G|_IZk8X?`pcj3x}zxeCNI
z*!e1$v>Edmrz;AGOHB7K5PP~mbe%k70ps>1MPdiIr-zt$h_G8Um9mL@t4>#}6O)*p
z&&R|z|J8bVMzQwIbs#L>zPV1KeRG{;`{p{S_RV$D?VIal+Bet9wr{SJYu{Wa-@ds{
zp?!0mV*BPgrS{Er%I%x$RN6P!skU#fQ)}N`r{2D~PNRKuoo4&yI<5B2b=vKl>vY;T
z*Xg!zuG4GZT&Lf@xz3<{bDd%P<~pPH&2`4@o9j&4H`kfAZ>}?I-&|+jzPZk#eRG{<
z`{p{U_RV$H?VIat+BetPwr{SpYu{XF-@dudp?!0mWBcYhr}oWt&h4A)T-rC+xwdbv
zb8FvR=ia`#&ZB*EooD;zI<NN4b>8ip>wMZb*ZH<@uJdc(T<71uxh|l6b6sHj=DMKv
z&2_=;o9jZ_H`j%>Z>|e#-&_~ozPT=<eREx8`{uf+_RV$C?VIaj+Bet5wr{SBYu{WK
z-@dsnp?z~*V*BR0r1s5q$?co#Qrb7yrM7RbOKaa;m)^d)E~9;OU1s~{x~%rib=mEk
z>vGyR*X6cvuFGrRT$kUzxvrpnb6sKk=DMQx&2`1?o9jy2H`kT6Z>}qA-&|MTzPYZV
zeREx9`{uf;_RV$G?VIar+BetLwr{ShYu{W~-@du7p?z~*WBca1ruNNs&F!1(TG}_)
zwYG1rYir+J*WSLluA_Z(U1$5|x~}%kb=~co>w4NZ*Y&n<uIp>xT-V>exo$%H=DLaP
zo9iaEZ?2o%zPWBn`{uf-?VIbSwQsJQ-oCkRM*HTvneChFX0>mwo87*-Zch8=y1DI}
z>*lp@uAAS!xo$!G=DLOLo9h;}Z?0S1zPWBm`{uf(?VIbCwQsIl-oCkRMf>KumF=7B
zR<&=gTiw37ZcY2<y0z__>(;ezu3O)}xo$)I=DLmTo9i~UZ?4<izPWBo`{uf>?VIbi
zwQsK5-oCkRNBicwo$Z_JcC~M=+ugpoZcqE>y1nh2>-M#8uG`<fx$Z#w=DLIJo9hm>
zZ>~GszPav5`{uf%?VIb4wQsIF-oCl+MEmBtlkJ=9PPK2YJKesy?o9jUy0h(@>&~@r
zt~=kpx$Z*y=DLgRo9iyMZ?3!CzPav7`{uf<?VIbawQsJw-oCl+M*HTvo9&zHZnbZ&
zyWPII?oRvWy1VV0>+ZF0uDjp9x$Z&x=DLUNo9iC6Z?1dXzPav6`{uf*?VIbKwQsI_
z-oCl+Mf>Kum+hPDUbSzod)>ad?oIpVy0`6{>)y3*u6y6Ux$Z;z=DLsVo9jNcZ?5~?
zzPav8`{uf@?VIbqwQsKb-oCl+NBicwpY5CLezk9|``x~|?oa#Xy1(t4>;AQGuKVA;
zxsI`aa~&gyU;+`$Ac6%%u!0CS5Wx;2I6wp^h~NSd+#rGnMDT(LJ`lkVA_PE$AcznG
z5yBus1Vo5}2r&>L4k9E#gd~WN0uj<6LIy<0f(SXr{hRCL8T}O5895ll89W*03oMXl
zR8V9($@qy;knbzQUOpQJPjCsl7fiy4`B~9wj3w+I&Ae>l(WancNpk-77&S(1PTpG#
zoM2V^-^Qvj3N!6jSuD@ElWV$Dzi|Bgd(mo)JGq<Kn|av9O--4a_2=s^mS+^3&m619
k_^NM%JY(Mm1;)M&ii~|5lo<OqC^PnLP+?rRL6xxq0Dj)ZAOHXW

diff --git a/changelog.d/8730.bugfix b/changelog.d/8730.bugfix
new file mode 100644
index 0000000000..dcc42bc981
--- /dev/null
+++ b/changelog.d/8730.bugfix
@@ -0,0 +1 @@
+Fix port script to correctly handle foreign key constraints. Broke in v1.21.0.
diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db
index 13c0120bb4..7a638ea8e3 100755
--- a/scripts/synapse_port_db
+++ b/scripts/synapse_port_db
@@ -22,7 +22,7 @@ import logging
 import sys
 import time
 import traceback
-from typing import Optional
+from typing import Dict, Optional, Set
 
 import yaml
 
@@ -292,6 +292,34 @@ class Porter(object):
 
         return table, already_ported, total_to_port, forward_chunk, backward_chunk
 
+    async def get_table_constraints(self) -> Dict[str, Set[str]]:
+        """Returns a map of tables that have foreign key constraints to tables they depend on.
+        """
+
+        def _get_constraints(txn):
+            # We can pull the information about foreign key constraints out from
+            # the postgres schema tables.
+            sql = """
+                SELECT DISTINCT
+                    tc.table_name,
+                    ccu.table_name AS foreign_table_name
+                FROM
+                    information_schema.table_constraints AS tc
+                    INNER JOIN information_schema.constraint_column_usage AS ccu
+                    USING (table_schema, constraint_name)
+                WHERE tc.constraint_type = 'FOREIGN KEY';
+            """
+            txn.execute(sql)
+
+            results = {}
+            for table, foreign_table in txn:
+                results.setdefault(table, set()).add(foreign_table)
+            return results
+
+        return await self.postgres_store.db_pool.runInteraction(
+            "get_table_constraints", _get_constraints
+        )
+
     async def handle_table(
         self, table, postgres_size, table_size, forward_chunk, backward_chunk
     ):
@@ -619,15 +647,43 @@ class Porter(object):
                     consumeErrors=True,
                 )
             )
+            # Map from table name to args passed to `handle_table`, i.e. a tuple
+            # of: `postgres_size`, `table_size`, `forward_chunk`, `backward_chunk`.
+            tables_to_port_info_map = {r[0]: r[1:] for r in setup_res}
 
             # Step 4. Do the copying.
+            #
+            # This is slightly convoluted as we need to ensure tables are ported
+            # in the correct order due to foreign key constraints.
             self.progress.set_state("Copying to postgres")
-            await make_deferred_yieldable(
-                defer.gatherResults(
-                    [run_in_background(self.handle_table, *res) for res in setup_res],
-                    consumeErrors=True,
+
+            constraints = await self.get_table_constraints()
+            tables_ported = set()  # type: Set[str]
+
+            while tables_to_port_info_map:
+                # Pulls out all tables that are still to be ported and which
+                # only depend on tables that are already ported (if any).
+                tables_to_port = [
+                    table
+                    for table in tables_to_port_info_map
+                    if not constraints.get(table, set()) - tables_ported
+                ]
+
+                await make_deferred_yieldable(
+                    defer.gatherResults(
+                        [
+                            run_in_background(
+                                self.handle_table,
+                                table,
+                                *tables_to_port_info_map.pop(table),
+                            )
+                            for table in tables_to_port
+                        ],
+                        consumeErrors=True,
+                    )
                 )
-            )
+
+                tables_ported.update(tables_to_port)
 
             # Step 5. Set up sequences
             self.progress.set_state("Setting up sequence generators")
-- 
GitLab