From 0c31783b4fa49fdf3b0e5b70f937ea0718114648 Mon Sep 17 00:00:00 2001
From: Erik Johnston <erikj@element.io>
Date: Mon, 17 Feb 2025 16:39:26 +0100
Subject: [PATCH] Limit size of user directory search queries (#18172)

If a user search has many words we can end up creating really large
queries that take a long time for the database to process. Generally,
such searches don't return any results anyway (due to limits on user ID
and display name length).

We "fix" this by cheating and only searching for the first ten words.
---
 changelog.d/18172.misc                           | 1 +
 synapse/storage/databases/main/user_directory.py | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/18172.misc

diff --git a/changelog.d/18172.misc b/changelog.d/18172.misc
new file mode 100644
index 0000000000..49b6be263b
--- /dev/null
+++ b/changelog.d/18172.misc
@@ -0,0 +1 @@
+Reduce database load of user search when using large search terms.
diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py
index a51182de55..d6cd0774a8 100644
--- a/synapse/storage/databases/main/user_directory.py
+++ b/synapse/storage/databases/main/user_directory.py
@@ -1237,7 +1237,13 @@ def _parse_query_postgres(search_term: str) -> Tuple[str, str, str]:
     search_term = _filter_text_for_index(search_term)
 
     escaped_words = []
-    for word in _parse_words(search_term):
+    for index, word in enumerate(_parse_words(search_term)):
+        if index >= 10:
+            # We limit how many terms we include, as otherwise it can use
+            # excessive database time if people accidentally search for large
+            # strings.
+            break
+
         # Postgres tsvector and tsquery quoting rules:
         # words potentially containing punctuation should be quoted
         # and then existing quotes and backslashes should be doubled
-- 
GitLab