Skip to content
Snippets Groups Projects
Unverified Commit 59710437 authored by Melvyn Laïly's avatar Melvyn Laïly Committed by GitHub
Browse files

Return the search terms as search highlights for SQLite instead of nothing (#17000)

parent 9985aa68
No related branches found
No related tags found
No related merge requests found
Fixed search feature of Element Android on homesevers using SQLite by returning search terms as search highlights.
\ No newline at end of file
...@@ -470,6 +470,8 @@ class SearchStore(SearchBackgroundUpdateStore): ...@@ -470,6 +470,8 @@ class SearchStore(SearchBackgroundUpdateStore):
count_args = args count_args = args
count_clauses = clauses count_clauses = clauses
sqlite_highlights: List[str] = []
if isinstance(self.database_engine, PostgresEngine): if isinstance(self.database_engine, PostgresEngine):
search_query = search_term search_query = search_term
sql = """ sql = """
...@@ -486,7 +488,7 @@ class SearchStore(SearchBackgroundUpdateStore): ...@@ -486,7 +488,7 @@ class SearchStore(SearchBackgroundUpdateStore):
""" """
count_args = [search_query] + count_args count_args = [search_query] + count_args
elif isinstance(self.database_engine, Sqlite3Engine): elif isinstance(self.database_engine, Sqlite3Engine):
search_query = _parse_query_for_sqlite(search_term) search_query, sqlite_highlights = _parse_query_for_sqlite(search_term)
sql = """ sql = """
SELECT rank(matchinfo(event_search)) as rank, room_id, event_id SELECT rank(matchinfo(event_search)) as rank, room_id, event_id
...@@ -531,9 +533,11 @@ class SearchStore(SearchBackgroundUpdateStore): ...@@ -531,9 +533,11 @@ class SearchStore(SearchBackgroundUpdateStore):
event_map = {ev.event_id: ev for ev in events} event_map = {ev.event_id: ev for ev in events}
highlights = None highlights: Collection[str] = []
if isinstance(self.database_engine, PostgresEngine): if isinstance(self.database_engine, PostgresEngine):
highlights = await self._find_highlights_in_postgres(search_query, events) highlights = await self._find_highlights_in_postgres(search_query, events)
else:
highlights = sqlite_highlights
count_sql += " GROUP BY room_id" count_sql += " GROUP BY room_id"
...@@ -597,6 +601,8 @@ class SearchStore(SearchBackgroundUpdateStore): ...@@ -597,6 +601,8 @@ class SearchStore(SearchBackgroundUpdateStore):
count_args = list(args) count_args = list(args)
count_clauses = list(clauses) count_clauses = list(clauses)
sqlite_highlights: List[str] = []
if pagination_token: if pagination_token:
try: try:
origin_server_ts_str, stream_str = pagination_token.split(",") origin_server_ts_str, stream_str = pagination_token.split(",")
...@@ -647,7 +653,7 @@ class SearchStore(SearchBackgroundUpdateStore): ...@@ -647,7 +653,7 @@ class SearchStore(SearchBackgroundUpdateStore):
CROSS JOIN events USING (event_id) CROSS JOIN events USING (event_id)
WHERE WHERE
""" """
search_query = _parse_query_for_sqlite(search_term) search_query, sqlite_highlights = _parse_query_for_sqlite(search_term)
args = [search_query] + args args = [search_query] + args
count_sql = """ count_sql = """
...@@ -694,9 +700,11 @@ class SearchStore(SearchBackgroundUpdateStore): ...@@ -694,9 +700,11 @@ class SearchStore(SearchBackgroundUpdateStore):
event_map = {ev.event_id: ev for ev in events} event_map = {ev.event_id: ev for ev in events}
highlights = None highlights: Collection[str] = []
if isinstance(self.database_engine, PostgresEngine): if isinstance(self.database_engine, PostgresEngine):
highlights = await self._find_highlights_in_postgres(search_query, events) highlights = await self._find_highlights_in_postgres(search_query, events)
else:
highlights = sqlite_highlights
count_sql += " GROUP BY room_id" count_sql += " GROUP BY room_id"
...@@ -892,19 +900,25 @@ def _tokenize_query(query: str) -> TokenList: ...@@ -892,19 +900,25 @@ def _tokenize_query(query: str) -> TokenList:
return tokens return tokens
def _tokens_to_sqlite_match_query(tokens: TokenList) -> str: def _tokens_to_sqlite_match_query(tokens: TokenList) -> Tuple[str, List[str]]:
""" """
Convert the list of tokens to a string suitable for passing to sqlite's MATCH. Convert the list of tokens to a string suitable for passing to sqlite's MATCH.
Assume sqlite was compiled with enhanced query syntax. Assume sqlite was compiled with enhanced query syntax.
Returns the sqlite-formatted query string and the tokenized search terms
that can be used as highlights.
Ref: https://www.sqlite.org/fts3.html#full_text_index_queries Ref: https://www.sqlite.org/fts3.html#full_text_index_queries
""" """
match_query = [] match_query = []
highlights = []
for token in tokens: for token in tokens:
if isinstance(token, str): if isinstance(token, str):
match_query.append(token) match_query.append(token)
highlights.append(token)
elif isinstance(token, Phrase): elif isinstance(token, Phrase):
match_query.append('"' + " ".join(token.phrase) + '"') match_query.append('"' + " ".join(token.phrase) + '"')
highlights.append(" ".join(token.phrase))
elif token == SearchToken.Not: elif token == SearchToken.Not:
# TODO: SQLite treats NOT as a *binary* operator. Hopefully a search # TODO: SQLite treats NOT as a *binary* operator. Hopefully a search
# term has already been added before this. # term has already been added before this.
...@@ -916,11 +930,14 @@ def _tokens_to_sqlite_match_query(tokens: TokenList) -> str: ...@@ -916,11 +930,14 @@ def _tokens_to_sqlite_match_query(tokens: TokenList) -> str:
else: else:
raise ValueError(f"unknown token {token}") raise ValueError(f"unknown token {token}")
return "".join(match_query) return "".join(match_query), highlights
def _parse_query_for_sqlite(search_term: str) -> str: def _parse_query_for_sqlite(search_term: str) -> Tuple[str, List[str]]:
"""Takes a plain unicode string from the user and converts it into a form """Takes a plain unicode string from the user and converts it into a form
that can be passed to sqllite's matchinfo(). that can be passed to sqllite's matchinfo().
Returns the converted query string and the tokenized search terms
that can be used as highlights.
""" """
return _tokens_to_sqlite_match_query(_tokenize_query(search_term)) return _tokens_to_sqlite_match_query(_tokenize_query(search_term))
...@@ -71,17 +71,16 @@ class EventSearchInsertionTest(HomeserverTestCase): ...@@ -71,17 +71,16 @@ class EventSearchInsertionTest(HomeserverTestCase):
store.search_msgs([room_id], "hi bob", ["content.body"]) store.search_msgs([room_id], "hi bob", ["content.body"])
) )
self.assertEqual(result.get("count"), 1) self.assertEqual(result.get("count"), 1)
if isinstance(store.database_engine, PostgresEngine): self.assertIn("hi", result.get("highlights"))
self.assertIn("hi", result.get("highlights")) self.assertIn("bob", result.get("highlights"))
self.assertIn("bob", result.get("highlights"))
# Check that search works for an unrelated message # Check that search works for an unrelated message
result = self.get_success( result = self.get_success(
store.search_msgs([room_id], "another", ["content.body"]) store.search_msgs([room_id], "another", ["content.body"])
) )
self.assertEqual(result.get("count"), 1) self.assertEqual(result.get("count"), 1)
if isinstance(store.database_engine, PostgresEngine):
self.assertIn("another", result.get("highlights")) self.assertIn("another", result.get("highlights"))
# Check that search works for a search term that overlaps with the message # Check that search works for a search term that overlaps with the message
# containing a null byte and an unrelated message. # containing a null byte and an unrelated message.
...@@ -90,8 +89,8 @@ class EventSearchInsertionTest(HomeserverTestCase): ...@@ -90,8 +89,8 @@ class EventSearchInsertionTest(HomeserverTestCase):
result = self.get_success( result = self.get_success(
store.search_msgs([room_id], "hi alice", ["content.body"]) store.search_msgs([room_id], "hi alice", ["content.body"])
) )
if isinstance(store.database_engine, PostgresEngine):
self.assertIn("alice", result.get("highlights")) self.assertIn("alice", result.get("highlights"))
def test_non_string(self) -> None: def test_non_string(self) -> None:
"""Test that non-string `value`s are not inserted into `event_search`. """Test that non-string `value`s are not inserted into `event_search`.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment