Skip to content
Snippets Groups Projects
Unverified Commit f78b68a9 authored by Hillery Shay's avatar Hillery Shay Committed by GitHub
Browse files

Treat "\u0000" as "\u0020" for the purposes of message search (message indexing) (#10820)


* add test to check if null code points are being inserted

* add logic to detect and replace null code points before insertion into db

* lints

* add license to test

* change approach to null substitution

* add type hint for SearchEntry

* Add changelog entry

Signed-off-by: default avatarH.Shay <shaysquared@gmail.com>

* updated changelog

* update chanelog message

* remove duplicate changelog

* Update synapse/storage/databases/main/events.py remove extra space

Co-authored-by: default avatarPatrick Cloke <clokep@users.noreply.github.com>

* rename and move test file, update tests, delete old test file

* fix typo in comments

* update _find_highlights_in_postgres to replace null byte with space

* replace null byte in sqlite search insertion

* beef up and reorganize test for this pr

* update changelog

* add type hints and update docstring

* check db engine directly vs using env variable

* refactor tests to be less repetetive

* move rplace logic into seperate function

* requested changes

* Fix typo.

* Update synapse/storage/databases/main/search.py

Co-authored-by: default avatarreivilibre <olivier@librepush.net>

* Update changelog.d/10820.misc

Co-authored-by: default avatarAaron Raimist <aaron@raim.ist>

Co-authored-by: default avatarPatrick Cloke <clokep@users.noreply.github.com>
Co-authored-by: default avatarreivilibre <olivier@librepush.net>
Co-authored-by: default avatarAaron Raimist <aaron@raim.ist>
parent 03db6701
No related branches found
No related tags found
No related merge requests found
Fix a long-standing bug where an `m.room.message` event containing a null byte would cause an internal server error.
\ No newline at end of file
...@@ -15,12 +15,12 @@ ...@@ -15,12 +15,12 @@
import logging import logging
import re import re
from collections import namedtuple from collections import namedtuple
from typing import Collection, List, Optional, Set from typing import Collection, Iterable, List, Optional, Set
from synapse.api.errors import SynapseError from synapse.api.errors import SynapseError
from synapse.events import EventBase from synapse.events import EventBase
from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
from synapse.storage.database import DatabasePool from synapse.storage.database import DatabasePool, LoggingTransaction
from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.databases.main.events_worker import EventRedactBehaviour
from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.storage.engines import PostgresEngine, Sqlite3Engine
...@@ -32,14 +32,24 @@ SearchEntry = namedtuple( ...@@ -32,14 +32,24 @@ SearchEntry = namedtuple(
) )
def _clean_value_for_search(value: str) -> str:
"""
Replaces any null code points in the string with spaces as
Postgres and SQLite do not like the insertion of strings with
null code points into the full-text search tables.
"""
return value.replace("\u0000", " ")
class SearchWorkerStore(SQLBaseStore): class SearchWorkerStore(SQLBaseStore):
def store_search_entries_txn(self, txn, entries): def store_search_entries_txn(
self, txn: LoggingTransaction, entries: Iterable[SearchEntry]
) -> None:
"""Add entries to the search table """Add entries to the search table
Args: Args:
txn (cursor): txn:
entries (iterable[SearchEntry]): entries: entries to be added to the table
entries to be added to the table
""" """
if not self.hs.config.enable_search: if not self.hs.config.enable_search:
return return
...@@ -55,7 +65,7 @@ class SearchWorkerStore(SQLBaseStore): ...@@ -55,7 +65,7 @@ class SearchWorkerStore(SQLBaseStore):
entry.event_id, entry.event_id,
entry.room_id, entry.room_id,
entry.key, entry.key,
entry.value, _clean_value_for_search(entry.value),
entry.stream_ordering, entry.stream_ordering,
entry.origin_server_ts, entry.origin_server_ts,
) )
...@@ -70,11 +80,16 @@ class SearchWorkerStore(SQLBaseStore): ...@@ -70,11 +80,16 @@ class SearchWorkerStore(SQLBaseStore):
" VALUES (?,?,?,?)" " VALUES (?,?,?,?)"
) )
args = ( args = (
(entry.event_id, entry.room_id, entry.key, entry.value) (
entry.event_id,
entry.room_id,
entry.key,
_clean_value_for_search(entry.value),
)
for entry in entries for entry in entries
) )
txn.execute_batch(sql, args) txn.execute_batch(sql, args)
else: else:
# This should be unreachable. # This should be unreachable.
raise Exception("Unrecognized database engine") raise Exception("Unrecognized database engine")
...@@ -646,6 +661,7 @@ class SearchStore(SearchBackgroundUpdateStore): ...@@ -646,6 +661,7 @@ class SearchStore(SearchBackgroundUpdateStore):
for key in ("body", "name", "topic"): for key in ("body", "name", "topic"):
v = event.content.get(key, None) v = event.content.get(key, None)
if v: if v:
v = _clean_value_for_search(v)
values.append(v) values.append(v)
if not values: if not values:
......
# Copyright 2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import synapse.rest.admin
from synapse.rest.client import login, room
from synapse.storage.engines import PostgresEngine
from tests.unittest import HomeserverTestCase
class NullByteInsertionTest(HomeserverTestCase):
servlets = [
synapse.rest.admin.register_servlets_for_client_rest_resource,
login.register_servlets,
room.register_servlets,
]
def test_null_byte(self):
"""
Postgres/SQLite don't like null bytes going into the search tables. Internally
we replace those with a space.
Ensure this doesn't break anything.
"""
# Register a user and create a room, create some messages
self.register_user("alice", "password")
access_token = self.login("alice", "password")
room_id = self.helper.create_room_as("alice", tok=access_token)
# Send messages and ensure they don't cause an internal server
# error
for body in ["hi\u0000bob", "another message", "hi alice"]:
response = self.helper.send(room_id, body, tok=access_token)
self.assertIn("event_id", response)
# Check that search works for the message where the null byte was replaced
store = self.hs.get_datastore()
result = self.get_success(
store.search_msgs([room_id], "hi bob", ["content.body"])
)
self.assertEquals(result.get("count"), 1)
if isinstance(store.database_engine, PostgresEngine):
self.assertIn("hi", result.get("highlights"))
self.assertIn("bob", result.get("highlights"))
# Check that search works for an unrelated message
result = self.get_success(
store.search_msgs([room_id], "another", ["content.body"])
)
self.assertEquals(result.get("count"), 1)
if isinstance(store.database_engine, PostgresEngine):
self.assertIn("another", result.get("highlights"))
# Check that search works for a search term that overlaps with the message
# containing a null byte and an unrelated message.
result = self.get_success(store.search_msgs([room_id], "hi", ["content.body"]))
self.assertEquals(result.get("count"), 2)
result = self.get_success(
store.search_msgs([room_id], "hi alice", ["content.body"])
)
if isinstance(store.database_engine, PostgresEngine):
self.assertIn("alice", result.get("highlights"))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment