diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3ce4ffb0369ee03b11cb5fa3d57abc6379ec7982..bc1de2893c1c9afcd4ed38d08e1d76d1ec193281 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -204,7 +204,6 @@ jobs: POSTGRES: ${{ matrix.job.postgres && 1}} MULTI_POSTGRES: ${{ (matrix.job.postgres == 'multi-postgres') && 1}} WORKERS: ${{ matrix.job.workers && 1 }} - REDIS: 1 BLACKLIST: ${{ matrix.job.workers && 'synapse-blacklist-with-workers' }} TOP: ${{ github.workspace }} diff --git a/changelog.d/13647.removal b/changelog.d/13647.removal new file mode 100644 index 0000000000000000000000000000000000000000..0190a65dba1ecf2309af53961b8b87efd7ba5f1a --- /dev/null +++ b/changelog.d/13647.removal @@ -0,0 +1 @@ +Remove the ability to use direct TCP replication with workers. Direct TCP replication was deprecated in Synapse v1.18.0. Workers now require using Redis. diff --git a/docs/upgrade.md b/docs/upgrade.md index 422a3da664fb659dde4850a66aeeaf934ce3681d..c6219d06e8b6b56c899f1de12c0572cd3277f26e 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -91,6 +91,21 @@ process, for example: # Upgrading to v1.67.0 +## Direct TCP replication is no longer supported: migrate to Redis + +Redis support was added in v1.13.0 with it becoming the recommended method in +v1.18.0. It replaced the old direct TCP connections (which was deprecated as of +v1.18.0) to the main process. With Redis, rather than all the workers connecting +to the main process, all the workers and the main process connect to Redis, +which relays replication commands between processes. This can give a significant +CPU saving on the main process and is a prerequisite for upcoming +performance improvements. + +To migrate to Redis add the [`redis` config](./workers.md#shared-configuration), +and remove the TCP `replication` listener from config of the master and +`worker_replication_port` from worker config. Note that a HTTP listener with a +`replication` resource is still required. + ## Minimum version of Poetry is now v1.2.0 The minimum supported version of poetry is now 1.2. This should only affect diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 396c56082252c00c7c0db2a5b3858992a1c2d1cf..757957a1d58facd22b724ca7e88b50346473633c 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -431,8 +431,6 @@ Sub-options for each listener include: * `metrics`: (see the docs [here](../../metrics-howto.md)), - * `replication`: (deprecated as of Synapse 1.18, see the docs [here](../../workers.md)). - * `tls`: set to true to enable TLS for this listener. Will use the TLS key/cert specified in tls_private_key_path / tls_certificate_path. * `x_forwarded`: Only valid for an 'http' listener. Set to true to use the X-Forwarded-For header as the client IP. Useful when Synapse is diff --git a/docs/workers.md b/docs/workers.md index 176bb1475eecb9ca3ea1683af928aca58f885d8e..40b18523137c7ba1cc6ffcf3715c32d1f2de2d74 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -32,13 +32,8 @@ stream between all configured Synapse processes. Additionally, processes may make HTTP requests to each other, primarily for operations which need to wait for a reply ─ such as sending an event. -Redis support was added in v1.13.0 with it becoming the recommended method in -v1.18.0. It replaced the old direct TCP connections (which is deprecated as of -v1.18.0) to the main process. With Redis, rather than all the workers connecting -to the main process, all the workers and the main process connect to Redis, -which relays replication commands between processes. This can give a significant -cpu saving on the main process and will be a prerequisite for upcoming -performance improvements. +All the workers and the main process connect to Redis, which relays replication +commands between processes. If Redis support is enabled Synapse will use it as a shared cache, as well as a pub/sub mechanism. @@ -330,7 +325,6 @@ effects of bursts of events from that bridge on events sent by normal users. Additionally, the writing of specific streams (such as events) can be moved off of the main process to a particular worker. -(This is only supported with Redis-based replication.) To enable this, the worker must have a HTTP replication listener configured, have a `worker_name` and be listed in the `instance_map` config. The same worker @@ -600,15 +594,9 @@ equivalent to `synapse.app.generic_worker`: ## Migration from old config -There are two main independent changes that have been made: introducing Redis -support and merging apps into `synapse.app.generic_worker`. Both these changes -are backwards compatible and so no changes to the config are required, however -server admins are encouraged to plan to migrate to Redis as the old style direct -TCP replication config is deprecated. - -To migrate to Redis add the `redis` config as above, and optionally remove the -TCP `replication` listener from master and `worker_replication_port` from worker -config. +A main change that has occurred is the merging of worker apps into +`synapse.app.generic_worker`. This change is backwards compatible and so no +changes to the config are required. To migrate apps to use `synapse.app.generic_worker` simply update the `worker_app` option in the worker configs, and where worker are started (e.g. diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index e57a9260326fc6fb193835bbec1860fba4a746f8..883f2fd2ecd8ba2ccc84856af569eb9661c342ef 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -57,7 +57,6 @@ from synapse.http.site import SynapseSite from synapse.logging.context import LoggingContext from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource -from synapse.replication.tcp.resource import ReplicationStreamProtocolFactory from synapse.rest import ClientRestResource from synapse.rest.admin import AdminRestResource from synapse.rest.health import HealthResource @@ -290,16 +289,6 @@ class SynapseHomeServer(HomeServer): manhole_settings=self.config.server.manhole_settings, manhole_globals={"hs": self}, ) - elif listener.type == "replication": - services = listen_tcp( - listener.bind_addresses, - listener.port, - ReplicationStreamProtocolFactory(self), - ) - for s in services: - self.get_reactor().addSystemEventTrigger( - "before", "shutdown", s.stopListening - ) elif listener.type == "metrics": if not self.config.metrics.enable_metrics: logger.warning( diff --git a/synapse/config/server.py b/synapse/config/server.py index 085fe22c511712b6ba7c4dc8ef5533645ad1cc75..c91df636d9a7501f35a963d9ab57179073a8e7b1 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -36,6 +36,12 @@ from ._util import validate_config logger = logging.Logger(__name__) +DIRECT_TCP_ERROR = """ +Using direct TCP replication for workers is no longer supported. + +Please see https://matrix-org.github.io/synapse/latest/upgrade.html#direct-tcp-replication-is-no-longer-supported-migrate-to-redis +""" + # by default, we attempt to listen on both '::' *and* '0.0.0.0' because some OSes # (Windows, macOS, other BSD/Linux where net.ipv6.bindv6only is set) will only listen # on IPv6 when '::' is set. @@ -165,7 +171,6 @@ KNOWN_LISTENER_TYPES = { "http", "metrics", "manhole", - "replication", } KNOWN_RESOURCES = { @@ -515,7 +520,9 @@ class ServerConfig(Config): ): raise ConfigError("allowed_avatar_mimetypes must be a list") - self.listeners = [parse_listener_def(x) for x in config.get("listeners", [])] + self.listeners = [ + parse_listener_def(i, x) for i, x in enumerate(config.get("listeners", [])) + ] # no_tls is not really supported any more, but let's grandfather it in # here. @@ -880,9 +887,12 @@ def read_gc_thresholds( ) -def parse_listener_def(listener: Any) -> ListenerConfig: +def parse_listener_def(num: int, listener: Any) -> ListenerConfig: """parse a listener config from the config file""" listener_type = listener["type"] + # Raise a helpful error if direct TCP replication is still configured. + if listener_type == "replication": + raise ConfigError(DIRECT_TCP_ERROR, ("listeners", str(num), "type")) port = listener.get("port") if not isinstance(port, int): diff --git a/synapse/config/workers.py b/synapse/config/workers.py index f2716422b52b7c59e076c70f20dd23b835957477..0fb725dd8fc69e079265f240365ecde9cdd6d4fe 100644 --- a/synapse/config/workers.py +++ b/synapse/config/workers.py @@ -27,7 +27,7 @@ from ._base import ( RoutableShardedWorkerHandlingConfig, ShardedWorkerHandlingConfig, ) -from .server import ListenerConfig, parse_listener_def +from .server import DIRECT_TCP_ERROR, ListenerConfig, parse_listener_def _FEDERATION_SENDER_WITH_SEND_FEDERATION_ENABLED_ERROR = """ The send_federation config option must be disabled in the main @@ -128,7 +128,8 @@ class WorkerConfig(Config): self.worker_app = None self.worker_listeners = [ - parse_listener_def(x) for x in config.get("worker_listeners", []) + parse_listener_def(i, x) + for i, x in enumerate(config.get("worker_listeners", [])) ] self.worker_daemonize = bool(config.get("worker_daemonize")) self.worker_pid_file = config.get("worker_pid_file") @@ -142,7 +143,8 @@ class WorkerConfig(Config): self.worker_replication_host = config.get("worker_replication_host", None) # The port on the main synapse for TCP replication - self.worker_replication_port = config.get("worker_replication_port", None) + if "worker_replication_port" in config: + raise ConfigError(DIRECT_TCP_ERROR, ("worker_replication_port",)) # The port on the main synapse for HTTP replication endpoint self.worker_replication_http_port = config.get("worker_replication_http_port") diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py index e1cbfa50ebd240b6818a8e41aa0a34b821540deb..0f166d16aa5fce051bc477ed4ceb52551dd70d36 100644 --- a/synapse/replication/tcp/handler.py +++ b/synapse/replication/tcp/handler.py @@ -35,7 +35,6 @@ from twisted.internet.protocol import ReconnectingClientFactory from synapse.metrics import LaterGauge from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.replication.tcp.client import DirectTcpReplicationClientFactory from synapse.replication.tcp.commands import ( ClearUserSyncsCommand, Command, @@ -332,46 +331,31 @@ class ReplicationCommandHandler: def start_replication(self, hs: "HomeServer") -> None: """Helper method to start replication.""" - if hs.config.redis.redis_enabled: - from synapse.replication.tcp.redis import ( - RedisDirectTcpReplicationClientFactory, - ) + from synapse.replication.tcp.redis import RedisDirectTcpReplicationClientFactory - # First let's ensure that we have a ReplicationStreamer started. - hs.get_replication_streamer() + # First let's ensure that we have a ReplicationStreamer started. + hs.get_replication_streamer() - # We need two connections to redis, one for the subscription stream and - # one to send commands to (as you can't send further redis commands to a - # connection after SUBSCRIBE is called). + # We need two connections to redis, one for the subscription stream and + # one to send commands to (as you can't send further redis commands to a + # connection after SUBSCRIBE is called). - # First create the connection for sending commands. - outbound_redis_connection = hs.get_outbound_redis_connection() + # First create the connection for sending commands. + outbound_redis_connection = hs.get_outbound_redis_connection() - # Now create the factory/connection for the subscription stream. - self._factory = RedisDirectTcpReplicationClientFactory( - hs, - outbound_redis_connection, - channel_names=self._channels_to_subscribe_to, - ) - hs.get_reactor().connectTCP( - hs.config.redis.redis_host, - hs.config.redis.redis_port, - self._factory, - timeout=30, - bindAddress=None, - ) - else: - client_name = hs.get_instance_name() - self._factory = DirectTcpReplicationClientFactory(hs, client_name, self) - host = hs.config.worker.worker_replication_host - port = hs.config.worker.worker_replication_port - hs.get_reactor().connectTCP( - host, - port, - self._factory, - timeout=30, - bindAddress=None, - ) + # Now create the factory/connection for the subscription stream. + self._factory = RedisDirectTcpReplicationClientFactory( + hs, + outbound_redis_connection, + channel_names=self._channels_to_subscribe_to, + ) + hs.get_reactor().connectTCP( + hs.config.redis.redis_host, + hs.config.redis.redis_port, + self._factory, + timeout=30, + bindAddress=None, + ) def get_streams(self) -> Dict[str, Stream]: """Get a map from stream name to all streams.""" diff --git a/tests/app/test_openid_listener.py b/tests/app/test_openid_listener.py index 264e101082428a4f6a0ab38d9a9bf500ba707658..c7dae58eb54935ee261bd8db75174a054e467034 100644 --- a/tests/app/test_openid_listener.py +++ b/tests/app/test_openid_listener.py @@ -61,7 +61,7 @@ class FederationReaderOpenIDListenerTests(HomeserverTestCase): } # Listen with the config - self.hs._listen_http(parse_listener_def(config)) + self.hs._listen_http(parse_listener_def(0, config)) # Grab the resource from the site that was told to listen site = self.reactor.tcpServers[0][1] @@ -109,7 +109,7 @@ class SynapseHomeserverOpenIDListenerTests(HomeserverTestCase): } # Listen with the config - self.hs._listener_http(self.hs.config, parse_listener_def(config)) + self.hs._listener_http(self.hs.config, parse_listener_def(0, config)) # Grab the resource from the site that was told to listen site = self.reactor.tcpServers[0][1] diff --git a/tests/test_server.py b/tests/test_server.py index 23975d59c30de676e4ab257ffc1bb340ed7a30f5..7c66448245d0b4b70fed059e0e8663970f47d828 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -228,7 +228,7 @@ class OptionsResourceTests(unittest.TestCase): site = SynapseSite( "test", "site_tag", - parse_listener_def({"type": "http", "port": 0}), + parse_listener_def(0, {"type": "http", "port": 0}), self.resource, "1.0", max_request_body_size=4096, diff --git a/tests/utils.py b/tests/utils.py index d2c6d1e85242a80792335dae2543c3178db148fb..65db4376973c07c826ebe8eccfb2393d5fde2aca 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -135,7 +135,6 @@ def default_config( "enable_registration_captcha": False, "macaroon_secret_key": "not even a little secret", "password_providers": [], - "worker_replication_url": "", "worker_app": None, "block_non_admin_invites": False, "federation_domain_whitelist": None,