Compare revisions

bfdb1074 · bfdb1074 · bfdb1074 · bfdb1074 · bfdb1074 · bfdb1074
--- a/synapse/events/utils.py
+++ b/synapse/events/utils.py
-# Copyright 2014-2016 OpenMarket Ltd
+#
+# This file is licensed under the Affero General Public License (AGPL) version 3.
+#
 # Copyright 2021 The Matrix.org Foundation C.I.C.
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright (C) 2023 New Vector, Ltd
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# See the GNU Affero General Public License for more details:
+# <https://www.gnu.org/licenses/agpl-3.0.html>.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Originally licensed under the Apache License, Version 2.0:
+# <http://www.apache.org/licenses/LICENSE-2.0>.
+#
+# [This file includes modifications made by New Vector Limited]
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
 import collections.abc
 import re
 from typing import (
    TYPE_CHECKING,
    Any,
+    Awaitable,
    Callable,
    Dict,
    Iterable,
    List,
    Mapping,
+    Match,
    MutableMapping,
    Optional,
    Union,
 )

 import attr
-
-from synapse.api.constants import EventContentFields, EventTypes, RelationTypes
+from canonicaljson import encode_canonical_json
+
+from synapse.api.constants import (
+    CANONICALJSON_MAX_INT,
+    CANONICALJSON_MIN_INT,
+    MAX_PDU_SIZE,
+    EventContentFields,
+    EventTypes,
+    RelationTypes,
+)
 from synapse.api.errors import Codes, SynapseError
 from synapse.api.room_versions import RoomVersion
-from synapse.types import JsonDict
-from synapse.util.frozenutils import unfreeze
+from synapse.types import JsonDict, Requester

-from . import EventBase
+from . import EventBase, StrippedStateEvent, make_event_from_dict

 if TYPE_CHECKING:
    from synapse.handlers.relations import BundledAggregations
+    from synapse.server import HomeServer


-# Split strings on "." but not "\." This uses a negative lookbehind assertion for '\'
-# (?<!stuff) matches if the current position in the string is not preceded
-# by a match for 'stuff'.
-# TODO: This is fast, but fails to handle "foo\\.bar" which should be treated as
-#       the literal fields "foo\" and "bar" but will instead be treated as "foo\\.bar"
-SPLIT_FIELD_REGEX = re.compile(r"(?<!\\)\.")
+# Split strings on "." but not "\." (or "\\\.").
+SPLIT_FIELD_REGEX = re.compile(r"\\*\.")
+# Find escaped characters, e.g. those with a \ in front of them.
+ESCAPE_SEQUENCE_PATTERN = re.compile(r"\\(.)")

-CANONICALJSON_MAX_INT = (2**53) - 1
-CANONICALJSON_MIN_INT = -CANONICALJSON_MAX_INT
+
+# Module API callback that allows adding fields to the unsigned section of
+# events that are sent to clients.
+ADD_EXTRA_FIELDS_TO_UNSIGNED_CLIENT_EVENT_CALLBACK = Callable[
+    [EventBase], Awaitable[JsonDict]
+]


 def prune_event(event: EventBase) -> EventBase:
@@ -62,17 +81,15 @@ def prune_event(event: EventBase) -> EventBase:
    """
    pruned_event_dict = prune_event_dict(event.room_version, event.get_dict())

-    from . import make_event_from_dict
-
    pruned_event = make_event_from_dict(
        pruned_event_dict, event.room_version, event.internal_metadata.get_dict()
    )

-    # copy the internal fields
+    # Copy the bits of `internal_metadata` that aren't returned by `get_dict`
    pruned_event.internal_metadata.stream_ordering = (
        event.internal_metadata.stream_ordering
    )
-
+    pruned_event.internal_metadata.instance_name = event.internal_metadata.instance_name
    pruned_event.internal_metadata.outlier = event.internal_metadata.outlier

    # Mark the event as redacted
@@ -81,6 +98,30 @@ def prune_event(event: EventBase) -> EventBase:
    return pruned_event


+def clone_event(event: EventBase) -> EventBase:
+    """Take a copy of the event.
+
+    This is mostly useful because it does a *shallow* copy of the `unsigned` data,
+    which means it can then be updated without corrupting the in-memory cache. Note that
+    other properties of the event, such as `content`, are *not* (currently) copied here.
+    """
+    # XXX: We rely on at least one of `event.get_dict()` and `make_event_from_dict()`
+    #   making a copy of `unsigned`. Currently, both do, though I don't really know why.
+    #   Still, as long as they do, there's not much point doing yet another copy here.
+    new_event = make_event_from_dict(
+        event.get_dict(), event.room_version, event.internal_metadata.get_dict()
+    )
+
+    # Copy the bits of `internal_metadata` that aren't returned by `get_dict`.
+    new_event.internal_metadata.stream_ordering = (
+        event.internal_metadata.stream_ordering
+    )
+    new_event.internal_metadata.instance_name = event.internal_metadata.instance_name
+    new_event.internal_metadata.outlier = event.internal_metadata.outlier
+
+    return new_event
+
+
 def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDict:
    """Redacts the event_dict in the same way as `prune_event`, except it
    operates on dicts rather than event objects
@@ -101,13 +142,12 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic
        "depth",
        "prev_events",
        "auth_events",
-        "origin",
        "origin_server_ts",
    ]

-    # Room versions from before MSC2176 had additional allowed keys.
-    if not room_version.msc2176_redaction_rules:
-        allowed_keys.extend(["prev_state", "membership"])
+    # Earlier room versions from had additional allowed keys.
+    if not room_version.updated_redaction_rules:
+        allowed_keys.extend(["prev_state", "membership", "origin"])

    event_type = event_dict["type"]

@@ -120,17 +160,29 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic

    if event_type == EventTypes.Member:
        add_fields("membership")
-        if room_version.msc3375_redaction_rules:
+        if room_version.restricted_join_rule_fix:
            add_fields(EventContentFields.AUTHORISING_USER)
+        if room_version.updated_redaction_rules:
+            # Preserve the signed field under third_party_invite.
+            third_party_invite = event_dict["content"].get("third_party_invite")
+            if isinstance(third_party_invite, collections.abc.Mapping):
+                new_content["third_party_invite"] = {}
+                if "signed" in third_party_invite:
+                    new_content["third_party_invite"]["signed"] = third_party_invite[
+                        "signed"
+                    ]
+
    elif event_type == EventTypes.Create:
-        # MSC2176 rules state that create events cannot be redacted.
-        if room_version.msc2176_redaction_rules:
-            return event_dict
+        if room_version.updated_redaction_rules:
+            # MSC2176 rules state that create events cannot have their `content` redacted.
+            new_content = event_dict["content"]
+        elif not room_version.implicit_room_creator:
+            # Some room versions give meaning to `creator`
+            add_fields("creator")

-        add_fields("creator")
    elif event_type == EventTypes.JoinRules:
        add_fields("join_rule")
-        if room_version.msc3083_join_rules:
+        if room_version.restricted_join_rule:
            add_fields("allow")
    elif event_type == EventTypes.PowerLevels:
        add_fields(
@@ -144,24 +196,27 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic
            "redact",
        )

-        if room_version.msc2176_redaction_rules:
+        if room_version.updated_redaction_rules:
            add_fields("invite")

-        if room_version.msc2716_historical:
-            add_fields("historical")
-
    elif event_type == EventTypes.Aliases and room_version.special_case_aliases_auth:
        add_fields("aliases")
    elif event_type == EventTypes.RoomHistoryVisibility:
        add_fields("history_visibility")
-    elif event_type == EventTypes.Redaction and room_version.msc2176_redaction_rules:
+    elif event_type == EventTypes.Redaction and room_version.updated_redaction_rules:
        add_fields("redacts")
-    elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_INSERTION:
-        add_fields(EventContentFields.MSC2716_NEXT_BATCH_ID)
-    elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_BATCH:
-        add_fields(EventContentFields.MSC2716_BATCH_ID)
-    elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_MARKER:
-        add_fields(EventContentFields.MSC2716_INSERTION_EVENT_REFERENCE)
+
+    # Protect the rel_type and event_id fields under the m.relates_to field.
+    if room_version.msc3389_relation_redactions:
+        relates_to = event_dict["content"].get("m.relates_to")
+        if isinstance(relates_to, collections.abc.Mapping):
+            new_relates_to = {}
+            for field in ("rel_type", "event_id"):
+                if field in relates_to:
+                    new_relates_to[field] = relates_to[field]
+            # Only include a non-empty relates_to field.
+            if new_relates_to:
+                new_content["m.relates_to"] = new_relates_to

    allowed_fields = {k: v for k, v in event_dict.items() if k in allowed_keys}

@@ -223,6 +278,57 @@ def _copy_field(src: JsonDict, dst: JsonDict, field: List[str]) -> None:
    sub_out_dict[key_to_move] = sub_dict[key_to_move]


+def _escape_slash(m: Match[str]) -> str:
+    """
+    Replacement function; replace a backslash-backslash or backslash-dot with the
+    second character. Leaves any other string alone.
+    """
+    if m.group(1) in ("\\", "."):
+        return m.group(1)
+    return m.group(0)
+
+
+def _split_field(field: str) -> List[str]:
+    """
+    Splits strings on unescaped dots and removes escaping.
+
+    Args:
+        field: A string representing a path to a field.
+
+    Returns:
+        A list of nested fields to traverse.
+    """
+
+    # Convert the field and remove escaping:
+    #
+    # 1. "content.body.thing\.with\.dots"
+    # 2. ["content", "body", "thing\.with\.dots"]
+    # 3. ["content", "body", "thing.with.dots"]
+
+    # Find all dots (and their preceding backslashes). If the dot is unescaped
+    # then emit a new field part.
+    result = []
+    prev_start = 0
+    for match in SPLIT_FIELD_REGEX.finditer(field):
+        # If the match is an *even* number of characters than the dot was escaped.
+        if len(match.group()) % 2 == 0:
+            continue
+
+        # Add a new part (up to the dot, exclusive) after escaping.
+        result.append(
+            ESCAPE_SEQUENCE_PATTERN.sub(
+                _escape_slash, field[prev_start : match.end() - 1]
+            )
+        )
+        prev_start = match.end()
+
+    # Add any part of the field after the last unescaped dot. (Note that if the
+    # character is a dot this correctly adds a blank string.)
+    result.append(re.sub(r"\\(.)", _escape_slash, field[prev_start:]))
+
+    return result
+
+
 def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict:
    """Return a new dict with only the fields in 'dictionary' which are present
    in 'fields'.
@@ -230,7 +336,7 @@ def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict:
    If there are no event fields specified then all fields are included.
    The entries may include '.' characters to indicate sub-fields.
    So ['content.body'] will include the 'body' field of the 'content' object.
-    A literal '.' character in a field name may be escaped using a '\'.
+    A literal '.' or '\' character in a field name may be escaped using a '\'.

    Args:
        dictionary: The dictionary to read from.
@@ -245,13 +351,7 @@ def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict:

    # for each field, convert it:
    # ["content.body.thing\.with\.dots"] => [["content", "body", "thing\.with\.dots"]]
-    split_fields = [SPLIT_FIELD_REGEX.split(f) for f in fields]
-
-    # for each element of the output array of arrays:
-    # remove escaping so we can use the right key names.
-    split_fields[:] = [
-        [f.replace(r"\.", r".") for f in field_array] for field_array in split_fields
-    ]
+    split_fields = [_split_field(f) for f in fields]

    output: JsonDict = {}
    for field_array in split_fields:
@@ -311,8 +411,9 @@ class SerializeEventConfig:
    as_client_event: bool = True
    # Function to convert from federation format to client format
    event_format: Callable[[JsonDict], JsonDict] = format_event_for_client_v1
-    # ID of the user's auth token - used for namespacing of transaction IDs
-    token_id: Optional[int] = None
+    # The entity that requested the event. This is used to determine whether to include
+    # the transaction_id in the unsigned section of the event.
+    requester: Optional[Requester] = None
    # List of event fields to include. If empty, all fields will be returned.
    only_event_fields: Optional[List[str]] = None
    # Some events can have stripped room state stored in the `unsigned` field.
@@ -349,7 +450,7 @@ def serialize_event(
    time_now_ms = int(time_now_ms)

    # Should this strip out None's?
-    d = {k: v for k, v in e.get_dict().items()}
+    d = dict(e.get_dict().items())

    d["event_id"] = e.event_id

@@ -359,13 +460,50 @@ def serialize_event(

    if "redacted_because" in e.unsigned:
        d["unsigned"]["redacted_because"] = serialize_event(
-            e.unsigned["redacted_because"], time_now_ms, config=config
+            e.unsigned["redacted_because"],
+            time_now_ms,
+            config=config,
        )

-    if config.token_id is not None:
-        if config.token_id == getattr(e.internal_metadata, "token_id", None):
-            txn_id = getattr(e.internal_metadata, "txn_id", None)
-            if txn_id is not None:
+    # If we have a txn_id saved in the internal_metadata, we should include it in the
+    # unsigned section of the event if it was sent by the same session as the one
+    # requesting the event.
+    txn_id: Optional[str] = getattr(e.internal_metadata, "txn_id", None)
+    if (
+        txn_id is not None
+        and config.requester is not None
+        and config.requester.user.to_string() == e.sender
+    ):
+        # Some events do not have the device ID stored in the internal metadata,
+        # this includes old events as well as those created by appservice, guests,
+        # or with tokens minted with the admin API. For those events, fallback
+        # to using the access token instead.
+        event_device_id: Optional[str] = getattr(e.internal_metadata, "device_id", None)
+        if event_device_id is not None:
+            if event_device_id == config.requester.device_id:
+                d["unsigned"]["transaction_id"] = txn_id
+
+        else:
+            # Fallback behaviour: only include the transaction ID if the event
+            # was sent from the same access token.
+            #
+            # For regular users, the access token ID can be used to determine this.
+            # This includes access tokens minted with the admin API.
+            #
+            # For guests and appservice users, we can't check the access token ID
+            # so assume it is the same session.
+            event_token_id: Optional[int] = getattr(
+                e.internal_metadata, "token_id", None
+            )
+            if (
+                (
+                    event_token_id is not None
+                    and config.requester.access_token_id is not None
+                    and event_token_id == config.requester.access_token_id
+                )
+                or config.requester.is_guest
+                or config.requester.app_service
+            ):
                d["unsigned"]["transaction_id"] = txn_id

    # invite_room_state and knock_room_state are a list of stripped room state events
@@ -379,6 +517,17 @@ def serialize_event(
    if config.as_client_event:
        d = config.event_format(d)

+    # If the event is a redaction, the field with the redacted event ID appears
+    # in a different location depending on the room version. e.redacts handles
+    # fetching from the proper location; copy it to the other location for forwards-
+    # and backwards-compatibility with clients.
+    if e.type == EventTypes.Redaction and e.redacts is not None:
+        if e.room_version.updated_redaction_rules:
+            d["redacts"] = e.redacts
+        else:
+            d["content"] = dict(d["content"])
+            d["content"]["redacts"] = e.redacts
+
    only_event_fields = config.only_event_fields
    if only_event_fields:
        if not isinstance(only_event_fields, list) or not all(
@@ -397,14 +546,19 @@ class EventClientSerializer:
    clients.
    """

-    def serialize_event(
+    def __init__(self, hs: "HomeServer") -> None:
+        self._store = hs.get_datastores().main
+        self._add_extra_fields_to_unsigned_client_event_callbacks: List[
+            ADD_EXTRA_FIELDS_TO_UNSIGNED_CLIENT_EVENT_CALLBACK
+        ] = []
+
+    async def serialize_event(
        self,
        event: Union[JsonDict, EventBase],
        time_now: int,
        *,
        config: SerializeEventConfig = _DEFAULT_SERIALIZE_EVENT_CONFIG,
        bundle_aggregations: Optional[Dict[str, "BundledAggregations"]] = None,
-        apply_edits: bool = True,
    ) -> JsonDict:
        """Serializes a single event.

@@ -414,8 +568,7 @@ class EventClientSerializer:
            config: Event serialization config
            bundle_aggregations: A map from event_id to the aggregations to be bundled
               into the event.
-            apply_edits: Whether the content of the event should be modified to reflect
-               any replacement in `bundle_aggregations[<event_id>].replace`.
+
        Returns:
            The serialized event
        """
@@ -425,55 +578,37 @@ class EventClientSerializer:

        serialized_event = serialize_event(event, time_now, config=config)

+        new_unsigned = {}
+        for callback in self._add_extra_fields_to_unsigned_client_event_callbacks:
+            u = await callback(event)
+            new_unsigned.update(u)
+
+        if new_unsigned:
+            # We do the `update` this way round so that modules can't clobber
+            # existing fields.
+            new_unsigned.update(serialized_event["unsigned"])
+            serialized_event["unsigned"] = new_unsigned
+
        # Check if there are any bundled aggregations to include with the event.
        if bundle_aggregations:
            if event.event_id in bundle_aggregations:
-                self._inject_bundled_aggregations(
+                await self._inject_bundled_aggregations(
                    event,
                    time_now,
                    config,
                    bundle_aggregations,
                    serialized_event,
-                    apply_edits=apply_edits,
                )

        return serialized_event

-    def _apply_edit(
-        self, orig_event: EventBase, serialized_event: JsonDict, edit: EventBase
-    ) -> None:
-        """Replace the content, preserving existing relations of the serialized event.
-
-        Args:
-            orig_event: The original event.
-            serialized_event: The original event, serialized. This is modified.
-            edit: The event which edits the above.
-        """
-
-        # Ensure we take copies of the edit content, otherwise we risk modifying
-        # the original event.
-        edit_content = edit.content.copy()
-
-        # Unfreeze the event content if necessary, so that we may modify it below
-        edit_content = unfreeze(edit_content)
-        serialized_event["content"] = edit_content.get("m.new_content", {})
-
-        # Check for existing relations
-        relates_to = orig_event.content.get("m.relates_to")
-        if relates_to:
-            # Keep the relations, ensuring we use a dict copy of the original
-            serialized_event["content"]["m.relates_to"] = relates_to.copy()
-        else:
-            serialized_event["content"].pop("m.relates_to", None)
-
-    def _inject_bundled_aggregations(
+    async def _inject_bundled_aggregations(
        self,
        event: EventBase,
        time_now: int,
        config: SerializeEventConfig,
        bundled_aggregations: Dict[str, "BundledAggregations"],
        serialized_event: JsonDict,
-        apply_edits: bool,
    ) -> None:
        """Potentially injects bundled aggregations into the unsigned portion of the serialized event.

@@ -488,8 +623,6 @@ class EventClientSerializer:
                While serializing the bundled aggregations this map may be searched
                again for additional events in a recursive manner.
            serialized_event: The serialized event which may be modified.
-            apply_edits: Whether the content of the event should be modified to reflect
-               any replacement in `aggregations.replace`.
        """

        # We have already checked that aggregations exist for this event.
@@ -499,34 +632,29 @@ class EventClientSerializer:
        # being serialized.
        serialized_aggregations = {}

-        if event_aggregations.annotations:
-            serialized_aggregations[
-                RelationTypes.ANNOTATION
-            ] = event_aggregations.annotations
-
        if event_aggregations.references:
-            serialized_aggregations[
-                RelationTypes.REFERENCE
-            ] = event_aggregations.references
+            serialized_aggregations[RelationTypes.REFERENCE] = (
+                event_aggregations.references
+            )

        if event_aggregations.replace:
-            # If there is an edit, optionally apply it to the event.
-            edit = event_aggregations.replace
-            if apply_edits:
-                self._apply_edit(event, serialized_event, edit)
-
            # Include information about it in the relations dict.
-            serialized_aggregations[RelationTypes.REPLACE] = {
-                "event_id": edit.event_id,
-                "origin_server_ts": edit.origin_server_ts,
-                "sender": edit.sender,
-            }
+            #
+            # Matrix spec v1.5 (https://spec.matrix.org/v1.5/client-server-api/#server-side-aggregation-of-mreplace-relationships)
+            # said that we should only include the `event_id`, `origin_server_ts` and
+            # `sender` of the edit; however MSC3925 proposes extending it to the whole
+            # of the edit, which is what we do here.
+            serialized_aggregations[RelationTypes.REPLACE] = await self.serialize_event(
+                event_aggregations.replace,
+                time_now,
+                config=config,
+            )

        # Include any threaded replies to this event.
        if event_aggregations.thread:
            thread = event_aggregations.thread

-            serialized_latest_event = self.serialize_event(
+            serialized_latest_event = await self.serialize_event(
                thread.latest_event,
                time_now,
                config=config,
@@ -549,7 +677,7 @@ class EventClientSerializer:
                "m.relations", {}
            ).update(serialized_aggregations)

-    def serialize_events(
+    async def serialize_events(
        self,
        events: Iterable[Union[JsonDict, EventBase]],
        time_now: int,
@@ -571,7 +699,7 @@ class EventClientSerializer:
            The list of serialized events
        """
        return [
-            self.serialize_event(
+            await self.serialize_event(
                event,
                time_now,
                config=config,
@@ -580,14 +708,23 @@ class EventClientSerializer:
            for event in events
        ]

+    def register_add_extra_fields_to_unsigned_client_event_callback(
+        self, callback: ADD_EXTRA_FIELDS_TO_UNSIGNED_CLIENT_EVENT_CALLBACK
+    ) -> None:
+        """Register a callback that returns additions to the unsigned section of
+        serialized events.
+        """
+        self._add_extra_fields_to_unsigned_client_event_callbacks.append(callback)
+

 _PowerLevel = Union[str, int]
+PowerLevelsContent = Mapping[str, Union[_PowerLevel, Mapping[str, _PowerLevel]]]


 def copy_and_fixup_power_levels_contents(
-    old_power_levels: Mapping[str, Union[_PowerLevel, Mapping[str, _PowerLevel]]]
+    old_power_levels: PowerLevelsContent,
 ) -> Dict[str, Union[int, Dict[str, int]]]:
-    """Copy the content of a power_levels event, unfreezing frozendicts along the way.
+    """Copy the content of a power_levels event, unfreezing immutabledicts along the way.

    We accept as input power level values which are strings, provided they represent an
    integer, e.g. `"`100"` instead of 100. Such strings are converted to integers
@@ -624,10 +761,10 @@ def _copy_power_level_value_as_integer(
 ) -> None:
    """Set `power_levels[key]` to the integer represented by `old_value`.

-    :raises TypeError: if `old_value` is not an integer, nor a base-10 string
+    :raises TypeError: if `old_value` is neither an integer nor a base-10 string
        representation of an integer.
    """
-    if isinstance(old_value, int):
+    if type(old_value) is int:  # noqa: E721
        power_levels[key] = old_value
        return

@@ -655,7 +792,7 @@ def validate_canonicaljson(value: Any) -> None:
    * Floats
    * NaN, Infinity, -Infinity
    """
-    if isinstance(value, int):
+    if type(value) is int:  # noqa: E721
        if value < CANONICALJSON_MIN_INT or CANONICALJSON_MAX_INT < value:
            raise SynapseError(400, "JSON integer out of range", Codes.BAD_JSON)

@@ -674,3 +811,72 @@ def validate_canonicaljson(value: Any) -> None:
    elif not isinstance(value, (bool, str)) and value is not None:
        # Other potential JSON values (bool, None, str) are safe.
        raise SynapseError(400, "Unknown JSON value", Codes.BAD_JSON)
+
+
+def maybe_upsert_event_field(
+    event: EventBase, container: JsonDict, key: str, value: object
+) -> bool:
+    """Upsert an event field, but only if this doesn't make the event too large.
+
+    Returns true iff the upsert took place.
+    """
+    if key in container:
+        old_value: object = container[key]
+        container[key] = value
+        # NB: here and below, we assume that passing a non-None `time_now` argument to
+        # get_pdu_json doesn't increase the size of the encoded result.
+        upsert_okay = len(encode_canonical_json(event.get_pdu_json())) <= MAX_PDU_SIZE
+        if not upsert_okay:
+            container[key] = old_value
+    else:
+        container[key] = value
+        upsert_okay = len(encode_canonical_json(event.get_pdu_json())) <= MAX_PDU_SIZE
+        if not upsert_okay:
+            del container[key]
+
+    return upsert_okay
+
+
+def strip_event(event: EventBase) -> JsonDict:
+    """
+    Used for "stripped state" events which provide a simplified view of the state of a
+    room intended to help a potential joiner identify the room (relevant when the user
+    is invited or knocked).
+
+    Stripped state events can only have the `sender`, `type`, `state_key` and `content`
+    properties present.
+    """
+
+    return {
+        "type": event.type,
+        "state_key": event.state_key,
+        "content": event.content,
+        "sender": event.sender,
+    }
+
+
+def parse_stripped_state_event(raw_stripped_event: Any) -> Optional[StrippedStateEvent]:
+    """
+    Given a raw value from an event's `unsigned` field, attempt to parse it into a
+    `StrippedStateEvent`.
+    """
+    if isinstance(raw_stripped_event, dict):
+        # All of these fields are required
+        type = raw_stripped_event.get("type")
+        state_key = raw_stripped_event.get("state_key")
+        sender = raw_stripped_event.get("sender")
+        content = raw_stripped_event.get("content")
+        if (
+            isinstance(type, str)
+            and isinstance(state_key, str)
+            and isinstance(sender, str)
+            and isinstance(content, dict)
+        ):
+            return StrippedStateEvent(
+                type=type,
+                state_key=state_key,
+                sender=sender,
+                content=content,
+            )
+
+    return None
--- a/synapse/events/validator.py
+++ b/synapse/events/validator.py
+#
+# This file is licensed under the Affero General Public License (AGPL) version 3.
+#
 # Copyright 2014-2016 OpenMarket Ltd
+# Copyright (C) 2023 New Vector, Ltd
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# See the GNU Affero General Public License for more details:
+# <https://www.gnu.org/licenses/agpl-3.0.html>.
+#
+# Originally licensed under the Apache License, Version 2.0:
+# <http://www.apache.org/licenses/LICENSE-2.0>.
+#
+# [This file includes modifications made by New Vector Limited]
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
 import collections.abc
-from typing import Iterable, Type, Union, cast
+from typing import List, Type, Union, cast

 import jsonschema

-from synapse.api.constants import MAX_ALIAS_LENGTH, EventTypes, Membership
+from synapse._pydantic_compat import Field, StrictBool, StrictStr
+from synapse.api.constants import (
+    MAX_ALIAS_LENGTH,
+    EventContentFields,
+    EventTypes,
+    Membership,
+)
 from synapse.api.errors import Codes, SynapseError
 from synapse.api.room_versions import EventFormatVersions
 from synapse.config.homeserver import HomeServerConfig
@@ -27,8 +40,10 @@ from synapse.events.utils import (
    CANONICALJSON_MIN_INT,
    validate_canonicaljson,
 )
-from synapse.federation.federation_server import server_matches_acl_event
-from synapse.types import EventID, JsonDict, RoomID, UserID
+from synapse.http.servlet import validate_json_object
+from synapse.storage.controllers.state import server_acl_evaluator_from_event
+from synapse.types import EventID, JsonDict, RoomID, StrCollection, UserID
+from synapse.types.rest import RequestBodyModel


 class EventValidator:
@@ -43,7 +58,7 @@ class EventValidator:
            event: The event to validate.
            config: The homeserver's configuration.
        """
-        self.validate_builder(event)
+        self.validate_builder(event, config)

        if event.format_version == EventFormatVersions.ROOM_V1_V2:
            EventID.from_string(event.event_id)
@@ -71,9 +86,14 @@ class EventValidator:

        # Depending on the room version, ensure the data is spec compliant JSON.
        if event.room_version.strict_canonicaljson:
-            # Note that only the client controlled portion of the event is
-            # checked, since we trust the portions of the event we created.
-            validate_canonicaljson(event.content)
+            validate_canonicaljson(event.get_pdu_json())
+
+        if not 0 < event.origin_server_ts < 2**53:
+            raise SynapseError(400, "Event timestamp is out of range")
+
+        # meow: allow specific users to send potentially dangerous events.
+        if event.sender in config.meow.validation_override:
+            return

        if event.type == EventTypes.Aliases:
            if "aliases" in event.content:
@@ -88,27 +108,30 @@ class EventValidator:
                            Codes.INVALID_PARAM,
                        )

-        if event.type == EventTypes.Retention:
+        elif event.type == EventTypes.Retention:
            self._validate_retention(event)

-        if event.type == EventTypes.ServerACL:
-            if not server_matches_acl_event(config.server.server_name, event):
+        elif event.type == EventTypes.ServerACL:
+            server_acl_evaluator = server_acl_evaluator_from_event(event)
+            if not server_acl_evaluator.server_matches_acl_event(
+                config.server.server_name
+            ):
                raise SynapseError(
                    400, "Can't create an ACL event that denies the local server"
                )

-        if event.type == EventTypes.PowerLevels:
+        elif event.type == EventTypes.PowerLevels:
            try:
                jsonschema.validate(
                    instance=event.content,
                    schema=POWER_LEVELS_SCHEMA,
-                    cls=plValidator,
+                    cls=POWER_LEVELS_VALIDATOR,
                )
            except jsonschema.ValidationError as e:
                if e.path:
                    # example: "users_default": '0' is not of type 'integer'
                    # cast safety: path entries can be integers, if we fail to validate
-                    # items in an array. However the POWER_LEVELS_SCHEMA doesn't expect
+                    # items in an array. However, the POWER_LEVELS_SCHEMA doesn't expect
                    # to see any arrays.
                    message = (
                        '"' + cast(str, e.path[-1]) + '": ' + e.message  # noqa: B306
@@ -125,6 +148,10 @@ class EventValidator:
                    errcode=Codes.BAD_JSON,
                )

+        # If the event contains a mentions key, validate it.
+        if EventContentFields.MENTIONS in event.content:
+            validate_json_object(event.content[EventContentFields.MENTIONS], Mentions)
+
    def _validate_retention(self, event: EventBase) -> None:
        """Checks that an event that defines the retention policy for a room respects the
        format enforced by the spec.
@@ -139,7 +166,7 @@ class EventValidator:
        max_lifetime = event.content.get("max_lifetime")

        if min_lifetime is not None:
-            if not isinstance(min_lifetime, int):
+            if type(min_lifetime) is not int:  # noqa: E721
                raise SynapseError(
                    code=400,
                    msg="'min_lifetime' must be an integer",
@@ -147,7 +174,7 @@ class EventValidator:
                )

        if max_lifetime is not None:
-            if not isinstance(max_lifetime, int):
+            if type(max_lifetime) is not int:  # noqa: E721
                raise SynapseError(
                    code=400,
                    msg="'max_lifetime' must be an integer",
@@ -165,7 +192,9 @@ class EventValidator:
                errcode=Codes.BAD_JSON,
            )

-    def validate_builder(self, event: Union[EventBase, EventBuilder]) -> None:
+    def validate_builder(
+        self, event: Union[EventBase, EventBuilder], config: HomeServerConfig
+    ) -> None:
        """Validates that the builder/event has roughly the right format. Only
        checks values that we expect a proto event to have, rather than all the
        fields an event would have
@@ -183,6 +212,10 @@ class EventValidator:
        RoomID.from_string(event.room_id)
        UserID.from_string(event.sender)

+        # meow: allow specific users to send so-called invalid events
+        if event.sender in config.meow.validation_override:
+            return
+
        if event.type == EventTypes.Message:
            strings = ["body", "msgtype"]

@@ -213,7 +246,7 @@ class EventValidator:

            self._ensure_state_event(event)

-    def _ensure_strings(self, d: JsonDict, keys: Iterable[str]) -> None:
+    def _ensure_strings(self, d: JsonDict, keys: StrCollection) -> None:
        for s in keys:
            if s not in d:
                raise SynapseError(400, "'%s' not in content" % (s,))
@@ -253,12 +286,17 @@ POWER_LEVELS_SCHEMA = {
 }


+class Mentions(RequestBodyModel):
+    user_ids: List[StrictStr] = Field(default_factory=list)
+    room: StrictBool = False
+
+
 # This could return something newer than Draft 7, but that's the current "latest"
 # validator.
-def _create_power_level_validator() -> Type[jsonschema.Draft7Validator]:
-    validator = jsonschema.validators.validator_for(POWER_LEVELS_SCHEMA)
+def _create_validator(schema: JsonDict) -> Type[jsonschema.Draft7Validator]:
+    validator = jsonschema.validators.validator_for(schema)

-    # by default jsonschema does not consider a frozendict to be an object so
+    # by default jsonschema does not consider a immutabledict to be an object so
    # we need to use a custom type checker
    # https://python-jsonschema.readthedocs.io/en/stable/validate/?highlight=object#validating-with-additional-types
    type_checker = validator.TYPE_CHECKER.redefine(
@@ -268,4 +306,4 @@ def _create_power_level_validator() -> Type[jsonschema.Draft7Validator]:
    return jsonschema.validators.extend(validator, type_checker=type_checker)


-plValidator = _create_power_level_validator()
+POWER_LEVELS_VALIDATOR = _create_validator(POWER_LEVELS_SCHEMA)
--- a/synapse/federation/__init__.py
+++ b/synapse/federation/__init__.py
+#
+# This file is licensed under the Affero General Public License (AGPL) version 3.
+#
 # Copyright 2014-2016 OpenMarket Ltd
+# Copyright (C) 2023 New Vector, Ltd
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# See the GNU Affero General Public License for more details:
+# <https://www.gnu.org/licenses/agpl-3.0.html>.
+#
+# Originally licensed under the Apache License, Version 2.0:
+# <http://www.apache.org/licenses/LICENSE-2.0>.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# [This file includes modifications made by New Vector Limited]
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

-""" This package includes all the federation specific logic.
-"""
+"""This package includes all the federation specific logic."""
--- a/synapse/federation/federation_base.py
+++ b/synapse/federation/federation_base.py
-# Copyright 2015, 2016 OpenMarket Ltd
+#
+# This file is licensed under the Affero General Public License (AGPL) version 3.
+#
 # Copyright 2020 The Matrix.org Foundation C.I.C.
+# Copyright 2015, 2016 OpenMarket Ltd
+# Copyright (C) 2023 New Vector, Ltd
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# See the GNU Affero General Public License for more details:
+# <https://www.gnu.org/licenses/agpl-3.0.html>.
+#
+# Originally licensed under the Apache License, Version 2.0:
+# <http://www.apache.org/licenses/LICENSE-2.0>.
+#
+# [This file includes modifications made by New Vector Limited]
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
 import logging
-from typing import TYPE_CHECKING, Awaitable, Callable, Optional
+from typing import TYPE_CHECKING, Awaitable, Callable, List, Optional, Sequence

 from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership
 from synapse.api.errors import Codes, SynapseError
@@ -22,6 +29,7 @@ from synapse.crypto.event_signing import check_event_content_hash
 from synapse.crypto.keyring import Keyring
 from synapse.events import EventBase, make_event_from_dict
 from synapse.events.utils import prune_event, validate_canonicaljson
+from synapse.federation.units import filter_pdus_for_valid_depth
 from synapse.http.servlet import assert_params_in_dict
 from synapse.logging.opentracing import log_kv, trace
 from synapse.types import JsonDict, get_domain_from_id
@@ -49,9 +57,9 @@ class FederationBase:
    def __init__(self, hs: "HomeServer"):
        self.hs = hs

-        self.server_name = hs.hostname
+        self._is_mine_server_name = hs.is_mine_server_name
        self.keyring = hs.get_keyring()
-        self.spam_checker = hs.get_spam_checker()
+        self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
        self.store = hs.get_datastores().main
        self._clock = hs.get_clock()
        self._storage_controllers = hs.get_storage_controllers()
@@ -137,9 +145,9 @@ class FederationBase:
                    )
            return redacted_event

-        spam_check = await self.spam_checker.check_event_for_spam(pdu)
+        spam_check = await self._spam_checker_module_callbacks.check_event_for_spam(pdu)

-        if spam_check != self.spam_checker.NOT_SPAM:
+        if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
            logger.warning("Event contains spam, soft-failing %s", pdu.event_id)
            log_kv(
                {
@@ -231,7 +239,7 @@ async def _check_sigs_on_pdu(
    # If this is a join event for a restricted room it may have been authorised
    # via a different server from the sending server. Check those signatures.
    if (
-        room_version.msc3083_join_rules
+        room_version.restricted_join_rule
        and pdu.type == EventTypes.Member
        and pdu.membership == Membership.JOIN
        and EventContentFields.AUTHORISING_USER in pdu.content
@@ -260,6 +268,15 @@ def _is_invite_via_3pid(event: EventBase) -> bool:
    )


+def parse_events_from_pdu_json(
+    pdus_json: Sequence[JsonDict], room_version: RoomVersion
+) -> List[EventBase]:
+    return [
+        event_from_pdu_json(pdu_json, room_version)
+        for pdu_json in filter_pdus_for_valid_depth(pdus_json)
+    ]
+
+
 def event_from_pdu_json(pdu_json: JsonDict, room_version: RoomVersion) -> EventBase:
    """Construct an EventBase from an event json received over federation

@@ -280,7 +297,7 @@ def event_from_pdu_json(pdu_json: JsonDict, room_version: RoomVersion) -> EventB
        _strip_unsigned_values(pdu_json)

    depth = pdu_json["depth"]
-    if not isinstance(depth, int):
+    if type(depth) is not int:  # noqa: E721
        raise SynapseError(400, "Depth %r not an intger" % (depth,), Codes.BAD_JSON)

    if depth < 0:

--- a/synapse/federation/federation_client.py
+++ b/synapse/federation/federation_client.py
-# Copyright 2015-2022 The Matrix.org Foundation C.I.C.
+#
+# This file is licensed under the Affero General Public License (AGPL) version 3.
+#
 # Copyright 2020 Sorunome
+# Copyright 2015-2022 The Matrix.org Foundation C.I.C.
+# Copyright (C) 2023 New Vector, Ltd
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# See the GNU Affero General Public License for more details:
+# <https://www.gnu.org/licenses/agpl-3.0.html>.
+#
+# Originally licensed under the Apache License, Version 2.0:
+# <http://www.apache.org/licenses/LICENSE-2.0>.
+#
+# [This file includes modifications made by New Vector Limited]
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.


 import copy
@@ -19,7 +26,9 @@ import itertools
 import logging
 from typing import (
    TYPE_CHECKING,
+    AbstractSet,
    Awaitable,
+    BinaryIO,
    Callable,
    Collection,
    Container,
@@ -37,7 +46,7 @@ from typing import (
 import attr
 from prometheus_client import Counter

-from synapse.api.constants import EventContentFields, EventTypes, Membership
+from synapse.api.constants import Direction, EventContentFields, EventTypes, Membership
 from synapse.api.errors import (
    CodeMessageException,
    Codes,
@@ -47,6 +56,7 @@ from synapse.api.errors import (
    SynapseError,
    UnsupportedRoomVersionError,
 )
+from synapse.api.ratelimiting import Ratelimiter
 from synapse.api.room_versions import (
    KNOWN_ROOM_VERSIONS,
    EventFormatVersions,
@@ -58,11 +68,13 @@ from synapse.federation.federation_base import (
    FederationBase,
    InvalidEventSignatureError,
    event_from_pdu_json,
+    parse_events_from_pdu_json,
 )
 from synapse.federation.transport.client import SendJoinResponse
+from synapse.http.client import is_unknown_endpoint
 from synapse.http.types import QueryParams
 from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, tag_args, trace
-from synapse.types import JsonDict, UserID, get_domain_from_id
+from synapse.types import JsonDict, StrCollection, UserID, get_domain_from_id
 from synapse.util.async_helpers import concurrently_execute
 from synapse.util.caches.expiringcache import ExpiringCache
 from synapse.util.retryutils import NotRetryingDestination
@@ -80,6 +92,18 @@ PDU_RETRY_TIME_MS = 1 * 60 * 1000
 T = TypeVar("T")


+@attr.s(frozen=True, slots=True, auto_attribs=True)
+class PulledPduInfo:
+    """
+    A result object that stores the PDU and info about it like which homeserver we
+    pulled it from (`pull_origin`)
+    """
+
+    pdu: EventBase
+    # Which homeserver we pulled the PDU from
+    pull_origin: str
+
+
 class InvalidResponseError(RuntimeError):
    """Helper for _try_destination_list: indicates that the server returned a response
    we couldn't parse
@@ -98,8 +122,9 @@ class SendJoinResult:
    # True if 'state' elides non-critical membership events
    partial_state: bool

-    # if 'partial_state' is set, a list of the servers in the room (otherwise empty)
-    servers_in_room: List[str]
+    # If 'partial_state' is set, a set of the servers in the room (otherwise empty).
+    # Always contains the server we joined off.
+    servers_in_room: AbstractSet[str]


 class FederationClient(FederationBase):
@@ -114,7 +139,9 @@ class FederationClient(FederationBase):
        self.hostname = hs.hostname
        self.signing_key = hs.signing_key

-        self._get_pdu_cache: ExpiringCache[str, EventBase] = ExpiringCache(
+        # Cache mapping `event_id` to a tuple of the event itself and the `pull_origin`
+        # (which server we pulled the event from)
+        self._get_pdu_cache: ExpiringCache[str, Tuple[EventBase, str]] = ExpiringCache(
            cache_name="get_pdu_cache",
            clock=self._clock,
            max_len=1000,
@@ -218,11 +245,16 @@ class FederationClient(FederationBase):
        )

    async def claim_client_keys(
-        self, destination: str, content: JsonDict, timeout: Optional[int]
+        self,
+        user: UserID,
+        destination: str,
+        query: Dict[str, Dict[str, Dict[str, int]]],
+        timeout: Optional[int],
    ) -> JsonDict:
        """Claims one-time keys for a device hosted on a remote server.

        Args:
+            user: The user id of the requesting user
            destination: Domain name of the remote homeserver
            content: The query content.

@@ -230,8 +262,55 @@ class FederationClient(FederationBase):
            The JSON object from the response
        """
        sent_queries_counter.labels("client_one_time_keys").inc()
+
+        # Convert the query with counts into a stable and unstable query and check
+        # if attempting to claim more than 1 OTK.
+        content: Dict[str, Dict[str, str]] = {}
+        unstable_content: Dict[str, Dict[str, List[str]]] = {}
+        use_unstable = False
+        for user_id, one_time_keys in query.items():
+            for device_id, algorithms in one_time_keys.items():
+                # If more than one algorithm is requested, attempt to use the unstable
+                # endpoint.
+                if sum(algorithms.values()) > 1:
+                    use_unstable = True
+                if algorithms:
+                    # For the stable query, choose only the first algorithm.
+                    content.setdefault(user_id, {})[device_id] = next(iter(algorithms))
+                    # For the unstable query, repeat each algorithm by count, then
+                    # splat those into chain to get a flattened list of all algorithms.
+                    #
+                    # Converts from {"algo1": 2, "algo2": 2} to ["algo1", "algo1", "algo2"].
+                    unstable_content.setdefault(user_id, {})[device_id] = list(
+                        itertools.chain(
+                            *(
+                                itertools.repeat(algorithm, count)
+                                for algorithm, count in algorithms.items()
+                            )
+                        )
+                    )
+
+        if use_unstable:
+            try:
+                return await self.transport_layer.claim_client_keys_unstable(
+                    user, destination, unstable_content, timeout
+                )
+            except HttpResponseException as e:
+                # If an error is received that is due to an unrecognised endpoint,
+                # fallback to the v1 endpoint. Otherwise, consider it a legitimate error
+                # and raise.
+                if not is_unknown_endpoint(e):
+                    raise
+
+            logger.debug(
+                "Couldn't claim client keys with the unstable API, falling back to the v1 API"
+            )
+        else:
+            logger.debug("Skipping unstable claim client keys API")
+
+        # TODO Potentially attempt multiple queries and combine the results?
        return await self.transport_layer.claim_client_keys(
-            destination, content, timeout
+            user, destination, content, timeout
        )

    @trace
@@ -263,19 +342,15 @@ class FederationClient(FederationBase):
        logger.debug("backfill transaction_data=%r", transaction_data)

        if not isinstance(transaction_data, dict):
-            # TODO we probably want an exception type specific to federation
-            # client validation.
-            raise TypeError("Backfill transaction_data is not a dict.")
+            raise InvalidResponseError("Backfill transaction_data is not a dict.")

        transaction_data_pdus = transaction_data.get("pdus")
        if not isinstance(transaction_data_pdus, list):
-            # TODO we probably want an exception type specific to federation
-            # client validation.
-            raise TypeError("transaction_data.pdus is not a list.")
+            raise InvalidResponseError("transaction_data.pdus is not a list.")

        room_version = await self.store.get_room_version(room_id)

-        pdus = [event_from_pdu_json(p, room_version) for p in transaction_data_pdus]
+        pdus = parse_events_from_pdu_json(transaction_data_pdus, room_version)

        # Check signatures and hash of pdus, removing any from the list that fail checks
        pdus[:] = await self._check_sigs_and_hash_for_pulled_events_and_fetch(
@@ -319,9 +394,7 @@ class FederationClient(FederationBase):
            transaction_data,
        )

-        pdu_list: List[EventBase] = [
-            event_from_pdu_json(p, room_version) for p in transaction_data["pdus"]
-        ]
+        pdu_list = parse_events_from_pdu_json(transaction_data["pdus"], room_version)

        if pdu_list and pdu_list[0]:
            pdu = pdu_list[0]
@@ -352,11 +425,11 @@ class FederationClient(FederationBase):
    @tag_args
    async def get_pdu(
        self,
-        destinations: Iterable[str],
+        destinations: Collection[str],
        event_id: str,
        room_version: RoomVersion,
        timeout: Optional[int] = None,
-    ) -> Optional[EventBase]:
+    ) -> Optional[PulledPduInfo]:
        """Requests the PDU with given origin and ID from the remote home
        servers.

@@ -371,11 +444,11 @@ class FederationClient(FederationBase):
                moving to the next destination. None indicates no timeout.

        Returns:
-            The requested PDU, or None if we were unable to find it.
+            The requested PDU wrapped in `PulledPduInfo`, or None if we were unable to find it.
        """

        logger.debug(
-            "get_pdu: event_id=%s from destinations=%s", event_id, destinations
+            "get_pdu(event_id=%s): from destinations=%s", event_id, destinations
        )

        # TODO: Rate limit the number of times we try and get the same event.
@@ -384,19 +457,25 @@ class FederationClient(FederationBase):
        # it gets persisted to the database), so we cache the results of the lookup.
        # Note that this is separate to the regular get_event cache which caches
        # events once they have been persisted.
-        event = self._get_pdu_cache.get(event_id)
+        get_pdu_cache_entry = self._get_pdu_cache.get(event_id)

+        event = None
+        pull_origin = None
+        if get_pdu_cache_entry:
+            event, pull_origin = get_pdu_cache_entry
        # If we don't see the event in the cache, go try to fetch it from the
        # provided remote federated destinations
-        if not event:
+        else:
            pdu_attempts = self.pdu_destination_tried.setdefault(event_id, {})

+            # TODO: We can probably refactor this to use `_try_destination_list`
            for destination in destinations:
                now = self._clock.time_msec()
                last_attempt = pdu_attempts.get(destination, 0)
                if last_attempt + PDU_RETRY_TIME_MS > now:
                    logger.debug(
-                        "get_pdu: skipping destination=%s because we tried it recently last_attempt=%s and we only check every %s (now=%s)",
+                        "get_pdu(event_id=%s): skipping destination=%s because we tried it recently last_attempt=%s and we only check every %s (now=%s)",
+                        event_id,
                        destination,
                        last_attempt,
                        PDU_RETRY_TIME_MS,
@@ -411,43 +490,48 @@ class FederationClient(FederationBase):
                        room_version=room_version,
                        timeout=timeout,
                    )
+                    pull_origin = destination

                    pdu_attempts[destination] = now

                    if event:
                        # Prime the cache
-                        self._get_pdu_cache[event.event_id] = event
+                        self._get_pdu_cache[event.event_id] = (event, pull_origin)

                        # Now that we have an event, we can break out of this
                        # loop and stop asking other destinations.
                        break

+                except NotRetryingDestination as e:
+                    logger.info("get_pdu(event_id=%s): %s", event_id, e)
+                    continue
+                except FederationDeniedError:
+                    logger.info(
+                        "get_pdu(event_id=%s): Not attempting to fetch PDU from %s because the homeserver is not on our federation whitelist",
+                        event_id,
+                        destination,
+                    )
+                    continue
                except SynapseError as e:
                    logger.info(
-                        "Failed to get PDU %s from %s because %s",
+                        "get_pdu(event_id=%s): Failed to get PDU from %s because %s",
                        event_id,
                        destination,
                        e,
                    )
                    continue
-                except NotRetryingDestination as e:
-                    logger.info(str(e))
-                    continue
-                except FederationDeniedError as e:
-                    logger.info(str(e))
-                    continue
                except Exception as e:
                    pdu_attempts[destination] = now

                    logger.info(
-                        "Failed to get PDU %s from %s because %s",
+                        "get_pdu(event_id=%s): Failed to get PDU from %s because %s",
                        event_id,
                        destination,
                        e,
                    )
                    continue

-        if not event:
+        if not event or not pull_origin:
            return None

        # `event` now refers to an object stored in `get_pdu_cache`. Our
@@ -459,7 +543,7 @@ class FederationClient(FederationBase):
            event.room_version,
        )

-        return event_copy
+        return PulledPduInfo(event_copy, pull_origin)

    @trace
    @tag_args
@@ -699,12 +783,14 @@ class FederationClient(FederationBase):
        pdu_origin = get_domain_from_id(pdu.sender)
        if not res and pdu_origin != origin:
            try:
-                res = await self.get_pdu(
+                pulled_pdu_info = await self.get_pdu(
                    destinations=[pdu_origin],
                    event_id=pdu.event_id,
                    room_version=room_version,
                    timeout=10000,
                )
+                if pulled_pdu_info is not None:
+                    res = pulled_pdu_info.pdu
            except SynapseError:
                pass

@@ -722,7 +808,7 @@ class FederationClient(FederationBase):

        room_version = await self.store.get_room_version(room_id)

-        auth_chain = [event_from_pdu_json(p, room_version) for p in res["auth_chain"]]
+        auth_chain = parse_events_from_pdu_json(res["auth_chain"], room_version)

        signed_auth = await self._check_sigs_and_hash_for_pulled_events_and_fetch(
            destination, auth_chain, room_version=room_version
@@ -730,32 +816,6 @@ class FederationClient(FederationBase):

        return signed_auth

-    def _is_unknown_endpoint(
-        self, e: HttpResponseException, synapse_error: Optional[SynapseError] = None
-    ) -> bool:
-        """
-        Returns true if the response was due to an endpoint being unimplemented.
-
-        Args:
-            e: The error response received from the remote server.
-            synapse_error: The above error converted to a SynapseError. This is
-                automatically generated if not provided.
-
-        """
-        if synapse_error is None:
-            synapse_error = e.to_synapse_error()
-        # There is no good way to detect an "unknown" endpoint.
-        #
-        # Dendrite returns a 404 (with a body of "404 page not found");
-        # Conduit returns a 404 (with no body); and Synapse returns a 400
-        # with M_UNRECOGNIZED.
-        #
-        # This needs to be rather specific as some endpoints truly do return 404
-        # errors.
-        return (
-            e.code == 404 and (not e.response or e.response == b"404 page not found")
-        ) or (e.code == 400 and synapse_error.errcode == Codes.UNRECOGNIZED)
-
    async def _try_destination_list(
        self,
        description: str,
@@ -806,7 +866,8 @@ class FederationClient(FederationBase):
            )

        for destination in destinations:
-            if destination == self.server_name:
+            # We don't want to ask our own server for information we don't have
+            if self._is_mine_server_name(destination):
                continue

            try:
@@ -814,9 +875,21 @@ class FederationClient(FederationBase):
            except (
                RequestSendFailed,
                InvalidResponseError,
-                NotRetryingDestination,
            ) as e:
                logger.warning("Failed to %s via %s: %s", description, destination, e)
+                # Skip to the next homeserver in the list to try.
+                continue
+            except NotRetryingDestination as e:
+                logger.info("%s: %s", description, e)
+                continue
+            except FederationDeniedError:
+                logger.info(
+                    "%s: Not attempting to %s from %s because the homeserver is not on our federation whitelist",
+                    description,
+                    description,
+                    destination,
+                )
+                continue
            except UnsupportedRoomVersionError:
                raise
            except HttpResponseException as e:
@@ -831,10 +904,10 @@ class FederationClient(FederationBase):
                if 500 <= e.code < 600:
                    failover = True

-                elif e.code == 400 and synapse_error.errcode in failover_errcodes:
+                elif 400 <= e.code < 500 and synapse_error.errcode in failover_errcodes:
                    failover = True

-                elif failover_on_unknown_endpoint and self._is_unknown_endpoint(
+                elif failover_on_unknown_endpoint and is_unknown_endpoint(
                    e, synapse_error
                ):
                    failover = True
@@ -918,7 +991,7 @@ class FederationClient(FederationBase):
            if not room_version:
                raise UnsupportedRoomVersionError()

-            if not room_version.msc2403_knocking and membership == Membership.KNOCK:
+            if not room_version.knock_join_rule and membership == Membership.KNOCK:
                raise SynapseError(
                    400,
                    "This room version does not support knocking",
@@ -946,14 +1019,13 @@ class FederationClient(FederationBase):

            return destination, ev, room_version

+        failover_errcodes = {Codes.NOT_FOUND}
        # MSC3083 defines additional error codes for room joins. Unfortunately
        # we do not yet know the room version, assume these will only be returned
        # by valid room versions.
-        failover_errcodes = (
-            (Codes.UNABLE_AUTHORISE_JOIN, Codes.UNABLE_TO_GRANT_JOIN)
-            if membership == Membership.JOIN
-            else None
-        )
+        if membership == Membership.JOIN:
+            failover_errcodes.add(Codes.UNABLE_AUTHORISE_JOIN)
+            failover_errcodes.add(Codes.UNABLE_TO_GRANT_JOIN)

        return await self._try_destination_list(
            "make_" + membership,
@@ -963,7 +1035,11 @@ class FederationClient(FederationBase):
        )

    async def send_join(
-        self, destinations: Iterable[str], pdu: EventBase, room_version: RoomVersion
+        self,
+        destinations: Iterable[str],
+        pdu: EventBase,
+        room_version: RoomVersion,
+        partial_state: bool = True,
    ) -> SendJoinResult:
        """Sends a join event to one of a list of homeservers.

@@ -976,6 +1052,10 @@ class FederationClient(FederationBase):
            pdu: event to be sent
            room_version: the version of the room (according to the server that
                did the make_join)
+            partial_state: whether to ask the remote server to omit membership state
+                events from the response. If the remote server complies,
+                `partial_state` in the send join result will be set. Defaults to
+                `True`.

        Returns:
            The result of the send join request.
@@ -986,7 +1066,9 @@ class FederationClient(FederationBase):
        """

        async def send_request(destination: str) -> SendJoinResult:
-            response = await self._do_send_join(room_version, destination, pdu)
+            response = await self._do_send_join(
+                room_version, destination, pdu, omit_members=partial_state
+            )

            # If an event was returned (and expected to be returned):
            #
@@ -995,7 +1077,7 @@ class FederationClient(FederationBase):
            # * Ensure the signatures are good.
            #
            # Otherwise, fallback to the provided event.
-            if room_version.msc3083_join_rules and response.event:
+            if room_version.restricted_join_rule and response.event:
                event = response.event

                valid_pdu = await self._check_sigs_and_hash_and_fetch_one(
@@ -1075,7 +1157,7 @@ class FederationClient(FederationBase):
            # NB: We *need* to copy to ensure that we don't have multiple
            # references being passed on, as that causes... issues.
            for s in signed_state:
-                s.internal_metadata = copy.deepcopy(s.internal_metadata)
+                s.internal_metadata = s.internal_metadata.copy()

            # double-check that the auth chain doesn't include a different create event
            auth_chain_create_events = [
@@ -1091,23 +1173,37 @@ class FederationClient(FederationBase):
                    % (auth_chain_create_events,)
                )

-            if response.partial_state and not response.servers_in_room:
-                raise InvalidResponseError(
-                    "partial_state was set, but no servers were listed in the room"
-                )
+            servers_in_room = None
+            if response.servers_in_room is not None:
+                servers_in_room = set(response.servers_in_room)
+
+            if response.members_omitted:
+                if not servers_in_room:
+                    raise InvalidResponseError(
+                        "members_omitted was set, but no servers were listed in the room"
+                    )
+
+                if not partial_state:
+                    raise InvalidResponseError(
+                        "members_omitted was set, but we asked for full state"
+                    )
+
+                # `servers_in_room` is supposed to be a complete list.
+                # Fix things up in case the remote homeserver is badly behaved.
+                servers_in_room.add(destination)

            return SendJoinResult(
                event=event,
                state=signed_state,
                auth_chain=signed_auth,
                origin=destination,
-                partial_state=response.partial_state,
-                servers_in_room=response.servers_in_room or [],
+                partial_state=response.members_omitted,
+                servers_in_room=servers_in_room or frozenset(),
            )

        # MSC3083 defines additional error codes for room joins.
        failover_errcodes = None
-        if room_version.msc3083_join_rules:
+        if room_version.restricted_join_rule:
            failover_errcodes = (
                Codes.UNABLE_AUTHORISE_JOIN,
                Codes.UNABLE_TO_GRANT_JOIN,
@@ -1126,7 +1222,11 @@ class FederationClient(FederationBase):
        )

    async def _do_send_join(
-        self, room_version: RoomVersion, destination: str, pdu: EventBase
+        self,
+        room_version: RoomVersion,
+        destination: str,
+        pdu: EventBase,
+        omit_members: bool,
    ) -> SendJoinResponse:
        time_now = self._clock.time_msec()

@@ -1137,12 +1237,13 @@ class FederationClient(FederationBase):
                room_id=pdu.room_id,
                event_id=pdu.event_id,
                content=pdu.get_pdu_json(time_now),
+                omit_members=omit_members,
            )
        except HttpResponseException as e:
            # If an error is received that is due to an unrecognised endpoint,
            # fallback to the v1 endpoint. Otherwise, consider it a legitimate error
            # and raise.
-            if not self._is_unknown_endpoint(e):
+            if not is_unknown_endpoint(e):
                raise

        logger.debug("Couldn't send_join with the v2 API, falling back to the v1 API")
@@ -1216,7 +1317,7 @@ class FederationClient(FederationBase):
            # fallback to the v1 endpoint if the room uses old-style event IDs.
            # Otherwise, consider it a legitimate error and raise.
            err = e.to_synapse_error()
-            if self._is_unknown_endpoint(e, err):
+            if is_unknown_endpoint(e, err):
                if room_version.event_format != EventFormatVersions.ROOM_V1_V2:
                    raise SynapseError(
                        400,
@@ -1277,7 +1378,7 @@ class FederationClient(FederationBase):
            # If an error is received that is due to an unrecognised endpoint,
            # fallback to the v1 endpoint. Otherwise, consider it a legitimate error
            # and raise.
-            if not self._is_unknown_endpoint(e):
+            if not is_unknown_endpoint(e):
                raise

        logger.debug("Couldn't send_leave with the v2 API, falling back to the v1 API")
@@ -1309,7 +1410,7 @@ class FederationClient(FederationBase):
            The remote homeserver return some state from the room. The response
            dictionary is in the form:

-            {"knock_state_events": [<state event dict>, ...]}
+            {"knock_room_state": [<state event dict>, ...]}

            The list of state events may be empty.

@@ -1336,7 +1437,7 @@ class FederationClient(FederationBase):
            The remote homeserver can optionally return some state from the room. The response
            dictionary is in the form:

-            {"knock_state_events": [<state event dict>, ...]}
+            {"knock_room_state": [<state event dict>, ...]}

            The list of state events may be empty.
        """
@@ -1427,9 +1528,7 @@ class FederationClient(FederationBase):

            room_version = await self.store.get_room_version(room_id)

-            events = [
-                event_from_pdu_json(e, room_version) for e in content.get("events", [])
-            ]
+            events = parse_events_from_pdu_json(content.get("events", []), room_version)

            signed_events = await self._check_sigs_and_hash_for_pulled_events_and_fetch(
                destination, events, room_version=room_version
@@ -1448,7 +1547,7 @@ class FederationClient(FederationBase):
        self, destinations: Iterable[str], room_id: str, event_dict: JsonDict
    ) -> None:
        for destination in destinations:
-            if destination == self.server_name:
+            if self._is_mine_server_name(destination):
                continue

            try:
@@ -1548,7 +1647,7 @@ class FederationClient(FederationBase):
                # If an error is received that is due to an unrecognised endpoint,
                # fallback to the unstable endpoint. Otherwise, consider it a
                # legitimate error and raise.
-                if not self._is_unknown_endpoint(e):
+                if not is_unknown_endpoint(e):
                    raise

                logger.debug(
@@ -1609,7 +1708,70 @@ class FederationClient(FederationBase):
        return result

    async def timestamp_to_event(
-        self, destination: str, room_id: str, timestamp: int, direction: str
+        self,
+        *,
+        destinations: StrCollection,
+        room_id: str,
+        timestamp: int,
+        direction: Direction,
+    ) -> Optional["TimestampToEventResponse"]:
+        """
+        Calls each remote federating server from `destinations` asking for their closest
+        event to the given timestamp in the given direction until we get a response.
+        Also validates the response to always return the expected keys or raises an
+        error.
+
+        Args:
+            destinations: The domains of homeservers to try fetching from
+            room_id: Room to fetch the event from
+            timestamp: The point in time (inclusive) we should navigate from in
+                the given direction to find the closest event.
+            direction: indicates whether we should navigate forward
+                or backward from the given timestamp to find the closest event.
+
+        Returns:
+            A parsed TimestampToEventResponse including the closest event_id
+            and origin_server_ts or None if no destination has a response.
+        """
+
+        async def _timestamp_to_event_from_destination(
+            destination: str,
+        ) -> TimestampToEventResponse:
+            return await self._timestamp_to_event_from_destination(
+                destination, room_id, timestamp, direction
+            )
+
+        try:
+            # Loop through each homeserver candidate until we get a succesful response
+            timestamp_to_event_response = await self._try_destination_list(
+                "timestamp_to_event",
+                destinations,
+                # TODO: The requested timestamp may lie in a part of the
+                #   event graph that the remote server *also* didn't have,
+                #   in which case they will have returned another event
+                #   which may be nowhere near the requested timestamp. In
+                #   the future, we may need to reconcile that gap and ask
+                #   other homeservers, and/or extend `/timestamp_to_event`
+                #   to return events on *both* sides of the timestamp to
+                #   help reconcile the gap faster.
+                _timestamp_to_event_from_destination,
+                # Since this endpoint is new, we should try other servers before giving up.
+                # We can safely remove this in a year (remove after 2023-11-16).
+                failover_on_unknown_endpoint=True,
+            )
+            return timestamp_to_event_response
+        except SynapseError as e:
+            logger.warn(
+                "timestamp_to_event(room_id=%s, timestamp=%s, direction=%s): encountered error when trying to fetch from destinations: %s",
+                room_id,
+                timestamp,
+                direction,
+                e,
+            )
+            return None
+
+    async def _timestamp_to_event_from_destination(
+        self, destination: str, room_id: str, timestamp: int, direction: Direction
    ) -> "TimestampToEventResponse":
        """
        Calls a remote federating server at `destination` asking for their
@@ -1622,7 +1784,7 @@ class FederationClient(FederationBase):
            room_id: Room to fetch the event from
            timestamp: The point in time (inclusive) we should navigate from in
                the given direction to find the closest event.
-            direction: ["f"|"b"] to indicate whether we should navigate forward
+            direction: indicates whether we should navigate forward
                or backward from the given timestamp to find the closest event.

        Returns:
@@ -1706,6 +1868,95 @@ class FederationClient(FederationBase):

        return filtered_statuses, filtered_failures

+    async def federation_download_media(
+        self,
+        destination: str,
+        media_id: str,
+        output_stream: BinaryIO,
+        max_size: int,
+        max_timeout_ms: int,
+        download_ratelimiter: Ratelimiter,
+        ip_address: str,
+    ) -> Union[
+        Tuple[int, Dict[bytes, List[bytes]], bytes],
+        Tuple[int, Dict[bytes, List[bytes]]],
+    ]:
+        try:
+            return await self.transport_layer.federation_download_media(
+                destination,
+                media_id,
+                output_stream=output_stream,
+                max_size=max_size,
+                max_timeout_ms=max_timeout_ms,
+                download_ratelimiter=download_ratelimiter,
+                ip_address=ip_address,
+            )
+        except HttpResponseException as e:
+            # If an error is received that is due to an unrecognised endpoint,
+            # fallback to the _matrix/media/v3/download endpoint. Otherwise, consider it a legitimate error
+            # and raise.
+            if not is_unknown_endpoint(e):
+                raise
+
+        logger.debug(
+            "Couldn't download media %s/%s over _matrix/federation/v1/media/download, falling back to _matrix/media/v3/download path",
+            destination,
+            media_id,
+        )
+
+        return await self.transport_layer.download_media_v3(
+            destination,
+            media_id,
+            output_stream=output_stream,
+            max_size=max_size,
+            max_timeout_ms=max_timeout_ms,
+            download_ratelimiter=download_ratelimiter,
+            ip_address=ip_address,
+        )
+
+    async def download_media(
+        self,
+        destination: str,
+        media_id: str,
+        output_stream: BinaryIO,
+        max_size: int,
+        max_timeout_ms: int,
+        download_ratelimiter: Ratelimiter,
+        ip_address: str,
+    ) -> Tuple[int, Dict[bytes, List[bytes]]]:
+        try:
+            return await self.transport_layer.download_media_v3(
+                destination,
+                media_id,
+                output_stream=output_stream,
+                max_size=max_size,
+                max_timeout_ms=max_timeout_ms,
+                download_ratelimiter=download_ratelimiter,
+                ip_address=ip_address,
+            )
+        except HttpResponseException as e:
+            # If an error is received that is due to an unrecognised endpoint,
+            # fallback to the r0 endpoint. Otherwise, consider it a legitimate error
+            # and raise.
+            if not is_unknown_endpoint(e):
+                raise
+
+        logger.debug(
+            "Couldn't download media %s/%s with the v3 API, falling back to the r0 API",
+            destination,
+            media_id,
+        )
+
+        return await self.transport_layer.download_media_r0(
+            destination,
+            media_id,
+            output_stream=output_stream,
+            max_size=max_size,
+            max_timeout_ms=max_timeout_ms,
+            download_ratelimiter=download_ratelimiter,
+            ip_address=ip_address,
+        )
+

 @attr.s(frozen=True, slots=True, auto_attribs=True)
 class TimestampToEventResponse:
@@ -1735,7 +1986,7 @@ class TimestampToEventResponse:
            )

        origin_server_ts = d.get("origin_server_ts")
-        if not isinstance(origin_server_ts, int):
+        if type(origin_server_ts) is not int:  # noqa: E721
            raise ValueError(
                "Invalid response: 'origin_server_ts' must be a int but received %r"
                % origin_server_ts

--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
-# Copyright 2015, 2016 OpenMarket Ltd
-# Copyright 2018 New Vector Ltd
+#
+# This file is licensed under the Affero General Public License (AGPL) version 3.
+#
 # Copyright 2019-2021 Matrix.org Federation C.I.C
+# Copyright 2015, 2016 OpenMarket Ltd
+# Copyright (C) 2023 New Vector, Ltd
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# See the GNU Affero General Public License for more details:
+# <https://www.gnu.org/licenses/agpl-3.0.html>.
+#
+# Originally licensed under the Apache License, Version 2.0:
+# <http://www.apache.org/licenses/LICENSE-2.0>.
+#
+# [This file includes modifications made by New Vector Limited]
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
 import logging
 import random
 from typing import (
@@ -23,24 +29,30 @@ from typing import (
    Collection,
    Dict,
    List,
+    Mapping,
    Optional,
    Tuple,
    Union,
 )

-from matrix_common.regex import glob_to_regex
 from prometheus_client import Counter, Gauge, Histogram

-from twisted.internet.abstract import isIPAddress
 from twisted.python import failure

-from synapse.api.constants import EduTypes, EventContentFields, EventTypes, Membership
+from synapse.api.constants import (
+    Direction,
+    EduTypes,
+    EventContentFields,
+    EventTypes,
+    Membership,
+)
 from synapse.api.errors import (
    AuthError,
    Codes,
    FederationError,
    IncompatibleRoomVersionError,
    NotFoundError,
+    PartialStateConflictError,
    SynapseError,
    UnsupportedRoomVersionError,
 )
@@ -54,7 +66,8 @@ from synapse.federation.federation_base import (
    event_from_pdu_json,
 )
 from synapse.federation.persistence import TransactionActions
-from synapse.federation.units import Edu, Transaction
+from synapse.federation.units import Edu, Transaction, serialize_and_filter_pdus
+from synapse.handlers.worker_lock import NEW_EVENT_DURING_PURGE_LOCK_NAME
 from synapse.http.servlet import assert_params_in_dict
 from synapse.logging.context import (
    make_deferred_yieldable,
@@ -62,7 +75,9 @@ from synapse.logging.context import (
    run_in_background,
 )
 from synapse.logging.opentracing import (
+    SynapseTags,
    log_kv,
+    set_tag,
    start_active_span_from_edu,
    tag_args,
    trace,
@@ -72,10 +87,11 @@ from synapse.replication.http.federation import (
    ReplicationFederationSendEduRestServlet,
    ReplicationGetQueryRestServlet,
 )
-from synapse.storage.databases.main.events import PartialStateConflictError
 from synapse.storage.databases.main.lock import Lock
-from synapse.types import JsonDict, StateMap, get_domain_from_id
-from synapse.util import json_decoder, unwrapFirstError
+from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary
+from synapse.storage.roommember import MemberSummary
+from synapse.types import JsonDict, StateMap, UserID, get_domain_from_id
+from synapse.util import unwrapFirstError
 from synapse.util.async_helpers import Linearizer, concurrently_execute, gather_results
 from synapse.util.caches.response_cache import ResponseCache
 from synapse.util.stringutils import parse_server_name
@@ -118,12 +134,15 @@ class FederationServer(FederationBase):
    def __init__(self, hs: "HomeServer"):
        super().__init__(hs)

+        self.server_name = hs.hostname
        self.handler = hs.get_federation_handler()
-        self._spam_checker = hs.get_spam_checker()
+        self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
        self._federation_event_handler = hs.get_federation_event_handler()
        self.state = hs.get_state_handler()
        self._event_auth_handler = hs.get_event_auth_handler()
        self._room_member_handler = hs.get_room_member_handler()
+        self._e2e_keys_handler = hs.get_e2e_keys_handler()
+        self._worker_lock_handler = hs.get_worker_locks_handler()

        self._state_storage_controller = hs.get_storage_controllers().state

@@ -150,9 +169,9 @@ class FederationServer(FederationBase):

        # We cache responses to state queries, as they take a while and often
        # come in waves.
-        self._state_resp_cache: ResponseCache[
-            Tuple[str, Optional[str]]
-        ] = ResponseCache(hs.get_clock(), "state_resp", timeout_ms=30000)
+        self._state_resp_cache: ResponseCache[Tuple[str, Optional[str]]] = (
+            ResponseCache(hs.get_clock(), "state_resp", timeout_ms=30000)
+        )
        self._state_ids_resp_cache: ResponseCache[Tuple[str, str]] = ResponseCache(
            hs.get_clock(), "state_ids_resp", timeout_ms=30000
        )
@@ -214,7 +233,7 @@ class FederationServer(FederationBase):
        return 200, res

    async def on_timestamp_to_event_request(
-        self, origin: str, room_id: str, timestamp: int, direction: str
+        self, origin: str, room_id: str, timestamp: int, direction: Direction
    ) -> Tuple[int, Dict[str, Any]]:
        """When we receive a federated `/timestamp_to_event` request,
        handle all of the logic for validating and fetching the event.
@@ -224,7 +243,7 @@ class FederationServer(FederationBase):
            room_id: Room to fetch the event from
            timestamp: The point in time (inclusive) we should navigate from in
                the given direction to find the closest event.
-            direction: ["f"|"b"] to indicate whether we should navigate forward
+            direction: indicates whether we should navigate forward
                or backward from the given timestamp to find the closest event.

        Returns:
@@ -450,7 +469,12 @@ class FederationServer(FederationBase):
                logger.info("Ignoring PDU: %s", e)
                continue

-            event = event_from_pdu_json(p, room_version)
+            try:
+                event = event_from_pdu_json(p, room_version)
+            except SynapseError as e:
+                logger.info("Ignoring PDU for failing to deserialize: %s", e)
+                continue
+
            pdus_by_room.setdefault(room_id, []).append(event)

            if event.origin_server_ts > newest_pdu_ts:
@@ -481,6 +505,14 @@ class FederationServer(FederationBase):
                    pdu_results[pdu.event_id] = await process_pdu(pdu)

        async def process_pdu(pdu: EventBase) -> JsonDict:
+            """
+            Processes a pushed PDU sent to us via a `/send` transaction
+
+            Returns:
+                JsonDict representing a "PDU Processing Result" that will be bundled up
+                with the other processed PDU's in the `/send` transaction and sent back
+                to remote homeserver.
+            """
            event_id = pdu.event_id
            with nested_logging_context(event_id):
                try:
@@ -494,7 +526,7 @@ class FederationServer(FederationBase):
                    logger.error(
                        "Failed to handle PDU %s",
                        event_id,
-                        exc_info=(f.type, f.value, f.getTracebackObject()),  # type: ignore
+                        exc_info=(f.type, f.value, f.getTracebackObject()),
                    )
                    return {"error": str(e)}

@@ -519,7 +551,25 @@ class FederationServer(FederationBase):
                edu_type=edu_dict["edu_type"],
                content=edu_dict["content"],
            )
-            await self.registry.on_edu(edu.edu_type, origin, edu.content)
+            try:
+                await self.registry.on_edu(edu.edu_type, origin, edu.content)
+            except Exception:
+                # If there was an error handling the EDU, we must reject the
+                # transaction.
+                #
+                # Some EDU types (notably, to-device messages) are, despite their name,
+                # expected to be reliable; if we weren't able to do something with it,
+                # we have to tell the sender that, and the only way the protocol gives
+                # us to do so is by sending an HTTP error back on the transaction.
+                #
+                # We log the exception now, and then raise a new SynapseError to cause
+                # the transaction to be failed.
+                logger.exception("Error handling EDU of type %s", edu.edu_type)
+                raise SynapseError(500, f"Error handing EDU of type {edu.edu_type}")
+
+                # TODO: if the first EDU fails, we should probably abort the whole
+                #   thing rather than carrying on with the rest of them. That would
+                #   probably be best done inside `concurrently_execute`.

        await concurrently_execute(
            _process_edu,
@@ -591,8 +641,8 @@ class FederationServer(FederationBase):
        )

        return {
-            "pdus": [pdu.get_pdu_json() for pdu in pdus],
-            "auth_chain": [pdu.get_pdu_json() for pdu in auth_chain],
+            "pdus": serialize_and_filter_pdus(pdus),
+            "auth_chain": serialize_and_filter_pdus(auth_chain),
        }

    async def on_pdu_request(
@@ -629,7 +679,7 @@ class FederationServer(FederationBase):
        # This is in addition to the HS-level rate limiting applied by
        # BaseFederationServlet.
        # type-ignore: mypy doesn't seem able to deduce the type of the limiter(!?)
-        await self._room_member_handler._join_rate_per_room_limiter.ratelimit(  # type: ignore[has-type]
+        await self._room_member_handler._join_rate_per_room_limiter.ratelimit(
            requester=None,
            key=room_id,
            update=False,
@@ -668,7 +718,11 @@ class FederationServer(FederationBase):
        room_id: str,
        caller_supports_partial_state: bool = False,
    ) -> Dict[str, Any]:
-        await self._room_member_handler._join_rate_per_room_limiter.ratelimit(  # type: ignore[has-type]
+        set_tag(
+            SynapseTags.SEND_JOIN_RESPONSE_IS_PARTIAL_STATE,
+            caller_supports_partial_state,
+        )
+        await self._room_member_handler._join_rate_per_room_limiter.ratelimit(
            requester=None,
            key=room_id,
            update=False,
@@ -683,8 +737,9 @@ class FederationServer(FederationBase):
        state_event_ids: Collection[str]
        servers_in_room: Optional[Collection[str]]
        if caller_supports_partial_state:
+            summary = await self.store.get_room_summary(room_id)
            state_event_ids = _get_event_ids_for_partial_state_join(
-                event, prev_state_ids
+                event, prev_state_ids, summary
            )
            servers_in_room = await self.state.get_hosts_in_room_at_events(
                room_id, event_ids=event.prev_event_ids()
@@ -711,13 +766,13 @@ class FederationServer(FederationBase):
        event_json = event.get_pdu_json(time_now)
        resp = {
            "event": event_json,
-            "state": [p.get_pdu_json(time_now) for p in state_events],
-            "auth_chain": [p.get_pdu_json(time_now) for p in auth_chain_events],
-            "org.matrix.msc3706.partial_state": caller_supports_partial_state,
+            "state": serialize_and_filter_pdus(state_events, time_now),
+            "auth_chain": serialize_and_filter_pdus(auth_chain_events, time_now),
+            "members_omitted": caller_supports_partial_state,
        }

        if servers_in_room is not None:
-            resp["org.matrix.msc3706.servers_in_room"] = list(servers_in_room)
+            resp["servers_in_room"] = list(servers_in_room)

        return resp

@@ -780,7 +835,7 @@ class FederationServer(FederationBase):
            raise IncompatibleRoomVersionError(room_version=room_version.identifier)

        # Check that this room supports knocking as defined by its room version
-        if not room_version.msc2403_knocking:
+        if not room_version.knock_join_rule:
            raise SynapseError(
                403,
                "This room version does not support knocking",
@@ -824,14 +879,7 @@ class FederationServer(FederationBase):
                context, self._room_prejoin_state_types
            )
        )
-        return {
-            "knock_room_state": stripped_room_state,
-            # Since v1.37, Synapse incorrectly used "knock_state_events" for this field.
-            # Thus, we also populate a 'knock_state_events' with the same content to
-            # support old instances.
-            # See https://github.com/matrix-org/synapse/issues/14088.
-            "knock_state_events": stripped_room_state,
-        }
+        return {"knock_room_state": stripped_room_state}

    async def _on_send_membership_event(
        self, origin: str, content: JsonDict, membership_type: str, room_id: str
@@ -883,7 +931,7 @@ class FederationServer(FederationBase):
                errcode=Codes.NOT_FOUND,
            )

-        if membership_type == Membership.KNOCK and not room_version.msc2403_knocking:
+        if membership_type == Membership.KNOCK and not room_version.knock_join_rule:
            raise SynapseError(
                403,
                "This room version does not support knocking",
@@ -907,7 +955,7 @@ class FederationServer(FederationBase):
        # the event is valid to be sent into the room. Currently this is only done
        # if the user is being joined via restricted join rules.
        if (
-            room_version.msc3083_join_rules
+            room_version.restricted_join_rule
            and event.membership == Membership.JOIN
            and EventContentFields.AUTHORISING_USER in event.content
        ):
@@ -915,10 +963,10 @@ class FederationServer(FederationBase):
            authorising_server = get_domain_from_id(
                event.content[EventContentFields.AUTHORISING_USER]
            )
-            if authorising_server != self.server_name:
+            if not self._is_mine_server_name(authorising_server):
                raise SynapseError(
                    400,
-                    f"Cannot authorise request from resident server: {authorising_server}",
+                    f"Cannot authorise membership event for {authorising_server}. We can only authorise requests from our own homeserver",
                )

            event.signatures.update(
@@ -962,7 +1010,7 @@ class FederationServer(FederationBase):

            time_now = self._clock.time_msec()
            auth_pdus = await self.handler.on_event_auth(event_id)
-            res = {"auth_chain": [a.get_pdu_json(time_now) for a in auth_pdus]}
+            res = {"auth_chain": serialize_and_filter_pdus(auth_pdus, time_now)}
        return 200, res

    async def on_query_client_keys(
@@ -978,23 +1026,27 @@ class FederationServer(FederationBase):

    @trace
    async def on_claim_client_keys(
-        self, origin: str, content: JsonDict
+        self, query: List[Tuple[str, str, str, int]], always_include_fallback_keys: bool
    ) -> Dict[str, Any]:
-        query = []
-        for user_id, device_keys in content.get("one_time_keys", {}).items():
-            for device_id, algorithm in device_keys.items():
-                query.append((user_id, device_id, algorithm))
+        if any(
+            not self.hs.is_mine(UserID.from_string(user_id))
+            for user_id, _, _, _ in query
+        ):
+            raise SynapseError(400, "User is not hosted on this homeserver")

        log_kv({"message": "Claiming one time keys.", "user, device pairs": query})
-        results = await self.store.claim_e2e_one_time_keys(query)
+        results = await self._e2e_keys_handler.claim_local_one_time_keys(
+            query, always_include_fallback_keys=always_include_fallback_keys
+        )

-        json_result: Dict[str, Dict[str, dict]] = {}
-        for user_id, device_keys in results.items():
-            for device_id, keys in device_keys.items():
-                for key_id, json_str in keys.items():
-                    json_result.setdefault(user_id, {})[device_id] = {
-                        key_id: json_decoder.decode(json_str)
-                    }
+        json_result: Dict[str, Dict[str, Dict[str, JsonDict]]] = {}
+        for result in results:
+            for user_id, device_keys in result.items():
+                for device_id, keys in device_keys.items():
+                    for key_id, key in keys.items():
+                        json_result.setdefault(user_id, {}).setdefault(device_id, {})[
+                            key_id
+                        ] = key

        logger.info(
            "Claimed one-time-keys: %s",
@@ -1043,7 +1095,7 @@ class FederationServer(FederationBase):

            time_now = self._clock.time_msec()

-        return {"events": [ev.get_pdu_json(time_now) for ev in missing_events]}
+        return {"events": serialize_and_filter_pdus(missing_events, time_now)}

    async def on_openid_userinfo(self, token: str) -> Optional[str]:
        ts_now_ms = self._clock.time_msec()
@@ -1103,7 +1155,7 @@ class FederationServer(FederationBase):
            logger.warning("event id %s: %s", pdu.event_id, e)
            raise FederationError("ERROR", 403, str(e), affected=pdu.event_id)

-        if await self._spam_checker.should_drop_federated_event(pdu):
+        if await self._spam_checker_module_callbacks.should_drop_federated_event(pdu):
            logger.warning(
                "Unstaged federated event contains spam, dropping %s", pdu.event_id
            )
@@ -1148,7 +1200,9 @@ class FederationServer(FederationBase):

            origin, event = next

-            if await self._spam_checker.should_drop_federated_event(event):
+            if await self._spam_checker_module_callbacks.should_drop_federated_event(
+                event
+            ):
                logger.warning(
                    "Staged federated event contains spam, dropping %s",
                    event.event_id,
@@ -1210,9 +1264,18 @@ class FederationServer(FederationBase):
                logger.info("handling received PDU in room %s: %s", room_id, event)
                try:
                    with nested_logging_context(event.event_id):
-                        await self._federation_event_handler.on_receive_pdu(
-                            origin, event
-                        )
+                        # We're taking out a lock within a lock, which could
+                        # lead to deadlocks if we're not careful. However, it is
+                        # safe on this occasion as we only ever take a write
+                        # lock when deleting a room, which we would never do
+                        # while holding the `_INBOUND_EVENT_HANDLING_LOCK_NAME`
+                        # lock.
+                        async with self._worker_lock_handler.acquire_read_write_lock(
+                            NEW_EVENT_DURING_PURGE_LOCK_NAME, room_id, write=False
+                        ):
+                            await self._federation_event_handler.on_receive_pdu(
+                                origin, event
+                            )
                except FederationError as e:
                    # XXX: Ideally we'd inform the remote we failed to process
                    # the event, but we can't return an error in the transaction
@@ -1223,7 +1286,7 @@ class FederationServer(FederationBase):
                    logger.error(
                        "Failed to handle PDU %s",
                        event.event_id,
-                        exc_info=(f.type, f.value, f.getTracebackObject()),  # type: ignore
+                        exc_info=(f.type, f.value, f.getTracebackObject()),
                    )

                received_ts = await self.store.remove_received_event_from_staging(
@@ -1267,9 +1330,6 @@ class FederationServer(FederationBase):
                return
            lock = new_lock

-    def __str__(self) -> str:
-        return "<ReplicationLayer(%s)>" % self.server_name
-
    async def exchange_third_party_invite(
        self, sender_user_id: str, target_user_id: str, room_id: str, signed: Dict
    ) -> None:
@@ -1290,75 +1350,13 @@ class FederationServer(FederationBase):
        Raises:
            AuthError if the server does not match the ACL
        """
-        acl_event = await self._storage_controllers.state.get_current_state_event(
-            room_id, EventTypes.ServerACL, ""
-        )
-        if not acl_event or server_matches_acl_event(server_name, acl_event):
-            return
-
-        raise AuthError(code=403, msg="Server is banned from room")
-
-
-def server_matches_acl_event(server_name: str, acl_event: EventBase) -> bool:
-    """Check if the given server is allowed by the ACL event
-
-    Args:
-        server_name: name of server, without any port part
-        acl_event: m.room.server_acl event
-
-    Returns:
-        True if this server is allowed by the ACLs
-    """
-    logger.debug("Checking %s against acl %s", server_name, acl_event.content)
-
-    # first of all, check if literal IPs are blocked, and if so, whether the
-    # server name is a literal IP
-    allow_ip_literals = acl_event.content.get("allow_ip_literals", True)
-    if not isinstance(allow_ip_literals, bool):
-        logger.warning("Ignoring non-bool allow_ip_literals flag")
-        allow_ip_literals = True
-    if not allow_ip_literals:
-        # check for ipv6 literals. These start with '['.
-        if server_name[0] == "[":
-            return False
-
-        # check for ipv4 literals. We can just lift the routine from twisted.
-        if isIPAddress(server_name):
-            return False
-
-    # next,  check the deny list
-    deny = acl_event.content.get("deny", [])
-    if not isinstance(deny, (list, tuple)):
-        logger.warning("Ignoring non-list deny ACL %s", deny)
-        deny = []
-    for e in deny:
-        if _acl_entry_matches(server_name, e):
-            # logger.info("%s matched deny rule %s", server_name, e)
-            return False
-
-    # then the allow list.
-    allow = acl_event.content.get("allow", [])
-    if not isinstance(allow, (list, tuple)):
-        logger.warning("Ignoring non-list allow ACL %s", allow)
-        allow = []
-    for e in allow:
-        if _acl_entry_matches(server_name, e):
-            # logger.info("%s matched allow rule %s", server_name, e)
-            return True
-
-    # everything else should be rejected.
-    # logger.info("%s fell through", server_name)
-    return False
-
-
-def _acl_entry_matches(server_name: str, acl_entry: Any) -> bool:
-    if not isinstance(acl_entry, str):
-        logger.warning(
-            "Ignoring non-str ACL entry '%s' (is %s)", acl_entry, type(acl_entry)
+        server_acl_evaluator = (
+            await self._storage_controllers.state.get_server_acl_for_room(room_id)
        )
-        return False
-    regex = glob_to_regex(acl_entry)
-    return bool(regex.match(server_name))
+        if server_acl_evaluator and not server_acl_evaluator.server_matches_acl_event(
+            server_name
+        ):
+            raise AuthError(code=403, msg="Server is banned from room")


 class FederationHandlerRegistry:
@@ -1432,19 +1430,14 @@ class FederationHandlerRegistry:
        self._edu_type_to_instance[edu_type] = instance_names

    async def on_edu(self, edu_type: str, origin: str, content: dict) -> None:
-        if not self.config.server.use_presence and edu_type == EduTypes.PRESENCE:
+        if not self.config.server.track_presence and edu_type == EduTypes.PRESENCE:
            return

        # Check if we have a handler on this instance
        handler = self.edu_handlers.get(edu_type)
        if handler:
            with start_active_span_from_edu(content, "handle_edu"):
-                try:
-                    await handler(origin, content)
-                except SynapseError as e:
-                    logger.info("Failed to handle edu %r: %r", edu_type, e)
-                except Exception:
-                    logger.exception("Failed to handle edu %r", edu_type)
+                await handler(origin, content)
            return

        # Check if we can route it somewhere else that isn't us
@@ -1453,17 +1446,12 @@ class FederationHandlerRegistry:
            # Pick an instance randomly so that we don't overload one.
            route_to = random.choice(instances)

-            try:
-                await self._send_edu(
-                    instance_name=route_to,
-                    edu_type=edu_type,
-                    origin=origin,
-                    content=content,
-                )
-            except SynapseError as e:
-                logger.info("Failed to handle edu %r: %r", edu_type, e)
-            except Exception:
-                logger.exception("Failed to handle edu %r", edu_type)
+            await self._send_edu(
+                instance_name=route_to,
+                edu_type=edu_type,
+                origin=origin,
+                content=content,
+            )
            return

        # Oh well, let's just log and move on.
@@ -1487,8 +1475,9 @@ class FederationHandlerRegistry:
 def _get_event_ids_for_partial_state_join(
    join_event: EventBase,
    prev_state_ids: StateMap[str],
+    summary: Mapping[str, MemberSummary],
 ) -> Collection[str]:
-    """Calculate state to be retuned in a partial_state send_join
+    """Calculate state to be returned in a partial_state send_join

    Args:
        join_event: the join event being send_joined
@@ -1513,8 +1502,19 @@ def _get_event_ids_for_partial_state_join(
    if current_membership_event_id is not None:
        state_event_ids.add(current_membership_event_id)

-    # TODO: return a few more members:
-    #   - those with invites
-    #   - those that are kicked? / banned
+    name_id = prev_state_ids.get((EventTypes.Name, ""))
+    canonical_alias_id = prev_state_ids.get((EventTypes.CanonicalAlias, ""))
+    if not name_id and not canonical_alias_id:
+        # Also include the hero members of the room (for DM rooms without a title).
+        # To do this properly, we should select the correct subset of membership events
+        # from `prev_state_ids`. Instead, we are lazier and use the (cached)
+        # `get_room_summary` function, which is based on the current state of the room.
+        # This introduces races; we choose to ignore them because a) they should be rare
+        # and b) even if it's wrong, joining servers will get the full state eventually.
+        heroes = extract_heroes_from_room_summary(summary, join_event.state_key)
+        for hero in heroes:
+            membership_event_id = prev_state_ids.get((EventTypes.Member, hero))
+            if membership_event_id:
+                state_event_ids.add(membership_event_id)

    return state_event_ids
--- a/synapse/federation/persistence.py
+++ b/synapse/federation/persistence.py
-# Copyright 2014-2016 OpenMarket Ltd
+#
+# This file is licensed under the Affero General Public License (AGPL) version 3.
+#
 # Copyright 2021 The Matrix.org Foundation C.I.C.
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright (C) 2023 New Vector, Ltd
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# See the GNU Affero General Public License for more details:
+# <https://www.gnu.org/licenses/agpl-3.0.html>.
+#
+# Originally licensed under the Apache License, Version 2.0:
+# <http://www.apache.org/licenses/LICENSE-2.0>.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# [This file includes modifications made by New Vector Limited]
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

-""" This module contains all the persistence actions done by the federation
+"""This module contains all the persistence actions done by the federation
 package.

 These actions are mostly only used by the :py:mod:`.replication` module.

--- a/synapse/federation/send_queue.py
+++ b/synapse/federation/send_queue.py
-# Copyright 2014-2016 OpenMarket Ltd
+#
+# This file is licensed under the Affero General Public License (AGPL) version 3.
+#
 # Copyright 2021 The Matrix.org Foundation C.I.C.
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright (C) 2023 New Vector, Ltd
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# See the GNU Affero General Public License for more details:
+# <https://www.gnu.org/licenses/agpl-3.0.html>.
+#
+# Originally licensed under the Apache License, Version 2.0:
+# <http://www.apache.org/licenses/LICENSE-2.0>.
+#
+# [This file includes modifications made by New Vector Limited]
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

 """A federation sender that forwards things to be sent across replication to
 a worker process.
@@ -49,7 +56,7 @@ from synapse.api.presence import UserPresenceState
 from synapse.federation.sender import AbstractFederationSender, FederationSender
 from synapse.metrics import LaterGauge
 from synapse.replication.tcp.streams.federation import FederationStream
-from synapse.types import JsonDict, ReadReceipt, RoomStreamToken
+from synapse.types import JsonDict, ReadReceipt, RoomStreamToken, StrCollection
 from synapse.util.metrics import Measure

 from .units import Edu
@@ -68,6 +75,7 @@ class FederationRemoteSendQueue(AbstractFederationSender):
        self.clock = hs.get_clock()
        self.notifier = hs.get_notifier()
        self.is_mine_id = hs.is_mine_id
+        self.is_mine_server_name = hs.is_mine_server_name

        # We may have multiple federation sender instances, so we need to track
        # their positions separately.
@@ -80,9 +88,9 @@ class FederationRemoteSendQueue(AbstractFederationSender):
        # Stores the destinations we need to explicitly send presence to about a
        # given user.
        # Stream position -> (user_id, destinations)
-        self.presence_destinations: SortedDict[
-            int, Tuple[str, Iterable[str]]
-        ] = SortedDict()
+        self.presence_destinations: SortedDict[int, Tuple[str, Iterable[str]]] = (
+            SortedDict()
+        )

        # (destination, key) -> EDU
        self.keyed_edu: Dict[Tuple[str, tuple], Edu] = {}
@@ -198,7 +206,7 @@ class FederationRemoteSendQueue(AbstractFederationSender):
        key: Optional[Hashable] = None,
    ) -> None:
        """As per FederationSender"""
-        if destination == self.server_name:
+        if self.is_mine_server_name(destination):
            logger.info("Not sending EDU to ourselves")
            return

@@ -228,7 +236,7 @@ class FederationRemoteSendQueue(AbstractFederationSender):
        """
        # nothing to do here: the replication listener will handle it.

-    def send_presence_to_destinations(
+    async def send_presence_to_destinations(
        self, states: Iterable[UserPresenceState], destinations: Iterable[str]
    ) -> None:
        """As per FederationSender
@@ -244,7 +252,9 @@ class FederationRemoteSendQueue(AbstractFederationSender):

        self.notifier.on_new_replication_data()

-    def send_device_messages(self, destination: str, immediate: bool = False) -> None:
+    async def send_device_messages(
+        self, destinations: StrCollection, immediate: bool = True
+    ) -> None:
        """As per FederationSender"""
        # We don't need to replicate this as it gets sent down a different
        # stream.
@@ -314,7 +324,7 @@ class FederationRemoteSendQueue(AbstractFederationSender):
        # stream position.
        keyed_edus = {v: k for k, v in self.keyed_edu_changed.items()[i:j]}

-        for ((destination, edu_key), pos) in keyed_edus.items():
+        for (destination, edu_key), pos in keyed_edus.items():
            rows.append(
                (
                    pos,
@@ -329,7 +339,7 @@ class FederationRemoteSendQueue(AbstractFederationSender):
        j = self.edus.bisect_right(to_token) + 1
        edus = self.edus.items()[i:j]

-        for (pos, edu) in edus:
+        for pos, edu in edus:
            rows.append((pos, EduRow(edu)))

        # Sort rows based on pos
@@ -392,7 +402,7 @@ class PresenceDestinationsRow(BaseFederationRow):
    @staticmethod
    def from_data(data: JsonDict) -> "PresenceDestinationsRow":
        return PresenceDestinationsRow(
-            state=UserPresenceState.from_dict(data["state"]), destinations=data["dests"]
+            state=UserPresenceState(**data["state"]), destinations=data["dests"]
        )

    def to_data(self) -> JsonDict:
@@ -462,7 +472,7 @@ class ParsedFederationStreamData:
    edus: Dict[str, List[Edu]]


-def process_rows_for_federation(
+async def process_rows_for_federation(
    transaction_queue: FederationSender,
    rows: List[FederationStream.FederationStreamRow],
 ) -> None:
@@ -495,7 +505,7 @@ def process_rows_for_federation(
        parsed_row.add_to_buffer(buff)

    for state, destinations in buff.presence_destinations:
-        transaction_queue.send_presence_to_destinations(
+        await transaction_queue.send_presence_to_destinations(
            states=[state], destinations=destinations
        )


--- a/synapse/federation/sender/__init__.py
+++ b/synapse/federation/sender/__init__.py
-# Copyright 2019 New Vector Ltd
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# This file is licensed under the Affero General Public License (AGPL) version 3.
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+# Copyright (C) 2023 New Vector, Ltd
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# See the GNU Affero General Public License for more details:
+# <https://www.gnu.org/licenses/agpl-3.0.html>.
+#
+# Originally licensed under the Apache License, Version 2.0:
+# <http://www.apache.org/licenses/LICENSE-2.0>.
+#
+# [This file includes modifications made by New Vector Limited]
+#
+#
+"""
+The Federation Sender is responsible for sending Persistent Data Units (PDUs)
+and Ephemeral Data Units (EDUs) to other homeservers using
+the `/send` Federation API.
+
+
+## How do PDUs get sent?
+
+The Federation Sender is made aware of new PDUs due to `FederationSender.notify_new_events`.
+When the sender is notified about a newly-persisted PDU that originates from this homeserver
+and is not an out-of-band event, we pass the PDU to the `_PerDestinationQueue` for each
+remote homeserver that is in the room at that point in the DAG.
+
+
+### Per-Destination Queues
+
+There is one `PerDestinationQueue` per 'destination' homeserver.
+The `PerDestinationQueue` maintains the following information about the destination:
+
+- whether the destination is currently in [catch-up mode (see below)](#catch-up-mode);
+- a queue of PDUs to be sent to the destination; and
+- a queue of EDUs to be sent to the destination (not considered in this section).
+
+Upon a new PDU being enqueued, `attempt_new_transaction` is called to start a new
+transaction if there is not already one in progress.
+
+
+### Transactions and the Transaction Transmission Loop
+
+Each federation HTTP request to the `/send` endpoint is referred to as a 'transaction'.
+The body of the HTTP request contains a list of PDUs and EDUs to send to the destination.
+
+The *Transaction Transmission Loop* (`_transaction_transmission_loop`) is responsible
+for emptying the queued PDUs (and EDUs) from a `PerDestinationQueue` by sending
+them to the destination.
+
+There can only be one transaction in flight for a given destination at any time.
+(Other than preventing us from overloading the destination, this also makes it easier to
+reason about because we process events sequentially for each destination.
+This is useful for *Catch-Up Mode*, described later.)
+
+The loop continues so long as there is anything to send. At each iteration of the loop, we:
+
+- dequeue up to 50 PDUs (and up to 100 EDUs).
+- make the `/send` request to the destination homeserver with the dequeued PDUs and EDUs.
+- if successful, make note of the fact that we succeeded in transmitting PDUs up to
+  the given `stream_ordering` of the latest PDU by
+- if unsuccessful, back off from the remote homeserver for some time.
+  If we have been unsuccessful for too long (when the backoff interval grows to exceed 1 hour),
+  the in-memory queues are emptied and we enter [*Catch-Up Mode*, described below](#catch-up-mode).
+
+
+### Catch-Up Mode
+
+When the `PerDestinationQueue` has the catch-up flag set, the *Catch-Up Transmission Loop*
+(`_catch_up_transmission_loop`) is used in lieu of the regular `_transaction_transmission_loop`.
+(Only once the catch-up mode has been exited can the regular transaction transmission behaviour
+be resumed.)
+
+*Catch-Up Mode*, entered upon Synapse startup or once a homeserver has fallen behind due to
+connection problems, is responsible for sending PDUs that have been missed by the destination
+homeserver. (PDUs can be missed because the `PerDestinationQueue` is volatile — i.e. resets
+on startup — and it does not hold PDUs forever if `/send` requests to the destination fail.)
+
+The catch-up mechanism makes use of the `last_successful_stream_ordering` column in the
+`destinations` table (which gives the `stream_ordering` of the most recent successfully
+sent PDU) and the `stream_ordering` column in the `destination_rooms` table (which gives,
+for each room, the `stream_ordering` of the most recent PDU that needs to be sent to this
+destination).
+
+Each iteration of the loop pulls out 50 `destination_rooms` entries with the oldest
+`stream_ordering`s that are greater than the `last_successful_stream_ordering`.
+In other words, from the set of latest PDUs in each room to be sent to the destination,
+the 50 oldest such PDUs are pulled out.
+
+These PDUs could, in principle, now be directly sent to the destination. However, as an
+optimisation intended to prevent overloading destination homeservers, we instead attempt
+to send the latest forward extremities so long as the destination homeserver is still
+eligible to receive those.
+This reduces load on the destination **in aggregate** because all Synapse homeservers
+will behave according to this principle and therefore avoid sending lots of different PDUs
+at different points in the DAG to a recovering homeserver.
+*This optimisation is not currently valid in rooms which are partial-state on this homeserver,
+since we are unable to determine whether the destination homeserver is eligible to receive
+the latest forward extremities unless this homeserver sent those PDUs — in this case, we
+just send the latest PDUs originating from this server and skip this optimisation.*
+
+Whilst PDUs are sent through this mechanism, the position of `last_successful_stream_ordering`
+is advanced as normal.
+Once there are no longer any rooms containing outstanding PDUs to be sent to the destination
+*that are not already in the `PerDestinationQueue` because they arrived since Catch-Up Mode
+was enabled*, Catch-Up Mode is exited and we return to `_transaction_transmission_loop`.
+
+
+#### A note on failures and back-offs
+
+If a remote server is unreachable over federation, we back off from that server,
+with an exponentially-increasing retry interval.
+We automatically retry after the retry interval expires (roughly, the logic to do so
+being triggered every minute).
+
+If the backoff grows too large (> 1 hour), the in-memory queue is emptied (to prevent
+unbounded growth) and Catch-Up Mode is entered.
+
+It is worth noting that the back-off for a remote server is cleared once an inbound
+request from that remote server is received (see `notify_remote_server_up`).
+At this point, the transaction transmission loop is also started up, to proactively
+send missed PDUs and EDUs to the destination (i.e. you don't need to wait for a new PDU
+or EDU, destined for that destination, to be created in order to send out missed PDUs and
+EDUs).
+"""

 import abc
 import logging
@@ -22,22 +139,23 @@ from typing import (
    Hashable,
    Iterable,
    List,
+    Literal,
    Optional,
-    Set,
    Tuple,
 )

 import attr
 from prometheus_client import Counter
-from typing_extensions import Literal

 from twisted.internet import defer
-from twisted.internet.interfaces import IDelayedCall

 import synapse.metrics
 from synapse.api.presence import UserPresenceState
 from synapse.events import EventBase
-from synapse.federation.sender.per_destination_queue import PerDestinationQueue
+from synapse.federation.sender.per_destination_queue import (
+    CATCHUP_RETRY_INTERVAL,
+    PerDestinationQueue,
+)
 from synapse.federation.sender.transaction_manager import TransactionManager
 from synapse.federation.units import Edu
 from synapse.logging.context import make_deferred_yieldable, run_in_background
@@ -51,9 +169,16 @@ from synapse.metrics.background_process_metrics import (
    run_as_background_process,
    wrap_as_background_process,
 )
-from synapse.types import JsonDict, ReadReceipt, RoomStreamToken
+from synapse.types import (
+    JsonDict,
+    ReadReceipt,
+    RoomStreamToken,
+    StrCollection,
+    get_domain_from_id,
+)
 from synapse.util import Clock
 from synapse.util.metrics import Measure
+from synapse.util.retryutils import filter_destinations_by_retry_limiter

 if TYPE_CHECKING:
    from synapse.events.presence_router import PresenceRouter
@@ -71,14 +196,17 @@ sent_pdus_destination_dist_total = Counter(
    "Total number of PDUs queued for sending across all destinations",
 )

-# Time (in s) after Synapse's startup that we will begin to wake up destinations
-# that have catch-up outstanding.
-CATCH_UP_STARTUP_DELAY_SEC = 15
+# Time (in s) to wait before trying to wake up destinations that have
+# catch-up outstanding.
+# Please note that rate limiting still applies, so while the loop is
+# executed every X seconds the destinations may not be woken up because
+# they are being rate limited following previous attempt failures.
+WAKEUP_RETRY_PERIOD_SEC = 60

 # Time (in s) to wait in between waking up each destination, i.e. one destination
-# will be woken up every <x> seconds after Synapse's startup until we have woken
-# every destination has outstanding catch-up.
-CATCH_UP_STARTUP_INTERVAL_SEC = 5
+# will be woken up every <x> seconds until we have woken every destination
+# has outstanding catch-up.
+WAKEUP_INTERVAL_BETWEEN_DESTINATIONS_SEC = 5


 class AbstractFederationSender(metaclass=abc.ABCMeta):
@@ -99,7 +227,7 @@ class AbstractFederationSender(metaclass=abc.ABCMeta):
        raise NotImplementedError()

    @abc.abstractmethod
-    def send_presence_to_destinations(
+    async def send_presence_to_destinations(
        self, states: Iterable[UserPresenceState], destinations: Iterable[str]
    ) -> None:
        """Send the given presence states to the given destinations.
@@ -128,9 +256,11 @@ class AbstractFederationSender(metaclass=abc.ABCMeta):
        raise NotImplementedError()

    @abc.abstractmethod
-    def send_device_messages(self, destination: str, immediate: bool = True) -> None:
+    async def send_device_messages(
+        self, destinations: StrCollection, immediate: bool = True
+    ) -> None:
        """Tells the sender that a new device message is ready to be sent to the
-        destination. The `immediate` flag specifies whether the messages should
+        destinations. The `immediate` flag specifies whether the messages should
        be tried to be sent immediately, or whether it can be delayed for a
        short while (to aid performance).
        """
@@ -172,12 +302,10 @@ class _DestinationWakeupQueue:
    # being woken up.
    _MAX_TIME_IN_QUEUE = 30.0

-    # The maximum duration in seconds between waking up consecutive destination
-    # queues.
-    _MAX_DELAY = 0.1
-
    sender: "FederationSender" = attr.ib()
    clock: Clock = attr.ib()
+    max_delay_s: int = attr.ib()
+
    queue: "OrderedDict[str, Literal[None]]" = attr.ib(factory=OrderedDict)
    processing: bool = attr.ib(default=False)

@@ -207,7 +335,7 @@ class _DestinationWakeupQueue:
            # We also add an upper bound to the delay, to gracefully handle the
            # case where the queue only has a few entries in it.
            current_sleep_seconds = min(
-                self._MAX_DELAY, self._MAX_TIME_IN_QUEUE / len(self.queue)
+                self.max_delay_s, self._MAX_TIME_IN_QUEUE / len(self.queue)
            )

            while self.queue:
@@ -249,6 +377,7 @@ class FederationSender(AbstractFederationSender):

        self.clock = hs.get_clock()
        self.is_mine_id = hs.is_mine_id
+        self.is_mine_server_name = hs.is_mine_server_name

        self._presence_router: Optional["PresenceRouter"] = None
        self._transaction_manager = TransactionManager(hs)
@@ -290,31 +419,23 @@ class FederationSender(AbstractFederationSender):
        self._is_processing = False
        self._last_poked_id = -1

-        # map from room_id to a set of PerDestinationQueues which we believe are
-        # awaiting a call to flush_read_receipts_for_room. The presence of an entry
-        # here for a given room means that we are rate-limiting RR flushes to that room,
-        # and that there is a pending call to _flush_rrs_for_room in the system.
-        self._queues_awaiting_rr_flush_by_room: Dict[str, Set[PerDestinationQueue]] = {}
+        self._external_cache = hs.get_external_cache()

-        self._rr_txn_interval_per_room_ms = (
-            1000.0
-            / hs.config.ratelimiting.federation_rr_transactions_per_room_per_second
+        rr_txn_interval_per_room_s = (
+            1.0 / hs.config.ratelimiting.federation_rr_transactions_per_room_per_second
+        )
+        self._destination_wakeup_queue = _DestinationWakeupQueue(
+            self, self.clock, max_delay_s=rr_txn_interval_per_room_s
        )

-        # wake up destinations that have outstanding PDUs to be caught up
-        self._catchup_after_startup_timer: Optional[
-            IDelayedCall
-        ] = self.clock.call_later(
-            CATCH_UP_STARTUP_DELAY_SEC,
+        # Regularly wake up destinations that have outstanding PDUs to be caught up
+        self.clock.looping_call_now(
            run_as_background_process,
+            WAKEUP_RETRY_PERIOD_SEC * 1000.0,
            "wake_destinations_needing_catchup",
            self._wake_destinations_needing_catchup,
        )

-        self._external_cache = hs.get_external_cache()
-
-        self._destination_wakeup_queue = _DestinationWakeupQueue(self, self.clock)
-
    def _get_per_destination_queue(self, destination: str) -> PerDestinationQueue:
        """Get or create a PerDestinationQueue for the given destination

@@ -434,7 +555,23 @@ class FederationSender(AbstractFederationSender):
                        # If there are no prev event IDs then the state is empty
                        # and so no remote servers in the room
                        destinations = set()
-                    else:
+
+                    if destinations is None:
+                        # During partial join we use the set of servers that we got
+                        # when beginning the join. It's still possible that we send
+                        # events to servers that left the room in the meantime, but
+                        # we consider that an acceptable risk since it is only our own
+                        # events that we leak and not other server's ones.
+                        partial_state_destinations = (
+                            await self.store.get_partial_state_servers_at_join(
+                                event.room_id
+                            )
+                        )
+
+                        if partial_state_destinations is not None:
+                            destinations = partial_state_destinations
+
+                    if destinations is None:
                        # We check the external cache for the destinations, which is
                        # stored per state group.

@@ -446,14 +583,14 @@ class FederationSender(AbstractFederationSender):
                                "get_joined_hosts", str(sg)
                            )
                            if destinations is None:
-                                # Add logging to help track down #13444
+                                # Add logging to help track down https://github.com/matrix-org/synapse/issues/13444
                                logger.info(
                                    "Unexpectedly did not have cached destinations for %s / %s",
                                    sg,
                                    event.event_id,
                                )
                        else:
-                            # Add logging to help track down #13444
+                            # Add logging to help track down https://github.com/matrix-org/synapse/issues/13444
                            logger.info(
                                "Unexpectedly did not have cached prev group for %s",
                                event.event_id,
@@ -587,6 +724,13 @@ class FederationSender(AbstractFederationSender):
            pdu.internal_metadata.stream_ordering,
        )

+        destinations = await filter_destinations_by_retry_limiter(
+            destinations,
+            clock=self.clock,
+            store=self.store,
+            retry_due_within_ms=CATCHUP_RETRY_INTERVAL,
+        )
+
        for destination in destinations:
            self._get_per_destination_queue(destination).send_pdu(pdu)

@@ -599,102 +743,123 @@ class FederationSender(AbstractFederationSender):

        # Some background on the rate-limiting going on here.
        #
-        # It turns out that if we attempt to send out RRs as soon as we get them from
-        # a client, then we end up trying to do several hundred Hz of federation
-        # transactions. (The number of transactions scales as O(N^2) on the size of a
-        # room, since in a large room we have both more RRs coming in, and more servers
-        # to send them to.)
+        # It turns out that if we attempt to send out RRs as soon as we get them
+        # from a client, then we end up trying to do several hundred Hz of
+        # federation transactions. (The number of transactions scales as O(N^2)
+        # on the size of a room, since in a large room we have both more RRs
+        # coming in, and more servers to send them to.)
        #
-        # This leads to a lot of CPU load, and we end up getting behind. The solution
-        # currently adopted is as follows:
+        # This leads to a lot of CPU load, and we end up getting behind. The
+        # solution currently adopted is to differentiate between receipts and
+        # destinations we should immediately send to, and those we can trickle
+        # the receipts to.
        #
-        # The first receipt in a given room is sent out immediately, at time T0. Any
-        # further receipts are, in theory, batched up for N seconds, where N is calculated
-        # based on the number of servers in the room to achieve a transaction frequency
-        # of around 50Hz. So, for example, if there were 100 servers in the room, then
-        # N would be 100 / 50Hz = 2 seconds.
+        # The current logic is to send receipts out immediately if:
+        #   - the room is "small", i.e. there's only N servers to send receipts
+        #     to, and so sending out the receipts immediately doesn't cause too
+        #     much load; or
+        #   - the receipt is for an event that happened recently, as users
+        #     notice if receipts are delayed when they know other users are
+        #     currently reading the room; or
+        #   - the receipt is being sent to the server that sent the event, so
+        #     that users see receipts for their own receipts quickly.
        #
-        # Then, after T+N, we flush out any receipts that have accumulated, and restart
-        # the timer to flush out more receipts at T+2N, etc. If no receipts accumulate,
-        # we stop the cycle and go back to the start.
+        # For destinations that we should delay sending the receipt to, we queue
+        # the receipts up to be sent in the next transaction, but don't trigger
+        # a new transaction to be sent. We then add the destination to the
+        # `DestinationWakeupQueue`, which will slowly iterate over each
+        # destination and trigger a new transaction to be sent.
        #
-        # However, in practice, it is often possible to flush out receipts earlier: in
-        # particular, if we are sending a transaction to a given server anyway (for
-        # example, because we have a PDU or a RR in another room to send), then we may
-        # as well send out all of the pending RRs for that server. So it may be that
-        # by the time we get to T+N, we don't actually have any RRs left to send out.
-        # Nevertheless we continue to buffer up RRs for the room in question until we
-        # reach the point that no RRs arrive between timer ticks.
+        # However, in practice, it is often possible to send out delayed
+        # receipts earlier: in particular, if we are sending a transaction to a
+        # given server anyway (for example, because we have a PDU or a RR in
+        # another room to send), then we may as well send out all of the pending
+        # RRs for that server. So it may be that by the time we get to waking up
+        # the destination, we don't actually have any RRs left to send out.
        #
-        # For even more background, see https://github.com/matrix-org/synapse/issues/4730.
+        # For even more background, see
+        # https://github.com/matrix-org/synapse/issues/4730.

        room_id = receipt.room_id

+        # Local read receipts always have 1 event ID.
+        event_id = receipt.event_ids[0]
+
        # Work out which remote servers should be poked and poke them.
-        domains_set = await self._storage_controllers.state.get_current_hosts_in_room(
+        domains_set = await self._storage_controllers.state.get_current_hosts_in_room_or_partial_state_approximation(
            room_id
        )
-        domains = [
+        domains: StrCollection = [
            d
            for d in domains_set
-            if d != self.server_name
+            if not self.is_mine_server_name(d)
            and self._federation_shard_config.should_handle(self._instance_name, d)
        ]
+
+        domains = await filter_destinations_by_retry_limiter(
+            domains,
+            clock=self.clock,
+            store=self.store,
+            retry_due_within_ms=CATCHUP_RETRY_INTERVAL,
+        )
+
        if not domains:
            return

-        queues_pending_flush = self._queues_awaiting_rr_flush_by_room.get(room_id)
+        # We now split which domains we want to wake up immediately vs which we
+        # want to delay waking up.
+        immediate_domains: StrCollection
+        delay_domains: StrCollection

-        # if there is no flush yet scheduled, we will send out these receipts with
-        # immediate flushes, and schedule the next flush for this room.
-        if queues_pending_flush is not None:
-            logger.debug("Queuing receipt for: %r", domains)
+        if len(domains) < 10:
+            # For "small" rooms send to all domains immediately
+            immediate_domains = domains
+            delay_domains = ()
        else:
-            logger.debug("Sending receipt to: %r", domains)
-            self._schedule_rr_flush_for_room(room_id, len(domains))
+            metadata = await self.store.get_metadata_for_event(
+                receipt.room_id, event_id
+            )
+            assert metadata is not None

-        for domain in domains:
-            queue = self._get_per_destination_queue(domain)
-            queue.queue_read_receipt(receipt)
+            sender_domain = get_domain_from_id(metadata.sender)

-            # if there is already a RR flush pending for this room, then make sure this
-            # destination is registered for the flush
-            if queues_pending_flush is not None:
-                queues_pending_flush.add(queue)
+            if self.clock.time_msec() - metadata.received_ts < 60_000:
+                # We always send receipts for recent messages immediately
+                immediate_domains = domains
+                delay_domains = ()
            else:
-                queue.flush_read_receipts_for_room(room_id)
-
-    def _schedule_rr_flush_for_room(self, room_id: str, n_domains: int) -> None:
-        # that is going to cause approximately len(domains) transactions, so now back
-        # off for that multiplied by RR_TXN_INTERVAL_PER_ROOM
-        backoff_ms = self._rr_txn_interval_per_room_ms * n_domains
-
-        logger.debug("Scheduling RR flush in %s in %d ms", room_id, backoff_ms)
-        self.clock.call_later(backoff_ms, self._flush_rrs_for_room, room_id)
-        self._queues_awaiting_rr_flush_by_room[room_id] = set()
-
-    def _flush_rrs_for_room(self, room_id: str) -> None:
-        queues = self._queues_awaiting_rr_flush_by_room.pop(room_id)
-        logger.debug("Flushing RRs in %s to %s", room_id, queues)
+                # Otherwise, we delay waking up all destinations except for the
+                # sender's domain.
+                immediate_domains = []
+                delay_domains = []
+                for domain in domains:
+                    if domain == sender_domain:
+                        immediate_domains.append(domain)
+                    else:
+                        delay_domains.append(domain)

-        if not queues:
-            # no more RRs arrived for this room; we are done.
-            return
+        for domain in immediate_domains:
+            # Add to destination queue and wake the destination up
+            queue = self._get_per_destination_queue(domain)
+            queue.queue_read_receipt(receipt)
+            queue.attempt_new_transaction()

-        # schedule the next flush
-        self._schedule_rr_flush_for_room(room_id, len(queues))
+        for domain in delay_domains:
+            # Add to destination queue...
+            queue = self._get_per_destination_queue(domain)
+            queue.queue_read_receipt(receipt)

-        for queue in queues:
-            queue.flush_read_receipts_for_room(room_id)
+            # ... and schedule the destination to be woken up.
+            self._destination_wakeup_queue.add_to_queue(domain)

-    def send_presence_to_destinations(
+    async def send_presence_to_destinations(
        self, states: Iterable[UserPresenceState], destinations: Iterable[str]
    ) -> None:
        """Send the given presence states to the given destinations.
        destinations (list[str])
        """

-        if not states or not self.hs.config.server.use_presence:
+        if not states or not self.hs.config.server.track_presence:
            # No-op if presence is disabled.
            return

@@ -702,12 +867,19 @@ class FederationSender(AbstractFederationSender):
        for state in states:
            assert self.is_mine_id(state.user_id)

+        destinations = await filter_destinations_by_retry_limiter(
+            [
+                d
+                for d in destinations
+                if self._federation_shard_config.should_handle(self._instance_name, d)
+            ],
+            clock=self.clock,
+            store=self.store,
+            retry_due_within_ms=CATCHUP_RETRY_INTERVAL,
+        )
+
        for destination in destinations:
-            if destination == self.server_name:
-                continue
-            if not self._federation_shard_config.should_handle(
-                self._instance_name, destination
-            ):
+            if self.is_mine_server_name(destination):
                continue

            self._get_per_destination_queue(destination).send_presence(
@@ -731,7 +903,7 @@ class FederationSender(AbstractFederationSender):
            content: content of EDU
            key: clobbering key for this edu
        """
-        if destination == self.server_name:
+        if self.is_mine_server_name(destination):
            logger.info("Not sending EDU to ourselves")
            return

@@ -767,21 +939,29 @@ class FederationSender(AbstractFederationSender):
        else:
            queue.send_edu(edu)

-    def send_device_messages(self, destination: str, immediate: bool = False) -> None:
-        if destination == self.server_name:
-            logger.warning("Not sending device update to ourselves")
-            return
-
-        if not self._federation_shard_config.should_handle(
-            self._instance_name, destination
-        ):
-            return
+    async def send_device_messages(
+        self, destinations: StrCollection, immediate: bool = True
+    ) -> None:
+        destinations = await filter_destinations_by_retry_limiter(
+            [
+                destination
+                for destination in destinations
+                if self._federation_shard_config.should_handle(
+                    self._instance_name, destination
+                )
+                and not self.is_mine_server_name(destination)
+            ],
+            clock=self.clock,
+            store=self.store,
+            retry_due_within_ms=CATCHUP_RETRY_INTERVAL,
+        )

-        if immediate:
-            self._get_per_destination_queue(destination).attempt_new_transaction()
-        else:
-            self._get_per_destination_queue(destination).mark_new_data()
-            self._destination_wakeup_queue.add_to_queue(destination)
+        for destination in destinations:
+            if immediate:
+                self._get_per_destination_queue(destination).attempt_new_transaction()
+            else:
+                self._get_per_destination_queue(destination).mark_new_data()
+                self._destination_wakeup_queue.add_to_queue(destination)

    def wake_destination(self, destination: str) -> None:
        """Called when we want to retry sending transactions to a remote.
@@ -790,7 +970,7 @@ class FederationSender(AbstractFederationSender):
        might have come back.
        """

-        if destination == self.server_name:
+        if self.is_mine_server_name(destination):
            logger.warning("Not waking up ourselves")
            return

@@ -836,7 +1016,6 @@ class FederationSender(AbstractFederationSender):

            if not destinations_to_wake:
                # finished waking all destinations!
-                self._catchup_after_startup_timer = None
                break

            last_processed = destinations_to_wake[-1]
@@ -853,4 +1032,4 @@ class FederationSender(AbstractFederationSender):
                    last_processed,
                )
                self.wake_destination(destination)
-                await self.clock.sleep(CATCH_UP_STARTUP_INTERVAL_SEC)
+                await self.clock.sleep(WAKEUP_INTERVAL_BETWEEN_DESTINATIONS_SEC)
--- a/synapse/federation/sender/per_destination_queue.py
+++ b/synapse/federation/sender/per_destination_queue.py
-# Copyright 2014-2016 OpenMarket Ltd
-# Copyright 2019 New Vector Ltd
+#
+# This file is licensed under the Affero General Public License (AGPL) version 3.
+#
 # Copyright 2021 The Matrix.org Foundation C.I.C.
+# Copyright 2014-2016 OpenMarket Ltd
+# Copyright (C) 2023 New Vector, Ltd
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# See the GNU Affero General Public License for more details:
+# <https://www.gnu.org/licenses/agpl-3.0.html>.
+#
+# Originally licensed under the Apache License, Version 2.0:
+# <http://www.apache.org/licenses/LICENSE-2.0>.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# [This file includes modifications made by New Vector Limited]
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
 import datetime
 import logging
+from collections import OrderedDict
 from types import TracebackType
 from typing import TYPE_CHECKING, Dict, Hashable, Iterable, List, Optional, Tuple, Type

@@ -35,7 +42,7 @@ from synapse.logging import issue9533_logger
 from synapse.logging.opentracing import SynapseTags, set_tag
 from synapse.metrics import sent_transactions_counter
 from synapse.metrics.background_process_metrics import run_as_background_process
-from synapse.types import ReadReceipt
+from synapse.types import JsonDict, ReadReceipt
 from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter
 from synapse.visibility import filter_events_for_server

@@ -59,6 +66,14 @@ sent_edus_by_type = Counter(
 )


+# If the retry interval is larger than this then we enter "catchup" mode
+CATCHUP_RETRY_INTERVAL = 60 * 60 * 1000
+
+# Limit how many presence states we add to each presence EDU, to ensure that
+# they are bounded in size.
+MAX_PRESENCE_STATES_PER_EDU = 50
+
+
 class PerDestinationQueue:
    """
    Manages the per-destination transmission queues.
@@ -134,11 +149,13 @@ class PerDestinationQueue:

        # Map of user_id -> UserPresenceState of pending presence to be sent to this
        # destination
-        self._pending_presence: Dict[str, UserPresenceState] = {}
+        self._pending_presence: OrderedDict[str, UserPresenceState] = OrderedDict()

-        # room_id -> receipt_type -> user_id -> receipt_dict
-        self._pending_rrs: Dict[str, Dict[str, Dict[str, dict]]] = {}
-        self._rrs_pending_flush = False
+        # List of room_id -> receipt_type -> user_id -> receipt_dict,
+        #
+        # Each receipt can only have a single receipt per
+        # (room ID, receipt type, user ID, thread ID) tuple.
+        self._pending_receipt_edus: List[Dict[str, Dict[str, Dict[str, dict]]]] = []

        # stream_id of last successfully sent to-device message.
        # NB: may be a long or an int.
@@ -202,17 +219,45 @@ class PerDestinationQueue:
        Args:
            receipt: receipt to be queued
        """
-        self._pending_rrs.setdefault(receipt.room_id, {}).setdefault(
-            receipt.receipt_type, {}
-        )[receipt.user_id] = {"event_ids": receipt.event_ids, "data": receipt.data}
-
-    def flush_read_receipts_for_room(self, room_id: str) -> None:
-        # if we don't have any read-receipts for this room, it may be that we've already
-        # sent them out, so we don't need to flush.
-        if room_id not in self._pending_rrs:
-            return
-        self._rrs_pending_flush = True
-        self.attempt_new_transaction()
+        serialized_receipt: JsonDict = {
+            "event_ids": receipt.event_ids,
+            "data": receipt.data,
+        }
+        if receipt.thread_id is not None:
+            serialized_receipt["data"]["thread_id"] = receipt.thread_id
+
+        # Find which EDU to add this receipt to. There's three situations depending
+        # on the (room ID, receipt type, user, thread ID) tuple:
+        #
+        # 1. If it fully matches, clobber the information.
+        # 2. If it is missing, add the information.
+        # 3. If the subset tuple of (room ID, receipt type, user) matches, check
+        #    the next EDU (or add a new EDU).
+        for edu in self._pending_receipt_edus:
+            receipt_content = edu.setdefault(receipt.room_id, {}).setdefault(
+                receipt.receipt_type, {}
+            )
+            # If this room ID, receipt type, user ID is not in this EDU, OR if
+            # the full tuple matches, use the current EDU.
+            if (
+                receipt.user_id not in receipt_content
+                or receipt_content[receipt.user_id].get("thread_id")
+                == receipt.thread_id
+            ):
+                receipt_content[receipt.user_id] = serialized_receipt
+                break
+
+        # If no matching EDU was found, create a new one.
+        else:
+            self._pending_receipt_edus.append(
+                {
+                    receipt.room_id: {
+                        receipt.receipt_type: {receipt.user_id: serialized_receipt}
+                    }
+                }
+            )
+
+        self.mark_new_data()

    def send_keyed_edu(self, edu: Edu, key: Hashable) -> None:
        self._pending_edus_keyed[(edu.edu_type, key)] = edu
@@ -284,12 +329,11 @@ class PerDestinationQueue:
                    # not caught up yet
                    return

-            pending_pdus = []
            while True:
                self._new_data_to_send = False

                async with _TransactionQueueManager(self) as (
-                    pending_pdus,
+                    pending_pdus,  # noqa: F811
                    pending_edus,
                ):
                    if not pending_pdus and not pending_edus:
@@ -331,7 +375,7 @@ class PerDestinationQueue:
                ),
            )

-            if e.retry_interval > 60 * 60 * 1000:
+            if e.retry_interval > CATCHUP_RETRY_INTERVAL:
                # we won't retry for another hour!
                # (this suggests a significant outage)
                # We drop pending EDUs because otherwise they will
@@ -350,8 +394,8 @@ class PerDestinationQueue:
                # through another mechanism, because this is all volatile!
                self._pending_edus = []
                self._pending_edus_keyed = {}
-                self._pending_presence = {}
-                self._pending_rrs = {}
+                self._pending_presence.clear()
+                self._pending_receipt_edus = []

                self._start_catching_up()
        except FederationDeniedError as e:
@@ -458,8 +502,8 @@ class PerDestinationQueue:
            #
            # Note: `catchup_pdus` will have exactly one PDU per room.
            for pdu in catchup_pdus:
-                # The PDU from the DB will be the last PDU in the room from
-                # *this server* that wasn't sent to the remote. However, other
+                # The PDU from the DB will be the newest PDU in the room from
+                # *this server* that we tried---but were unable---to send to the remote.
                # servers may have sent lots of events since then, and we want
                # to try and tell the remote only about the *latest* events in
                # the room. This is so that it doesn't get inundated by events
@@ -477,6 +521,11 @@ class PerDestinationQueue:
                    # If the event is in the extremities, then great! We can just
                    # use that without having to do further checks.
                    room_catchup_pdus = [pdu]
+                elif await self._store.is_partial_state_room(pdu.room_id):
+                    # We can't be sure which events the destination should
+                    # see using only partial state. Avoid doing so, and just retry
+                    # sending our the newest PDU the remote is missing from us.
+                    room_catchup_pdus = [pdu]
                else:
                    # If not, fetch the extremities and figure out which we can
                    # send.
@@ -505,8 +554,11 @@ class PerDestinationQueue:
                    new_pdus = await filter_events_for_server(
                        self._storage_controllers,
                        self._destination,
+                        self._server_name,
                        new_pdus,
                        redact=False,
+                        filter_out_erased_senders=True,
+                        filter_out_remote_partial_state_events=True,
                    )

                    # If we've filtered out all the extremities, fall back to
@@ -542,22 +594,24 @@ class PerDestinationQueue:
                    self._destination, last_successful_stream_ordering
                )

-    def _get_rr_edus(self, force_flush: bool) -> Iterable[Edu]:
-        if not self._pending_rrs:
-            return
-        if not force_flush and not self._rrs_pending_flush:
-            # not yet time for this lot
+    def _get_receipt_edus(self, limit: int) -> Iterable[Edu]:
+        if not self._pending_receipt_edus:
            return

-        edu = Edu(
-            origin=self._server_name,
-            destination=self._destination,
-            edu_type=EduTypes.RECEIPT,
-            content=self._pending_rrs,
-        )
-        self._pending_rrs = {}
-        self._rrs_pending_flush = False
-        yield edu
+        # Send at most limit EDUs for receipts.
+        for content in self._pending_receipt_edus[:limit]:
+            yield Edu(
+                origin=self._server_name,
+                destination=self._destination,
+                edu_type=EduTypes.RECEIPT,
+                content=content,
+            )
+        self._pending_receipt_edus = self._pending_receipt_edus[limit:]
+
+        # If there are still pending read-receipts, don't reset the pending flush
+        # flag.
+        if not self._pending_receipt_edus:
+            self._rrs_pending_flush = False

    def _pop_pending_edus(self, limit: int) -> List[Edu]:
        pending_edus = self._pending_edus
@@ -596,7 +650,7 @@ class PerDestinationQueue:
            if not message_id:
                continue

-            set_tag(SynapseTags.TO_DEVICE_MESSAGE_ID, message_id)
+            set_tag(SynapseTags.TO_DEVICE_EDU_ID, message_id)

        edus = [
            Edu(
@@ -644,27 +698,65 @@ class _TransactionQueueManager:
    async def __aenter__(self) -> Tuple[List[EventBase], List[Edu]]:
        # First we calculate the EDUs we want to send, if any.

-        # We start by fetching device related EDUs, i.e device updates and to
-        # device messages. We have to keep 2 free slots for presence and rr_edus.
-        device_edu_limit = MAX_EDUS_PER_TRANSACTION - 2
+        # There's a maximum number of EDUs that can be sent with a transaction,
+        # generally device updates and to-device messages get priority, but we
+        # want to ensure that there's room for some other EDUs as well.
+        #
+        # This is done by:
+        #
+        # * Add a presence EDU, if one exists.
+        # * Add up-to a small limit of read receipt EDUs.
+        # * Add to-device EDUs, but leave some space for device list updates.
+        # * Add device list updates EDUs.
+        # * If there's any remaining room, add other EDUs.
+        pending_edus = []
+
+        # Add presence EDU.
+        if self.queue._pending_presence:
+            # Only send max 50 presence entries in the EDU, to bound the amount
+            # of data we're sending.
+            presence_to_add: List[JsonDict] = []
+            while (
+                self.queue._pending_presence
+                and len(presence_to_add) < MAX_PRESENCE_STATES_PER_EDU
+            ):
+                _, presence = self.queue._pending_presence.popitem(last=False)
+                presence_to_add.append(
+                    format_user_presence_state(presence, self.queue._clock.time_msec())
+                )

-        # We prioritize to-device messages so that existing encryption channels
+            pending_edus.append(
+                Edu(
+                    origin=self.queue._server_name,
+                    destination=self.queue._destination,
+                    edu_type=EduTypes.PRESENCE,
+                    content={"push": presence_to_add},
+                )
+            )
+
+        # Add read receipt EDUs.
+        pending_edus.extend(self.queue._get_receipt_edus(limit=5))
+        edu_limit = MAX_EDUS_PER_TRANSACTION - len(pending_edus)
+
+        # Next, prioritize to-device messages so that existing encryption channels
        # work. We also keep a few slots spare (by reducing the limit) so that
        # we can still trickle out some device list updates.
        (
            to_device_edus,
            device_stream_id,
-        ) = await self.queue._get_to_device_message_edus(device_edu_limit - 10)
+        ) = await self.queue._get_to_device_message_edus(edu_limit - 10)

        if to_device_edus:
            self._device_stream_id = device_stream_id
        else:
            self.queue._last_device_stream_id = device_stream_id

-        device_edu_limit -= len(to_device_edus)
+        pending_edus.extend(to_device_edus)
+        edu_limit -= len(to_device_edus)

+        # Add device list update EDUs.
        device_update_edus, dev_list_id = await self.queue._get_device_update_edus(
-            device_edu_limit
+            edu_limit
        )

        if device_update_edus:
@@ -672,40 +764,17 @@ class _TransactionQueueManager:
        else:
            self.queue._last_device_list_stream_id = dev_list_id

-        pending_edus = device_update_edus + to_device_edus
-
-        # Now add the read receipt EDU.
-        pending_edus.extend(self.queue._get_rr_edus(force_flush=False))
-
-        # And presence EDU.
-        if self.queue._pending_presence:
-            pending_edus.append(
-                Edu(
-                    origin=self.queue._server_name,
-                    destination=self.queue._destination,
-                    edu_type=EduTypes.PRESENCE,
-                    content={
-                        "push": [
-                            format_user_presence_state(
-                                presence, self.queue._clock.time_msec()
-                            )
-                            for presence in self.queue._pending_presence.values()
-                        ]
-                    },
-                )
-            )
-            self.queue._pending_presence = {}
+        pending_edus.extend(device_update_edus)
+        edu_limit -= len(device_update_edus)

        # Finally add any other types of EDUs if there is room.
-        pending_edus.extend(
-            self.queue._pop_pending_edus(MAX_EDUS_PER_TRANSACTION - len(pending_edus))
-        )
-        while (
-            len(pending_edus) < MAX_EDUS_PER_TRANSACTION
-            and self.queue._pending_edus_keyed
-        ):
+        other_edus = self.queue._pop_pending_edus(edu_limit)
+        pending_edus.extend(other_edus)
+        edu_limit -= len(other_edus)
+        while edu_limit > 0 and self.queue._pending_edus_keyed:
            _, val = self.queue._pending_edus_keyed.popitem()
            pending_edus.append(val)
+            edu_limit -= 1

        # Now we look for any PDUs to send, by getting up to 50 PDUs from the
        # queue
@@ -714,11 +783,6 @@ class _TransactionQueueManager:
        if not self._pdus and not pending_edus:
            return [], []

-        # if we've decided to send a transaction anyway, and we have room, we
-        # may as well send any pending RRs
-        if len(pending_edus) < MAX_EDUS_PER_TRANSACTION:
-            pending_edus.extend(self.queue._get_rr_edus(force_flush=True))
-
        if self._pdus:
            self._last_stream_ordering = self._pdus[
                -1

--- a/synapse/federation/sender/transaction_manager.py
+++ b/synapse/federation/sender/transaction_manager.py
-# Copyright 2019 New Vector Ltd
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# This file is licensed under the Affero General Public License (AGPL) version 3.
+#
+# Copyright (C) 2023 New Vector, Ltd
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# See the GNU Affero General Public License for more details:
+# <https://www.gnu.org/licenses/agpl-3.0.html>.
+#
+# Originally licensed under the Apache License, Version 2.0:
+# <http://www.apache.org/licenses/LICENSE-2.0>.
+#
+# [This file includes modifications made by New Vector Limited]
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
 import logging
 from typing import TYPE_CHECKING, List


--- a/synapse/federation/transport/__init__.py
+++ b/synapse/federation/transport/__init__.py
+#
+# This file is licensed under the Affero General Public License (AGPL) version 3.
+#
 # Copyright 2014-2016 OpenMarket Ltd
+# Copyright (C) 2023 New Vector, Ltd
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# See the GNU Affero General Public License for more details:
+# <https://www.gnu.org/licenses/agpl-3.0.html>.
+#
+# Originally licensed under the Apache License, Version 2.0:
+# <http://www.apache.org/licenses/LICENSE-2.0>.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# [This file includes modifications made by New Vector Limited]
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

 """The transport layer is responsible for both sending transactions to remote
 homeservers and receiving a variety of requests from other homeservers.
No results found