Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • maunium/synapse
  • leytilera/synapse
2 results
Show changes
# Copyright 2014-2016 OpenMarket Ltd
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2021 The Matrix.org Foundation C.I.C.
# Copyright 2014-2016 OpenMarket Ltd
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections.abc
import re
from typing import (
TYPE_CHECKING,
Any,
Awaitable,
Callable,
Dict,
Iterable,
List,
Mapping,
Match,
MutableMapping,
Optional,
Union,
)
import attr
from synapse.api.constants import EventContentFields, EventTypes, RelationTypes
from canonicaljson import encode_canonical_json
from synapse.api.constants import (
CANONICALJSON_MAX_INT,
CANONICALJSON_MIN_INT,
MAX_PDU_SIZE,
EventContentFields,
EventTypes,
RelationTypes,
)
from synapse.api.errors import Codes, SynapseError
from synapse.api.room_versions import RoomVersion
from synapse.types import JsonDict
from synapse.util.frozenutils import unfreeze
from synapse.types import JsonDict, Requester
from . import EventBase
from . import EventBase, StrippedStateEvent, make_event_from_dict
if TYPE_CHECKING:
from synapse.handlers.relations import BundledAggregations
from synapse.server import HomeServer
# Split strings on "." but not "\." This uses a negative lookbehind assertion for '\'
# (?<!stuff) matches if the current position in the string is not preceded
# by a match for 'stuff'.
# TODO: This is fast, but fails to handle "foo\\.bar" which should be treated as
# the literal fields "foo\" and "bar" but will instead be treated as "foo\\.bar"
SPLIT_FIELD_REGEX = re.compile(r"(?<!\\)\.")
# Split strings on "." but not "\." (or "\\\.").
SPLIT_FIELD_REGEX = re.compile(r"\\*\.")
# Find escaped characters, e.g. those with a \ in front of them.
ESCAPE_SEQUENCE_PATTERN = re.compile(r"\\(.)")
CANONICALJSON_MAX_INT = (2**53) - 1
CANONICALJSON_MIN_INT = -CANONICALJSON_MAX_INT
# Module API callback that allows adding fields to the unsigned section of
# events that are sent to clients.
ADD_EXTRA_FIELDS_TO_UNSIGNED_CLIENT_EVENT_CALLBACK = Callable[
[EventBase], Awaitable[JsonDict]
]
def prune_event(event: EventBase) -> EventBase:
......@@ -62,17 +81,15 @@ def prune_event(event: EventBase) -> EventBase:
"""
pruned_event_dict = prune_event_dict(event.room_version, event.get_dict())
from . import make_event_from_dict
pruned_event = make_event_from_dict(
pruned_event_dict, event.room_version, event.internal_metadata.get_dict()
)
# copy the internal fields
# Copy the bits of `internal_metadata` that aren't returned by `get_dict`
pruned_event.internal_metadata.stream_ordering = (
event.internal_metadata.stream_ordering
)
pruned_event.internal_metadata.instance_name = event.internal_metadata.instance_name
pruned_event.internal_metadata.outlier = event.internal_metadata.outlier
# Mark the event as redacted
......@@ -81,6 +98,30 @@ def prune_event(event: EventBase) -> EventBase:
return pruned_event
def clone_event(event: EventBase) -> EventBase:
"""Take a copy of the event.
This is mostly useful because it does a *shallow* copy of the `unsigned` data,
which means it can then be updated without corrupting the in-memory cache. Note that
other properties of the event, such as `content`, are *not* (currently) copied here.
"""
# XXX: We rely on at least one of `event.get_dict()` and `make_event_from_dict()`
# making a copy of `unsigned`. Currently, both do, though I don't really know why.
# Still, as long as they do, there's not much point doing yet another copy here.
new_event = make_event_from_dict(
event.get_dict(), event.room_version, event.internal_metadata.get_dict()
)
# Copy the bits of `internal_metadata` that aren't returned by `get_dict`.
new_event.internal_metadata.stream_ordering = (
event.internal_metadata.stream_ordering
)
new_event.internal_metadata.instance_name = event.internal_metadata.instance_name
new_event.internal_metadata.outlier = event.internal_metadata.outlier
return new_event
def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDict:
"""Redacts the event_dict in the same way as `prune_event`, except it
operates on dicts rather than event objects
......@@ -101,13 +142,12 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic
"depth",
"prev_events",
"auth_events",
"origin",
"origin_server_ts",
]
# Room versions from before MSC2176 had additional allowed keys.
if not room_version.msc2176_redaction_rules:
allowed_keys.extend(["prev_state", "membership"])
# Earlier room versions from had additional allowed keys.
if not room_version.updated_redaction_rules:
allowed_keys.extend(["prev_state", "membership", "origin"])
event_type = event_dict["type"]
......@@ -120,17 +160,29 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic
if event_type == EventTypes.Member:
add_fields("membership")
if room_version.msc3375_redaction_rules:
if room_version.restricted_join_rule_fix:
add_fields(EventContentFields.AUTHORISING_USER)
if room_version.updated_redaction_rules:
# Preserve the signed field under third_party_invite.
third_party_invite = event_dict["content"].get("third_party_invite")
if isinstance(third_party_invite, collections.abc.Mapping):
new_content["third_party_invite"] = {}
if "signed" in third_party_invite:
new_content["third_party_invite"]["signed"] = third_party_invite[
"signed"
]
elif event_type == EventTypes.Create:
# MSC2176 rules state that create events cannot be redacted.
if room_version.msc2176_redaction_rules:
return event_dict
if room_version.updated_redaction_rules:
# MSC2176 rules state that create events cannot have their `content` redacted.
new_content = event_dict["content"]
elif not room_version.implicit_room_creator:
# Some room versions give meaning to `creator`
add_fields("creator")
add_fields("creator")
elif event_type == EventTypes.JoinRules:
add_fields("join_rule")
if room_version.msc3083_join_rules:
if room_version.restricted_join_rule:
add_fields("allow")
elif event_type == EventTypes.PowerLevels:
add_fields(
......@@ -144,24 +196,27 @@ def prune_event_dict(room_version: RoomVersion, event_dict: JsonDict) -> JsonDic
"redact",
)
if room_version.msc2176_redaction_rules:
if room_version.updated_redaction_rules:
add_fields("invite")
if room_version.msc2716_historical:
add_fields("historical")
elif event_type == EventTypes.Aliases and room_version.special_case_aliases_auth:
add_fields("aliases")
elif event_type == EventTypes.RoomHistoryVisibility:
add_fields("history_visibility")
elif event_type == EventTypes.Redaction and room_version.msc2176_redaction_rules:
elif event_type == EventTypes.Redaction and room_version.updated_redaction_rules:
add_fields("redacts")
elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_INSERTION:
add_fields(EventContentFields.MSC2716_NEXT_BATCH_ID)
elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_BATCH:
add_fields(EventContentFields.MSC2716_BATCH_ID)
elif room_version.msc2716_redactions and event_type == EventTypes.MSC2716_MARKER:
add_fields(EventContentFields.MSC2716_INSERTION_EVENT_REFERENCE)
# Protect the rel_type and event_id fields under the m.relates_to field.
if room_version.msc3389_relation_redactions:
relates_to = event_dict["content"].get("m.relates_to")
if isinstance(relates_to, collections.abc.Mapping):
new_relates_to = {}
for field in ("rel_type", "event_id"):
if field in relates_to:
new_relates_to[field] = relates_to[field]
# Only include a non-empty relates_to field.
if new_relates_to:
new_content["m.relates_to"] = new_relates_to
allowed_fields = {k: v for k, v in event_dict.items() if k in allowed_keys}
......@@ -223,6 +278,57 @@ def _copy_field(src: JsonDict, dst: JsonDict, field: List[str]) -> None:
sub_out_dict[key_to_move] = sub_dict[key_to_move]
def _escape_slash(m: Match[str]) -> str:
"""
Replacement function; replace a backslash-backslash or backslash-dot with the
second character. Leaves any other string alone.
"""
if m.group(1) in ("\\", "."):
return m.group(1)
return m.group(0)
def _split_field(field: str) -> List[str]:
"""
Splits strings on unescaped dots and removes escaping.
Args:
field: A string representing a path to a field.
Returns:
A list of nested fields to traverse.
"""
# Convert the field and remove escaping:
#
# 1. "content.body.thing\.with\.dots"
# 2. ["content", "body", "thing\.with\.dots"]
# 3. ["content", "body", "thing.with.dots"]
# Find all dots (and their preceding backslashes). If the dot is unescaped
# then emit a new field part.
result = []
prev_start = 0
for match in SPLIT_FIELD_REGEX.finditer(field):
# If the match is an *even* number of characters than the dot was escaped.
if len(match.group()) % 2 == 0:
continue
# Add a new part (up to the dot, exclusive) after escaping.
result.append(
ESCAPE_SEQUENCE_PATTERN.sub(
_escape_slash, field[prev_start : match.end() - 1]
)
)
prev_start = match.end()
# Add any part of the field after the last unescaped dot. (Note that if the
# character is a dot this correctly adds a blank string.)
result.append(re.sub(r"\\(.)", _escape_slash, field[prev_start:]))
return result
def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict:
"""Return a new dict with only the fields in 'dictionary' which are present
in 'fields'.
......@@ -230,7 +336,7 @@ def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict:
If there are no event fields specified then all fields are included.
The entries may include '.' characters to indicate sub-fields.
So ['content.body'] will include the 'body' field of the 'content' object.
A literal '.' character in a field name may be escaped using a '\'.
A literal '.' or '\' character in a field name may be escaped using a '\'.
Args:
dictionary: The dictionary to read from.
......@@ -245,13 +351,7 @@ def only_fields(dictionary: JsonDict, fields: List[str]) -> JsonDict:
# for each field, convert it:
# ["content.body.thing\.with\.dots"] => [["content", "body", "thing\.with\.dots"]]
split_fields = [SPLIT_FIELD_REGEX.split(f) for f in fields]
# for each element of the output array of arrays:
# remove escaping so we can use the right key names.
split_fields[:] = [
[f.replace(r"\.", r".") for f in field_array] for field_array in split_fields
]
split_fields = [_split_field(f) for f in fields]
output: JsonDict = {}
for field_array in split_fields:
......@@ -311,8 +411,9 @@ class SerializeEventConfig:
as_client_event: bool = True
# Function to convert from federation format to client format
event_format: Callable[[JsonDict], JsonDict] = format_event_for_client_v1
# ID of the user's auth token - used for namespacing of transaction IDs
token_id: Optional[int] = None
# The entity that requested the event. This is used to determine whether to include
# the transaction_id in the unsigned section of the event.
requester: Optional[Requester] = None
# List of event fields to include. If empty, all fields will be returned.
only_event_fields: Optional[List[str]] = None
# Some events can have stripped room state stored in the `unsigned` field.
......@@ -349,7 +450,7 @@ def serialize_event(
time_now_ms = int(time_now_ms)
# Should this strip out None's?
d = {k: v for k, v in e.get_dict().items()}
d = dict(e.get_dict().items())
d["event_id"] = e.event_id
......@@ -359,13 +460,50 @@ def serialize_event(
if "redacted_because" in e.unsigned:
d["unsigned"]["redacted_because"] = serialize_event(
e.unsigned["redacted_because"], time_now_ms, config=config
e.unsigned["redacted_because"],
time_now_ms,
config=config,
)
if config.token_id is not None:
if config.token_id == getattr(e.internal_metadata, "token_id", None):
txn_id = getattr(e.internal_metadata, "txn_id", None)
if txn_id is not None:
# If we have a txn_id saved in the internal_metadata, we should include it in the
# unsigned section of the event if it was sent by the same session as the one
# requesting the event.
txn_id: Optional[str] = getattr(e.internal_metadata, "txn_id", None)
if (
txn_id is not None
and config.requester is not None
and config.requester.user.to_string() == e.sender
):
# Some events do not have the device ID stored in the internal metadata,
# this includes old events as well as those created by appservice, guests,
# or with tokens minted with the admin API. For those events, fallback
# to using the access token instead.
event_device_id: Optional[str] = getattr(e.internal_metadata, "device_id", None)
if event_device_id is not None:
if event_device_id == config.requester.device_id:
d["unsigned"]["transaction_id"] = txn_id
else:
# Fallback behaviour: only include the transaction ID if the event
# was sent from the same access token.
#
# For regular users, the access token ID can be used to determine this.
# This includes access tokens minted with the admin API.
#
# For guests and appservice users, we can't check the access token ID
# so assume it is the same session.
event_token_id: Optional[int] = getattr(
e.internal_metadata, "token_id", None
)
if (
(
event_token_id is not None
and config.requester.access_token_id is not None
and event_token_id == config.requester.access_token_id
)
or config.requester.is_guest
or config.requester.app_service
):
d["unsigned"]["transaction_id"] = txn_id
# invite_room_state and knock_room_state are a list of stripped room state events
......@@ -379,6 +517,17 @@ def serialize_event(
if config.as_client_event:
d = config.event_format(d)
# If the event is a redaction, the field with the redacted event ID appears
# in a different location depending on the room version. e.redacts handles
# fetching from the proper location; copy it to the other location for forwards-
# and backwards-compatibility with clients.
if e.type == EventTypes.Redaction and e.redacts is not None:
if e.room_version.updated_redaction_rules:
d["redacts"] = e.redacts
else:
d["content"] = dict(d["content"])
d["content"]["redacts"] = e.redacts
only_event_fields = config.only_event_fields
if only_event_fields:
if not isinstance(only_event_fields, list) or not all(
......@@ -397,14 +546,19 @@ class EventClientSerializer:
clients.
"""
def serialize_event(
def __init__(self, hs: "HomeServer") -> None:
self._store = hs.get_datastores().main
self._add_extra_fields_to_unsigned_client_event_callbacks: List[
ADD_EXTRA_FIELDS_TO_UNSIGNED_CLIENT_EVENT_CALLBACK
] = []
async def serialize_event(
self,
event: Union[JsonDict, EventBase],
time_now: int,
*,
config: SerializeEventConfig = _DEFAULT_SERIALIZE_EVENT_CONFIG,
bundle_aggregations: Optional[Dict[str, "BundledAggregations"]] = None,
apply_edits: bool = True,
) -> JsonDict:
"""Serializes a single event.
......@@ -414,8 +568,7 @@ class EventClientSerializer:
config: Event serialization config
bundle_aggregations: A map from event_id to the aggregations to be bundled
into the event.
apply_edits: Whether the content of the event should be modified to reflect
any replacement in `bundle_aggregations[<event_id>].replace`.
Returns:
The serialized event
"""
......@@ -425,55 +578,37 @@ class EventClientSerializer:
serialized_event = serialize_event(event, time_now, config=config)
new_unsigned = {}
for callback in self._add_extra_fields_to_unsigned_client_event_callbacks:
u = await callback(event)
new_unsigned.update(u)
if new_unsigned:
# We do the `update` this way round so that modules can't clobber
# existing fields.
new_unsigned.update(serialized_event["unsigned"])
serialized_event["unsigned"] = new_unsigned
# Check if there are any bundled aggregations to include with the event.
if bundle_aggregations:
if event.event_id in bundle_aggregations:
self._inject_bundled_aggregations(
await self._inject_bundled_aggregations(
event,
time_now,
config,
bundle_aggregations,
serialized_event,
apply_edits=apply_edits,
)
return serialized_event
def _apply_edit(
self, orig_event: EventBase, serialized_event: JsonDict, edit: EventBase
) -> None:
"""Replace the content, preserving existing relations of the serialized event.
Args:
orig_event: The original event.
serialized_event: The original event, serialized. This is modified.
edit: The event which edits the above.
"""
# Ensure we take copies of the edit content, otherwise we risk modifying
# the original event.
edit_content = edit.content.copy()
# Unfreeze the event content if necessary, so that we may modify it below
edit_content = unfreeze(edit_content)
serialized_event["content"] = edit_content.get("m.new_content", {})
# Check for existing relations
relates_to = orig_event.content.get("m.relates_to")
if relates_to:
# Keep the relations, ensuring we use a dict copy of the original
serialized_event["content"]["m.relates_to"] = relates_to.copy()
else:
serialized_event["content"].pop("m.relates_to", None)
def _inject_bundled_aggregations(
async def _inject_bundled_aggregations(
self,
event: EventBase,
time_now: int,
config: SerializeEventConfig,
bundled_aggregations: Dict[str, "BundledAggregations"],
serialized_event: JsonDict,
apply_edits: bool,
) -> None:
"""Potentially injects bundled aggregations into the unsigned portion of the serialized event.
......@@ -488,8 +623,6 @@ class EventClientSerializer:
While serializing the bundled aggregations this map may be searched
again for additional events in a recursive manner.
serialized_event: The serialized event which may be modified.
apply_edits: Whether the content of the event should be modified to reflect
any replacement in `aggregations.replace`.
"""
# We have already checked that aggregations exist for this event.
......@@ -499,34 +632,29 @@ class EventClientSerializer:
# being serialized.
serialized_aggregations = {}
if event_aggregations.annotations:
serialized_aggregations[
RelationTypes.ANNOTATION
] = event_aggregations.annotations
if event_aggregations.references:
serialized_aggregations[
RelationTypes.REFERENCE
] = event_aggregations.references
serialized_aggregations[RelationTypes.REFERENCE] = (
event_aggregations.references
)
if event_aggregations.replace:
# If there is an edit, optionally apply it to the event.
edit = event_aggregations.replace
if apply_edits:
self._apply_edit(event, serialized_event, edit)
# Include information about it in the relations dict.
serialized_aggregations[RelationTypes.REPLACE] = {
"event_id": edit.event_id,
"origin_server_ts": edit.origin_server_ts,
"sender": edit.sender,
}
#
# Matrix spec v1.5 (https://spec.matrix.org/v1.5/client-server-api/#server-side-aggregation-of-mreplace-relationships)
# said that we should only include the `event_id`, `origin_server_ts` and
# `sender` of the edit; however MSC3925 proposes extending it to the whole
# of the edit, which is what we do here.
serialized_aggregations[RelationTypes.REPLACE] = await self.serialize_event(
event_aggregations.replace,
time_now,
config=config,
)
# Include any threaded replies to this event.
if event_aggregations.thread:
thread = event_aggregations.thread
serialized_latest_event = self.serialize_event(
serialized_latest_event = await self.serialize_event(
thread.latest_event,
time_now,
config=config,
......@@ -549,7 +677,7 @@ class EventClientSerializer:
"m.relations", {}
).update(serialized_aggregations)
def serialize_events(
async def serialize_events(
self,
events: Iterable[Union[JsonDict, EventBase]],
time_now: int,
......@@ -571,7 +699,7 @@ class EventClientSerializer:
The list of serialized events
"""
return [
self.serialize_event(
await self.serialize_event(
event,
time_now,
config=config,
......@@ -580,14 +708,23 @@ class EventClientSerializer:
for event in events
]
def register_add_extra_fields_to_unsigned_client_event_callback(
self, callback: ADD_EXTRA_FIELDS_TO_UNSIGNED_CLIENT_EVENT_CALLBACK
) -> None:
"""Register a callback that returns additions to the unsigned section of
serialized events.
"""
self._add_extra_fields_to_unsigned_client_event_callbacks.append(callback)
_PowerLevel = Union[str, int]
PowerLevelsContent = Mapping[str, Union[_PowerLevel, Mapping[str, _PowerLevel]]]
def copy_and_fixup_power_levels_contents(
old_power_levels: Mapping[str, Union[_PowerLevel, Mapping[str, _PowerLevel]]]
old_power_levels: PowerLevelsContent,
) -> Dict[str, Union[int, Dict[str, int]]]:
"""Copy the content of a power_levels event, unfreezing frozendicts along the way.
"""Copy the content of a power_levels event, unfreezing immutabledicts along the way.
We accept as input power level values which are strings, provided they represent an
integer, e.g. `"`100"` instead of 100. Such strings are converted to integers
......@@ -624,10 +761,10 @@ def _copy_power_level_value_as_integer(
) -> None:
"""Set `power_levels[key]` to the integer represented by `old_value`.
:raises TypeError: if `old_value` is not an integer, nor a base-10 string
:raises TypeError: if `old_value` is neither an integer nor a base-10 string
representation of an integer.
"""
if isinstance(old_value, int):
if type(old_value) is int: # noqa: E721
power_levels[key] = old_value
return
......@@ -655,7 +792,7 @@ def validate_canonicaljson(value: Any) -> None:
* Floats
* NaN, Infinity, -Infinity
"""
if isinstance(value, int):
if type(value) is int: # noqa: E721
if value < CANONICALJSON_MIN_INT or CANONICALJSON_MAX_INT < value:
raise SynapseError(400, "JSON integer out of range", Codes.BAD_JSON)
......@@ -674,3 +811,72 @@ def validate_canonicaljson(value: Any) -> None:
elif not isinstance(value, (bool, str)) and value is not None:
# Other potential JSON values (bool, None, str) are safe.
raise SynapseError(400, "Unknown JSON value", Codes.BAD_JSON)
def maybe_upsert_event_field(
event: EventBase, container: JsonDict, key: str, value: object
) -> bool:
"""Upsert an event field, but only if this doesn't make the event too large.
Returns true iff the upsert took place.
"""
if key in container:
old_value: object = container[key]
container[key] = value
# NB: here and below, we assume that passing a non-None `time_now` argument to
# get_pdu_json doesn't increase the size of the encoded result.
upsert_okay = len(encode_canonical_json(event.get_pdu_json())) <= MAX_PDU_SIZE
if not upsert_okay:
container[key] = old_value
else:
container[key] = value
upsert_okay = len(encode_canonical_json(event.get_pdu_json())) <= MAX_PDU_SIZE
if not upsert_okay:
del container[key]
return upsert_okay
def strip_event(event: EventBase) -> JsonDict:
"""
Used for "stripped state" events which provide a simplified view of the state of a
room intended to help a potential joiner identify the room (relevant when the user
is invited or knocked).
Stripped state events can only have the `sender`, `type`, `state_key` and `content`
properties present.
"""
return {
"type": event.type,
"state_key": event.state_key,
"content": event.content,
"sender": event.sender,
}
def parse_stripped_state_event(raw_stripped_event: Any) -> Optional[StrippedStateEvent]:
"""
Given a raw value from an event's `unsigned` field, attempt to parse it into a
`StrippedStateEvent`.
"""
if isinstance(raw_stripped_event, dict):
# All of these fields are required
type = raw_stripped_event.get("type")
state_key = raw_stripped_event.get("state_key")
sender = raw_stripped_event.get("sender")
content = raw_stripped_event.get("content")
if (
isinstance(type, str)
and isinstance(state_key, str)
and isinstance(sender, str)
and isinstance(content, dict)
):
return StrippedStateEvent(
type=type,
state_key=state_key,
sender=sender,
content=content,
)
return None
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2014-2016 OpenMarket Ltd
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections.abc
from typing import Iterable, Type, Union, cast
from typing import List, Type, Union, cast
import jsonschema
from synapse.api.constants import MAX_ALIAS_LENGTH, EventTypes, Membership
from synapse._pydantic_compat import Field, StrictBool, StrictStr
from synapse.api.constants import (
MAX_ALIAS_LENGTH,
EventContentFields,
EventTypes,
Membership,
)
from synapse.api.errors import Codes, SynapseError
from synapse.api.room_versions import EventFormatVersions
from synapse.config.homeserver import HomeServerConfig
......@@ -27,8 +40,10 @@ from synapse.events.utils import (
CANONICALJSON_MIN_INT,
validate_canonicaljson,
)
from synapse.federation.federation_server import server_matches_acl_event
from synapse.types import EventID, JsonDict, RoomID, UserID
from synapse.http.servlet import validate_json_object
from synapse.storage.controllers.state import server_acl_evaluator_from_event
from synapse.types import EventID, JsonDict, RoomID, StrCollection, UserID
from synapse.types.rest import RequestBodyModel
class EventValidator:
......@@ -43,7 +58,7 @@ class EventValidator:
event: The event to validate.
config: The homeserver's configuration.
"""
self.validate_builder(event)
self.validate_builder(event, config)
if event.format_version == EventFormatVersions.ROOM_V1_V2:
EventID.from_string(event.event_id)
......@@ -71,9 +86,14 @@ class EventValidator:
# Depending on the room version, ensure the data is spec compliant JSON.
if event.room_version.strict_canonicaljson:
# Note that only the client controlled portion of the event is
# checked, since we trust the portions of the event we created.
validate_canonicaljson(event.content)
validate_canonicaljson(event.get_pdu_json())
if not 0 < event.origin_server_ts < 2**53:
raise SynapseError(400, "Event timestamp is out of range")
# meow: allow specific users to send potentially dangerous events.
if event.sender in config.meow.validation_override:
return
if event.type == EventTypes.Aliases:
if "aliases" in event.content:
......@@ -88,27 +108,30 @@ class EventValidator:
Codes.INVALID_PARAM,
)
if event.type == EventTypes.Retention:
elif event.type == EventTypes.Retention:
self._validate_retention(event)
if event.type == EventTypes.ServerACL:
if not server_matches_acl_event(config.server.server_name, event):
elif event.type == EventTypes.ServerACL:
server_acl_evaluator = server_acl_evaluator_from_event(event)
if not server_acl_evaluator.server_matches_acl_event(
config.server.server_name
):
raise SynapseError(
400, "Can't create an ACL event that denies the local server"
)
if event.type == EventTypes.PowerLevels:
elif event.type == EventTypes.PowerLevels:
try:
jsonschema.validate(
instance=event.content,
schema=POWER_LEVELS_SCHEMA,
cls=plValidator,
cls=POWER_LEVELS_VALIDATOR,
)
except jsonschema.ValidationError as e:
if e.path:
# example: "users_default": '0' is not of type 'integer'
# cast safety: path entries can be integers, if we fail to validate
# items in an array. However the POWER_LEVELS_SCHEMA doesn't expect
# items in an array. However, the POWER_LEVELS_SCHEMA doesn't expect
# to see any arrays.
message = (
'"' + cast(str, e.path[-1]) + '": ' + e.message # noqa: B306
......@@ -125,6 +148,10 @@ class EventValidator:
errcode=Codes.BAD_JSON,
)
# If the event contains a mentions key, validate it.
if EventContentFields.MENTIONS in event.content:
validate_json_object(event.content[EventContentFields.MENTIONS], Mentions)
def _validate_retention(self, event: EventBase) -> None:
"""Checks that an event that defines the retention policy for a room respects the
format enforced by the spec.
......@@ -139,7 +166,7 @@ class EventValidator:
max_lifetime = event.content.get("max_lifetime")
if min_lifetime is not None:
if not isinstance(min_lifetime, int):
if type(min_lifetime) is not int: # noqa: E721
raise SynapseError(
code=400,
msg="'min_lifetime' must be an integer",
......@@ -147,7 +174,7 @@ class EventValidator:
)
if max_lifetime is not None:
if not isinstance(max_lifetime, int):
if type(max_lifetime) is not int: # noqa: E721
raise SynapseError(
code=400,
msg="'max_lifetime' must be an integer",
......@@ -165,7 +192,9 @@ class EventValidator:
errcode=Codes.BAD_JSON,
)
def validate_builder(self, event: Union[EventBase, EventBuilder]) -> None:
def validate_builder(
self, event: Union[EventBase, EventBuilder], config: HomeServerConfig
) -> None:
"""Validates that the builder/event has roughly the right format. Only
checks values that we expect a proto event to have, rather than all the
fields an event would have
......@@ -183,6 +212,10 @@ class EventValidator:
RoomID.from_string(event.room_id)
UserID.from_string(event.sender)
# meow: allow specific users to send so-called invalid events
if event.sender in config.meow.validation_override:
return
if event.type == EventTypes.Message:
strings = ["body", "msgtype"]
......@@ -213,7 +246,7 @@ class EventValidator:
self._ensure_state_event(event)
def _ensure_strings(self, d: JsonDict, keys: Iterable[str]) -> None:
def _ensure_strings(self, d: JsonDict, keys: StrCollection) -> None:
for s in keys:
if s not in d:
raise SynapseError(400, "'%s' not in content" % (s,))
......@@ -253,12 +286,17 @@ POWER_LEVELS_SCHEMA = {
}
class Mentions(RequestBodyModel):
user_ids: List[StrictStr] = Field(default_factory=list)
room: StrictBool = False
# This could return something newer than Draft 7, but that's the current "latest"
# validator.
def _create_power_level_validator() -> Type[jsonschema.Draft7Validator]:
validator = jsonschema.validators.validator_for(POWER_LEVELS_SCHEMA)
def _create_validator(schema: JsonDict) -> Type[jsonschema.Draft7Validator]:
validator = jsonschema.validators.validator_for(schema)
# by default jsonschema does not consider a frozendict to be an object so
# by default jsonschema does not consider a immutabledict to be an object so
# we need to use a custom type checker
# https://python-jsonschema.readthedocs.io/en/stable/validate/?highlight=object#validating-with-additional-types
type_checker = validator.TYPE_CHECKER.redefine(
......@@ -268,4 +306,4 @@ def _create_power_level_validator() -> Type[jsonschema.Draft7Validator]:
return jsonschema.validators.extend(validator, type_checker=type_checker)
plValidator = _create_power_level_validator()
POWER_LEVELS_VALIDATOR = _create_validator(POWER_LEVELS_SCHEMA)
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2014-2016 OpenMarket Ltd
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# [This file includes modifications made by New Vector Limited]
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" This package includes all the federation specific logic.
"""
"""This package includes all the federation specific logic."""
# Copyright 2015, 2016 OpenMarket Ltd
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2020 The Matrix.org Foundation C.I.C.
# Copyright 2015, 2016 OpenMarket Ltd
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from typing import TYPE_CHECKING, Awaitable, Callable, Optional
from typing import TYPE_CHECKING, Awaitable, Callable, List, Optional, Sequence
from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership
from synapse.api.errors import Codes, SynapseError
......@@ -22,6 +29,7 @@ from synapse.crypto.event_signing import check_event_content_hash
from synapse.crypto.keyring import Keyring
from synapse.events import EventBase, make_event_from_dict
from synapse.events.utils import prune_event, validate_canonicaljson
from synapse.federation.units import filter_pdus_for_valid_depth
from synapse.http.servlet import assert_params_in_dict
from synapse.logging.opentracing import log_kv, trace
from synapse.types import JsonDict, get_domain_from_id
......@@ -49,9 +57,9 @@ class FederationBase:
def __init__(self, hs: "HomeServer"):
self.hs = hs
self.server_name = hs.hostname
self._is_mine_server_name = hs.is_mine_server_name
self.keyring = hs.get_keyring()
self.spam_checker = hs.get_spam_checker()
self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
self.store = hs.get_datastores().main
self._clock = hs.get_clock()
self._storage_controllers = hs.get_storage_controllers()
......@@ -137,9 +145,9 @@ class FederationBase:
)
return redacted_event
spam_check = await self.spam_checker.check_event_for_spam(pdu)
spam_check = await self._spam_checker_module_callbacks.check_event_for_spam(pdu)
if spam_check != self.spam_checker.NOT_SPAM:
if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
logger.warning("Event contains spam, soft-failing %s", pdu.event_id)
log_kv(
{
......@@ -231,7 +239,7 @@ async def _check_sigs_on_pdu(
# If this is a join event for a restricted room it may have been authorised
# via a different server from the sending server. Check those signatures.
if (
room_version.msc3083_join_rules
room_version.restricted_join_rule
and pdu.type == EventTypes.Member
and pdu.membership == Membership.JOIN
and EventContentFields.AUTHORISING_USER in pdu.content
......@@ -260,6 +268,15 @@ def _is_invite_via_3pid(event: EventBase) -> bool:
)
def parse_events_from_pdu_json(
pdus_json: Sequence[JsonDict], room_version: RoomVersion
) -> List[EventBase]:
return [
event_from_pdu_json(pdu_json, room_version)
for pdu_json in filter_pdus_for_valid_depth(pdus_json)
]
def event_from_pdu_json(pdu_json: JsonDict, room_version: RoomVersion) -> EventBase:
"""Construct an EventBase from an event json received over federation
......@@ -280,7 +297,7 @@ def event_from_pdu_json(pdu_json: JsonDict, room_version: RoomVersion) -> EventB
_strip_unsigned_values(pdu_json)
depth = pdu_json["depth"]
if not isinstance(depth, int):
if type(depth) is not int: # noqa: E721
raise SynapseError(400, "Depth %r not an intger" % (depth,), Codes.BAD_JSON)
if depth < 0:
......
# Copyright 2015-2022 The Matrix.org Foundation C.I.C.
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2020 Sorunome
# Copyright 2015-2022 The Matrix.org Foundation C.I.C.
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
......@@ -19,7 +26,9 @@ import itertools
import logging
from typing import (
TYPE_CHECKING,
AbstractSet,
Awaitable,
BinaryIO,
Callable,
Collection,
Container,
......@@ -37,7 +46,7 @@ from typing import (
import attr
from prometheus_client import Counter
from synapse.api.constants import EventContentFields, EventTypes, Membership
from synapse.api.constants import Direction, EventContentFields, EventTypes, Membership
from synapse.api.errors import (
CodeMessageException,
Codes,
......@@ -47,6 +56,7 @@ from synapse.api.errors import (
SynapseError,
UnsupportedRoomVersionError,
)
from synapse.api.ratelimiting import Ratelimiter
from synapse.api.room_versions import (
KNOWN_ROOM_VERSIONS,
EventFormatVersions,
......@@ -58,11 +68,13 @@ from synapse.federation.federation_base import (
FederationBase,
InvalidEventSignatureError,
event_from_pdu_json,
parse_events_from_pdu_json,
)
from synapse.federation.transport.client import SendJoinResponse
from synapse.http.client import is_unknown_endpoint
from synapse.http.types import QueryParams
from synapse.logging.opentracing import SynapseTags, log_kv, set_tag, tag_args, trace
from synapse.types import JsonDict, UserID, get_domain_from_id
from synapse.types import JsonDict, StrCollection, UserID, get_domain_from_id
from synapse.util.async_helpers import concurrently_execute
from synapse.util.caches.expiringcache import ExpiringCache
from synapse.util.retryutils import NotRetryingDestination
......@@ -80,6 +92,18 @@ PDU_RETRY_TIME_MS = 1 * 60 * 1000
T = TypeVar("T")
@attr.s(frozen=True, slots=True, auto_attribs=True)
class PulledPduInfo:
"""
A result object that stores the PDU and info about it like which homeserver we
pulled it from (`pull_origin`)
"""
pdu: EventBase
# Which homeserver we pulled the PDU from
pull_origin: str
class InvalidResponseError(RuntimeError):
"""Helper for _try_destination_list: indicates that the server returned a response
we couldn't parse
......@@ -98,8 +122,9 @@ class SendJoinResult:
# True if 'state' elides non-critical membership events
partial_state: bool
# if 'partial_state' is set, a list of the servers in the room (otherwise empty)
servers_in_room: List[str]
# If 'partial_state' is set, a set of the servers in the room (otherwise empty).
# Always contains the server we joined off.
servers_in_room: AbstractSet[str]
class FederationClient(FederationBase):
......@@ -114,7 +139,9 @@ class FederationClient(FederationBase):
self.hostname = hs.hostname
self.signing_key = hs.signing_key
self._get_pdu_cache: ExpiringCache[str, EventBase] = ExpiringCache(
# Cache mapping `event_id` to a tuple of the event itself and the `pull_origin`
# (which server we pulled the event from)
self._get_pdu_cache: ExpiringCache[str, Tuple[EventBase, str]] = ExpiringCache(
cache_name="get_pdu_cache",
clock=self._clock,
max_len=1000,
......@@ -218,11 +245,16 @@ class FederationClient(FederationBase):
)
async def claim_client_keys(
self, destination: str, content: JsonDict, timeout: Optional[int]
self,
user: UserID,
destination: str,
query: Dict[str, Dict[str, Dict[str, int]]],
timeout: Optional[int],
) -> JsonDict:
"""Claims one-time keys for a device hosted on a remote server.
Args:
user: The user id of the requesting user
destination: Domain name of the remote homeserver
content: The query content.
......@@ -230,8 +262,55 @@ class FederationClient(FederationBase):
The JSON object from the response
"""
sent_queries_counter.labels("client_one_time_keys").inc()
# Convert the query with counts into a stable and unstable query and check
# if attempting to claim more than 1 OTK.
content: Dict[str, Dict[str, str]] = {}
unstable_content: Dict[str, Dict[str, List[str]]] = {}
use_unstable = False
for user_id, one_time_keys in query.items():
for device_id, algorithms in one_time_keys.items():
# If more than one algorithm is requested, attempt to use the unstable
# endpoint.
if sum(algorithms.values()) > 1:
use_unstable = True
if algorithms:
# For the stable query, choose only the first algorithm.
content.setdefault(user_id, {})[device_id] = next(iter(algorithms))
# For the unstable query, repeat each algorithm by count, then
# splat those into chain to get a flattened list of all algorithms.
#
# Converts from {"algo1": 2, "algo2": 2} to ["algo1", "algo1", "algo2"].
unstable_content.setdefault(user_id, {})[device_id] = list(
itertools.chain(
*(
itertools.repeat(algorithm, count)
for algorithm, count in algorithms.items()
)
)
)
if use_unstable:
try:
return await self.transport_layer.claim_client_keys_unstable(
user, destination, unstable_content, timeout
)
except HttpResponseException as e:
# If an error is received that is due to an unrecognised endpoint,
# fallback to the v1 endpoint. Otherwise, consider it a legitimate error
# and raise.
if not is_unknown_endpoint(e):
raise
logger.debug(
"Couldn't claim client keys with the unstable API, falling back to the v1 API"
)
else:
logger.debug("Skipping unstable claim client keys API")
# TODO Potentially attempt multiple queries and combine the results?
return await self.transport_layer.claim_client_keys(
destination, content, timeout
user, destination, content, timeout
)
@trace
......@@ -263,19 +342,15 @@ class FederationClient(FederationBase):
logger.debug("backfill transaction_data=%r", transaction_data)
if not isinstance(transaction_data, dict):
# TODO we probably want an exception type specific to federation
# client validation.
raise TypeError("Backfill transaction_data is not a dict.")
raise InvalidResponseError("Backfill transaction_data is not a dict.")
transaction_data_pdus = transaction_data.get("pdus")
if not isinstance(transaction_data_pdus, list):
# TODO we probably want an exception type specific to federation
# client validation.
raise TypeError("transaction_data.pdus is not a list.")
raise InvalidResponseError("transaction_data.pdus is not a list.")
room_version = await self.store.get_room_version(room_id)
pdus = [event_from_pdu_json(p, room_version) for p in transaction_data_pdus]
pdus = parse_events_from_pdu_json(transaction_data_pdus, room_version)
# Check signatures and hash of pdus, removing any from the list that fail checks
pdus[:] = await self._check_sigs_and_hash_for_pulled_events_and_fetch(
......@@ -319,9 +394,7 @@ class FederationClient(FederationBase):
transaction_data,
)
pdu_list: List[EventBase] = [
event_from_pdu_json(p, room_version) for p in transaction_data["pdus"]
]
pdu_list = parse_events_from_pdu_json(transaction_data["pdus"], room_version)
if pdu_list and pdu_list[0]:
pdu = pdu_list[0]
......@@ -352,11 +425,11 @@ class FederationClient(FederationBase):
@tag_args
async def get_pdu(
self,
destinations: Iterable[str],
destinations: Collection[str],
event_id: str,
room_version: RoomVersion,
timeout: Optional[int] = None,
) -> Optional[EventBase]:
) -> Optional[PulledPduInfo]:
"""Requests the PDU with given origin and ID from the remote home
servers.
......@@ -371,11 +444,11 @@ class FederationClient(FederationBase):
moving to the next destination. None indicates no timeout.
Returns:
The requested PDU, or None if we were unable to find it.
The requested PDU wrapped in `PulledPduInfo`, or None if we were unable to find it.
"""
logger.debug(
"get_pdu: event_id=%s from destinations=%s", event_id, destinations
"get_pdu(event_id=%s): from destinations=%s", event_id, destinations
)
# TODO: Rate limit the number of times we try and get the same event.
......@@ -384,19 +457,25 @@ class FederationClient(FederationBase):
# it gets persisted to the database), so we cache the results of the lookup.
# Note that this is separate to the regular get_event cache which caches
# events once they have been persisted.
event = self._get_pdu_cache.get(event_id)
get_pdu_cache_entry = self._get_pdu_cache.get(event_id)
event = None
pull_origin = None
if get_pdu_cache_entry:
event, pull_origin = get_pdu_cache_entry
# If we don't see the event in the cache, go try to fetch it from the
# provided remote federated destinations
if not event:
else:
pdu_attempts = self.pdu_destination_tried.setdefault(event_id, {})
# TODO: We can probably refactor this to use `_try_destination_list`
for destination in destinations:
now = self._clock.time_msec()
last_attempt = pdu_attempts.get(destination, 0)
if last_attempt + PDU_RETRY_TIME_MS > now:
logger.debug(
"get_pdu: skipping destination=%s because we tried it recently last_attempt=%s and we only check every %s (now=%s)",
"get_pdu(event_id=%s): skipping destination=%s because we tried it recently last_attempt=%s and we only check every %s (now=%s)",
event_id,
destination,
last_attempt,
PDU_RETRY_TIME_MS,
......@@ -411,43 +490,48 @@ class FederationClient(FederationBase):
room_version=room_version,
timeout=timeout,
)
pull_origin = destination
pdu_attempts[destination] = now
if event:
# Prime the cache
self._get_pdu_cache[event.event_id] = event
self._get_pdu_cache[event.event_id] = (event, pull_origin)
# Now that we have an event, we can break out of this
# loop and stop asking other destinations.
break
except NotRetryingDestination as e:
logger.info("get_pdu(event_id=%s): %s", event_id, e)
continue
except FederationDeniedError:
logger.info(
"get_pdu(event_id=%s): Not attempting to fetch PDU from %s because the homeserver is not on our federation whitelist",
event_id,
destination,
)
continue
except SynapseError as e:
logger.info(
"Failed to get PDU %s from %s because %s",
"get_pdu(event_id=%s): Failed to get PDU from %s because %s",
event_id,
destination,
e,
)
continue
except NotRetryingDestination as e:
logger.info(str(e))
continue
except FederationDeniedError as e:
logger.info(str(e))
continue
except Exception as e:
pdu_attempts[destination] = now
logger.info(
"Failed to get PDU %s from %s because %s",
"get_pdu(event_id=%s): Failed to get PDU from %s because %s",
event_id,
destination,
e,
)
continue
if not event:
if not event or not pull_origin:
return None
# `event` now refers to an object stored in `get_pdu_cache`. Our
......@@ -459,7 +543,7 @@ class FederationClient(FederationBase):
event.room_version,
)
return event_copy
return PulledPduInfo(event_copy, pull_origin)
@trace
@tag_args
......@@ -699,12 +783,14 @@ class FederationClient(FederationBase):
pdu_origin = get_domain_from_id(pdu.sender)
if not res and pdu_origin != origin:
try:
res = await self.get_pdu(
pulled_pdu_info = await self.get_pdu(
destinations=[pdu_origin],
event_id=pdu.event_id,
room_version=room_version,
timeout=10000,
)
if pulled_pdu_info is not None:
res = pulled_pdu_info.pdu
except SynapseError:
pass
......@@ -722,7 +808,7 @@ class FederationClient(FederationBase):
room_version = await self.store.get_room_version(room_id)
auth_chain = [event_from_pdu_json(p, room_version) for p in res["auth_chain"]]
auth_chain = parse_events_from_pdu_json(res["auth_chain"], room_version)
signed_auth = await self._check_sigs_and_hash_for_pulled_events_and_fetch(
destination, auth_chain, room_version=room_version
......@@ -730,32 +816,6 @@ class FederationClient(FederationBase):
return signed_auth
def _is_unknown_endpoint(
self, e: HttpResponseException, synapse_error: Optional[SynapseError] = None
) -> bool:
"""
Returns true if the response was due to an endpoint being unimplemented.
Args:
e: The error response received from the remote server.
synapse_error: The above error converted to a SynapseError. This is
automatically generated if not provided.
"""
if synapse_error is None:
synapse_error = e.to_synapse_error()
# There is no good way to detect an "unknown" endpoint.
#
# Dendrite returns a 404 (with a body of "404 page not found");
# Conduit returns a 404 (with no body); and Synapse returns a 400
# with M_UNRECOGNIZED.
#
# This needs to be rather specific as some endpoints truly do return 404
# errors.
return (
e.code == 404 and (not e.response or e.response == b"404 page not found")
) or (e.code == 400 and synapse_error.errcode == Codes.UNRECOGNIZED)
async def _try_destination_list(
self,
description: str,
......@@ -806,7 +866,8 @@ class FederationClient(FederationBase):
)
for destination in destinations:
if destination == self.server_name:
# We don't want to ask our own server for information we don't have
if self._is_mine_server_name(destination):
continue
try:
......@@ -814,9 +875,21 @@ class FederationClient(FederationBase):
except (
RequestSendFailed,
InvalidResponseError,
NotRetryingDestination,
) as e:
logger.warning("Failed to %s via %s: %s", description, destination, e)
# Skip to the next homeserver in the list to try.
continue
except NotRetryingDestination as e:
logger.info("%s: %s", description, e)
continue
except FederationDeniedError:
logger.info(
"%s: Not attempting to %s from %s because the homeserver is not on our federation whitelist",
description,
description,
destination,
)
continue
except UnsupportedRoomVersionError:
raise
except HttpResponseException as e:
......@@ -831,10 +904,10 @@ class FederationClient(FederationBase):
if 500 <= e.code < 600:
failover = True
elif e.code == 400 and synapse_error.errcode in failover_errcodes:
elif 400 <= e.code < 500 and synapse_error.errcode in failover_errcodes:
failover = True
elif failover_on_unknown_endpoint and self._is_unknown_endpoint(
elif failover_on_unknown_endpoint and is_unknown_endpoint(
e, synapse_error
):
failover = True
......@@ -918,7 +991,7 @@ class FederationClient(FederationBase):
if not room_version:
raise UnsupportedRoomVersionError()
if not room_version.msc2403_knocking and membership == Membership.KNOCK:
if not room_version.knock_join_rule and membership == Membership.KNOCK:
raise SynapseError(
400,
"This room version does not support knocking",
......@@ -946,14 +1019,13 @@ class FederationClient(FederationBase):
return destination, ev, room_version
failover_errcodes = {Codes.NOT_FOUND}
# MSC3083 defines additional error codes for room joins. Unfortunately
# we do not yet know the room version, assume these will only be returned
# by valid room versions.
failover_errcodes = (
(Codes.UNABLE_AUTHORISE_JOIN, Codes.UNABLE_TO_GRANT_JOIN)
if membership == Membership.JOIN
else None
)
if membership == Membership.JOIN:
failover_errcodes.add(Codes.UNABLE_AUTHORISE_JOIN)
failover_errcodes.add(Codes.UNABLE_TO_GRANT_JOIN)
return await self._try_destination_list(
"make_" + membership,
......@@ -963,7 +1035,11 @@ class FederationClient(FederationBase):
)
async def send_join(
self, destinations: Iterable[str], pdu: EventBase, room_version: RoomVersion
self,
destinations: Iterable[str],
pdu: EventBase,
room_version: RoomVersion,
partial_state: bool = True,
) -> SendJoinResult:
"""Sends a join event to one of a list of homeservers.
......@@ -976,6 +1052,10 @@ class FederationClient(FederationBase):
pdu: event to be sent
room_version: the version of the room (according to the server that
did the make_join)
partial_state: whether to ask the remote server to omit membership state
events from the response. If the remote server complies,
`partial_state` in the send join result will be set. Defaults to
`True`.
Returns:
The result of the send join request.
......@@ -986,7 +1066,9 @@ class FederationClient(FederationBase):
"""
async def send_request(destination: str) -> SendJoinResult:
response = await self._do_send_join(room_version, destination, pdu)
response = await self._do_send_join(
room_version, destination, pdu, omit_members=partial_state
)
# If an event was returned (and expected to be returned):
#
......@@ -995,7 +1077,7 @@ class FederationClient(FederationBase):
# * Ensure the signatures are good.
#
# Otherwise, fallback to the provided event.
if room_version.msc3083_join_rules and response.event:
if room_version.restricted_join_rule and response.event:
event = response.event
valid_pdu = await self._check_sigs_and_hash_and_fetch_one(
......@@ -1075,7 +1157,7 @@ class FederationClient(FederationBase):
# NB: We *need* to copy to ensure that we don't have multiple
# references being passed on, as that causes... issues.
for s in signed_state:
s.internal_metadata = copy.deepcopy(s.internal_metadata)
s.internal_metadata = s.internal_metadata.copy()
# double-check that the auth chain doesn't include a different create event
auth_chain_create_events = [
......@@ -1091,23 +1173,37 @@ class FederationClient(FederationBase):
% (auth_chain_create_events,)
)
if response.partial_state and not response.servers_in_room:
raise InvalidResponseError(
"partial_state was set, but no servers were listed in the room"
)
servers_in_room = None
if response.servers_in_room is not None:
servers_in_room = set(response.servers_in_room)
if response.members_omitted:
if not servers_in_room:
raise InvalidResponseError(
"members_omitted was set, but no servers were listed in the room"
)
if not partial_state:
raise InvalidResponseError(
"members_omitted was set, but we asked for full state"
)
# `servers_in_room` is supposed to be a complete list.
# Fix things up in case the remote homeserver is badly behaved.
servers_in_room.add(destination)
return SendJoinResult(
event=event,
state=signed_state,
auth_chain=signed_auth,
origin=destination,
partial_state=response.partial_state,
servers_in_room=response.servers_in_room or [],
partial_state=response.members_omitted,
servers_in_room=servers_in_room or frozenset(),
)
# MSC3083 defines additional error codes for room joins.
failover_errcodes = None
if room_version.msc3083_join_rules:
if room_version.restricted_join_rule:
failover_errcodes = (
Codes.UNABLE_AUTHORISE_JOIN,
Codes.UNABLE_TO_GRANT_JOIN,
......@@ -1126,7 +1222,11 @@ class FederationClient(FederationBase):
)
async def _do_send_join(
self, room_version: RoomVersion, destination: str, pdu: EventBase
self,
room_version: RoomVersion,
destination: str,
pdu: EventBase,
omit_members: bool,
) -> SendJoinResponse:
time_now = self._clock.time_msec()
......@@ -1137,12 +1237,13 @@ class FederationClient(FederationBase):
room_id=pdu.room_id,
event_id=pdu.event_id,
content=pdu.get_pdu_json(time_now),
omit_members=omit_members,
)
except HttpResponseException as e:
# If an error is received that is due to an unrecognised endpoint,
# fallback to the v1 endpoint. Otherwise, consider it a legitimate error
# and raise.
if not self._is_unknown_endpoint(e):
if not is_unknown_endpoint(e):
raise
logger.debug("Couldn't send_join with the v2 API, falling back to the v1 API")
......@@ -1216,7 +1317,7 @@ class FederationClient(FederationBase):
# fallback to the v1 endpoint if the room uses old-style event IDs.
# Otherwise, consider it a legitimate error and raise.
err = e.to_synapse_error()
if self._is_unknown_endpoint(e, err):
if is_unknown_endpoint(e, err):
if room_version.event_format != EventFormatVersions.ROOM_V1_V2:
raise SynapseError(
400,
......@@ -1277,7 +1378,7 @@ class FederationClient(FederationBase):
# If an error is received that is due to an unrecognised endpoint,
# fallback to the v1 endpoint. Otherwise, consider it a legitimate error
# and raise.
if not self._is_unknown_endpoint(e):
if not is_unknown_endpoint(e):
raise
logger.debug("Couldn't send_leave with the v2 API, falling back to the v1 API")
......@@ -1309,7 +1410,7 @@ class FederationClient(FederationBase):
The remote homeserver return some state from the room. The response
dictionary is in the form:
{"knock_state_events": [<state event dict>, ...]}
{"knock_room_state": [<state event dict>, ...]}
The list of state events may be empty.
......@@ -1336,7 +1437,7 @@ class FederationClient(FederationBase):
The remote homeserver can optionally return some state from the room. The response
dictionary is in the form:
{"knock_state_events": [<state event dict>, ...]}
{"knock_room_state": [<state event dict>, ...]}
The list of state events may be empty.
"""
......@@ -1427,9 +1528,7 @@ class FederationClient(FederationBase):
room_version = await self.store.get_room_version(room_id)
events = [
event_from_pdu_json(e, room_version) for e in content.get("events", [])
]
events = parse_events_from_pdu_json(content.get("events", []), room_version)
signed_events = await self._check_sigs_and_hash_for_pulled_events_and_fetch(
destination, events, room_version=room_version
......@@ -1448,7 +1547,7 @@ class FederationClient(FederationBase):
self, destinations: Iterable[str], room_id: str, event_dict: JsonDict
) -> None:
for destination in destinations:
if destination == self.server_name:
if self._is_mine_server_name(destination):
continue
try:
......@@ -1548,7 +1647,7 @@ class FederationClient(FederationBase):
# If an error is received that is due to an unrecognised endpoint,
# fallback to the unstable endpoint. Otherwise, consider it a
# legitimate error and raise.
if not self._is_unknown_endpoint(e):
if not is_unknown_endpoint(e):
raise
logger.debug(
......@@ -1609,7 +1708,70 @@ class FederationClient(FederationBase):
return result
async def timestamp_to_event(
self, destination: str, room_id: str, timestamp: int, direction: str
self,
*,
destinations: StrCollection,
room_id: str,
timestamp: int,
direction: Direction,
) -> Optional["TimestampToEventResponse"]:
"""
Calls each remote federating server from `destinations` asking for their closest
event to the given timestamp in the given direction until we get a response.
Also validates the response to always return the expected keys or raises an
error.
Args:
destinations: The domains of homeservers to try fetching from
room_id: Room to fetch the event from
timestamp: The point in time (inclusive) we should navigate from in
the given direction to find the closest event.
direction: indicates whether we should navigate forward
or backward from the given timestamp to find the closest event.
Returns:
A parsed TimestampToEventResponse including the closest event_id
and origin_server_ts or None if no destination has a response.
"""
async def _timestamp_to_event_from_destination(
destination: str,
) -> TimestampToEventResponse:
return await self._timestamp_to_event_from_destination(
destination, room_id, timestamp, direction
)
try:
# Loop through each homeserver candidate until we get a succesful response
timestamp_to_event_response = await self._try_destination_list(
"timestamp_to_event",
destinations,
# TODO: The requested timestamp may lie in a part of the
# event graph that the remote server *also* didn't have,
# in which case they will have returned another event
# which may be nowhere near the requested timestamp. In
# the future, we may need to reconcile that gap and ask
# other homeservers, and/or extend `/timestamp_to_event`
# to return events on *both* sides of the timestamp to
# help reconcile the gap faster.
_timestamp_to_event_from_destination,
# Since this endpoint is new, we should try other servers before giving up.
# We can safely remove this in a year (remove after 2023-11-16).
failover_on_unknown_endpoint=True,
)
return timestamp_to_event_response
except SynapseError as e:
logger.warn(
"timestamp_to_event(room_id=%s, timestamp=%s, direction=%s): encountered error when trying to fetch from destinations: %s",
room_id,
timestamp,
direction,
e,
)
return None
async def _timestamp_to_event_from_destination(
self, destination: str, room_id: str, timestamp: int, direction: Direction
) -> "TimestampToEventResponse":
"""
Calls a remote federating server at `destination` asking for their
......@@ -1622,7 +1784,7 @@ class FederationClient(FederationBase):
room_id: Room to fetch the event from
timestamp: The point in time (inclusive) we should navigate from in
the given direction to find the closest event.
direction: ["f"|"b"] to indicate whether we should navigate forward
direction: indicates whether we should navigate forward
or backward from the given timestamp to find the closest event.
Returns:
......@@ -1706,6 +1868,95 @@ class FederationClient(FederationBase):
return filtered_statuses, filtered_failures
async def federation_download_media(
self,
destination: str,
media_id: str,
output_stream: BinaryIO,
max_size: int,
max_timeout_ms: int,
download_ratelimiter: Ratelimiter,
ip_address: str,
) -> Union[
Tuple[int, Dict[bytes, List[bytes]], bytes],
Tuple[int, Dict[bytes, List[bytes]]],
]:
try:
return await self.transport_layer.federation_download_media(
destination,
media_id,
output_stream=output_stream,
max_size=max_size,
max_timeout_ms=max_timeout_ms,
download_ratelimiter=download_ratelimiter,
ip_address=ip_address,
)
except HttpResponseException as e:
# If an error is received that is due to an unrecognised endpoint,
# fallback to the _matrix/media/v3/download endpoint. Otherwise, consider it a legitimate error
# and raise.
if not is_unknown_endpoint(e):
raise
logger.debug(
"Couldn't download media %s/%s over _matrix/federation/v1/media/download, falling back to _matrix/media/v3/download path",
destination,
media_id,
)
return await self.transport_layer.download_media_v3(
destination,
media_id,
output_stream=output_stream,
max_size=max_size,
max_timeout_ms=max_timeout_ms,
download_ratelimiter=download_ratelimiter,
ip_address=ip_address,
)
async def download_media(
self,
destination: str,
media_id: str,
output_stream: BinaryIO,
max_size: int,
max_timeout_ms: int,
download_ratelimiter: Ratelimiter,
ip_address: str,
) -> Tuple[int, Dict[bytes, List[bytes]]]:
try:
return await self.transport_layer.download_media_v3(
destination,
media_id,
output_stream=output_stream,
max_size=max_size,
max_timeout_ms=max_timeout_ms,
download_ratelimiter=download_ratelimiter,
ip_address=ip_address,
)
except HttpResponseException as e:
# If an error is received that is due to an unrecognised endpoint,
# fallback to the r0 endpoint. Otherwise, consider it a legitimate error
# and raise.
if not is_unknown_endpoint(e):
raise
logger.debug(
"Couldn't download media %s/%s with the v3 API, falling back to the r0 API",
destination,
media_id,
)
return await self.transport_layer.download_media_r0(
destination,
media_id,
output_stream=output_stream,
max_size=max_size,
max_timeout_ms=max_timeout_ms,
download_ratelimiter=download_ratelimiter,
ip_address=ip_address,
)
@attr.s(frozen=True, slots=True, auto_attribs=True)
class TimestampToEventResponse:
......@@ -1735,7 +1986,7 @@ class TimestampToEventResponse:
)
origin_server_ts = d.get("origin_server_ts")
if not isinstance(origin_server_ts, int):
if type(origin_server_ts) is not int: # noqa: E721
raise ValueError(
"Invalid response: 'origin_server_ts' must be a int but received %r"
% origin_server_ts
......
# Copyright 2015, 2016 OpenMarket Ltd
# Copyright 2018 New Vector Ltd
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2019-2021 Matrix.org Federation C.I.C
# Copyright 2015, 2016 OpenMarket Ltd
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import random
from typing import (
......@@ -23,24 +29,30 @@ from typing import (
Collection,
Dict,
List,
Mapping,
Optional,
Tuple,
Union,
)
from matrix_common.regex import glob_to_regex
from prometheus_client import Counter, Gauge, Histogram
from twisted.internet.abstract import isIPAddress
from twisted.python import failure
from synapse.api.constants import EduTypes, EventContentFields, EventTypes, Membership
from synapse.api.constants import (
Direction,
EduTypes,
EventContentFields,
EventTypes,
Membership,
)
from synapse.api.errors import (
AuthError,
Codes,
FederationError,
IncompatibleRoomVersionError,
NotFoundError,
PartialStateConflictError,
SynapseError,
UnsupportedRoomVersionError,
)
......@@ -54,7 +66,8 @@ from synapse.federation.federation_base import (
event_from_pdu_json,
)
from synapse.federation.persistence import TransactionActions
from synapse.federation.units import Edu, Transaction
from synapse.federation.units import Edu, Transaction, serialize_and_filter_pdus
from synapse.handlers.worker_lock import NEW_EVENT_DURING_PURGE_LOCK_NAME
from synapse.http.servlet import assert_params_in_dict
from synapse.logging.context import (
make_deferred_yieldable,
......@@ -62,7 +75,9 @@ from synapse.logging.context import (
run_in_background,
)
from synapse.logging.opentracing import (
SynapseTags,
log_kv,
set_tag,
start_active_span_from_edu,
tag_args,
trace,
......@@ -72,10 +87,11 @@ from synapse.replication.http.federation import (
ReplicationFederationSendEduRestServlet,
ReplicationGetQueryRestServlet,
)
from synapse.storage.databases.main.events import PartialStateConflictError
from synapse.storage.databases.main.lock import Lock
from synapse.types import JsonDict, StateMap, get_domain_from_id
from synapse.util import json_decoder, unwrapFirstError
from synapse.storage.databases.main.roommember import extract_heroes_from_room_summary
from synapse.storage.roommember import MemberSummary
from synapse.types import JsonDict, StateMap, UserID, get_domain_from_id
from synapse.util import unwrapFirstError
from synapse.util.async_helpers import Linearizer, concurrently_execute, gather_results
from synapse.util.caches.response_cache import ResponseCache
from synapse.util.stringutils import parse_server_name
......@@ -118,12 +134,15 @@ class FederationServer(FederationBase):
def __init__(self, hs: "HomeServer"):
super().__init__(hs)
self.server_name = hs.hostname
self.handler = hs.get_federation_handler()
self._spam_checker = hs.get_spam_checker()
self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
self._federation_event_handler = hs.get_federation_event_handler()
self.state = hs.get_state_handler()
self._event_auth_handler = hs.get_event_auth_handler()
self._room_member_handler = hs.get_room_member_handler()
self._e2e_keys_handler = hs.get_e2e_keys_handler()
self._worker_lock_handler = hs.get_worker_locks_handler()
self._state_storage_controller = hs.get_storage_controllers().state
......@@ -150,9 +169,9 @@ class FederationServer(FederationBase):
# We cache responses to state queries, as they take a while and often
# come in waves.
self._state_resp_cache: ResponseCache[
Tuple[str, Optional[str]]
] = ResponseCache(hs.get_clock(), "state_resp", timeout_ms=30000)
self._state_resp_cache: ResponseCache[Tuple[str, Optional[str]]] = (
ResponseCache(hs.get_clock(), "state_resp", timeout_ms=30000)
)
self._state_ids_resp_cache: ResponseCache[Tuple[str, str]] = ResponseCache(
hs.get_clock(), "state_ids_resp", timeout_ms=30000
)
......@@ -214,7 +233,7 @@ class FederationServer(FederationBase):
return 200, res
async def on_timestamp_to_event_request(
self, origin: str, room_id: str, timestamp: int, direction: str
self, origin: str, room_id: str, timestamp: int, direction: Direction
) -> Tuple[int, Dict[str, Any]]:
"""When we receive a federated `/timestamp_to_event` request,
handle all of the logic for validating and fetching the event.
......@@ -224,7 +243,7 @@ class FederationServer(FederationBase):
room_id: Room to fetch the event from
timestamp: The point in time (inclusive) we should navigate from in
the given direction to find the closest event.
direction: ["f"|"b"] to indicate whether we should navigate forward
direction: indicates whether we should navigate forward
or backward from the given timestamp to find the closest event.
Returns:
......@@ -450,7 +469,12 @@ class FederationServer(FederationBase):
logger.info("Ignoring PDU: %s", e)
continue
event = event_from_pdu_json(p, room_version)
try:
event = event_from_pdu_json(p, room_version)
except SynapseError as e:
logger.info("Ignoring PDU for failing to deserialize: %s", e)
continue
pdus_by_room.setdefault(room_id, []).append(event)
if event.origin_server_ts > newest_pdu_ts:
......@@ -481,6 +505,14 @@ class FederationServer(FederationBase):
pdu_results[pdu.event_id] = await process_pdu(pdu)
async def process_pdu(pdu: EventBase) -> JsonDict:
"""
Processes a pushed PDU sent to us via a `/send` transaction
Returns:
JsonDict representing a "PDU Processing Result" that will be bundled up
with the other processed PDU's in the `/send` transaction and sent back
to remote homeserver.
"""
event_id = pdu.event_id
with nested_logging_context(event_id):
try:
......@@ -494,7 +526,7 @@ class FederationServer(FederationBase):
logger.error(
"Failed to handle PDU %s",
event_id,
exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore
exc_info=(f.type, f.value, f.getTracebackObject()),
)
return {"error": str(e)}
......@@ -519,7 +551,25 @@ class FederationServer(FederationBase):
edu_type=edu_dict["edu_type"],
content=edu_dict["content"],
)
await self.registry.on_edu(edu.edu_type, origin, edu.content)
try:
await self.registry.on_edu(edu.edu_type, origin, edu.content)
except Exception:
# If there was an error handling the EDU, we must reject the
# transaction.
#
# Some EDU types (notably, to-device messages) are, despite their name,
# expected to be reliable; if we weren't able to do something with it,
# we have to tell the sender that, and the only way the protocol gives
# us to do so is by sending an HTTP error back on the transaction.
#
# We log the exception now, and then raise a new SynapseError to cause
# the transaction to be failed.
logger.exception("Error handling EDU of type %s", edu.edu_type)
raise SynapseError(500, f"Error handing EDU of type {edu.edu_type}")
# TODO: if the first EDU fails, we should probably abort the whole
# thing rather than carrying on with the rest of them. That would
# probably be best done inside `concurrently_execute`.
await concurrently_execute(
_process_edu,
......@@ -591,8 +641,8 @@ class FederationServer(FederationBase):
)
return {
"pdus": [pdu.get_pdu_json() for pdu in pdus],
"auth_chain": [pdu.get_pdu_json() for pdu in auth_chain],
"pdus": serialize_and_filter_pdus(pdus),
"auth_chain": serialize_and_filter_pdus(auth_chain),
}
async def on_pdu_request(
......@@ -629,7 +679,7 @@ class FederationServer(FederationBase):
# This is in addition to the HS-level rate limiting applied by
# BaseFederationServlet.
# type-ignore: mypy doesn't seem able to deduce the type of the limiter(!?)
await self._room_member_handler._join_rate_per_room_limiter.ratelimit( # type: ignore[has-type]
await self._room_member_handler._join_rate_per_room_limiter.ratelimit(
requester=None,
key=room_id,
update=False,
......@@ -668,7 +718,11 @@ class FederationServer(FederationBase):
room_id: str,
caller_supports_partial_state: bool = False,
) -> Dict[str, Any]:
await self._room_member_handler._join_rate_per_room_limiter.ratelimit( # type: ignore[has-type]
set_tag(
SynapseTags.SEND_JOIN_RESPONSE_IS_PARTIAL_STATE,
caller_supports_partial_state,
)
await self._room_member_handler._join_rate_per_room_limiter.ratelimit(
requester=None,
key=room_id,
update=False,
......@@ -683,8 +737,9 @@ class FederationServer(FederationBase):
state_event_ids: Collection[str]
servers_in_room: Optional[Collection[str]]
if caller_supports_partial_state:
summary = await self.store.get_room_summary(room_id)
state_event_ids = _get_event_ids_for_partial_state_join(
event, prev_state_ids
event, prev_state_ids, summary
)
servers_in_room = await self.state.get_hosts_in_room_at_events(
room_id, event_ids=event.prev_event_ids()
......@@ -711,13 +766,13 @@ class FederationServer(FederationBase):
event_json = event.get_pdu_json(time_now)
resp = {
"event": event_json,
"state": [p.get_pdu_json(time_now) for p in state_events],
"auth_chain": [p.get_pdu_json(time_now) for p in auth_chain_events],
"org.matrix.msc3706.partial_state": caller_supports_partial_state,
"state": serialize_and_filter_pdus(state_events, time_now),
"auth_chain": serialize_and_filter_pdus(auth_chain_events, time_now),
"members_omitted": caller_supports_partial_state,
}
if servers_in_room is not None:
resp["org.matrix.msc3706.servers_in_room"] = list(servers_in_room)
resp["servers_in_room"] = list(servers_in_room)
return resp
......@@ -780,7 +835,7 @@ class FederationServer(FederationBase):
raise IncompatibleRoomVersionError(room_version=room_version.identifier)
# Check that this room supports knocking as defined by its room version
if not room_version.msc2403_knocking:
if not room_version.knock_join_rule:
raise SynapseError(
403,
"This room version does not support knocking",
......@@ -824,14 +879,7 @@ class FederationServer(FederationBase):
context, self._room_prejoin_state_types
)
)
return {
"knock_room_state": stripped_room_state,
# Since v1.37, Synapse incorrectly used "knock_state_events" for this field.
# Thus, we also populate a 'knock_state_events' with the same content to
# support old instances.
# See https://github.com/matrix-org/synapse/issues/14088.
"knock_state_events": stripped_room_state,
}
return {"knock_room_state": stripped_room_state}
async def _on_send_membership_event(
self, origin: str, content: JsonDict, membership_type: str, room_id: str
......@@ -883,7 +931,7 @@ class FederationServer(FederationBase):
errcode=Codes.NOT_FOUND,
)
if membership_type == Membership.KNOCK and not room_version.msc2403_knocking:
if membership_type == Membership.KNOCK and not room_version.knock_join_rule:
raise SynapseError(
403,
"This room version does not support knocking",
......@@ -907,7 +955,7 @@ class FederationServer(FederationBase):
# the event is valid to be sent into the room. Currently this is only done
# if the user is being joined via restricted join rules.
if (
room_version.msc3083_join_rules
room_version.restricted_join_rule
and event.membership == Membership.JOIN
and EventContentFields.AUTHORISING_USER in event.content
):
......@@ -915,10 +963,10 @@ class FederationServer(FederationBase):
authorising_server = get_domain_from_id(
event.content[EventContentFields.AUTHORISING_USER]
)
if authorising_server != self.server_name:
if not self._is_mine_server_name(authorising_server):
raise SynapseError(
400,
f"Cannot authorise request from resident server: {authorising_server}",
f"Cannot authorise membership event for {authorising_server}. We can only authorise requests from our own homeserver",
)
event.signatures.update(
......@@ -962,7 +1010,7 @@ class FederationServer(FederationBase):
time_now = self._clock.time_msec()
auth_pdus = await self.handler.on_event_auth(event_id)
res = {"auth_chain": [a.get_pdu_json(time_now) for a in auth_pdus]}
res = {"auth_chain": serialize_and_filter_pdus(auth_pdus, time_now)}
return 200, res
async def on_query_client_keys(
......@@ -978,23 +1026,27 @@ class FederationServer(FederationBase):
@trace
async def on_claim_client_keys(
self, origin: str, content: JsonDict
self, query: List[Tuple[str, str, str, int]], always_include_fallback_keys: bool
) -> Dict[str, Any]:
query = []
for user_id, device_keys in content.get("one_time_keys", {}).items():
for device_id, algorithm in device_keys.items():
query.append((user_id, device_id, algorithm))
if any(
not self.hs.is_mine(UserID.from_string(user_id))
for user_id, _, _, _ in query
):
raise SynapseError(400, "User is not hosted on this homeserver")
log_kv({"message": "Claiming one time keys.", "user, device pairs": query})
results = await self.store.claim_e2e_one_time_keys(query)
results = await self._e2e_keys_handler.claim_local_one_time_keys(
query, always_include_fallback_keys=always_include_fallback_keys
)
json_result: Dict[str, Dict[str, dict]] = {}
for user_id, device_keys in results.items():
for device_id, keys in device_keys.items():
for key_id, json_str in keys.items():
json_result.setdefault(user_id, {})[device_id] = {
key_id: json_decoder.decode(json_str)
}
json_result: Dict[str, Dict[str, Dict[str, JsonDict]]] = {}
for result in results:
for user_id, device_keys in result.items():
for device_id, keys in device_keys.items():
for key_id, key in keys.items():
json_result.setdefault(user_id, {}).setdefault(device_id, {})[
key_id
] = key
logger.info(
"Claimed one-time-keys: %s",
......@@ -1043,7 +1095,7 @@ class FederationServer(FederationBase):
time_now = self._clock.time_msec()
return {"events": [ev.get_pdu_json(time_now) for ev in missing_events]}
return {"events": serialize_and_filter_pdus(missing_events, time_now)}
async def on_openid_userinfo(self, token: str) -> Optional[str]:
ts_now_ms = self._clock.time_msec()
......@@ -1103,7 +1155,7 @@ class FederationServer(FederationBase):
logger.warning("event id %s: %s", pdu.event_id, e)
raise FederationError("ERROR", 403, str(e), affected=pdu.event_id)
if await self._spam_checker.should_drop_federated_event(pdu):
if await self._spam_checker_module_callbacks.should_drop_federated_event(pdu):
logger.warning(
"Unstaged federated event contains spam, dropping %s", pdu.event_id
)
......@@ -1148,7 +1200,9 @@ class FederationServer(FederationBase):
origin, event = next
if await self._spam_checker.should_drop_federated_event(event):
if await self._spam_checker_module_callbacks.should_drop_federated_event(
event
):
logger.warning(
"Staged federated event contains spam, dropping %s",
event.event_id,
......@@ -1210,9 +1264,18 @@ class FederationServer(FederationBase):
logger.info("handling received PDU in room %s: %s", room_id, event)
try:
with nested_logging_context(event.event_id):
await self._federation_event_handler.on_receive_pdu(
origin, event
)
# We're taking out a lock within a lock, which could
# lead to deadlocks if we're not careful. However, it is
# safe on this occasion as we only ever take a write
# lock when deleting a room, which we would never do
# while holding the `_INBOUND_EVENT_HANDLING_LOCK_NAME`
# lock.
async with self._worker_lock_handler.acquire_read_write_lock(
NEW_EVENT_DURING_PURGE_LOCK_NAME, room_id, write=False
):
await self._federation_event_handler.on_receive_pdu(
origin, event
)
except FederationError as e:
# XXX: Ideally we'd inform the remote we failed to process
# the event, but we can't return an error in the transaction
......@@ -1223,7 +1286,7 @@ class FederationServer(FederationBase):
logger.error(
"Failed to handle PDU %s",
event.event_id,
exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore
exc_info=(f.type, f.value, f.getTracebackObject()),
)
received_ts = await self.store.remove_received_event_from_staging(
......@@ -1267,9 +1330,6 @@ class FederationServer(FederationBase):
return
lock = new_lock
def __str__(self) -> str:
return "<ReplicationLayer(%s)>" % self.server_name
async def exchange_third_party_invite(
self, sender_user_id: str, target_user_id: str, room_id: str, signed: Dict
) -> None:
......@@ -1290,75 +1350,13 @@ class FederationServer(FederationBase):
Raises:
AuthError if the server does not match the ACL
"""
acl_event = await self._storage_controllers.state.get_current_state_event(
room_id, EventTypes.ServerACL, ""
)
if not acl_event or server_matches_acl_event(server_name, acl_event):
return
raise AuthError(code=403, msg="Server is banned from room")
def server_matches_acl_event(server_name: str, acl_event: EventBase) -> bool:
"""Check if the given server is allowed by the ACL event
Args:
server_name: name of server, without any port part
acl_event: m.room.server_acl event
Returns:
True if this server is allowed by the ACLs
"""
logger.debug("Checking %s against acl %s", server_name, acl_event.content)
# first of all, check if literal IPs are blocked, and if so, whether the
# server name is a literal IP
allow_ip_literals = acl_event.content.get("allow_ip_literals", True)
if not isinstance(allow_ip_literals, bool):
logger.warning("Ignoring non-bool allow_ip_literals flag")
allow_ip_literals = True
if not allow_ip_literals:
# check for ipv6 literals. These start with '['.
if server_name[0] == "[":
return False
# check for ipv4 literals. We can just lift the routine from twisted.
if isIPAddress(server_name):
return False
# next, check the deny list
deny = acl_event.content.get("deny", [])
if not isinstance(deny, (list, tuple)):
logger.warning("Ignoring non-list deny ACL %s", deny)
deny = []
for e in deny:
if _acl_entry_matches(server_name, e):
# logger.info("%s matched deny rule %s", server_name, e)
return False
# then the allow list.
allow = acl_event.content.get("allow", [])
if not isinstance(allow, (list, tuple)):
logger.warning("Ignoring non-list allow ACL %s", allow)
allow = []
for e in allow:
if _acl_entry_matches(server_name, e):
# logger.info("%s matched allow rule %s", server_name, e)
return True
# everything else should be rejected.
# logger.info("%s fell through", server_name)
return False
def _acl_entry_matches(server_name: str, acl_entry: Any) -> bool:
if not isinstance(acl_entry, str):
logger.warning(
"Ignoring non-str ACL entry '%s' (is %s)", acl_entry, type(acl_entry)
server_acl_evaluator = (
await self._storage_controllers.state.get_server_acl_for_room(room_id)
)
return False
regex = glob_to_regex(acl_entry)
return bool(regex.match(server_name))
if server_acl_evaluator and not server_acl_evaluator.server_matches_acl_event(
server_name
):
raise AuthError(code=403, msg="Server is banned from room")
class FederationHandlerRegistry:
......@@ -1432,19 +1430,14 @@ class FederationHandlerRegistry:
self._edu_type_to_instance[edu_type] = instance_names
async def on_edu(self, edu_type: str, origin: str, content: dict) -> None:
if not self.config.server.use_presence and edu_type == EduTypes.PRESENCE:
if not self.config.server.track_presence and edu_type == EduTypes.PRESENCE:
return
# Check if we have a handler on this instance
handler = self.edu_handlers.get(edu_type)
if handler:
with start_active_span_from_edu(content, "handle_edu"):
try:
await handler(origin, content)
except SynapseError as e:
logger.info("Failed to handle edu %r: %r", edu_type, e)
except Exception:
logger.exception("Failed to handle edu %r", edu_type)
await handler(origin, content)
return
# Check if we can route it somewhere else that isn't us
......@@ -1453,17 +1446,12 @@ class FederationHandlerRegistry:
# Pick an instance randomly so that we don't overload one.
route_to = random.choice(instances)
try:
await self._send_edu(
instance_name=route_to,
edu_type=edu_type,
origin=origin,
content=content,
)
except SynapseError as e:
logger.info("Failed to handle edu %r: %r", edu_type, e)
except Exception:
logger.exception("Failed to handle edu %r", edu_type)
await self._send_edu(
instance_name=route_to,
edu_type=edu_type,
origin=origin,
content=content,
)
return
# Oh well, let's just log and move on.
......@@ -1487,8 +1475,9 @@ class FederationHandlerRegistry:
def _get_event_ids_for_partial_state_join(
join_event: EventBase,
prev_state_ids: StateMap[str],
summary: Mapping[str, MemberSummary],
) -> Collection[str]:
"""Calculate state to be retuned in a partial_state send_join
"""Calculate state to be returned in a partial_state send_join
Args:
join_event: the join event being send_joined
......@@ -1513,8 +1502,19 @@ def _get_event_ids_for_partial_state_join(
if current_membership_event_id is not None:
state_event_ids.add(current_membership_event_id)
# TODO: return a few more members:
# - those with invites
# - those that are kicked? / banned
name_id = prev_state_ids.get((EventTypes.Name, ""))
canonical_alias_id = prev_state_ids.get((EventTypes.CanonicalAlias, ""))
if not name_id and not canonical_alias_id:
# Also include the hero members of the room (for DM rooms without a title).
# To do this properly, we should select the correct subset of membership events
# from `prev_state_ids`. Instead, we are lazier and use the (cached)
# `get_room_summary` function, which is based on the current state of the room.
# This introduces races; we choose to ignore them because a) they should be rare
# and b) even if it's wrong, joining servers will get the full state eventually.
heroes = extract_heroes_from_room_summary(summary, join_event.state_key)
for hero in heroes:
membership_event_id = prev_state_ids.get((EventTypes.Member, hero))
if membership_event_id:
state_event_ids.add(membership_event_id)
return state_event_ids
# Copyright 2014-2016 OpenMarket Ltd
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2021 The Matrix.org Foundation C.I.C.
# Copyright 2014-2016 OpenMarket Ltd
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# [This file includes modifications made by New Vector Limited]
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" This module contains all the persistence actions done by the federation
"""This module contains all the persistence actions done by the federation
package.
These actions are mostly only used by the :py:mod:`.replication` module.
......
# Copyright 2014-2016 OpenMarket Ltd
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2021 The Matrix.org Foundation C.I.C.
# Copyright 2014-2016 OpenMarket Ltd
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A federation sender that forwards things to be sent across replication to
a worker process.
......@@ -49,7 +56,7 @@ from synapse.api.presence import UserPresenceState
from synapse.federation.sender import AbstractFederationSender, FederationSender
from synapse.metrics import LaterGauge
from synapse.replication.tcp.streams.federation import FederationStream
from synapse.types import JsonDict, ReadReceipt, RoomStreamToken
from synapse.types import JsonDict, ReadReceipt, RoomStreamToken, StrCollection
from synapse.util.metrics import Measure
from .units import Edu
......@@ -68,6 +75,7 @@ class FederationRemoteSendQueue(AbstractFederationSender):
self.clock = hs.get_clock()
self.notifier = hs.get_notifier()
self.is_mine_id = hs.is_mine_id
self.is_mine_server_name = hs.is_mine_server_name
# We may have multiple federation sender instances, so we need to track
# their positions separately.
......@@ -80,9 +88,9 @@ class FederationRemoteSendQueue(AbstractFederationSender):
# Stores the destinations we need to explicitly send presence to about a
# given user.
# Stream position -> (user_id, destinations)
self.presence_destinations: SortedDict[
int, Tuple[str, Iterable[str]]
] = SortedDict()
self.presence_destinations: SortedDict[int, Tuple[str, Iterable[str]]] = (
SortedDict()
)
# (destination, key) -> EDU
self.keyed_edu: Dict[Tuple[str, tuple], Edu] = {}
......@@ -198,7 +206,7 @@ class FederationRemoteSendQueue(AbstractFederationSender):
key: Optional[Hashable] = None,
) -> None:
"""As per FederationSender"""
if destination == self.server_name:
if self.is_mine_server_name(destination):
logger.info("Not sending EDU to ourselves")
return
......@@ -228,7 +236,7 @@ class FederationRemoteSendQueue(AbstractFederationSender):
"""
# nothing to do here: the replication listener will handle it.
def send_presence_to_destinations(
async def send_presence_to_destinations(
self, states: Iterable[UserPresenceState], destinations: Iterable[str]
) -> None:
"""As per FederationSender
......@@ -244,7 +252,9 @@ class FederationRemoteSendQueue(AbstractFederationSender):
self.notifier.on_new_replication_data()
def send_device_messages(self, destination: str, immediate: bool = False) -> None:
async def send_device_messages(
self, destinations: StrCollection, immediate: bool = True
) -> None:
"""As per FederationSender"""
# We don't need to replicate this as it gets sent down a different
# stream.
......@@ -314,7 +324,7 @@ class FederationRemoteSendQueue(AbstractFederationSender):
# stream position.
keyed_edus = {v: k for k, v in self.keyed_edu_changed.items()[i:j]}
for ((destination, edu_key), pos) in keyed_edus.items():
for (destination, edu_key), pos in keyed_edus.items():
rows.append(
(
pos,
......@@ -329,7 +339,7 @@ class FederationRemoteSendQueue(AbstractFederationSender):
j = self.edus.bisect_right(to_token) + 1
edus = self.edus.items()[i:j]
for (pos, edu) in edus:
for pos, edu in edus:
rows.append((pos, EduRow(edu)))
# Sort rows based on pos
......@@ -392,7 +402,7 @@ class PresenceDestinationsRow(BaseFederationRow):
@staticmethod
def from_data(data: JsonDict) -> "PresenceDestinationsRow":
return PresenceDestinationsRow(
state=UserPresenceState.from_dict(data["state"]), destinations=data["dests"]
state=UserPresenceState(**data["state"]), destinations=data["dests"]
)
def to_data(self) -> JsonDict:
......@@ -462,7 +472,7 @@ class ParsedFederationStreamData:
edus: Dict[str, List[Edu]]
def process_rows_for_federation(
async def process_rows_for_federation(
transaction_queue: FederationSender,
rows: List[FederationStream.FederationStreamRow],
) -> None:
......@@ -495,7 +505,7 @@ def process_rows_for_federation(
parsed_row.add_to_buffer(buff)
for state, destinations in buff.presence_destinations:
transaction_queue.send_presence_to_destinations(
await transaction_queue.send_presence_to_destinations(
states=[state], destinations=destinations
)
......
# Copyright 2019 New Vector Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# http://www.apache.org/licenses/LICENSE-2.0
# Copyright (C) 2023 New Vector, Ltd
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
#
"""
The Federation Sender is responsible for sending Persistent Data Units (PDUs)
and Ephemeral Data Units (EDUs) to other homeservers using
the `/send` Federation API.
## How do PDUs get sent?
The Federation Sender is made aware of new PDUs due to `FederationSender.notify_new_events`.
When the sender is notified about a newly-persisted PDU that originates from this homeserver
and is not an out-of-band event, we pass the PDU to the `_PerDestinationQueue` for each
remote homeserver that is in the room at that point in the DAG.
### Per-Destination Queues
There is one `PerDestinationQueue` per 'destination' homeserver.
The `PerDestinationQueue` maintains the following information about the destination:
- whether the destination is currently in [catch-up mode (see below)](#catch-up-mode);
- a queue of PDUs to be sent to the destination; and
- a queue of EDUs to be sent to the destination (not considered in this section).
Upon a new PDU being enqueued, `attempt_new_transaction` is called to start a new
transaction if there is not already one in progress.
### Transactions and the Transaction Transmission Loop
Each federation HTTP request to the `/send` endpoint is referred to as a 'transaction'.
The body of the HTTP request contains a list of PDUs and EDUs to send to the destination.
The *Transaction Transmission Loop* (`_transaction_transmission_loop`) is responsible
for emptying the queued PDUs (and EDUs) from a `PerDestinationQueue` by sending
them to the destination.
There can only be one transaction in flight for a given destination at any time.
(Other than preventing us from overloading the destination, this also makes it easier to
reason about because we process events sequentially for each destination.
This is useful for *Catch-Up Mode*, described later.)
The loop continues so long as there is anything to send. At each iteration of the loop, we:
- dequeue up to 50 PDUs (and up to 100 EDUs).
- make the `/send` request to the destination homeserver with the dequeued PDUs and EDUs.
- if successful, make note of the fact that we succeeded in transmitting PDUs up to
the given `stream_ordering` of the latest PDU by
- if unsuccessful, back off from the remote homeserver for some time.
If we have been unsuccessful for too long (when the backoff interval grows to exceed 1 hour),
the in-memory queues are emptied and we enter [*Catch-Up Mode*, described below](#catch-up-mode).
### Catch-Up Mode
When the `PerDestinationQueue` has the catch-up flag set, the *Catch-Up Transmission Loop*
(`_catch_up_transmission_loop`) is used in lieu of the regular `_transaction_transmission_loop`.
(Only once the catch-up mode has been exited can the regular transaction transmission behaviour
be resumed.)
*Catch-Up Mode*, entered upon Synapse startup or once a homeserver has fallen behind due to
connection problems, is responsible for sending PDUs that have been missed by the destination
homeserver. (PDUs can be missed because the `PerDestinationQueue` is volatile — i.e. resets
on startup — and it does not hold PDUs forever if `/send` requests to the destination fail.)
The catch-up mechanism makes use of the `last_successful_stream_ordering` column in the
`destinations` table (which gives the `stream_ordering` of the most recent successfully
sent PDU) and the `stream_ordering` column in the `destination_rooms` table (which gives,
for each room, the `stream_ordering` of the most recent PDU that needs to be sent to this
destination).
Each iteration of the loop pulls out 50 `destination_rooms` entries with the oldest
`stream_ordering`s that are greater than the `last_successful_stream_ordering`.
In other words, from the set of latest PDUs in each room to be sent to the destination,
the 50 oldest such PDUs are pulled out.
These PDUs could, in principle, now be directly sent to the destination. However, as an
optimisation intended to prevent overloading destination homeservers, we instead attempt
to send the latest forward extremities so long as the destination homeserver is still
eligible to receive those.
This reduces load on the destination **in aggregate** because all Synapse homeservers
will behave according to this principle and therefore avoid sending lots of different PDUs
at different points in the DAG to a recovering homeserver.
*This optimisation is not currently valid in rooms which are partial-state on this homeserver,
since we are unable to determine whether the destination homeserver is eligible to receive
the latest forward extremities unless this homeserver sent those PDUs — in this case, we
just send the latest PDUs originating from this server and skip this optimisation.*
Whilst PDUs are sent through this mechanism, the position of `last_successful_stream_ordering`
is advanced as normal.
Once there are no longer any rooms containing outstanding PDUs to be sent to the destination
*that are not already in the `PerDestinationQueue` because they arrived since Catch-Up Mode
was enabled*, Catch-Up Mode is exited and we return to `_transaction_transmission_loop`.
#### A note on failures and back-offs
If a remote server is unreachable over federation, we back off from that server,
with an exponentially-increasing retry interval.
We automatically retry after the retry interval expires (roughly, the logic to do so
being triggered every minute).
If the backoff grows too large (> 1 hour), the in-memory queue is emptied (to prevent
unbounded growth) and Catch-Up Mode is entered.
It is worth noting that the back-off for a remote server is cleared once an inbound
request from that remote server is received (see `notify_remote_server_up`).
At this point, the transaction transmission loop is also started up, to proactively
send missed PDUs and EDUs to the destination (i.e. you don't need to wait for a new PDU
or EDU, destined for that destination, to be created in order to send out missed PDUs and
EDUs).
"""
import abc
import logging
......@@ -22,22 +139,23 @@ from typing import (
Hashable,
Iterable,
List,
Literal,
Optional,
Set,
Tuple,
)
import attr
from prometheus_client import Counter
from typing_extensions import Literal
from twisted.internet import defer
from twisted.internet.interfaces import IDelayedCall
import synapse.metrics
from synapse.api.presence import UserPresenceState
from synapse.events import EventBase
from synapse.federation.sender.per_destination_queue import PerDestinationQueue
from synapse.federation.sender.per_destination_queue import (
CATCHUP_RETRY_INTERVAL,
PerDestinationQueue,
)
from synapse.federation.sender.transaction_manager import TransactionManager
from synapse.federation.units import Edu
from synapse.logging.context import make_deferred_yieldable, run_in_background
......@@ -51,9 +169,16 @@ from synapse.metrics.background_process_metrics import (
run_as_background_process,
wrap_as_background_process,
)
from synapse.types import JsonDict, ReadReceipt, RoomStreamToken
from synapse.types import (
JsonDict,
ReadReceipt,
RoomStreamToken,
StrCollection,
get_domain_from_id,
)
from synapse.util import Clock
from synapse.util.metrics import Measure
from synapse.util.retryutils import filter_destinations_by_retry_limiter
if TYPE_CHECKING:
from synapse.events.presence_router import PresenceRouter
......@@ -71,14 +196,17 @@ sent_pdus_destination_dist_total = Counter(
"Total number of PDUs queued for sending across all destinations",
)
# Time (in s) after Synapse's startup that we will begin to wake up destinations
# that have catch-up outstanding.
CATCH_UP_STARTUP_DELAY_SEC = 15
# Time (in s) to wait before trying to wake up destinations that have
# catch-up outstanding.
# Please note that rate limiting still applies, so while the loop is
# executed every X seconds the destinations may not be woken up because
# they are being rate limited following previous attempt failures.
WAKEUP_RETRY_PERIOD_SEC = 60
# Time (in s) to wait in between waking up each destination, i.e. one destination
# will be woken up every <x> seconds after Synapse's startup until we have woken
# every destination has outstanding catch-up.
CATCH_UP_STARTUP_INTERVAL_SEC = 5
# will be woken up every <x> seconds until we have woken every destination
# has outstanding catch-up.
WAKEUP_INTERVAL_BETWEEN_DESTINATIONS_SEC = 5
class AbstractFederationSender(metaclass=abc.ABCMeta):
......@@ -99,7 +227,7 @@ class AbstractFederationSender(metaclass=abc.ABCMeta):
raise NotImplementedError()
@abc.abstractmethod
def send_presence_to_destinations(
async def send_presence_to_destinations(
self, states: Iterable[UserPresenceState], destinations: Iterable[str]
) -> None:
"""Send the given presence states to the given destinations.
......@@ -128,9 +256,11 @@ class AbstractFederationSender(metaclass=abc.ABCMeta):
raise NotImplementedError()
@abc.abstractmethod
def send_device_messages(self, destination: str, immediate: bool = True) -> None:
async def send_device_messages(
self, destinations: StrCollection, immediate: bool = True
) -> None:
"""Tells the sender that a new device message is ready to be sent to the
destination. The `immediate` flag specifies whether the messages should
destinations. The `immediate` flag specifies whether the messages should
be tried to be sent immediately, or whether it can be delayed for a
short while (to aid performance).
"""
......@@ -172,12 +302,10 @@ class _DestinationWakeupQueue:
# being woken up.
_MAX_TIME_IN_QUEUE = 30.0
# The maximum duration in seconds between waking up consecutive destination
# queues.
_MAX_DELAY = 0.1
sender: "FederationSender" = attr.ib()
clock: Clock = attr.ib()
max_delay_s: int = attr.ib()
queue: "OrderedDict[str, Literal[None]]" = attr.ib(factory=OrderedDict)
processing: bool = attr.ib(default=False)
......@@ -207,7 +335,7 @@ class _DestinationWakeupQueue:
# We also add an upper bound to the delay, to gracefully handle the
# case where the queue only has a few entries in it.
current_sleep_seconds = min(
self._MAX_DELAY, self._MAX_TIME_IN_QUEUE / len(self.queue)
self.max_delay_s, self._MAX_TIME_IN_QUEUE / len(self.queue)
)
while self.queue:
......@@ -249,6 +377,7 @@ class FederationSender(AbstractFederationSender):
self.clock = hs.get_clock()
self.is_mine_id = hs.is_mine_id
self.is_mine_server_name = hs.is_mine_server_name
self._presence_router: Optional["PresenceRouter"] = None
self._transaction_manager = TransactionManager(hs)
......@@ -290,31 +419,23 @@ class FederationSender(AbstractFederationSender):
self._is_processing = False
self._last_poked_id = -1
# map from room_id to a set of PerDestinationQueues which we believe are
# awaiting a call to flush_read_receipts_for_room. The presence of an entry
# here for a given room means that we are rate-limiting RR flushes to that room,
# and that there is a pending call to _flush_rrs_for_room in the system.
self._queues_awaiting_rr_flush_by_room: Dict[str, Set[PerDestinationQueue]] = {}
self._external_cache = hs.get_external_cache()
self._rr_txn_interval_per_room_ms = (
1000.0
/ hs.config.ratelimiting.federation_rr_transactions_per_room_per_second
rr_txn_interval_per_room_s = (
1.0 / hs.config.ratelimiting.federation_rr_transactions_per_room_per_second
)
self._destination_wakeup_queue = _DestinationWakeupQueue(
self, self.clock, max_delay_s=rr_txn_interval_per_room_s
)
# wake up destinations that have outstanding PDUs to be caught up
self._catchup_after_startup_timer: Optional[
IDelayedCall
] = self.clock.call_later(
CATCH_UP_STARTUP_DELAY_SEC,
# Regularly wake up destinations that have outstanding PDUs to be caught up
self.clock.looping_call_now(
run_as_background_process,
WAKEUP_RETRY_PERIOD_SEC * 1000.0,
"wake_destinations_needing_catchup",
self._wake_destinations_needing_catchup,
)
self._external_cache = hs.get_external_cache()
self._destination_wakeup_queue = _DestinationWakeupQueue(self, self.clock)
def _get_per_destination_queue(self, destination: str) -> PerDestinationQueue:
"""Get or create a PerDestinationQueue for the given destination
......@@ -434,7 +555,23 @@ class FederationSender(AbstractFederationSender):
# If there are no prev event IDs then the state is empty
# and so no remote servers in the room
destinations = set()
else:
if destinations is None:
# During partial join we use the set of servers that we got
# when beginning the join. It's still possible that we send
# events to servers that left the room in the meantime, but
# we consider that an acceptable risk since it is only our own
# events that we leak and not other server's ones.
partial_state_destinations = (
await self.store.get_partial_state_servers_at_join(
event.room_id
)
)
if partial_state_destinations is not None:
destinations = partial_state_destinations
if destinations is None:
# We check the external cache for the destinations, which is
# stored per state group.
......@@ -446,14 +583,14 @@ class FederationSender(AbstractFederationSender):
"get_joined_hosts", str(sg)
)
if destinations is None:
# Add logging to help track down #13444
# Add logging to help track down https://github.com/matrix-org/synapse/issues/13444
logger.info(
"Unexpectedly did not have cached destinations for %s / %s",
sg,
event.event_id,
)
else:
# Add logging to help track down #13444
# Add logging to help track down https://github.com/matrix-org/synapse/issues/13444
logger.info(
"Unexpectedly did not have cached prev group for %s",
event.event_id,
......@@ -587,6 +724,13 @@ class FederationSender(AbstractFederationSender):
pdu.internal_metadata.stream_ordering,
)
destinations = await filter_destinations_by_retry_limiter(
destinations,
clock=self.clock,
store=self.store,
retry_due_within_ms=CATCHUP_RETRY_INTERVAL,
)
for destination in destinations:
self._get_per_destination_queue(destination).send_pdu(pdu)
......@@ -599,102 +743,123 @@ class FederationSender(AbstractFederationSender):
# Some background on the rate-limiting going on here.
#
# It turns out that if we attempt to send out RRs as soon as we get them from
# a client, then we end up trying to do several hundred Hz of federation
# transactions. (The number of transactions scales as O(N^2) on the size of a
# room, since in a large room we have both more RRs coming in, and more servers
# to send them to.)
# It turns out that if we attempt to send out RRs as soon as we get them
# from a client, then we end up trying to do several hundred Hz of
# federation transactions. (The number of transactions scales as O(N^2)
# on the size of a room, since in a large room we have both more RRs
# coming in, and more servers to send them to.)
#
# This leads to a lot of CPU load, and we end up getting behind. The solution
# currently adopted is as follows:
# This leads to a lot of CPU load, and we end up getting behind. The
# solution currently adopted is to differentiate between receipts and
# destinations we should immediately send to, and those we can trickle
# the receipts to.
#
# The first receipt in a given room is sent out immediately, at time T0. Any
# further receipts are, in theory, batched up for N seconds, where N is calculated
# based on the number of servers in the room to achieve a transaction frequency
# of around 50Hz. So, for example, if there were 100 servers in the room, then
# N would be 100 / 50Hz = 2 seconds.
# The current logic is to send receipts out immediately if:
# - the room is "small", i.e. there's only N servers to send receipts
# to, and so sending out the receipts immediately doesn't cause too
# much load; or
# - the receipt is for an event that happened recently, as users
# notice if receipts are delayed when they know other users are
# currently reading the room; or
# - the receipt is being sent to the server that sent the event, so
# that users see receipts for their own receipts quickly.
#
# Then, after T+N, we flush out any receipts that have accumulated, and restart
# the timer to flush out more receipts at T+2N, etc. If no receipts accumulate,
# we stop the cycle and go back to the start.
# For destinations that we should delay sending the receipt to, we queue
# the receipts up to be sent in the next transaction, but don't trigger
# a new transaction to be sent. We then add the destination to the
# `DestinationWakeupQueue`, which will slowly iterate over each
# destination and trigger a new transaction to be sent.
#
# However, in practice, it is often possible to flush out receipts earlier: in
# particular, if we are sending a transaction to a given server anyway (for
# example, because we have a PDU or a RR in another room to send), then we may
# as well send out all of the pending RRs for that server. So it may be that
# by the time we get to T+N, we don't actually have any RRs left to send out.
# Nevertheless we continue to buffer up RRs for the room in question until we
# reach the point that no RRs arrive between timer ticks.
# However, in practice, it is often possible to send out delayed
# receipts earlier: in particular, if we are sending a transaction to a
# given server anyway (for example, because we have a PDU or a RR in
# another room to send), then we may as well send out all of the pending
# RRs for that server. So it may be that by the time we get to waking up
# the destination, we don't actually have any RRs left to send out.
#
# For even more background, see https://github.com/matrix-org/synapse/issues/4730.
# For even more background, see
# https://github.com/matrix-org/synapse/issues/4730.
room_id = receipt.room_id
# Local read receipts always have 1 event ID.
event_id = receipt.event_ids[0]
# Work out which remote servers should be poked and poke them.
domains_set = await self._storage_controllers.state.get_current_hosts_in_room(
domains_set = await self._storage_controllers.state.get_current_hosts_in_room_or_partial_state_approximation(
room_id
)
domains = [
domains: StrCollection = [
d
for d in domains_set
if d != self.server_name
if not self.is_mine_server_name(d)
and self._federation_shard_config.should_handle(self._instance_name, d)
]
domains = await filter_destinations_by_retry_limiter(
domains,
clock=self.clock,
store=self.store,
retry_due_within_ms=CATCHUP_RETRY_INTERVAL,
)
if not domains:
return
queues_pending_flush = self._queues_awaiting_rr_flush_by_room.get(room_id)
# We now split which domains we want to wake up immediately vs which we
# want to delay waking up.
immediate_domains: StrCollection
delay_domains: StrCollection
# if there is no flush yet scheduled, we will send out these receipts with
# immediate flushes, and schedule the next flush for this room.
if queues_pending_flush is not None:
logger.debug("Queuing receipt for: %r", domains)
if len(domains) < 10:
# For "small" rooms send to all domains immediately
immediate_domains = domains
delay_domains = ()
else:
logger.debug("Sending receipt to: %r", domains)
self._schedule_rr_flush_for_room(room_id, len(domains))
metadata = await self.store.get_metadata_for_event(
receipt.room_id, event_id
)
assert metadata is not None
for domain in domains:
queue = self._get_per_destination_queue(domain)
queue.queue_read_receipt(receipt)
sender_domain = get_domain_from_id(metadata.sender)
# if there is already a RR flush pending for this room, then make sure this
# destination is registered for the flush
if queues_pending_flush is not None:
queues_pending_flush.add(queue)
if self.clock.time_msec() - metadata.received_ts < 60_000:
# We always send receipts for recent messages immediately
immediate_domains = domains
delay_domains = ()
else:
queue.flush_read_receipts_for_room(room_id)
def _schedule_rr_flush_for_room(self, room_id: str, n_domains: int) -> None:
# that is going to cause approximately len(domains) transactions, so now back
# off for that multiplied by RR_TXN_INTERVAL_PER_ROOM
backoff_ms = self._rr_txn_interval_per_room_ms * n_domains
logger.debug("Scheduling RR flush in %s in %d ms", room_id, backoff_ms)
self.clock.call_later(backoff_ms, self._flush_rrs_for_room, room_id)
self._queues_awaiting_rr_flush_by_room[room_id] = set()
def _flush_rrs_for_room(self, room_id: str) -> None:
queues = self._queues_awaiting_rr_flush_by_room.pop(room_id)
logger.debug("Flushing RRs in %s to %s", room_id, queues)
# Otherwise, we delay waking up all destinations except for the
# sender's domain.
immediate_domains = []
delay_domains = []
for domain in domains:
if domain == sender_domain:
immediate_domains.append(domain)
else:
delay_domains.append(domain)
if not queues:
# no more RRs arrived for this room; we are done.
return
for domain in immediate_domains:
# Add to destination queue and wake the destination up
queue = self._get_per_destination_queue(domain)
queue.queue_read_receipt(receipt)
queue.attempt_new_transaction()
# schedule the next flush
self._schedule_rr_flush_for_room(room_id, len(queues))
for domain in delay_domains:
# Add to destination queue...
queue = self._get_per_destination_queue(domain)
queue.queue_read_receipt(receipt)
for queue in queues:
queue.flush_read_receipts_for_room(room_id)
# ... and schedule the destination to be woken up.
self._destination_wakeup_queue.add_to_queue(domain)
def send_presence_to_destinations(
async def send_presence_to_destinations(
self, states: Iterable[UserPresenceState], destinations: Iterable[str]
) -> None:
"""Send the given presence states to the given destinations.
destinations (list[str])
"""
if not states or not self.hs.config.server.use_presence:
if not states or not self.hs.config.server.track_presence:
# No-op if presence is disabled.
return
......@@ -702,12 +867,19 @@ class FederationSender(AbstractFederationSender):
for state in states:
assert self.is_mine_id(state.user_id)
destinations = await filter_destinations_by_retry_limiter(
[
d
for d in destinations
if self._federation_shard_config.should_handle(self._instance_name, d)
],
clock=self.clock,
store=self.store,
retry_due_within_ms=CATCHUP_RETRY_INTERVAL,
)
for destination in destinations:
if destination == self.server_name:
continue
if not self._federation_shard_config.should_handle(
self._instance_name, destination
):
if self.is_mine_server_name(destination):
continue
self._get_per_destination_queue(destination).send_presence(
......@@ -731,7 +903,7 @@ class FederationSender(AbstractFederationSender):
content: content of EDU
key: clobbering key for this edu
"""
if destination == self.server_name:
if self.is_mine_server_name(destination):
logger.info("Not sending EDU to ourselves")
return
......@@ -767,21 +939,29 @@ class FederationSender(AbstractFederationSender):
else:
queue.send_edu(edu)
def send_device_messages(self, destination: str, immediate: bool = False) -> None:
if destination == self.server_name:
logger.warning("Not sending device update to ourselves")
return
if not self._federation_shard_config.should_handle(
self._instance_name, destination
):
return
async def send_device_messages(
self, destinations: StrCollection, immediate: bool = True
) -> None:
destinations = await filter_destinations_by_retry_limiter(
[
destination
for destination in destinations
if self._federation_shard_config.should_handle(
self._instance_name, destination
)
and not self.is_mine_server_name(destination)
],
clock=self.clock,
store=self.store,
retry_due_within_ms=CATCHUP_RETRY_INTERVAL,
)
if immediate:
self._get_per_destination_queue(destination).attempt_new_transaction()
else:
self._get_per_destination_queue(destination).mark_new_data()
self._destination_wakeup_queue.add_to_queue(destination)
for destination in destinations:
if immediate:
self._get_per_destination_queue(destination).attempt_new_transaction()
else:
self._get_per_destination_queue(destination).mark_new_data()
self._destination_wakeup_queue.add_to_queue(destination)
def wake_destination(self, destination: str) -> None:
"""Called when we want to retry sending transactions to a remote.
......@@ -790,7 +970,7 @@ class FederationSender(AbstractFederationSender):
might have come back.
"""
if destination == self.server_name:
if self.is_mine_server_name(destination):
logger.warning("Not waking up ourselves")
return
......@@ -836,7 +1016,6 @@ class FederationSender(AbstractFederationSender):
if not destinations_to_wake:
# finished waking all destinations!
self._catchup_after_startup_timer = None
break
last_processed = destinations_to_wake[-1]
......@@ -853,4 +1032,4 @@ class FederationSender(AbstractFederationSender):
last_processed,
)
self.wake_destination(destination)
await self.clock.sleep(CATCH_UP_STARTUP_INTERVAL_SEC)
await self.clock.sleep(WAKEUP_INTERVAL_BETWEEN_DESTINATIONS_SEC)
# Copyright 2014-2016 OpenMarket Ltd
# Copyright 2019 New Vector Ltd
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2021 The Matrix.org Foundation C.I.C.
# Copyright 2014-2016 OpenMarket Ltd
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# [This file includes modifications made by New Vector Limited]
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import datetime
import logging
from collections import OrderedDict
from types import TracebackType
from typing import TYPE_CHECKING, Dict, Hashable, Iterable, List, Optional, Tuple, Type
......@@ -35,7 +42,7 @@ from synapse.logging import issue9533_logger
from synapse.logging.opentracing import SynapseTags, set_tag
from synapse.metrics import sent_transactions_counter
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.types import ReadReceipt
from synapse.types import JsonDict, ReadReceipt
from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter
from synapse.visibility import filter_events_for_server
......@@ -59,6 +66,14 @@ sent_edus_by_type = Counter(
)
# If the retry interval is larger than this then we enter "catchup" mode
CATCHUP_RETRY_INTERVAL = 60 * 60 * 1000
# Limit how many presence states we add to each presence EDU, to ensure that
# they are bounded in size.
MAX_PRESENCE_STATES_PER_EDU = 50
class PerDestinationQueue:
"""
Manages the per-destination transmission queues.
......@@ -134,11 +149,13 @@ class PerDestinationQueue:
# Map of user_id -> UserPresenceState of pending presence to be sent to this
# destination
self._pending_presence: Dict[str, UserPresenceState] = {}
self._pending_presence: OrderedDict[str, UserPresenceState] = OrderedDict()
# room_id -> receipt_type -> user_id -> receipt_dict
self._pending_rrs: Dict[str, Dict[str, Dict[str, dict]]] = {}
self._rrs_pending_flush = False
# List of room_id -> receipt_type -> user_id -> receipt_dict,
#
# Each receipt can only have a single receipt per
# (room ID, receipt type, user ID, thread ID) tuple.
self._pending_receipt_edus: List[Dict[str, Dict[str, Dict[str, dict]]]] = []
# stream_id of last successfully sent to-device message.
# NB: may be a long or an int.
......@@ -202,17 +219,45 @@ class PerDestinationQueue:
Args:
receipt: receipt to be queued
"""
self._pending_rrs.setdefault(receipt.room_id, {}).setdefault(
receipt.receipt_type, {}
)[receipt.user_id] = {"event_ids": receipt.event_ids, "data": receipt.data}
def flush_read_receipts_for_room(self, room_id: str) -> None:
# if we don't have any read-receipts for this room, it may be that we've already
# sent them out, so we don't need to flush.
if room_id not in self._pending_rrs:
return
self._rrs_pending_flush = True
self.attempt_new_transaction()
serialized_receipt: JsonDict = {
"event_ids": receipt.event_ids,
"data": receipt.data,
}
if receipt.thread_id is not None:
serialized_receipt["data"]["thread_id"] = receipt.thread_id
# Find which EDU to add this receipt to. There's three situations depending
# on the (room ID, receipt type, user, thread ID) tuple:
#
# 1. If it fully matches, clobber the information.
# 2. If it is missing, add the information.
# 3. If the subset tuple of (room ID, receipt type, user) matches, check
# the next EDU (or add a new EDU).
for edu in self._pending_receipt_edus:
receipt_content = edu.setdefault(receipt.room_id, {}).setdefault(
receipt.receipt_type, {}
)
# If this room ID, receipt type, user ID is not in this EDU, OR if
# the full tuple matches, use the current EDU.
if (
receipt.user_id not in receipt_content
or receipt_content[receipt.user_id].get("thread_id")
== receipt.thread_id
):
receipt_content[receipt.user_id] = serialized_receipt
break
# If no matching EDU was found, create a new one.
else:
self._pending_receipt_edus.append(
{
receipt.room_id: {
receipt.receipt_type: {receipt.user_id: serialized_receipt}
}
}
)
self.mark_new_data()
def send_keyed_edu(self, edu: Edu, key: Hashable) -> None:
self._pending_edus_keyed[(edu.edu_type, key)] = edu
......@@ -284,12 +329,11 @@ class PerDestinationQueue:
# not caught up yet
return
pending_pdus = []
while True:
self._new_data_to_send = False
async with _TransactionQueueManager(self) as (
pending_pdus,
pending_pdus, # noqa: F811
pending_edus,
):
if not pending_pdus and not pending_edus:
......@@ -331,7 +375,7 @@ class PerDestinationQueue:
),
)
if e.retry_interval > 60 * 60 * 1000:
if e.retry_interval > CATCHUP_RETRY_INTERVAL:
# we won't retry for another hour!
# (this suggests a significant outage)
# We drop pending EDUs because otherwise they will
......@@ -350,8 +394,8 @@ class PerDestinationQueue:
# through another mechanism, because this is all volatile!
self._pending_edus = []
self._pending_edus_keyed = {}
self._pending_presence = {}
self._pending_rrs = {}
self._pending_presence.clear()
self._pending_receipt_edus = []
self._start_catching_up()
except FederationDeniedError as e:
......@@ -458,8 +502,8 @@ class PerDestinationQueue:
#
# Note: `catchup_pdus` will have exactly one PDU per room.
for pdu in catchup_pdus:
# The PDU from the DB will be the last PDU in the room from
# *this server* that wasn't sent to the remote. However, other
# The PDU from the DB will be the newest PDU in the room from
# *this server* that we tried---but were unable---to send to the remote.
# servers may have sent lots of events since then, and we want
# to try and tell the remote only about the *latest* events in
# the room. This is so that it doesn't get inundated by events
......@@ -477,6 +521,11 @@ class PerDestinationQueue:
# If the event is in the extremities, then great! We can just
# use that without having to do further checks.
room_catchup_pdus = [pdu]
elif await self._store.is_partial_state_room(pdu.room_id):
# We can't be sure which events the destination should
# see using only partial state. Avoid doing so, and just retry
# sending our the newest PDU the remote is missing from us.
room_catchup_pdus = [pdu]
else:
# If not, fetch the extremities and figure out which we can
# send.
......@@ -505,8 +554,11 @@ class PerDestinationQueue:
new_pdus = await filter_events_for_server(
self._storage_controllers,
self._destination,
self._server_name,
new_pdus,
redact=False,
filter_out_erased_senders=True,
filter_out_remote_partial_state_events=True,
)
# If we've filtered out all the extremities, fall back to
......@@ -542,22 +594,24 @@ class PerDestinationQueue:
self._destination, last_successful_stream_ordering
)
def _get_rr_edus(self, force_flush: bool) -> Iterable[Edu]:
if not self._pending_rrs:
return
if not force_flush and not self._rrs_pending_flush:
# not yet time for this lot
def _get_receipt_edus(self, limit: int) -> Iterable[Edu]:
if not self._pending_receipt_edus:
return
edu = Edu(
origin=self._server_name,
destination=self._destination,
edu_type=EduTypes.RECEIPT,
content=self._pending_rrs,
)
self._pending_rrs = {}
self._rrs_pending_flush = False
yield edu
# Send at most limit EDUs for receipts.
for content in self._pending_receipt_edus[:limit]:
yield Edu(
origin=self._server_name,
destination=self._destination,
edu_type=EduTypes.RECEIPT,
content=content,
)
self._pending_receipt_edus = self._pending_receipt_edus[limit:]
# If there are still pending read-receipts, don't reset the pending flush
# flag.
if not self._pending_receipt_edus:
self._rrs_pending_flush = False
def _pop_pending_edus(self, limit: int) -> List[Edu]:
pending_edus = self._pending_edus
......@@ -596,7 +650,7 @@ class PerDestinationQueue:
if not message_id:
continue
set_tag(SynapseTags.TO_DEVICE_MESSAGE_ID, message_id)
set_tag(SynapseTags.TO_DEVICE_EDU_ID, message_id)
edus = [
Edu(
......@@ -644,27 +698,65 @@ class _TransactionQueueManager:
async def __aenter__(self) -> Tuple[List[EventBase], List[Edu]]:
# First we calculate the EDUs we want to send, if any.
# We start by fetching device related EDUs, i.e device updates and to
# device messages. We have to keep 2 free slots for presence and rr_edus.
device_edu_limit = MAX_EDUS_PER_TRANSACTION - 2
# There's a maximum number of EDUs that can be sent with a transaction,
# generally device updates and to-device messages get priority, but we
# want to ensure that there's room for some other EDUs as well.
#
# This is done by:
#
# * Add a presence EDU, if one exists.
# * Add up-to a small limit of read receipt EDUs.
# * Add to-device EDUs, but leave some space for device list updates.
# * Add device list updates EDUs.
# * If there's any remaining room, add other EDUs.
pending_edus = []
# Add presence EDU.
if self.queue._pending_presence:
# Only send max 50 presence entries in the EDU, to bound the amount
# of data we're sending.
presence_to_add: List[JsonDict] = []
while (
self.queue._pending_presence
and len(presence_to_add) < MAX_PRESENCE_STATES_PER_EDU
):
_, presence = self.queue._pending_presence.popitem(last=False)
presence_to_add.append(
format_user_presence_state(presence, self.queue._clock.time_msec())
)
# We prioritize to-device messages so that existing encryption channels
pending_edus.append(
Edu(
origin=self.queue._server_name,
destination=self.queue._destination,
edu_type=EduTypes.PRESENCE,
content={"push": presence_to_add},
)
)
# Add read receipt EDUs.
pending_edus.extend(self.queue._get_receipt_edus(limit=5))
edu_limit = MAX_EDUS_PER_TRANSACTION - len(pending_edus)
# Next, prioritize to-device messages so that existing encryption channels
# work. We also keep a few slots spare (by reducing the limit) so that
# we can still trickle out some device list updates.
(
to_device_edus,
device_stream_id,
) = await self.queue._get_to_device_message_edus(device_edu_limit - 10)
) = await self.queue._get_to_device_message_edus(edu_limit - 10)
if to_device_edus:
self._device_stream_id = device_stream_id
else:
self.queue._last_device_stream_id = device_stream_id
device_edu_limit -= len(to_device_edus)
pending_edus.extend(to_device_edus)
edu_limit -= len(to_device_edus)
# Add device list update EDUs.
device_update_edus, dev_list_id = await self.queue._get_device_update_edus(
device_edu_limit
edu_limit
)
if device_update_edus:
......@@ -672,40 +764,17 @@ class _TransactionQueueManager:
else:
self.queue._last_device_list_stream_id = dev_list_id
pending_edus = device_update_edus + to_device_edus
# Now add the read receipt EDU.
pending_edus.extend(self.queue._get_rr_edus(force_flush=False))
# And presence EDU.
if self.queue._pending_presence:
pending_edus.append(
Edu(
origin=self.queue._server_name,
destination=self.queue._destination,
edu_type=EduTypes.PRESENCE,
content={
"push": [
format_user_presence_state(
presence, self.queue._clock.time_msec()
)
for presence in self.queue._pending_presence.values()
]
},
)
)
self.queue._pending_presence = {}
pending_edus.extend(device_update_edus)
edu_limit -= len(device_update_edus)
# Finally add any other types of EDUs if there is room.
pending_edus.extend(
self.queue._pop_pending_edus(MAX_EDUS_PER_TRANSACTION - len(pending_edus))
)
while (
len(pending_edus) < MAX_EDUS_PER_TRANSACTION
and self.queue._pending_edus_keyed
):
other_edus = self.queue._pop_pending_edus(edu_limit)
pending_edus.extend(other_edus)
edu_limit -= len(other_edus)
while edu_limit > 0 and self.queue._pending_edus_keyed:
_, val = self.queue._pending_edus_keyed.popitem()
pending_edus.append(val)
edu_limit -= 1
# Now we look for any PDUs to send, by getting up to 50 PDUs from the
# queue
......@@ -714,11 +783,6 @@ class _TransactionQueueManager:
if not self._pdus and not pending_edus:
return [], []
# if we've decided to send a transaction anyway, and we have room, we
# may as well send any pending RRs
if len(pending_edus) < MAX_EDUS_PER_TRANSACTION:
pending_edus.extend(self.queue._get_rr_edus(force_flush=True))
if self._pdus:
self._last_stream_ordering = self._pdus[
-1
......
# Copyright 2019 New Vector Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# [This file includes modifications made by New Vector Limited]
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from typing import TYPE_CHECKING, List
......
#
# This file is licensed under the Affero General Public License (AGPL) version 3.
#
# Copyright 2014-2016 OpenMarket Ltd
# Copyright (C) 2023 New Vector, Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# See the GNU Affero General Public License for more details:
# <https://www.gnu.org/licenses/agpl-3.0.html>.
#
# Originally licensed under the Apache License, Version 2.0:
# <http://www.apache.org/licenses/LICENSE-2.0>.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# [This file includes modifications made by New Vector Limited]
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The transport layer is responsible for both sending transactions to remote
homeservers and receiving a variety of requests from other homeservers.
......