diff --git a/changelog.d/17688.misc b/changelog.d/17688.misc new file mode 100644 index 00000000000..7ba8d48fbe7 --- /dev/null +++ b/changelog.d/17688.misc @@ -0,0 +1 @@ +Speed up sync by pulling out fewer events from the database. diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index c874d22eace..65b3f153da8 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -200,6 +200,7 @@ async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") -> ( events, _, + _, ) = await self._store.paginate_room_events_by_topological_ordering( room_id=room_id, from_key=from_key, diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 3c44458fa3c..4070b74b7af 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -510,6 +510,7 @@ async def get_messages( ( events, next_key, + _, ) = await self.store.paginate_room_events_by_topological_ordering( room_id=room_id, from_key=from_token.room_key, @@ -588,6 +589,7 @@ async def get_messages( ( events, next_key, + _, ) = await self.store.paginate_room_events_by_topological_ordering( room_id=room_id, from_key=from_token.room_key, diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 35c88f1b919..386375d64b2 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1753,7 +1753,7 @@ async def get_new_events( ) events = list(room_events) - events.extend(e for evs, _ in room_to_events.values() for e in evs) + events.extend(e for evs, _, _ in room_to_events.values() for e in evs) # We know stream_ordering must be not None here, as its been # persisted, but mypy doesn't know that diff --git a/synapse/handlers/sliding_sync/__init__.py b/synapse/handlers/sliding_sync/__init__.py index 7340c6ec053..cf368be9d9c 100644 --- a/synapse/handlers/sliding_sync/__init__.py +++ b/synapse/handlers/sliding_sync/__init__.py @@ -47,7 +47,6 @@ MutableStateMap, PersistedEventPosition, Requester, - RoomStreamToken, SlidingSyncStreamToken, StateMap, StreamKeyType, @@ -632,7 +631,7 @@ async def get_room_sync_data( # Use `stream_ordering` for updates else paginate_room_events_by_stream_ordering ) - timeline_events, new_room_key = await pagination_method( + timeline_events, new_room_key, limited = await pagination_method( room_id=room_id, # The bounds are reversed so we can paginate backwards # (from newer to older events) starting at to_bound. @@ -640,28 +639,13 @@ async def get_room_sync_data( from_key=to_bound, to_key=timeline_from_bound, direction=Direction.BACKWARDS, - # We add one so we can determine if there are enough events to saturate - # the limit or not (see `limited`) - limit=room_sync_config.timeline_limit + 1, + limit=room_sync_config.timeline_limit, ) # We want to return the events in ascending order (the last event is the # most recent). timeline_events.reverse() - # Determine our `limited` status based on the timeline. We do this before - # filtering the events so we can accurately determine if there is more to - # paginate even if we filter out some/all events. - if len(timeline_events) > room_sync_config.timeline_limit: - limited = True - # Get rid of that extra "+ 1" event because we only used it to determine - # if we hit the limit or not - timeline_events = timeline_events[-room_sync_config.timeline_limit :] - assert timeline_events[0].internal_metadata.stream_ordering - new_room_key = RoomStreamToken( - stream=timeline_events[0].internal_metadata.stream_ordering - 1 - ) - # Make sure we don't expose any events that the client shouldn't see timeline_events = await filter_events_for_client( self.storage_controllers, diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 609840bfe9d..f4ea90fbd78 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -906,7 +906,7 @@ async def _load_filtered_recents( # Use `stream_ordering` for updates else paginate_room_events_by_stream_ordering ) - events, end_key = await pagination_method( + events, end_key, limited = await pagination_method( room_id=room_id, # The bounds are reversed so we can paginate backwards # (from newer to older events) starting at to_bound. @@ -914,9 +914,7 @@ async def _load_filtered_recents( from_key=end_key, to_key=since_key, direction=Direction.BACKWARDS, - # We add one so we can determine if there are enough events to saturate - # the limit or not (see `limited`) - limit=load_limit + 1, + limit=load_limit, ) # We want to return the events in ascending order (the last event is the # most recent). @@ -971,9 +969,6 @@ async def _load_filtered_recents( loaded_recents.extend(recents) recents = loaded_recents - if len(events) <= load_limit: - limited = False - break max_repeat -= 1 if len(recents) > timeline_limit: @@ -2608,7 +2603,7 @@ async def _get_room_changes_for_incremental_sync( newly_joined = room_id in newly_joined_rooms if room_entry: - events, start_key = room_entry + events, start_key, _ = room_entry # We want to return the events in ascending order (the last event is the # most recent). events.reverse() diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 68d41686219..459436e304e 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -108,7 +108,7 @@ async def __call__( to_key: Optional[RoomStreamToken] = None, direction: Direction = Direction.BACKWARDS, limit: int = 0, - ) -> Tuple[List[EventBase], RoomStreamToken]: ... + ) -> Tuple[List[EventBase], RoomStreamToken, bool]: ... # Used as return values for pagination APIs @@ -679,7 +679,7 @@ async def get_room_events_stream_for_rooms( to_key: Optional[RoomStreamToken] = None, direction: Direction = Direction.BACKWARDS, limit: int = 0, - ) -> Dict[str, Tuple[List[EventBase], RoomStreamToken]]: + ) -> Dict[str, Tuple[List[EventBase], RoomStreamToken, bool]]: """Get new room events in stream ordering since `from_key`. Args: @@ -695,6 +695,7 @@ async def get_room_events_stream_for_rooms( A map from room id to a tuple containing: - list of recent events in the room - stream ordering key for the start of the chunk of events returned. + - a boolean to indicate if there were more events but we hit the limit When Direction.FORWARDS: from_key < x <= to_key, (ascending order) When Direction.BACKWARDS: from_key >= x > to_key, (descending order) @@ -758,7 +759,7 @@ async def paginate_room_events_by_stream_ordering( to_key: Optional[RoomStreamToken] = None, direction: Direction = Direction.BACKWARDS, limit: int = 0, - ) -> Tuple[List[EventBase], RoomStreamToken]: + ) -> Tuple[List[EventBase], RoomStreamToken, bool]: """ Paginate events by `stream_ordering` in the room from the `from_key` in the given `direction` to the `to_key` or `limit`. @@ -773,8 +774,9 @@ async def paginate_room_events_by_stream_ordering( limit: Maximum number of events to return Returns: - The results as a list of events and a token that points to the end - of the result set. If no events are returned then the end of the + The results as a list of events, a token that points to the end of + the result set, and a boolean to indicate if there were more events + but we hit the limit. If no events are returned then the end of the stream has been reached (i.e. there are no events between `from_key` and `to_key`). @@ -798,7 +800,7 @@ async def paginate_room_events_by_stream_ordering( and to_key.is_before_or_eq(from_key) ): # Token selection matches what we do below if there are no rows - return [], to_key if to_key else from_key + return [], to_key if to_key else from_key, False # Or vice-versa, if we're looking backwards and our `from_key` is already before # our `to_key`. elif ( @@ -807,7 +809,7 @@ async def paginate_room_events_by_stream_ordering( and from_key.is_before_or_eq(to_key) ): # Token selection matches what we do below if there are no rows - return [], to_key if to_key else from_key + return [], to_key if to_key else from_key, False # We can do a quick sanity check to see if any events have been sent in the room # since the earlier token. @@ -826,7 +828,7 @@ async def paginate_room_events_by_stream_ordering( if not has_changed: # Token selection matches what we do below if there are no rows - return [], to_key if to_key else from_key + return [], to_key if to_key else from_key, False order, from_bound, to_bound = generate_pagination_bounds( direction, from_key, to_key @@ -842,7 +844,7 @@ async def paginate_room_events_by_stream_ordering( engine=self.database_engine, ) - def f(txn: LoggingTransaction) -> List[_EventDictReturn]: + def f(txn: LoggingTransaction) -> Tuple[List[_EventDictReturn], bool]: sql = f""" SELECT event_id, instance_name, stream_ordering FROM events @@ -854,9 +856,13 @@ def f(txn: LoggingTransaction) -> List[_EventDictReturn]: """ txn.execute(sql, (room_id, 2 * limit)) + # Get all the rows and check if we hit the limit. + fetched_rows = txn.fetchall() + limited = len(fetched_rows) >= 2 * limit + rows = [ _EventDictReturn(event_id, None, stream_ordering) - for event_id, instance_name, stream_ordering in txn + for event_id, instance_name, stream_ordering in fetched_rows if _filter_results_by_stream( lower_token=( to_key if direction == Direction.BACKWARDS else from_key @@ -867,10 +873,17 @@ def f(txn: LoggingTransaction) -> List[_EventDictReturn]: instance_name=instance_name, stream_ordering=stream_ordering, ) - ][:limit] - return rows + ] + + if len(rows) > limit: + limited = True - rows = await self.db_pool.runInteraction("get_room_events_stream_for_room", f) + rows = rows[:limit] + return rows, limited + + rows, limited = await self.db_pool.runInteraction( + "get_room_events_stream_for_room", f + ) ret = await self.get_events_as_list( [r.event_id for r in rows], get_prev_content=True @@ -887,7 +900,7 @@ def f(txn: LoggingTransaction) -> List[_EventDictReturn]: # `_paginate_room_events_by_topological_ordering_txn(...)`) next_key = to_key if to_key else from_key - return ret, next_key + return ret, next_key, limited @trace async def get_current_state_delta_membership_changes_for_user( @@ -1191,7 +1204,7 @@ async def get_recent_event_ids_for_room( if limit == 0: return [], end_token - rows, token = await self.db_pool.runInteraction( + rows, token, _ = await self.db_pool.runInteraction( "get_recent_event_ids_for_room", self._paginate_room_events_by_topological_ordering_txn, room_id, @@ -1765,7 +1778,7 @@ def _get_events_around_txn( topological=topological_ordering, stream=stream_ordering ) - rows, start_token = self._paginate_room_events_by_topological_ordering_txn( + rows, start_token, _ = self._paginate_room_events_by_topological_ordering_txn( txn, room_id, before_token, @@ -1775,7 +1788,7 @@ def _get_events_around_txn( ) events_before = [r.event_id for r in rows] - rows, end_token = self._paginate_room_events_by_topological_ordering_txn( + rows, end_token, _ = self._paginate_room_events_by_topological_ordering_txn( txn, room_id, after_token, @@ -1947,7 +1960,7 @@ def _paginate_room_events_by_topological_ordering_txn( direction: Direction = Direction.BACKWARDS, limit: int = 0, event_filter: Optional[Filter] = None, - ) -> Tuple[List[_EventDictReturn], RoomStreamToken]: + ) -> Tuple[List[_EventDictReturn], RoomStreamToken, bool]: """Returns list of events before or after a given token. Args: @@ -1962,10 +1975,11 @@ def _paginate_room_events_by_topological_ordering_txn( those that match the filter. Returns: - A list of _EventDictReturn and a token that points to the end of the - result set. If no events are returned then the end of the stream has - been reached (i.e. there are no events between `from_token` and - `to_token`), or `limit` is zero. + A list of _EventDictReturn, a token that points to the end of the + result set, and a boolean to indicate if there were more events but + we hit the limit. If no events are returned then the end of the + stream has been reached (i.e. there are no events between + `from_token` and `to_token`), or `limit` is zero. """ # We can bail early if we're looking forwards, and our `to_key` is already # before our `from_token`. @@ -1975,7 +1989,7 @@ def _paginate_room_events_by_topological_ordering_txn( and to_token.is_before_or_eq(from_token) ): # Token selection matches what we do below if there are no rows - return [], to_token if to_token else from_token + return [], to_token if to_token else from_token, False # Or vice-versa, if we're looking backwards and our `from_token` is already before # our `to_token`. elif ( @@ -1984,7 +1998,7 @@ def _paginate_room_events_by_topological_ordering_txn( and from_token.is_before_or_eq(to_token) ): # Token selection matches what we do below if there are no rows - return [], to_token if to_token else from_token + return [], to_token if to_token else from_token, False args: List[Any] = [room_id] @@ -2007,6 +2021,7 @@ def _paginate_room_events_by_topological_ordering_txn( args.extend(filter_args) # We fetch more events as we'll filter the result set + requested_limit = int(limit) * 2 args.append(int(limit) * 2) select_keywords = "SELECT" @@ -2071,10 +2086,14 @@ def _paginate_room_events_by_topological_ordering_txn( } txn.execute(sql, args) + # Get all the rows and check if we hit the limit. + fetched_rows = txn.fetchall() + limited = len(fetched_rows) >= requested_limit + # Filter the result set. rows = [ _EventDictReturn(event_id, topological_ordering, stream_ordering) - for event_id, instance_name, topological_ordering, stream_ordering in txn + for event_id, instance_name, topological_ordering, stream_ordering in fetched_rows if _filter_results( lower_token=( to_token if direction == Direction.BACKWARDS else from_token @@ -2086,7 +2105,12 @@ def _paginate_room_events_by_topological_ordering_txn( topological_ordering=topological_ordering, stream_ordering=stream_ordering, ) - ][:limit] + ] + + if len(rows) > limit: + limited = True + + rows = rows[:limit] if rows: assert rows[-1].topological_ordering is not None @@ -2097,7 +2121,7 @@ def _paginate_room_events_by_topological_ordering_txn( # TODO (erikj): We should work out what to do here instead. next_token = to_token if to_token else from_token - return rows, next_token + return rows, next_token, limited @trace @tag_args @@ -2110,7 +2134,7 @@ async def paginate_room_events_by_topological_ordering( direction: Direction = Direction.BACKWARDS, limit: int = 0, event_filter: Optional[Filter] = None, - ) -> Tuple[List[EventBase], RoomStreamToken]: + ) -> Tuple[List[EventBase], RoomStreamToken, bool]: """ Paginate events by `topological_ordering` (tie-break with `stream_ordering`) in the room from the `from_key` in the given `direction` to the `to_key` or @@ -2127,8 +2151,9 @@ async def paginate_room_events_by_topological_ordering( event_filter: If provided filters the events to those that match the filter. Returns: - The results as a list of events and a token that points to the end - of the result set. If no events are returned then the end of the + The results as a list of events, a token that points to the end of + the result set, and a boolean to indicate if there were more events + but we hit the limit. If no events are returned then the end of the stream has been reached (i.e. there are no events between `from_key` and `to_key`). @@ -2152,7 +2177,7 @@ async def paginate_room_events_by_topological_ordering( ): # Token selection matches what we do in `_paginate_room_events_txn` if there # are no rows - return [], to_key if to_key else from_key + return [], to_key if to_key else from_key, False # Or vice-versa, if we're looking backwards and our `from_key` is already before # our `to_key`. elif ( @@ -2162,9 +2187,9 @@ async def paginate_room_events_by_topological_ordering( ): # Token selection matches what we do in `_paginate_room_events_txn` if there # are no rows - return [], to_key if to_key else from_key + return [], to_key if to_key else from_key, False - rows, token = await self.db_pool.runInteraction( + rows, token, limited = await self.db_pool.runInteraction( "paginate_room_events_by_topological_ordering", self._paginate_room_events_by_topological_ordering_txn, room_id, @@ -2179,7 +2204,7 @@ async def paginate_room_events_by_topological_ordering( [r.event_id for r in rows], get_prev_content=True ) - return events, token + return events, token, limited @cached() async def get_id_for_instance(self, instance_name: str) -> int: diff --git a/tests/rest/client/sliding_sync/test_rooms_timeline.py b/tests/rest/client/sliding_sync/test_rooms_timeline.py index 0e027ff39d6..22939947936 100644 --- a/tests/rest/client/sliding_sync/test_rooms_timeline.py +++ b/tests/rest/client/sliding_sync/test_rooms_timeline.py @@ -22,7 +22,7 @@ from synapse.api.constants import EventTypes from synapse.rest.client import login, room, sync from synapse.server import HomeServer -from synapse.types import StreamToken, StrSequence +from synapse.types import StrSequence from synapse.util import Clock from tests.rest.client.sliding_sync.test_sliding_sync import SlidingSyncBase @@ -149,16 +149,10 @@ def test_rooms_limited_initial_sync(self) -> None: user2_tok = self.login(user2_id, "pass") room_id1 = self.helper.create_room_as(user2_id, tok=user2_tok) - self.helper.send(room_id1, "activity1", tok=user2_tok) - self.helper.send(room_id1, "activity2", tok=user2_tok) + event_response1 = self.helper.send(room_id1, "activity1", tok=user2_tok) + event_response2 = self.helper.send(room_id1, "activity2", tok=user2_tok) event_response3 = self.helper.send(room_id1, "activity3", tok=user2_tok) - event_pos3 = self.get_success( - self.store.get_position_for_event(event_response3["event_id"]) - ) event_response4 = self.helper.send(room_id1, "activity4", tok=user2_tok) - event_pos4 = self.get_success( - self.store.get_position_for_event(event_response4["event_id"]) - ) event_response5 = self.helper.send(room_id1, "activity5", tok=user2_tok) user1_join_response = self.helper.join(room_id1, user1_id, tok=user1_tok) @@ -196,27 +190,23 @@ def test_rooms_limited_initial_sync(self) -> None: ) # Check to make sure the `prev_batch` points at the right place - prev_batch_token = self.get_success( - StreamToken.from_string( - self.store, response_body["rooms"][room_id1]["prev_batch"] - ) - ) - prev_batch_room_stream_token_serialized = self.get_success( - prev_batch_token.room_key.to_string(self.store) - ) - # If we use the `prev_batch` token to look backwards, we should see `event3` - # next so make sure the token encompasses it - self.assertEqual( - event_pos3.persisted_after(prev_batch_token.room_key), - False, - f"`prev_batch` token {prev_batch_room_stream_token_serialized} should be >= event_pos3={self.get_success(event_pos3.to_room_stream_token().to_string(self.store))}", - ) - # If we use the `prev_batch` token to look backwards, we shouldn't see `event4` - # anymore since it was just returned in this response. - self.assertEqual( - event_pos4.persisted_after(prev_batch_token.room_key), - True, - f"`prev_batch` token {prev_batch_room_stream_token_serialized} should be < event_pos4={self.get_success(event_pos4.to_room_stream_token().to_string(self.store))}", + prev_batch_token = response_body["rooms"][room_id1]["prev_batch"] + + # If we use the `prev_batch` token to look backwards we should see + # `event3` and older next. + channel = self.make_request( + "GET", + f"/rooms/{room_id1}/messages?from={prev_batch_token}&dir=b&limit=3", + access_token=user1_tok, + ) + self.assertEqual(channel.code, 200, channel.json_body) + self.assertListEqual( + [ + event_response3["event_id"], + event_response2["event_id"], + event_response1["event_id"], + ], + [ev["event_id"] for ev in channel.json_body["chunk"]], ) # With no `from_token` (initial sync), it's all historical since there is no diff --git a/tests/storage/test_stream.py b/tests/storage/test_stream.py index 7b7590da76a..837eb434aa4 100644 --- a/tests/storage/test_stream.py +++ b/tests/storage/test_stream.py @@ -147,7 +147,7 @@ def prepare( def _filter_messages(self, filter: JsonDict) -> List[str]: """Make a request to /messages with a filter, returns the chunk of events.""" - events, next_key = self.get_success( + events, next_key, _ = self.get_success( self.hs.get_datastores().main.paginate_room_events_by_topological_ordering( room_id=self.room_id, from_key=self.from_token.room_key,