Skip to content
Snippets Groups Projects
Unverified Commit 49d72dea authored by Dirk Klimpel's avatar Dirk Klimpel Committed by GitHub
Browse files

Add an admin api to delete local media. (#8519)

Related to: #6459, #3479

Add `DELETE /_synapse/admin/v1/media/<server_name>/<media_id>` to delete
a single file from server.
parent f6a3859a
No related branches found
No related tags found
No related merge requests found
Add an admin api to delete a single file or files were not used for a defined time from server. Contributed by @dklimpel.
\ No newline at end of file
......@@ -100,3 +100,82 @@ Response:
"num_quarantined": 10 # The number of media items successfully quarantined
}
```
# Delete local media
This API deletes the *local* media from the disk of your own server.
This includes any local thumbnails and copies of media downloaded from
remote homeservers.
This API will not affect media that has been uploaded to external
media repositories (e.g https://github.com/turt2live/matrix-media-repo/).
See also [purge_remote_media.rst](purge_remote_media.rst).
## Delete a specific local media
Delete a specific `media_id`.
Request:
```
DELETE /_synapse/admin/v1/media/<server_name>/<media_id>
{}
```
URL Parameters
* `server_name`: string - The name of your local server (e.g `matrix.org`)
* `media_id`: string - The ID of the media (e.g `abcdefghijklmnopqrstuvwx`)
Response:
```json
{
"deleted_media": [
"abcdefghijklmnopqrstuvwx"
],
"total": 1
}
```
The following fields are returned in the JSON response body:
* `deleted_media`: an array of strings - List of deleted `media_id`
* `total`: integer - Total number of deleted `media_id`
## Delete local media by date or size
Request:
```
POST /_synapse/admin/v1/media/<server_name>/delete?before_ts=<before_ts>
{}
```
URL Parameters
* `server_name`: string - The name of your local server (e.g `matrix.org`).
* `before_ts`: string representing a positive integer - Unix timestamp in ms.
Files that were last used before this timestamp will be deleted. It is the timestamp of
last access and not the timestamp creation.
* `size_gt`: Optional - string representing a positive integer - Size of the media in bytes.
Files that are larger will be deleted. Defaults to `0`.
* `keep_profiles`: Optional - string representing a boolean - Switch to also delete files
that are still used in image data (e.g user profile, room avatar).
If `false` these files will be deleted. Defaults to `true`.
Response:
```json
{
"deleted_media": [
"abcdefghijklmnopqrstuvwx",
"abcdefghijklmnopqrstuvwz"
],
"total": 2
}
```
The following fields are returned in the JSON response body:
* `deleted_media`: an array of strings - List of deleted `media_id`
* `total`: integer - Total number of deleted `media_id`
......@@ -16,9 +16,10 @@
import logging
from synapse.api.errors import AuthError
from synapse.http.servlet import RestServlet, parse_integer
from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError
from synapse.http.servlet import RestServlet, parse_boolean, parse_integer
from synapse.rest.admin._base import (
admin_patterns,
assert_requester_is_admin,
assert_user_is_admin,
historical_admin_path_patterns,
......@@ -150,6 +151,80 @@ class PurgeMediaCacheRestServlet(RestServlet):
return 200, ret
class DeleteMediaByID(RestServlet):
"""Delete local media by a given ID. Removes it from this server.
"""
PATTERNS = admin_patterns("/media/(?P<server_name>[^/]+)/(?P<media_id>[^/]+)")
def __init__(self, hs):
self.store = hs.get_datastore()
self.auth = hs.get_auth()
self.server_name = hs.hostname
self.media_repository = hs.get_media_repository()
async def on_DELETE(self, request, server_name: str, media_id: str):
await assert_requester_is_admin(self.auth, request)
if self.server_name != server_name:
raise SynapseError(400, "Can only delete local media")
if await self.store.get_local_media(media_id) is None:
raise NotFoundError("Unknown media")
logging.info("Deleting local media by ID: %s", media_id)
deleted_media, total = await self.media_repository.delete_local_media(media_id)
return 200, {"deleted_media": deleted_media, "total": total}
class DeleteMediaByDateSize(RestServlet):
"""Delete local media and local copies of remote media by
timestamp and size.
"""
PATTERNS = admin_patterns("/media/(?P<server_name>[^/]+)/delete")
def __init__(self, hs):
self.store = hs.get_datastore()
self.auth = hs.get_auth()
self.server_name = hs.hostname
self.media_repository = hs.get_media_repository()
async def on_POST(self, request, server_name: str):
await assert_requester_is_admin(self.auth, request)
before_ts = parse_integer(request, "before_ts", required=True)
size_gt = parse_integer(request, "size_gt", default=0)
keep_profiles = parse_boolean(request, "keep_profiles", default=True)
if before_ts < 0:
raise SynapseError(
400,
"Query parameter before_ts must be a string representing a positive integer.",
errcode=Codes.INVALID_PARAM,
)
if size_gt < 0:
raise SynapseError(
400,
"Query parameter size_gt must be a string representing a positive integer.",
errcode=Codes.INVALID_PARAM,
)
if self.server_name != server_name:
raise SynapseError(400, "Can only delete local media")
logging.info(
"Deleting local media by timestamp: %s, size larger than: %s, keep profile media: %s"
% (before_ts, size_gt, keep_profiles)
)
deleted_media, total = await self.media_repository.delete_old_local_media(
before_ts, size_gt, keep_profiles
)
return 200, {"deleted_media": deleted_media, "total": total}
def register_servlets_for_media_repo(hs, http_server):
"""
Media repo specific APIs.
......@@ -159,3 +234,5 @@ def register_servlets_for_media_repo(hs, http_server):
QuarantineMediaByID(hs).register(http_server)
QuarantineMediaByUser(hs).register(http_server)
ListMediaInRoom(hs).register(http_server)
DeleteMediaByID(hs).register(http_server)
DeleteMediaByDateSize(hs).register(http_server)
......@@ -69,6 +69,23 @@ class MediaFilePaths:
local_media_thumbnail = _wrap_in_base_path(local_media_thumbnail_rel)
def local_media_thumbnail_dir(self, media_id: str) -> str:
"""
Retrieve the local store path of thumbnails of a given media_id
Args:
media_id: The media ID to query.
Returns:
Path of local_thumbnails from media_id
"""
return os.path.join(
self.base_path,
"local_thumbnails",
media_id[0:2],
media_id[2:4],
media_id[4:],
)
def remote_media_filepath_rel(self, server_name, file_id):
return os.path.join(
"remote_content", server_name, file_id[0:2], file_id[2:4], file_id[4:]
......
......@@ -18,7 +18,7 @@ import errno
import logging
import os
import shutil
from typing import IO, Dict, Optional, Tuple
from typing import IO, Dict, List, Optional, Tuple
import twisted.internet.error
import twisted.web.http
......@@ -767,6 +767,76 @@ class MediaRepository:
return {"deleted": deleted}
async def delete_local_media(self, media_id: str) -> Tuple[List[str], int]:
"""
Delete the given local or remote media ID from this server
Args:
media_id: The media ID to delete.
Returns:
A tuple of (list of deleted media IDs, total deleted media IDs).
"""
return await self._remove_local_media_from_disk([media_id])
async def delete_old_local_media(
self, before_ts: int, size_gt: int = 0, keep_profiles: bool = True,
) -> Tuple[List[str], int]:
"""
Delete local or remote media from this server by size and timestamp. Removes
media files, any thumbnails and cached URLs.
Args:
before_ts: Unix timestamp in ms.
Files that were last used before this timestamp will be deleted
size_gt: Size of the media in bytes. Files that are larger will be deleted
keep_profiles: Switch to delete also files that are still used in image data
(e.g user profile, room avatar)
If false these files will be deleted
Returns:
A tuple of (list of deleted media IDs, total deleted media IDs).
"""
old_media = await self.store.get_local_media_before(
before_ts, size_gt, keep_profiles,
)
return await self._remove_local_media_from_disk(old_media)
async def _remove_local_media_from_disk(
self, media_ids: List[str]
) -> Tuple[List[str], int]:
"""
Delete local or remote media from this server. Removes media files,
any thumbnails and cached URLs.
Args:
media_ids: List of media_id to delete
Returns:
A tuple of (list of deleted media IDs, total deleted media IDs).
"""
removed_media = []
for media_id in media_ids:
logger.info("Deleting media with ID '%s'", media_id)
full_path = self.filepaths.local_media_filepath(media_id)
try:
os.remove(full_path)
except OSError as e:
logger.warning("Failed to remove file: %r: %s", full_path, e)
if e.errno == errno.ENOENT:
pass
else:
continue
thumbnail_dir = self.filepaths.local_media_thumbnail_dir(media_id)
shutil.rmtree(thumbnail_dir, ignore_errors=True)
await self.store.delete_remote_media(self.server_name, media_id)
await self.store.delete_url_cache((media_id,))
await self.store.delete_url_cache_media((media_id,))
removed_media.append(media_id)
return removed_media, len(removed_media)
class MediaRepositoryResource(Resource):
"""File uploading and downloading.
......
......@@ -93,6 +93,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
def __init__(self, database: DatabasePool, db_conn, hs):
super().__init__(database, db_conn, hs)
self.server_name = hs.hostname
async def get_local_media(self, media_id: str) -> Optional[Dict[str, Any]]:
"""Get the metadata for a local piece of media
......@@ -115,6 +116,58 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore):
desc="get_local_media",
)
async def get_local_media_before(
self, before_ts: int, size_gt: int, keep_profiles: bool,
) -> Optional[List[str]]:
# to find files that have never been accessed (last_access_ts IS NULL)
# compare with `created_ts`
sql = """
SELECT media_id
FROM local_media_repository AS lmr
WHERE
( last_access_ts < ?
OR ( created_ts < ? AND last_access_ts IS NULL ) )
AND media_length > ?
"""
if keep_profiles:
sql_keep = """
AND (
NOT EXISTS
(SELECT 1
FROM profiles
WHERE profiles.avatar_url = '{media_prefix}' || lmr.media_id)
AND NOT EXISTS
(SELECT 1
FROM groups
WHERE groups.avatar_url = '{media_prefix}' || lmr.media_id)
AND NOT EXISTS
(SELECT 1
FROM room_memberships
WHERE room_memberships.avatar_url = '{media_prefix}' || lmr.media_id)
AND NOT EXISTS
(SELECT 1
FROM user_directory
WHERE user_directory.avatar_url = '{media_prefix}' || lmr.media_id)
AND NOT EXISTS
(SELECT 1
FROM room_stats_state
WHERE room_stats_state.avatar = '{media_prefix}' || lmr.media_id)
)
""".format(
media_prefix="mxc://%s/" % (self.server_name,),
)
sql += sql_keep
def _get_local_media_before_txn(txn):
txn.execute(sql, (before_ts, before_ts, size_gt))
return [row[0] for row in txn]
return await self.db_pool.runInteraction(
"get_local_media_before", _get_local_media_before_txn
)
async def store_local_media(
self,
media_id,
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment