diff --git a/src/service/globals/migrations.rs b/src/service/globals/migrations.rs index 3e81340e528efb070e92fb94cb732625493a38f6..39d829b4133bcb2c8e7c381b3c7eda717b85a489 100644 --- a/src/service/globals/migrations.rs +++ b/src/service/globals/migrations.rs @@ -1,15 +1,12 @@ use std::{ collections::{HashMap, HashSet}, - ffi::{OsStr, OsString}, fs::{self}, io::Write, mem::size_of, - path::PathBuf, sync::Arc, - time::Instant, }; -use conduit::{debug, debug_info, debug_warn, error, info, utils, warn, Config, Error, Result}; +use conduit::{debug, debug_info, debug_warn, error, info, utils, warn, Error, Result}; use itertools::Itertools; use ruma::{ events::{push_rules::PushRulesEvent, room::member::MembershipState, GlobalAccountDataEventType}, @@ -17,7 +14,7 @@ EventId, OwnedRoomId, RoomId, UserId, }; -use crate::Services; +use crate::{media, Services}; /// The current schema version. /// - If database is opened at greater version we reject with error. The @@ -25,7 +22,7 @@ /// - If database is opened at lesser version we apply migrations up to this. /// Note that named-feature migrations may also be performed when opening at /// equal or lesser version. These are expected to be backward-compatible. -const DATABASE_VERSION: u64 = 13; +pub(crate) const DATABASE_VERSION: u64 = 13; pub(crate) async fn migrations(services: &Services) -> Result<()> { // Matrix resource ownership is based on the server name; changing it @@ -131,9 +128,9 @@ async fn migrate(services: &Services) -> Result<()> { } if db["global"].get(b"feat_sha256_media")?.is_none() { - migrate_sha256_media(services).await?; + media::migrations::migrate_sha256_media(services).await?; } else if config.media_startup_check { - checkup_sha256_media(services).await?; + media::migrations::checkup_sha256_media(services).await?; } if db["global"] @@ -738,152 +735,6 @@ async fn db_lt_13(services: &Services) -> Result<()> { Ok(()) } -/// Migrates a media directory from legacy base64 file names to sha2 file names. -/// All errors are fatal. Upon success the database is keyed to not perform this -/// again. -async fn migrate_sha256_media(services: &Services) -> Result<()> { - let db = &services.db; - let config = &services.server.config; - - warn!("Migrating legacy base64 file names to sha256 file names"); - let mediaid_file = &db["mediaid_file"]; - - // Move old media files to new names - let mut changes = Vec::<(PathBuf, PathBuf)>::new(); - for (key, _) in mediaid_file.iter() { - let old = services.media.get_media_file_b64(&key); - let new = services.media.get_media_file_sha256(&key); - debug!(?key, ?old, ?new, num = changes.len(), "change"); - changes.push((old, new)); - } - // move the file to the new location - for (old_path, path) in changes { - if old_path.exists() { - tokio::fs::rename(&old_path, &path).await?; - if config.media_compat_file_link { - tokio::fs::symlink(&path, &old_path).await?; - } - } - } - - // Apply fix from when sha256_media was backward-incompat and bumped the schema - // version from 13 to 14. For users satisfying these conditions we can go back. - if services.globals.db.database_version()? == 14 && DATABASE_VERSION == 13 { - services.globals.db.bump_database_version(13)?; - } - - db["global"].insert(b"feat_sha256_media", &[])?; - info!("Finished applying sha256_media"); - Ok(()) -} - -/// Check is run on startup for prior-migrated media directories. This handles: -/// - Going back and forth to non-sha256 legacy binaries (e.g. upstream). -/// - Deletion of artifacts in the media directory which will then fall out of -/// sync with the database. -async fn checkup_sha256_media(services: &Services) -> Result<()> { - use crate::media::encode_key; - - debug!("Checking integrity of media directory"); - let db = &services.db; - let media = &services.media; - let config = &services.server.config; - let mediaid_file = &db["mediaid_file"]; - let mediaid_user = &db["mediaid_user"]; - let dbs = (mediaid_file, mediaid_user); - let timer = Instant::now(); - - let dir = media.get_media_dir(); - let files: HashSet<OsString> = fs::read_dir(dir)? - .filter_map(|ent| ent.map_or(None, |ent| Some(ent.path().into_os_string()))) - .collect(); - - for key in media.db.get_all_media_keys() { - let new_path = media.get_media_file_sha256(&key).into_os_string(); - let old_path = media.get_media_file_b64(&key).into_os_string(); - if let Err(e) = handle_media_check(&dbs, config, &files, &key, &new_path, &old_path).await { - error!( - media_id = ?encode_key(&key), ?new_path, ?old_path, - "Failed to resolve media check failure: {e}" - ); - } - } - - debug_info!( - elapsed = ?timer.elapsed(), - "Finished checking media directory" - ); - - Ok(()) -} - -async fn handle_media_check( - dbs: &(&Arc<database::Map>, &Arc<database::Map>), config: &Config, files: &HashSet<OsString>, key: &[u8], - new_path: &OsStr, old_path: &OsStr, -) -> Result<()> { - use crate::media::encode_key; - - let (mediaid_file, mediaid_user) = dbs; - - let new_exists = files.contains(new_path); - let old_exists = files.contains(old_path); - let old_is_symlink = || async { - tokio::fs::symlink_metadata(old_path) - .await - .map_or(false, |md| md.is_symlink()) - }; - - if config.prune_missing_media && !old_exists && !new_exists { - error!( - media_id = ?encode_key(key), ?new_path, ?old_path, - "Media is missing at all paths. Removing from database..." - ); - - mediaid_file.remove(key)?; - mediaid_user.remove(key)?; - } - - if config.media_compat_file_link && !old_exists && new_exists { - debug_warn!( - media_id = ?encode_key(key), ?new_path, ?old_path, - "Media found but missing legacy link. Fixing..." - ); - - tokio::fs::symlink(&new_path, &old_path).await?; - } - - if config.media_compat_file_link && !new_exists && old_exists { - debug_warn!( - media_id = ?encode_key(key), ?new_path, ?old_path, - "Legacy media found without sha256 migration. Fixing..." - ); - - debug_assert!( - old_is_symlink().await, - "Legacy media not expected to be a symlink without an existing sha256 migration." - ); - - tokio::fs::rename(&old_path, &new_path).await?; - tokio::fs::symlink(&new_path, &old_path).await?; - } - - if !config.media_compat_file_link && old_exists && old_is_symlink().await { - debug_warn!( - media_id = ?encode_key(key), ?new_path, ?old_path, - "Legacy link found but compat disabled. Cleansing symlink..." - ); - - debug_assert!( - new_exists, - "sha256 migration into new file expected prior to cleaning legacy symlink here." - ); - - tokio::fs::remove_file(&old_path).await?; - } - - Ok(()) -} - async fn fix_bad_double_separator_in_state_cache(services: &Services) -> Result<()> { warn!("Fixing bad double separator in state_cache roomuserid_joined"); diff --git a/src/service/media/migrations.rs b/src/service/media/migrations.rs new file mode 100644 index 0000000000000000000000000000000000000000..9968d25b78be2d9bb863478486b84d1767a2353a --- /dev/null +++ b/src/service/media/migrations.rs @@ -0,0 +1,158 @@ +use std::{ + collections::HashSet, + ffi::{OsStr, OsString}, + fs::{self}, + path::PathBuf, + sync::Arc, + time::Instant, +}; + +use conduit::{debug, debug_info, debug_warn, error, info, warn, Config, Result}; + +use crate::{globals, Services}; + +/// Migrates a media directory from legacy base64 file names to sha2 file names. +/// All errors are fatal. Upon success the database is keyed to not perform this +/// again. +pub(crate) async fn migrate_sha256_media(services: &Services) -> Result<()> { + let db = &services.db; + let config = &services.server.config; + + warn!("Migrating legacy base64 file names to sha256 file names"); + let mediaid_file = &db["mediaid_file"]; + + // Move old media files to new names + let mut changes = Vec::<(PathBuf, PathBuf)>::new(); + for (key, _) in mediaid_file.iter() { + let old = services.media.get_media_file_b64(&key); + let new = services.media.get_media_file_sha256(&key); + debug!(?key, ?old, ?new, num = changes.len(), "change"); + changes.push((old, new)); + } + // move the file to the new location + for (old_path, path) in changes { + if old_path.exists() { + tokio::fs::rename(&old_path, &path).await?; + if config.media_compat_file_link { + tokio::fs::symlink(&path, &old_path).await?; + } + } + } + + // Apply fix from when sha256_media was backward-incompat and bumped the schema + // version from 13 to 14. For users satisfying these conditions we can go back. + if services.globals.db.database_version()? == 14 && globals::migrations::DATABASE_VERSION == 13 { + services.globals.db.bump_database_version(13)?; + } + + db["global"].insert(b"feat_sha256_media", &[])?; + info!("Finished applying sha256_media"); + Ok(()) +} + +/// Check is run on startup for prior-migrated media directories. This handles: +/// - Going back and forth to non-sha256 legacy binaries (e.g. upstream). +/// - Deletion of artifacts in the media directory which will then fall out of +/// sync with the database. +pub(crate) async fn checkup_sha256_media(services: &Services) -> Result<()> { + use crate::media::encode_key; + + debug!("Checking integrity of media directory"); + let db = &services.db; + let media = &services.media; + let config = &services.server.config; + let mediaid_file = &db["mediaid_file"]; + let mediaid_user = &db["mediaid_user"]; + let dbs = (mediaid_file, mediaid_user); + let timer = Instant::now(); + + let dir = media.get_media_dir(); + let files: HashSet<OsString> = fs::read_dir(dir)? + .filter_map(|ent| ent.map_or(None, |ent| Some(ent.path().into_os_string()))) + .collect(); + + for key in media.db.get_all_media_keys() { + let new_path = media.get_media_file_sha256(&key).into_os_string(); + let old_path = media.get_media_file_b64(&key).into_os_string(); + if let Err(e) = handle_media_check(&dbs, config, &files, &key, &new_path, &old_path).await { + error!( + media_id = ?encode_key(&key), ?new_path, ?old_path, + "Failed to resolve media check failure: {e}" + ); + } + } + + debug_info!( + elapsed = ?timer.elapsed(), + "Finished checking media directory" + ); + + Ok(()) +} + +async fn handle_media_check( + dbs: &(&Arc<database::Map>, &Arc<database::Map>), config: &Config, files: &HashSet<OsString>, key: &[u8], + new_path: &OsStr, old_path: &OsStr, +) -> Result<()> { + use crate::media::encode_key; + + let (mediaid_file, mediaid_user) = dbs; + + let new_exists = files.contains(new_path); + let old_exists = files.contains(old_path); + let old_is_symlink = || async { + tokio::fs::symlink_metadata(old_path) + .await + .map_or(false, |md| md.is_symlink()) + }; + + if config.prune_missing_media && !old_exists && !new_exists { + error!( + media_id = ?encode_key(key), ?new_path, ?old_path, + "Media is missing at all paths. Removing from database..." + ); + + mediaid_file.remove(key)?; + mediaid_user.remove(key)?; + } + + if config.media_compat_file_link && !old_exists && new_exists { + debug_warn!( + media_id = ?encode_key(key), ?new_path, ?old_path, + "Media found but missing legacy link. Fixing..." + ); + + tokio::fs::symlink(&new_path, &old_path).await?; + } + + if config.media_compat_file_link && !new_exists && old_exists { + debug_warn!( + media_id = ?encode_key(key), ?new_path, ?old_path, + "Legacy media found without sha256 migration. Fixing..." + ); + + debug_assert!( + old_is_symlink().await, + "Legacy media not expected to be a symlink without an existing sha256 migration." + ); + + tokio::fs::rename(&old_path, &new_path).await?; + tokio::fs::symlink(&new_path, &old_path).await?; + } + + if !config.media_compat_file_link && old_exists && old_is_symlink().await { + debug_warn!( + media_id = ?encode_key(key), ?new_path, ?old_path, + "Legacy link found but compat disabled. Cleansing symlink..." + ); + + debug_assert!( + new_exists, + "sha256 migration into new file expected prior to cleaning legacy symlink here." + ); + + tokio::fs::remove_file(&old_path).await?; + } + + Ok(()) +} diff --git a/src/service/media/mod.rs b/src/service/media/mod.rs index db0b8459bf99e5bf44f5bbc5b91c5df6ca35545a..04436f7c41e3ed546173a6482a578e3e679a7d51 100644 --- a/src/service/media/mod.rs +++ b/src/service/media/mod.rs @@ -1,4 +1,5 @@ mod data; +pub(super) mod migrations; mod preview; mod remote; mod tests;