From 244c1f71903cbbb3903f3d9b13a5772432355112 Mon Sep 17 00:00:00 2001
From: strawberry <strawberry@puppygock.gay>
Date: Sun, 11 Feb 2024 11:42:55 -0500
Subject: [PATCH] config option to check root domain with URL previews

Signed-off-by: strawberry <strawberry@puppygock.gay>
---
 conduwuit-example.toml         |  5 ++++
 debian/postinst                |  5 ++++
 src/api/client_server/media.rs | 52 ++++++++++++++++++++++++++--------
 src/config/mod.rs              |  6 ++++
 src/service/globals/mod.rs     |  4 +++
 5 files changed, 60 insertions(+), 12 deletions(-)

diff --git a/conduwuit-example.toml b/conduwuit-example.toml
index 376c19d1e..2460b3161 100644
--- a/conduwuit-example.toml
+++ b/conduwuit-example.toml
@@ -183,6 +183,11 @@ url_preview_url_contains_allowlist = []
 # Maximum amount of bytes allowed in a URL preview body size when spidering. Defaults to 1MB (1_000_000 bytes)
 url_preview_max_spider_size = 1_000_000
 
+# Option to decide whether you would like to run the domain allowlist checks (contains and explicit) on the root domain or not. Does not apply to URL contains allowlist. Defaults to false.
+# Example: If this is enabled and you have "wikipedia.org" allowed in the explicit and/or contains domain allowlist, it will allow all subdomains under "wikipedia.org" such as "en.m.wikipedia.org" as the root domain is checked and matched.
+# Useful if the domain contains allowlist is still too broad for you but you still want to allow all the subdomains under a root domain.
+url_preview_check_root_domain = false
+
 
 
 ### Misc
diff --git a/debian/postinst b/debian/postinst
index ea52b0ca7..cceff8e67 100644
--- a/debian/postinst
+++ b/debian/postinst
@@ -220,6 +220,11 @@ url_preview_url_contains_allowlist = []
 # Maximum amount of bytes allowed in a URL preview body size when spidering. Defaults to 1MB (1_000_000 bytes)
 url_preview_max_spider_size = 1_000_000
 
+# Option to decide whether you would like to run the domain allowlist checks (contains and explicit) on the root domain or not. Does not apply to URL contains allowlist. Defaults to false.
+# Example: If this is enabled and you have "wikipedia.org" allowed in the explicit and/or contains domain allowlist, it will allow all subdomains under "wikipedia.org" such as "en.m.wikipedia.org" as the root domain is checked and matched.
+# Useful if the domain contains allowlist is still too broad for you but you still want to allow all the subdomains under a root domain.
+url_preview_check_root_domain = false
+
 
 
 ### Misc
diff --git a/src/api/client_server/media.rs b/src/api/client_server/media.rs
index 3e42e6727..d8e5e7fb5 100644
--- a/src/api/client_server/media.rs
+++ b/src/api/client_server/media.rs
@@ -541,34 +541,62 @@ fn url_preview_allowed(url_str: &str) -> bool {
 
     if !host.is_empty() {
         if allowlist_domain_explicit.contains(&host) {
+            debug!(
+                "Host {} is allowed by url_preview_domain_explicit_allowlist (check 1/3)",
+                &host
+            );
             return true;
         }
-        debug!(
-            "Host {} is allowed by url_preview_domain_explicit_allowlist (check 1/3)",
-            &host
-        );
 
         if allowlist_domain_contains
             .iter()
             .any(|domain_s| domain_s.contains(&host.clone()))
         {
+            debug!(
+                "Host {} is allowed by url_preview_domain_contains_allowlist (check 2/3)",
+                &host
+            );
             return true;
         }
-        debug!(
-            "Host {} is allowed by url_preview_domain_contains_allowlist (check 2/3)",
-            &host
-        );
 
         if allowlist_url_contains
             .iter()
             .any(|url_s| url.to_string().contains(&url_s.to_string()))
         {
+            debug!(
+                "URL {} is allowed by url_preview_url_contains_allowlist (check 3/3)",
+                &host
+            );
             return true;
         }
-        debug!(
-            "URL {} is allowed by url_preview_url_contains_allowlist (check 3/3)",
-            &host
-        );
+
+        // check root domain if available and if user has root domain checks
+        if services().globals.url_preview_check_root_domain() {
+            debug!("Checking root domain");
+            match host.split_once('.') {
+                None => return false,
+                Some((_, root_domain)) => {
+                    if allowlist_domain_explicit.contains(&root_domain.to_owned()) {
+                        debug!(
+                        "Root domain {} is allowed by url_preview_domain_explicit_allowlist (check 1/3)",
+                        &root_domain
+                    );
+                        return true;
+                    }
+
+                    if allowlist_domain_contains
+                        .iter()
+                        .any(|domain_s| domain_s.contains(&root_domain.to_owned()))
+                    {
+                        debug!(
+                    "Root domain {} is allowed by url_preview_domain_contains_allowlist (check 2/3)",
+                    &root_domain
+                );
+                        return true;
+                    }
+                }
+            }
+        }
     }
 
     false
diff --git a/src/config/mod.rs b/src/config/mod.rs
index 6f6e24e04..39aa21786 100644
--- a/src/config/mod.rs
+++ b/src/config/mod.rs
@@ -142,6 +142,8 @@ pub struct Config {
     pub url_preview_url_contains_allowlist: Vec<String>,
     #[serde(default = "default_url_preview_max_spider_size")]
     pub url_preview_max_spider_size: usize,
+    #[serde(default)]
+    pub url_preview_check_root_domain: bool,
 
     #[serde(default = "RegexSet::empty")]
     #[serde(with = "serde_regex")]
@@ -374,6 +376,10 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
                 "URL preview maximum spider size",
                 &self.url_preview_max_spider_size.to_string(),
             ),
+            (
+                "URL preview check root domain",
+                &self.url_preview_check_root_domain.to_string(),
+            ),
         ];
 
         let mut msg: String = "Active config values:\n\n".to_owned();
diff --git a/src/service/globals/mod.rs b/src/service/globals/mod.rs
index 0944cedc5..712d60088 100644
--- a/src/service/globals/mod.rs
+++ b/src/service/globals/mod.rs
@@ -416,6 +416,10 @@ pub fn url_preview_max_spider_size(&self) -> usize {
         self.config.url_preview_max_spider_size
     }
 
+    pub fn url_preview_check_root_domain(&self) -> bool {
+        self.config.url_preview_check_root_domain
+    }
+
     pub fn forbidden_room_names(&self) -> &RegexSet {
         &self.config.forbidden_room_names
     }
-- 
GitLab