Skip to content
Snippets Groups Projects
Commit 48e4b71d authored by 🥺's avatar 🥺 :transgender_flag: Committed by 🥺
Browse files

remove hardcoded 300kb limit on spider size with config option of 1MB default


modern websites are sadly massive, 300kb is pretty low. 1MB should be enough.

Signed-off-by: default avatarstrawberry <strawberry@puppygock.gay>
parent 2ea89519
No related branches found
No related tags found
No related merge requests found
......@@ -180,6 +180,9 @@ url_preview_domain_explicit_allowlist = []
# Setting this to "*" will allow all URL previews. Please note that this opens up significant attack surface to your server, you are expected to be aware of the risks by doing so.
url_preview_url_contains_allowlist = []
# Maximum amount of bytes allowed in a URL preview body size when spidering. Defaults to 1MB (1_000_000 bytes)
url_preview_max_spider_size = 1_000_000
### Misc
......
......@@ -342,14 +342,13 @@ async fn download_image(client: &reqwest::Client, url: &str) -> Result<UrlPrevie
}
async fn download_html(client: &reqwest::Client, url: &str) -> Result<UrlPreviewData> {
let max_download_size = 300_000; // TODO: is this bytes? kilobytes? megabytes?
let mut response = client.get(url).send().await?;
let mut bytes: Vec<u8> = Vec::new();
while let Some(chunk) = response.chunk().await? {
bytes.extend_from_slice(&chunk);
if bytes.len() > max_download_size {
if bytes.len() > services().globals.url_preview_max_spider_size() {
debug!("Response body from URL {} exceeds url_preview_max_spider_size ({}), not processing the rest of the response body and assuming our necessary data is in this range.", url, services().globals.url_preview_max_spider_size());
break;
}
}
......
......@@ -136,12 +136,12 @@ pub struct Config {
#[serde(default = "Vec::new")]
pub url_preview_domain_contains_allowlist: Vec<String>,
#[serde(default = "Vec::new")]
pub url_preview_domain_explicit_allowlist: Vec<String>,
#[serde(default = "Vec::new")]
pub url_preview_url_contains_allowlist: Vec<String>,
#[serde(default = "default_url_preview_max_spider_size")]
pub url_preview_max_spider_size: usize,
#[serde(default = "RegexSet::empty")]
#[serde(with = "serde_regex")]
......@@ -370,6 +370,10 @@ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
"URL preview URL contains allowlist",
&self.url_preview_url_contains_allowlist.join(", "),
),
(
"URL preview maximum spider size",
&self.url_preview_max_spider_size.to_string(),
),
];
let mut msg: String = "Active config values:\n\n".to_owned();
......@@ -495,3 +499,7 @@ fn default_ip_range_denylist() -> Vec<String> {
"fec0::/10".to_owned(),
]
}
fn default_url_preview_max_spider_size() -> usize {
1_000_000 // 1MB
}
......@@ -412,6 +412,10 @@ pub fn url_preview_url_contains_allowlist(&self) -> &Vec<String> {
&self.config.url_preview_url_contains_allowlist
}
pub fn url_preview_max_spider_size(&self) -> usize {
self.config.url_preview_max_spider_size
}
pub fn forbidden_room_names(&self) -> &RegexSet {
&self.config.forbidden_room_names
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment