2018-12-19 00:57:45 +01:00
|
|
|
use std::fs::{create_dir_all, remove_file, symlink_metadata, File};
|
2018-02-10 01:00:55 +01:00
|
|
|
use std::io::prelude::*;
|
2019-01-28 23:58:32 +01:00
|
|
|
use std::time::{Duration, SystemTime};
|
2018-02-10 01:00:55 +01:00
|
|
|
|
|
|
|
use rocket::http::ContentType;
|
2018-12-19 00:57:45 +01:00
|
|
|
use rocket::response::Content;
|
|
|
|
use rocket::Route;
|
2018-02-10 01:00:55 +01:00
|
|
|
|
2019-01-29 21:20:59 +01:00
|
|
|
use reqwest::{header::HeaderMap, Client, Response};
|
2018-02-10 01:00:55 +01:00
|
|
|
|
2019-01-31 15:49:58 +01:00
|
|
|
use rocket::http::{Cookie};
|
|
|
|
|
2019-01-27 15:39:19 +01:00
|
|
|
use regex::Regex;
|
|
|
|
use soup::prelude::*;
|
|
|
|
|
2019-01-28 23:58:32 +01:00
|
|
|
use crate::error::Error;
|
|
|
|
use crate::CONFIG;
|
2019-01-27 15:39:19 +01:00
|
|
|
|
2018-02-10 01:00:55 +01:00
|
|
|
pub fn routes() -> Vec<Route> {
|
|
|
|
routes![icon]
|
|
|
|
}
|
|
|
|
|
2018-12-19 00:57:45 +01:00
|
|
|
const FALLBACK_ICON: &[u8; 344] = include_bytes!("../static/fallback-icon.png");
|
|
|
|
|
2019-01-29 21:20:59 +01:00
|
|
|
lazy_static! {
|
|
|
|
// Reuse the client between requests
|
|
|
|
static ref CLIENT: Client = Client::builder()
|
|
|
|
.gzip(true)
|
2019-02-12 21:56:28 +01:00
|
|
|
.timeout(Duration::from_secs(CONFIG.icon_download_timeout()))
|
2019-01-29 21:20:59 +01:00
|
|
|
.default_headers(_header_map())
|
|
|
|
.build()
|
|
|
|
.unwrap();
|
|
|
|
}
|
|
|
|
|
2018-02-10 01:00:55 +01:00
|
|
|
#[get("/<domain>/icon.png")]
|
|
|
|
fn icon(domain: String) -> Content<Vec<u8>> {
|
2018-02-17 18:48:42 +01:00
|
|
|
let icon_type = ContentType::new("image", "x-icon");
|
|
|
|
|
2018-02-10 01:00:55 +01:00
|
|
|
// Validate the domain to avoid directory traversal attacks
|
2018-09-13 21:55:23 +02:00
|
|
|
if domain.contains('/') || domain.contains("..") {
|
2018-12-19 00:57:45 +01:00
|
|
|
return Content(icon_type, FALLBACK_ICON.to_vec());
|
2018-02-10 01:00:55 +01:00
|
|
|
}
|
|
|
|
|
2018-06-12 21:09:42 +02:00
|
|
|
let icon = get_icon(&domain);
|
2018-02-10 01:00:55 +01:00
|
|
|
|
2018-02-17 18:48:42 +01:00
|
|
|
Content(icon_type, icon)
|
2018-02-10 01:00:55 +01:00
|
|
|
}
|
|
|
|
|
2018-12-19 00:57:45 +01:00
|
|
|
fn get_icon(domain: &str) -> Vec<u8> {
|
2019-01-25 18:23:51 +01:00
|
|
|
let path = format!("{}/{}.png", CONFIG.icon_cache_folder(), domain);
|
2018-02-10 01:00:55 +01:00
|
|
|
|
2018-06-12 21:09:42 +02:00
|
|
|
if let Some(icon) = get_cached_icon(&path) {
|
|
|
|
return icon;
|
|
|
|
}
|
2018-02-10 01:00:55 +01:00
|
|
|
|
2019-01-28 23:58:32 +01:00
|
|
|
if CONFIG.disable_icon_download() {
|
|
|
|
return FALLBACK_ICON.to_vec();
|
|
|
|
}
|
|
|
|
|
2018-06-12 21:09:42 +02:00
|
|
|
// Get the icon, or fallback in case of error
|
2019-01-27 16:03:18 +01:00
|
|
|
match download_icon(&domain) {
|
2018-06-12 21:09:42 +02:00
|
|
|
Ok(icon) => {
|
|
|
|
save_icon(&path, &icon);
|
|
|
|
icon
|
2018-12-19 00:57:45 +01:00
|
|
|
}
|
2018-07-01 15:27:42 +02:00
|
|
|
Err(e) => {
|
2018-12-06 20:35:25 +01:00
|
|
|
error!("Error downloading icon: {:?}", e);
|
2018-12-18 13:33:32 -08:00
|
|
|
mark_negcache(&path);
|
2018-12-19 00:57:45 +01:00
|
|
|
FALLBACK_ICON.to_vec()
|
2018-07-01 15:27:42 +02:00
|
|
|
}
|
2018-06-12 21:09:42 +02:00
|
|
|
}
|
2018-02-10 01:00:55 +01:00
|
|
|
}
|
|
|
|
|
2018-06-12 21:09:42 +02:00
|
|
|
fn get_cached_icon(path: &str) -> Option<Vec<u8>> {
|
2018-12-18 13:33:32 -08:00
|
|
|
// Check for expiration of negatively cached copy
|
|
|
|
if icon_is_negcached(path) {
|
2018-12-19 00:57:45 +01:00
|
|
|
return Some(FALLBACK_ICON.to_vec());
|
2018-12-18 13:33:32 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check for expiration of successfully cached copy
|
|
|
|
if icon_is_expired(path) {
|
2018-12-19 00:57:45 +01:00
|
|
|
return None;
|
2018-12-18 13:33:32 -08:00
|
|
|
}
|
|
|
|
|
2018-02-15 00:53:11 +01:00
|
|
|
// Try to read the cached icon, and return it if it exists
|
2018-06-11 15:44:37 +02:00
|
|
|
if let Ok(mut f) = File::open(path) {
|
|
|
|
let mut buffer = Vec::new();
|
|
|
|
|
|
|
|
if f.read_to_end(&mut buffer).is_ok() {
|
2018-06-12 21:09:42 +02:00
|
|
|
return Some(buffer);
|
2018-02-10 01:00:55 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-12 21:09:42 +02:00
|
|
|
None
|
|
|
|
}
|
|
|
|
|
2019-01-20 15:36:33 +01:00
|
|
|
fn file_is_expired(path: &str, ttl: u64) -> Result<bool, Error> {
|
2018-12-18 13:33:32 -08:00
|
|
|
let meta = symlink_metadata(path)?;
|
|
|
|
let modified = meta.modified()?;
|
|
|
|
let age = SystemTime::now().duration_since(modified)?;
|
|
|
|
|
|
|
|
Ok(ttl > 0 && ttl <= age.as_secs())
|
|
|
|
}
|
|
|
|
|
|
|
|
fn icon_is_negcached(path: &str) -> bool {
|
|
|
|
let miss_indicator = path.to_owned() + ".miss";
|
2019-01-25 18:23:51 +01:00
|
|
|
let expired = file_is_expired(&miss_indicator, CONFIG.icon_cache_negttl());
|
2018-12-19 00:57:45 +01:00
|
|
|
|
2018-12-18 13:33:32 -08:00
|
|
|
match expired {
|
|
|
|
// No longer negatively cached, drop the marker
|
|
|
|
Ok(true) => {
|
2018-12-19 00:57:45 +01:00
|
|
|
if let Err(e) = remove_file(&miss_indicator) {
|
|
|
|
error!("Could not remove negative cache indicator for icon {:?}: {:?}", path, e);
|
2018-12-18 13:33:32 -08:00
|
|
|
}
|
|
|
|
false
|
2018-12-19 00:57:45 +01:00
|
|
|
}
|
2018-12-18 13:33:32 -08:00
|
|
|
// The marker hasn't expired yet.
|
2018-12-19 00:57:45 +01:00
|
|
|
Ok(false) => true,
|
2018-12-18 13:33:32 -08:00
|
|
|
// The marker is missing or inaccessible in some way.
|
2018-12-19 00:57:45 +01:00
|
|
|
Err(_) => false,
|
2018-12-18 13:33:32 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn mark_negcache(path: &str) {
|
|
|
|
let miss_indicator = path.to_owned() + ".miss";
|
|
|
|
File::create(&miss_indicator).expect("Error creating negative cache marker");
|
|
|
|
}
|
|
|
|
|
|
|
|
fn icon_is_expired(path: &str) -> bool {
|
2019-01-25 18:23:51 +01:00
|
|
|
let expired = file_is_expired(path, CONFIG.icon_cache_ttl());
|
2018-12-18 13:33:32 -08:00
|
|
|
expired.unwrap_or(true)
|
|
|
|
}
|
|
|
|
|
2019-01-28 23:58:32 +01:00
|
|
|
#[derive(Debug)]
|
|
|
|
struct IconList {
|
|
|
|
priority: u8,
|
|
|
|
href: String,
|
|
|
|
}
|
|
|
|
|
2019-02-04 16:59:52 +01:00
|
|
|
/// Returns a Result/Tuple which holds a Vector IconList and a string which holds the cookies from the last response.
|
|
|
|
/// There will always be a result with a string which will contain https://example.com/favicon.ico and an empty string for the cookies.
|
|
|
|
/// This does not mean that that location does exists, but it is the default location browser use.
|
2019-01-27 15:39:19 +01:00
|
|
|
///
|
|
|
|
/// # Argument
|
|
|
|
/// * `domain` - A string which holds the domain with extension.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
/// ```
|
2019-02-04 16:59:52 +01:00
|
|
|
/// let (mut iconlist, cookie_str) = get_icon_url("github.com")?;
|
|
|
|
/// let (mut iconlist, cookie_str) = get_icon_url("gitlab.com")?;
|
2019-01-27 15:39:19 +01:00
|
|
|
/// ```
|
2019-02-04 16:59:52 +01:00
|
|
|
fn get_icon_url(domain: &str) -> Result<(Vec<IconList>, String), Error> {
|
2019-01-27 15:39:19 +01:00
|
|
|
// Default URL with secure and insecure schemes
|
|
|
|
let ssldomain = format!("https://{}", domain);
|
|
|
|
let httpdomain = format!("http://{}", domain);
|
|
|
|
|
|
|
|
// Create the iconlist
|
|
|
|
let mut iconlist: Vec<IconList> = Vec::new();
|
|
|
|
|
2019-01-31 15:49:58 +01:00
|
|
|
// Create the cookie_str to fill it all the cookies from the response
|
|
|
|
// These cookies can be used to request/download the favicon image.
|
|
|
|
// Some sites have extra security in place with for example XSRF Tokens.
|
|
|
|
let mut cookie_str = String::new();
|
|
|
|
|
2019-01-29 21:20:59 +01:00
|
|
|
let resp = get_page(&ssldomain).or_else(|_| get_page(&httpdomain));
|
2019-01-28 23:58:32 +01:00
|
|
|
if let Ok(content) = resp {
|
|
|
|
// Extract the URL from the respose in case redirects occured (like @ gitlab.com)
|
2019-01-29 18:08:23 +01:00
|
|
|
let url = content.url().clone();
|
2019-01-31 15:49:58 +01:00
|
|
|
let raw_cookies = content.headers().get_all("set-cookie");
|
2019-01-31 17:58:03 +01:00
|
|
|
cookie_str = raw_cookies.iter().map(|raw_cookie| {
|
2019-01-31 15:49:58 +01:00
|
|
|
let cookie = Cookie::parse(raw_cookie.to_str().unwrap_or_default()).unwrap();
|
2019-01-31 17:58:03 +01:00
|
|
|
format!("{}={}; ", cookie.name(), cookie.value())
|
|
|
|
}).collect::<String>();
|
2019-01-27 15:39:19 +01:00
|
|
|
|
|
|
|
// Add the default favicon.ico to the list with the domain the content responded from.
|
2019-01-29 18:08:23 +01:00
|
|
|
iconlist.push(IconList { priority: 35, href: url.join("/favicon.ico").unwrap().into_string() });
|
2019-01-27 15:39:19 +01:00
|
|
|
|
2019-01-28 23:58:32 +01:00
|
|
|
let soup = Soup::from_reader(content)?;
|
2019-01-27 15:39:19 +01:00
|
|
|
// Search for and filter
|
|
|
|
let favicons = soup
|
|
|
|
.tag("link")
|
|
|
|
.attr("rel", Regex::new(r"icon$|apple.*icon")?) // Only use icon rels
|
2019-01-28 23:58:32 +01:00
|
|
|
.attr("href", Regex::new(r"(?i)\w+\.(jpg|jpeg|png|ico)(\?.*)?$")?) // Only allow specific extensions
|
2019-01-27 15:39:19 +01:00
|
|
|
.find_all();
|
|
|
|
|
|
|
|
// Loop through all the found icons and determine it's priority
|
|
|
|
for favicon in favicons {
|
2019-01-29 18:08:23 +01:00
|
|
|
let sizes = favicon.get("sizes").unwrap_or_default();
|
|
|
|
let href = url.join(&favicon.get("href").unwrap_or_default()).unwrap().into_string();
|
|
|
|
let priority = get_icon_priority(&href, &sizes);
|
2019-01-27 15:39:19 +01:00
|
|
|
|
2019-01-28 23:58:32 +01:00
|
|
|
iconlist.push(IconList { priority, href })
|
2019-01-27 15:39:19 +01:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Add the default favicon.ico to the list with just the given domain
|
2019-01-28 23:58:32 +01:00
|
|
|
iconlist.push(IconList { priority: 35, href: format!("{}/favicon.ico", ssldomain) });
|
2019-01-27 15:39:19 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Sort the iconlist by priority
|
|
|
|
iconlist.sort_by_key(|x| x.priority);
|
|
|
|
|
|
|
|
// There always is an icon in the list, so no need to check if it exists, and just return the first one
|
2019-02-04 16:59:52 +01:00
|
|
|
Ok((iconlist, cookie_str))
|
2019-01-27 15:39:19 +01:00
|
|
|
}
|
|
|
|
|
2019-01-29 21:20:59 +01:00
|
|
|
fn get_page(url: &str) -> Result<Response, Error> {
|
2019-01-31 15:49:58 +01:00
|
|
|
//CLIENT.get(url).send()?.error_for_status().map_err(Into::into)
|
|
|
|
get_page_with_cookies(url, "")
|
|
|
|
}
|
|
|
|
|
|
|
|
fn get_page_with_cookies(url: &str, cookie_str: &str) -> Result<Response, Error> {
|
|
|
|
CLIENT.get(url).header("cookie", cookie_str).send()?.error_for_status().map_err(Into::into)
|
2019-01-29 21:20:59 +01:00
|
|
|
}
|
|
|
|
|
2019-01-27 15:39:19 +01:00
|
|
|
/// Returns a Integer with the priority of the type of the icon which to prefer.
|
|
|
|
/// The lower the number the better.
|
|
|
|
///
|
|
|
|
/// # Arguments
|
|
|
|
/// * `href` - A string which holds the href value or relative path.
|
|
|
|
/// * `sizes` - The size of the icon if available as a <width>x<height> value like 32x32.
|
|
|
|
///
|
|
|
|
/// # Example
|
|
|
|
/// ```
|
|
|
|
/// priority1 = get_icon_priority("http://example.com/path/to/a/favicon.png", "32x32");
|
|
|
|
/// priority2 = get_icon_priority("https://example.com/path/to/a/favicon.ico", "");
|
|
|
|
/// ```
|
|
|
|
fn get_icon_priority(href: &str, sizes: &str) -> u8 {
|
|
|
|
// Check if there is a dimension set
|
2019-02-04 12:55:39 +01:00
|
|
|
let (width, height) = parse_sizes(sizes);
|
2019-01-27 15:39:19 +01:00
|
|
|
|
2019-02-04 12:55:39 +01:00
|
|
|
// Check if there is a size given
|
|
|
|
if width != 0 && height != 0 {
|
2019-01-27 15:39:19 +01:00
|
|
|
// Only allow square dimensions
|
|
|
|
if width == height {
|
|
|
|
// Change priority by given size
|
|
|
|
if width == 32 {
|
|
|
|
1
|
|
|
|
} else if width == 64 {
|
|
|
|
2
|
|
|
|
} else if width >= 24 && width <= 128 {
|
|
|
|
3
|
|
|
|
} else if width == 16 {
|
|
|
|
4
|
|
|
|
} else {
|
2019-02-04 12:55:39 +01:00
|
|
|
5
|
2019-01-27 15:39:19 +01:00
|
|
|
}
|
2019-02-04 12:55:39 +01:00
|
|
|
// There are dimensions available, but the image is not a square
|
2019-01-27 15:39:19 +01:00
|
|
|
} else {
|
|
|
|
200
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Change priority by file extension
|
|
|
|
if href.ends_with(".png") {
|
|
|
|
10
|
|
|
|
} else if href.ends_with(".jpg") || href.ends_with(".jpeg") {
|
|
|
|
20
|
|
|
|
} else {
|
|
|
|
30
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-04 12:55:39 +01:00
|
|
|
/// Returns a Tuple with the width and hight as a seperate value extracted from the sizes attribute
|
|
|
|
/// It will return 0 for both values if no match has been found.
|
|
|
|
///
|
|
|
|
/// # Arguments
|
|
|
|
/// * `sizes` - The size of the icon if available as a <width>x<height> value like 32x32.
|
2019-02-04 17:27:40 +01:00
|
|
|
///
|
2019-02-04 12:55:39 +01:00
|
|
|
/// # Example
|
|
|
|
/// ```
|
|
|
|
/// let (width, height) = parse_sizes("64x64"); // (64, 64)
|
|
|
|
/// let (width, height) = parse_sizes("x128x128"); // (128, 128)
|
|
|
|
/// let (width, height) = parse_sizes("32"); // (0, 0)
|
|
|
|
/// ```
|
|
|
|
fn parse_sizes(sizes: &str) -> (u16, u16) {
|
|
|
|
let mut width: u16 = 0;
|
|
|
|
let mut height: u16 = 0;
|
|
|
|
|
|
|
|
if !sizes.is_empty() {
|
|
|
|
match Regex::new(r"(?x)(\d+)\D*(\d+)").unwrap().captures(sizes.trim()) {
|
|
|
|
None => {},
|
|
|
|
Some(dimensions) => {
|
|
|
|
if dimensions.len() >= 3 {
|
|
|
|
width = dimensions[1].parse::<u16>().unwrap_or_default();
|
|
|
|
height = dimensions[2].parse::<u16>().unwrap_or_default();
|
|
|
|
}
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
(width, height)
|
|
|
|
}
|
|
|
|
|
2019-01-27 16:03:18 +01:00
|
|
|
fn download_icon(domain: &str) -> Result<Vec<u8>, Error> {
|
2019-02-04 16:59:52 +01:00
|
|
|
let (mut iconlist, cookie_str) = get_icon_url(&domain)?;
|
2018-06-12 21:09:42 +02:00
|
|
|
|
2019-01-29 21:20:59 +01:00
|
|
|
let mut buffer = Vec::new();
|
2019-02-04 16:59:52 +01:00
|
|
|
|
2019-02-04 17:27:40 +01:00
|
|
|
iconlist.truncate(5);
|
|
|
|
for icon in iconlist {
|
|
|
|
let url = icon.href;
|
2019-02-04 16:59:52 +01:00
|
|
|
info!("Downloading icon for {} via {}...", domain, url);
|
|
|
|
match get_page_with_cookies(&url, &cookie_str) {
|
|
|
|
Ok(mut res) => {
|
|
|
|
info!("Download finished for {}", url);
|
|
|
|
res.copy_to(&mut buffer)?;
|
|
|
|
break;
|
|
|
|
},
|
|
|
|
Err(_) => info!("Download failed for {}", url),
|
|
|
|
};
|
|
|
|
}
|
2018-06-12 21:09:42 +02:00
|
|
|
|
2019-01-29 21:20:59 +01:00
|
|
|
if buffer.is_empty() {
|
|
|
|
err!("Empty response")
|
|
|
|
}
|
|
|
|
|
2018-06-12 21:09:42 +02:00
|
|
|
Ok(buffer)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn save_icon(path: &str, icon: &[u8]) {
|
2019-01-25 18:23:51 +01:00
|
|
|
create_dir_all(&CONFIG.icon_cache_folder()).expect("Error creating icon cache");
|
2018-02-10 01:00:55 +01:00
|
|
|
|
2018-06-11 15:44:37 +02:00
|
|
|
if let Ok(mut f) = File::create(path) {
|
2018-06-12 21:09:42 +02:00
|
|
|
f.write_all(icon).expect("Error writing icon file");
|
2018-02-10 01:00:55 +01:00
|
|
|
};
|
|
|
|
}
|
2019-01-29 21:20:59 +01:00
|
|
|
|
|
|
|
fn _header_map() -> HeaderMap {
|
|
|
|
// Set some default headers for the request.
|
|
|
|
// Use a browser like user-agent to make sure most websites will return there correct website.
|
|
|
|
use reqwest::header::*;
|
|
|
|
|
|
|
|
macro_rules! headers {
|
2019-02-02 16:47:27 +01:00
|
|
|
($( $name:ident : $value:literal),+ $(,)? ) => {
|
2019-01-29 21:20:59 +01:00
|
|
|
let mut headers = HeaderMap::new();
|
|
|
|
$( headers.insert($name, HeaderValue::from_static($value)); )+
|
|
|
|
headers
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
headers! {
|
|
|
|
USER_AGENT: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299",
|
|
|
|
ACCEPT_LANGUAGE: "en-US,en;q=0.8",
|
|
|
|
CACHE_CONTROL: "no-cache",
|
|
|
|
PRAGMA: "no-cache",
|
|
|
|
ACCEPT: "text/html,application/xhtml+xml,application/xml; q=0.9,image/webp,image/apng,*/*;q=0.8",
|
|
|
|
}
|
|
|
|
}
|