From c7a752b01d704c116d715d75f75ff6fa00ab964d Mon Sep 17 00:00:00 2001 From: BlackDex Date: Tue, 21 Jun 2022 18:47:01 +0200 Subject: [PATCH] Update dep's and small improvements on favicons - Updated dependencies (html5gum for favicon downloading) * Also openssl, time, jsonwebtoken and r2d2 - Small optimizations on downloading favicons. It now only emits tokens/tags which needs to be parsed, all others are being skipped. This prevents unneeded items within the for-loop being parsed. --- Cargo.lock | 155 ++++++++++++++++++++--------------------------- Cargo.toml | 6 +- src/api/icons.rs | 47 ++++++++++---- 3 files changed, 102 insertions(+), 106 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 007b355d..d2d55e47 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -378,7 +378,7 @@ dependencies = [ "rand", "sha2", "subtle", - "time 0.3.9", + "time 0.3.11", "version_check", ] @@ -394,7 +394,7 @@ dependencies = [ "publicsuffix", "serde", "serde_json", - "time 0.3.9", + "time 0.3.11", "url 2.2.2", ] @@ -445,12 +445,12 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.8" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bf124c720b7686e3c2663cf54062ab0f68a88af2fb6a030e87e30bf721fcb38" +checksum = "7d82ee10ce34d7bc12c2122495e7593a9c41347ecdd64185af4ecf72cb1a7f83" dependencies = [ "cfg-if", - "lazy_static", + "once_cell", ] [[package]] @@ -526,7 +526,7 @@ dependencies = [ "cfg-if", "hashbrown 0.12.1", "lock_api", - "parking_lot_core 0.9.3", + "parking_lot_core", ] [[package]] @@ -967,7 +967,7 @@ dependencies = [ "futures-timer", "no-std-compat", "nonzero_ext", - "parking_lot 0.12.1", + "parking_lot", "quanta", "rand", "smallvec", @@ -1071,9 +1071,9 @@ dependencies = [ [[package]] name = "html5gum" -version = "0.4.0" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dad48b66db55322add2819ae1d7bda0c32f3415269a08330679dbc8b0afeb30" +checksum = "3404cc217cc3e11d09c8ac9ccf8b1e540f64477c253d6dc70b5a5074782d934d" dependencies = [ "jetscii", ] @@ -1179,12 +1179,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.8.2" +version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6012d540c5baa3589337a98ce73408de9b5a25ec9fc2c6fd6be8f0d39e0ca5a" +checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e" dependencies = [ "autocfg", - "hashbrown 0.11.2", + "hashbrown 0.12.1", "serde", ] @@ -1246,18 +1246,18 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.57" +version = "0.3.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "671a26f820db17c2a2750743f1dd03bafd15b98c9f30c7c2628c024c05d73397" +checksum = "c3fac17f7123a73ca62df411b1bf727ccc805daa070338fda671c86dac1bdc27" dependencies = [ "wasm-bindgen", ] [[package]] name = "jsonwebtoken" -version = "8.1.0" +version = "8.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc9051c17f81bae79440afa041b3a278e1de71bfb96d32454b477fd4703ccb6f" +checksum = "1aa4b4af834c6cfd35d8763d359661b90f2e45d8f750a0849156c7f4671af09c" dependencies = [ "base64", "pem", @@ -1466,9 +1466,9 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "713d550d9b44d89174e066b7a6217ae06234c10cb47819a88290d2b353c31799" +checksum = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf" dependencies = [ "libc", "log", @@ -1678,9 +1678,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-src" -version = "111.20.0+1.1.1o" +version = "111.21.0+1.1.1p" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92892c4f87d56e376e469ace79f1128fdaded07646ddf73aa0be4706ff712dec" +checksum = "6d0a8313729211913936f1b95ca47a5fc7f2e04cd658c115388287f8a8361008" dependencies = [ "cc", ] @@ -1699,17 +1699,6 @@ dependencies = [ "vcpkg", ] -[[package]] -name = "parking_lot" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" -dependencies = [ - "instant", - "lock_api", - "parking_lot_core 0.8.5", -] - [[package]] name = "parking_lot" version = "0.12.1" @@ -1717,21 +1706,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ "lock_api", - "parking_lot_core 0.9.3", -] - -[[package]] -name = "parking_lot_core" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216" -dependencies = [ - "cfg-if", - "instant", - "libc", - "redox_syscall", - "smallvec", - "winapi", + "parking_lot_core", ] [[package]] @@ -1947,9 +1922,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" [[package]] name = "proc-macro2" -version = "1.0.39" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" +checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7" dependencies = [ "unicode-ident", ] @@ -2009,9 +1984,9 @@ checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" [[package]] name = "quote" -version = "1.0.18" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" +checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804" dependencies = [ "proc-macro2", ] @@ -2024,12 +1999,12 @@ checksum = "3fee2dce59f7a43418e3382c766554c614e06a552d53a8f07ef499ea4b332c0f" [[package]] name = "r2d2" -version = "0.8.9" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "545c5bc2b880973c9c10e4067418407a0ccaa3091781d1671d46eb35107cb26f" +checksum = "51de85fb3fb6524929c8a2eb85e6b6d363de4e8c48f9e2c2eac4944abc181c93" dependencies = [ "log", - "parking_lot 0.11.2", + "parking_lot", "scheduled-thread-pool", ] @@ -2257,7 +2232,7 @@ dependencies = [ "memchr", "multer", "num_cpus", - "parking_lot 0.12.1", + "parking_lot", "pin-project-lite", "rand", "ref-cast", @@ -2267,7 +2242,7 @@ dependencies = [ "serde_json", "state", "tempfile", - "time 0.3.9", + "time 0.3.11", "tokio", "tokio-stream", "tokio-util 0.7.3", @@ -2316,7 +2291,7 @@ dependencies = [ "smallvec", "stable-pattern", "state", - "time 0.3.9", + "time 0.3.11", "tokio", "tokio-rustls", "uncased", @@ -2351,9 +2326,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.6" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2cc38e8fa666e2de3c4aba7edeb5ffc5246c1c2ed0e3d17e560aeeba736b23f" +checksum = "a0a5f7c728f5d284929a1cccb5bc19884422bfe6ef4d6c409da2c41838983fcf" [[package]] name = "ryu" @@ -2386,7 +2361,7 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "977a7519bff143a44f842fd07e80ad1329295bd71686457f18e496736f4bf9bf" dependencies = [ - "parking_lot 0.12.1", + "parking_lot", ] [[package]] @@ -2559,7 +2534,7 @@ dependencies = [ "num-bigint", "num-traits", "thiserror", - "time 0.3.9", + "time 0.3.11", ] [[package]] @@ -2576,9 +2551,9 @@ checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32" [[package]] name = "smallvec" -version = "1.8.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" +checksum = "cc88c725d61fc6c3132893370cac4a0200e3fedf5da8331c570664b1987f5ca2" [[package]] name = "socket2" @@ -2634,9 +2609,9 @@ checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" [[package]] name = "syn" -version = "1.0.96" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0748dd251e24453cb8717f0354206b91557e4ec8703673a4b30208f2abaf1ebf" +checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd" dependencies = [ "proc-macro2", "quote", @@ -2653,7 +2628,7 @@ dependencies = [ "hostname", "libc", "log", - "time 0.3.9", + "time 0.3.11", ] [[package]] @@ -2720,9 +2695,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.9" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2702e08a7a860f005826c6815dcac101b19b5eb330c27fe4a5928fec1d20ddd" +checksum = "72c91f41dcb2f096c05f0873d667dceec1087ce5bcf984ec8ffb19acddbb3217" dependencies = [ "itoa", "libc", @@ -2763,7 +2738,7 @@ dependencies = [ "mio", "num_cpus", "once_cell", - "parking_lot 0.12.1", + "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2", @@ -2889,9 +2864,9 @@ dependencies = [ [[package]] name = "tower-service" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6" +checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" @@ -2919,9 +2894,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7709595b8878a4965ce5e87ebf880a7d39c9afc6837721b21a5a816a8117d921" +checksum = "7b7358be39f2f274f322d2aaed611acc57f382e8eb1e5b48cb9ae30933495ce7" dependencies = [ "once_cell", "valuable", @@ -2993,7 +2968,7 @@ dependencies = [ "lazy_static", "log", "lru-cache", - "parking_lot 0.12.1", + "parking_lot", "resolv-conf", "smallvec", "thiserror", @@ -3071,9 +3046,9 @@ checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c" [[package]] name = "unicode-normalization" -version = "0.1.19" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9" +checksum = "81dee68f85cab8cf68dec42158baf3a79a1cdc065a8b103025965d6ccb7f6cbd" dependencies = [ "tinyvec", ] @@ -3190,7 +3165,7 @@ dependencies = [ "serde", "serde_json", "syslog", - "time 0.3.9", + "time 0.3.11", "tokio", "tokio-tungstenite", "totp-lite", @@ -3248,9 +3223,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.80" +version = "0.2.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27370197c907c55e3f1a9fbe26f44e937fe6451368324e009cba39e139dc08ad" +checksum = "7c53b543413a17a202f4be280a7e5c62a1c69345f5de525ee64f8cfdbc954994" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -3258,9 +3233,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.80" +version = "0.2.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53e04185bfa3a779273da532f5025e33398409573f348985af9a1cbf3774d3f4" +checksum = "5491a68ab4500fa6b4d726bd67408630c3dbe9c4fe7bda16d5c82a1fd8c7340a" dependencies = [ "bumpalo", "lazy_static", @@ -3273,9 +3248,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.30" +version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f741de44b75e14c35df886aff5f1eb73aa114fa5d4d00dcd37b5e01259bf3b2" +checksum = "de9a9cec1733468a8c657e57fa2413d2ae2c0129b95e87c5b72b8ace4d13f31f" dependencies = [ "cfg-if", "js-sys", @@ -3285,9 +3260,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.80" +version = "0.2.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17cae7ff784d7e83a2fe7611cfe766ecf034111b49deb850a3dc7699c08251f5" +checksum = "c441e177922bc58f1e12c022624b6216378e5febc2f0533e41ba443d505b80aa" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3295,9 +3270,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.80" +version = "0.2.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99ec0dc7a4756fffc231aab1b9f2f578d23cd391390ab27f952ae0c9b3ece20b" +checksum = "7d94ac45fcf608c1f45ef53e748d35660f168490c10b23704c7779ab8f5c3048" dependencies = [ "proc-macro2", "quote", @@ -3308,15 +3283,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.80" +version = "0.2.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d554b7f530dee5964d9a9468d95c1f8b8acae4f282807e7d27d4b03099a46744" +checksum = "6a89911bd99e5f3659ec4acf9c4d93b0a90fe4a2a11f15328472058edc5261be" [[package]] name = "web-sys" -version = "0.3.57" +version = "0.3.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b17e741662c70c8bd24ac5c5b18de314a2c26c32bf8346ee1e6f53de919c283" +checksum = "2fed94beee57daf8dd7d51f2b15dc2bcde92d7a72304cdf662a4371008b71b90" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/Cargo.toml b/Cargo.toml index dbc1fc1a..19db42b2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -84,7 +84,7 @@ uuid = { version = "1.1.2", features = ["v4"] } # Date and time libraries chrono = { version = "0.4.19", features = ["clock", "serde"], default-features = false } chrono-tz = "0.6.1" -time = "0.3.9" +time = "0.3.11" # Job scheduler job_scheduler_ng = "2.0.1" @@ -93,7 +93,7 @@ job_scheduler_ng = "2.0.1" data-encoding = "2.3.2" # JWT library -jsonwebtoken = "8.1.0" +jsonwebtoken = "8.1.1" # TOTP library totp-lite = "2.0.0" @@ -118,7 +118,7 @@ handlebars = { version = "4.3.1", features = ["dir_source"] } reqwest = { version = "0.11.11", features = ["stream", "json", "gzip", "brotli", "socks", "cookies", "trust-dns"] } # For favicon extraction from main website -html5gum = "0.4.0" +html5gum = "0.5.2" regex = { version = "1.5.6", features = ["std", "perf", "unicode-perl"], default-features = false } data-url = "0.1.1" bytes = "1.1.0" diff --git a/src/api/icons.rs b/src/api/icons.rs index dc27d13a..90d306fb 100644 --- a/src/api/icons.rs +++ b/src/api/icons.rs @@ -19,7 +19,7 @@ use tokio::{ net::lookup_host, }; -use html5gum::{Emitter, EndTag, InfallibleTokenizer, Readable, StartTag, StringReader, Tokenizer}; +use html5gum::{Emitter, EndTag, HtmlString, InfallibleTokenizer, Readable, StartTag, StringReader, Tokenizer}; use crate::{ error::Error, @@ -433,7 +433,7 @@ async fn get_favicons_node( for token in dom { match token { FaviconToken::StartTag(tag) => { - if tag.name == TAG_LINK + if *tag.name == TAG_LINK && tag.attributes.contains_key(ATTR_REL) && tag.attributes.contains_key(ATTR_HREF) { @@ -443,7 +443,7 @@ async fn get_favicons_node( if rel_value.contains("icon") && !rel_value.contains("mask-icon") { icon_tags.push(tag); } - } else if tag.name == TAG_BASE && tag.attributes.contains_key(ATTR_HREF) { + } else if *tag.name == TAG_BASE && tag.attributes.contains_key(ATTR_HREF) { let href = std::str::from_utf8(tag.attributes.get(ATTR_HREF).unwrap()).unwrap_or_default(); debug!("Found base href: {href}"); base_url = match base_url.join(href) { @@ -453,7 +453,7 @@ async fn get_favicons_node( } } FaviconToken::EndTag(tag) => { - if tag.name == TAG_HEAD { + if *tag.name == TAG_HEAD { break; } } @@ -830,17 +830,18 @@ impl reqwest::cookie::CookieStore for Jar { /// Therefor parsing the HTML content is faster. use std::collections::{BTreeSet, VecDeque}; +#[derive(Debug)] enum FaviconToken { StartTag(StartTag), EndTag(EndTag), } -#[derive(Default)] +#[derive(Default, Debug)] struct FaviconEmitter { current_token: Option, - last_start_tag: Vec, - current_attribute: Option<(Vec, Vec)>, - seen_attributes: BTreeSet>, + last_start_tag: HtmlString, + current_attribute: Option<(HtmlString, HtmlString)>, + seen_attributes: BTreeSet, emitted_tokens: VecDeque, } @@ -887,18 +888,38 @@ impl Emitter for FaviconEmitter { self.seen_attributes.clear(); } - fn emit_current_tag(&mut self) { + fn emit_current_tag(&mut self) -> Option { self.flush_current_attribute(); let mut token = self.current_token.take().unwrap(); + let mut emit = false; match token { - FaviconToken::EndTag(_) => { + FaviconToken::EndTag(ref mut tag) => { + // Always clean seen attributes self.seen_attributes.clear(); + + // Only trigger an emit for the tag. + // This is matched, and will break the for-loop. + if *tag.name == b"head" { + emit = true; + } } FaviconToken::StartTag(ref mut tag) => { - self.set_last_start_tag(Some(&tag.name)); + // Only trriger an emit for and tags. + // These are the only tags we want to parse. + if *tag.name == b"link" || *tag.name == b"base" { + self.set_last_start_tag(Some(&tag.name)); + emit = true; + } else { + self.set_last_start_tag(None); + } } } - self.emit_token(token); + + // Only emit the tags we want to parse. + if emit { + self.emit_token(token); + } + None } fn push_tag_name(&mut self, s: &[u8]) { @@ -921,7 +942,7 @@ impl Emitter for FaviconEmitter { fn init_attribute(&mut self) { self.flush_current_attribute(); - self.current_attribute = Some((Vec::new(), Vec::new())); + self.current_attribute = Some(Default::default()); } fn push_attribute_name(&mut self, s: &[u8]) {