use select::{document::Document, predicate::Name}; use reqwest::Url; use base64::{Engine as _, engine::general_purpose}; use murmur3::murmur3_32; use std::{error::Error, io::Cursor}; use super::console::{parsetitle, parsebody, parsehash, fmtwhitespace}; pub fn get_title(body: &String) -> String { let document = Document::from(body.as_str()); let title = document .find(Name("title")) .next() .map(|n| n.text()) .unwrap_or_else(|| "".to_string()); parsetitle(title) } pub fn read_body(body: &String, lim: usize) -> String { let document = Document::from(body.as_str()); let mut bodytext = document.find(Name("body")).next().map(|n| n.text()).unwrap_or_else(|| "".to_string()); bodytext = fmtwhitespace(bodytext); if bodytext.len() > lim { bodytext = bodytext .char_indices() .take_while(|(i, _)| *i < lim) .map(|(_, c)| c) .collect(); } parsebody(bodytext) } fn fmtbase64(s: &str, interval: usize, sep: char) -> String { let mut out = String::with_capacity(s.len() + s.len() / interval); let mut count = 0; for (_, c) in s.chars().enumerate() { out.push(c); count += 1; if count == interval { out.push(sep); count = 0; } } if count != 0 { out.push(sep); } out } fn faviconurl(doc: Document, url: String ) -> Result> { for node in doc.find(Name("link")) { if let Some(rel) = node.attr("rel") { if rel.eq("icon") { if let Some(href) = node.attr("href") { let base_url = Url::parse(&url)?; let favicon_url = base_url.join(href)?; return Ok(favicon_url.to_string()); } } } } Err("".into()) } async fn dl_favicon(url: String) -> Result, Box> { let data = reqwest::get(url).await?.bytes().await?.to_vec(); Ok(data) } pub async fn hash_favicon(body: &String, url: String) -> String { let document = Document::from(body.as_str()); if let Ok(favurl) = faviconurl(document, url) { if let Ok(data) = dl_favicon(favurl).await { // compute hash let b64 = general_purpose::STANDARD.encode(data); let f_b64 = fmtbase64(&b64, 76,'\n'); let hash = murmur3_32(&mut Cursor::new(f_b64.into_bytes()), 0).unwrap_or_else(|_| 0) as i32; return parsehash(hash); } } "".into() }