93 lines
2.5 KiB
Rust
93 lines
2.5 KiB
Rust
use select::{document::Document, predicate::Name};
|
|
use reqwest::Url;
|
|
use base64::{Engine as _, engine::general_purpose};
|
|
use murmur3::murmur3_32;
|
|
use std::{error::Error,
|
|
io::Cursor};
|
|
|
|
use super::console::{parsetitle, parsebody, parsehash, fmtwhitespace};
|
|
|
|
pub fn get_title(body: &String) -> String {
|
|
let document = Document::from(body.as_str());
|
|
|
|
let title = document
|
|
.find(Name("title"))
|
|
.next()
|
|
.map(|n| n.text())
|
|
.unwrap_or_else(|| "".to_string());
|
|
|
|
parsetitle(title)
|
|
}
|
|
|
|
pub fn read_body(body: &String, lim: usize) -> String {
|
|
let document = Document::from(body.as_str());
|
|
|
|
let mut bodytext = document.find(Name("body")).next().map(|n| n.text()).unwrap_or_else(|| "".to_string());
|
|
|
|
bodytext = fmtwhitespace(bodytext);
|
|
if bodytext.len() > lim {
|
|
bodytext = bodytext
|
|
.char_indices()
|
|
.take_while(|(i, _)| *i < lim)
|
|
.map(|(_, c)| c)
|
|
.collect();
|
|
}
|
|
|
|
parsebody(bodytext)
|
|
}
|
|
|
|
fn fmtbase64(s: &str, interval: usize, sep: char) -> String {
|
|
let mut out = String::with_capacity(s.len() + s.len() / interval);
|
|
let mut count = 0;
|
|
|
|
for (_, c) in s.chars().enumerate() {
|
|
out.push(c);
|
|
count += 1;
|
|
if count == interval {
|
|
out.push(sep);
|
|
count = 0;
|
|
}
|
|
}
|
|
if count != 0 {
|
|
out.push(sep);
|
|
}
|
|
|
|
out
|
|
}
|
|
|
|
fn faviconurl(doc: Document, url: String ) -> Result<String, Box <dyn Error>> {
|
|
for node in doc.find(Name("link")) {
|
|
if let Some(rel) = node.attr("rel") {
|
|
if rel.eq("icon") {
|
|
if let Some(href) = node.attr("href") {
|
|
let base_url = Url::parse(&url)?;
|
|
let favicon_url = base_url.join(href)?;
|
|
return Ok(favicon_url.to_string());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Err("".into())
|
|
}
|
|
|
|
async fn dl_favicon(url: String) -> Result<Vec<u8>, Box<dyn Error>> {
|
|
let data = reqwest::get(url).await?.bytes().await?.to_vec();
|
|
Ok(data)
|
|
}
|
|
|
|
pub async fn hash_favicon(body: &String, url: String) -> String {
|
|
let document = Document::from(body.as_str());
|
|
|
|
if let Ok(favurl) = faviconurl(document, url) {
|
|
if let Ok(data) = dl_favicon(favurl).await {
|
|
// compute hash
|
|
let b64 = general_purpose::STANDARD.encode(data);
|
|
let f_b64 = fmtbase64(&b64, 76,'\n');
|
|
let hash = murmur3_32(&mut Cursor::new(f_b64.into_bytes()), 0).unwrap_or_else(|_| 0) as i32;
|
|
return parsehash(hash);
|
|
}
|
|
}
|
|
|
|
"".into()
|
|
} |