implemented mmh3 favicon hashing

This commit is contained in:
delorean 2024-05-24 10:42:52 -05:00
parent d94ca7688c
commit eeb9a32523
7 changed files with 97 additions and 5 deletions

8
Cargo.lock generated
View File

@ -730,6 +730,12 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "murmur3"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9252111cf132ba0929b6f8e030cac2a24b507f3a4d6db6fb2896f27b354c714b"
[[package]]
name = "native-tls"
version = "0.2.11"
@ -1225,10 +1231,12 @@ dependencies = [
name = "speedboat"
version = "0.1.0"
dependencies = [
"base64",
"chrono",
"clap",
"colored",
"futures",
"murmur3",
"reqwest",
"select",
"tokio",

View File

@ -6,10 +6,12 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
base64 = "0.22.1"
chrono = "0.4.38"
clap = { version = "4.5.4", features = ["derive"] }
colored = "2.1.0"
futures = "0.3.30"
murmur3 = "0.5.2"
reqwest = "0.12.4"
select = "0.6.0"
tokio = { version = "1", features = ["full"] }
tokio = { version = "1", features = ["full"] }

View File

@ -42,6 +42,10 @@ pub struct Config {
/// read n bytes of the response document body
pub bodysize: usize,
#[clap(long = "favicon")]
/// computes an mmh3 favicon hash
pub favicon: bool,
#[clap(long = "ts")]
/// include timestamps of requests
pub timestamps: bool,

View File

@ -34,7 +34,7 @@ pub fn fmtcode(code: u16) -> ColoredString {
pub fn tstamp() -> String {
let date = Local::now();
let datestr = format!("{}", date.format("[%Y-%m-%d][%H:%M:%S}"));
let datestr = format!("{}", date.format("[%Y-%m-%d][%H:%M:%S]"));
format!("{}", datestr.bright_blue())
}
@ -56,7 +56,15 @@ pub fn parsebody(s: String) -> String {
)
}
pub fn parsehit(sc: u16, url: String) -> String {
pub fn parsehash(h: i32) -> String {
format!("{}{}{}",
"favicon[".bright_black().bold(),
h.to_string().blue().bold(),
"]".bright_black().bold()
)
}
pub fn parsehit(sc: u16, url: &String) -> String {
format!(
"{} {} {}",
fmtcode(sc),

View File

@ -31,6 +31,7 @@ pub async fn takeoff(args: Config, params: Params) {
scodes,
params.exclude,
args.pulltitles,
args.favicon,
args.timestamps,
args.bodysize
)

View File

@ -1,6 +1,11 @@
use select::{document::Document, predicate::Name};
use reqwest::Url;
use base64::{Engine as _, engine::general_purpose};
use murmur3::murmur3_32;
use std::{error::Error,
io::Cursor};
use super::console::{parsetitle, parsebody, fmtwhitespace};
use super::console::{parsetitle, parsebody, parsehash, fmtwhitespace};
pub fn get_title(body: &String) -> String {
let document = Document::from(body.as_str());
@ -29,4 +34,60 @@ pub fn read_body(body: &String, lim: usize) -> String {
}
parsebody(bodytext)
}
fn fmtbase64(s: &str, interval: usize, sep: char) -> String {
let mut out = String::with_capacity(s.len() + s.len() / interval);
let mut count = 0;
for (_, c) in s.chars().enumerate() {
out.push(c);
count += 1;
if count == interval {
out.push(sep);
count = 0;
}
}
if count != 0 {
out.push(sep);
}
out
}
fn faviconurl(doc: Document, url: String ) -> Result<String, Box <dyn Error>> {
for node in doc.find(Name("link")) {
if let Some(rel) = node.attr("rel") {
if rel.eq("icon") {
if let Some(href) = node.attr("href") {
let base_url = Url::parse(&url)?;
let favicon_url = base_url.join(href)?;
return Ok(favicon_url.to_string());
}
}
}
}
Err("".into())
}
async fn dl_favicon(url: String) -> Result<Vec<u8>, Box<dyn Error>> {
let data = reqwest::get(url).await?.bytes().await?.to_vec();
Ok(data)
}
pub async fn hash_favicon(body: &String, url: String) -> String {
let document = Document::from(body.as_str());
if let Ok(favurl) = faviconurl(document, url) {
if let Ok(data) = dl_favicon(favurl).await {
// compute hash
let b64 = general_purpose::STANDARD.encode(data);
let f_b64 = fmtbase64(&b64, 76,'\n');
let hash = murmur3_32(&mut Cursor::new(f_b64.into_bytes()), 0).unwrap_or_else(|_| 0) as i32;
return parsehash(hash);
}
}
"".into()
}

View File

@ -30,6 +30,7 @@ pub async fn query(
codes: Vec<u16>,
exclude: bool,
titles: bool,
favicon: bool,
timestamps: bool,
bodysize: usize,
) -> Result<(), reqwest::Error> {
@ -56,7 +57,7 @@ pub async fn query(
let url: String = response.url().to_string();
let body = response.text().await?;
let mut out = parsehit(sc, url);
let mut out = parsehit(sc, &url);
if timestamps {
out = format!("{} {}", tstamp(), out);
@ -66,6 +67,13 @@ pub async fn query(
out = format!("{} {}", out, get_title(&body));
}
if favicon {
let hash = hash_favicon(&body, url).await;
if !hash.is_empty() {
out = format!("{} {}", out, hash);
}
}
if bodysize > 0 {
out = format!("{} {}", out, read_body(&body, bodysize));
}