implemented mmh3 favicon hashing
This commit is contained in:
parent
d94ca7688c
commit
eeb9a32523
8
Cargo.lock
generated
8
Cargo.lock
generated
@ -730,6 +730,12 @@ dependencies = [
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "murmur3"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9252111cf132ba0929b6f8e030cac2a24b507f3a4d6db6fb2896f27b354c714b"
|
||||
|
||||
[[package]]
|
||||
name = "native-tls"
|
||||
version = "0.2.11"
|
||||
@ -1225,10 +1231,12 @@ dependencies = [
|
||||
name = "speedboat"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"chrono",
|
||||
"clap",
|
||||
"colored",
|
||||
"futures",
|
||||
"murmur3",
|
||||
"reqwest",
|
||||
"select",
|
||||
"tokio",
|
||||
|
@ -6,10 +6,12 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
base64 = "0.22.1"
|
||||
chrono = "0.4.38"
|
||||
clap = { version = "4.5.4", features = ["derive"] }
|
||||
colored = "2.1.0"
|
||||
futures = "0.3.30"
|
||||
murmur3 = "0.5.2"
|
||||
reqwest = "0.12.4"
|
||||
select = "0.6.0"
|
||||
tokio = { version = "1", features = ["full"] }
|
@ -42,6 +42,10 @@ pub struct Config {
|
||||
/// read n bytes of the response document body
|
||||
pub bodysize: usize,
|
||||
|
||||
#[clap(long = "favicon")]
|
||||
/// computes an mmh3 favicon hash
|
||||
pub favicon: bool,
|
||||
|
||||
#[clap(long = "ts")]
|
||||
/// include timestamps of requests
|
||||
pub timestamps: bool,
|
||||
|
@ -34,7 +34,7 @@ pub fn fmtcode(code: u16) -> ColoredString {
|
||||
pub fn tstamp() -> String {
|
||||
let date = Local::now();
|
||||
|
||||
let datestr = format!("{}", date.format("[%Y-%m-%d][%H:%M:%S}"));
|
||||
let datestr = format!("{}", date.format("[%Y-%m-%d][%H:%M:%S]"));
|
||||
format!("{}", datestr.bright_blue())
|
||||
}
|
||||
|
||||
@ -56,7 +56,15 @@ pub fn parsebody(s: String) -> String {
|
||||
)
|
||||
}
|
||||
|
||||
pub fn parsehit(sc: u16, url: String) -> String {
|
||||
pub fn parsehash(h: i32) -> String {
|
||||
format!("{}{}{}",
|
||||
"favicon[".bright_black().bold(),
|
||||
h.to_string().blue().bold(),
|
||||
"]".bright_black().bold()
|
||||
)
|
||||
}
|
||||
|
||||
pub fn parsehit(sc: u16, url: &String) -> String {
|
||||
format!(
|
||||
"{} {} {}",
|
||||
fmtcode(sc),
|
||||
|
@ -31,6 +31,7 @@ pub async fn takeoff(args: Config, params: Params) {
|
||||
scodes,
|
||||
params.exclude,
|
||||
args.pulltitles,
|
||||
args.favicon,
|
||||
args.timestamps,
|
||||
args.bodysize
|
||||
)
|
||||
|
@ -1,6 +1,11 @@
|
||||
use select::{document::Document, predicate::Name};
|
||||
use reqwest::Url;
|
||||
use base64::{Engine as _, engine::general_purpose};
|
||||
use murmur3::murmur3_32;
|
||||
use std::{error::Error,
|
||||
io::Cursor};
|
||||
|
||||
use super::console::{parsetitle, parsebody, fmtwhitespace};
|
||||
use super::console::{parsetitle, parsebody, parsehash, fmtwhitespace};
|
||||
|
||||
pub fn get_title(body: &String) -> String {
|
||||
let document = Document::from(body.as_str());
|
||||
@ -30,3 +35,59 @@ pub fn read_body(body: &String, lim: usize) -> String {
|
||||
|
||||
parsebody(bodytext)
|
||||
}
|
||||
|
||||
fn fmtbase64(s: &str, interval: usize, sep: char) -> String {
|
||||
let mut out = String::with_capacity(s.len() + s.len() / interval);
|
||||
let mut count = 0;
|
||||
|
||||
for (_, c) in s.chars().enumerate() {
|
||||
out.push(c);
|
||||
count += 1;
|
||||
if count == interval {
|
||||
out.push(sep);
|
||||
count = 0;
|
||||
}
|
||||
}
|
||||
if count != 0 {
|
||||
out.push(sep);
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
fn faviconurl(doc: Document, url: String ) -> Result<String, Box <dyn Error>> {
|
||||
for node in doc.find(Name("link")) {
|
||||
if let Some(rel) = node.attr("rel") {
|
||||
if rel.eq("icon") {
|
||||
if let Some(href) = node.attr("href") {
|
||||
let base_url = Url::parse(&url)?;
|
||||
let favicon_url = base_url.join(href)?;
|
||||
return Ok(favicon_url.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Err("".into())
|
||||
}
|
||||
|
||||
async fn dl_favicon(url: String) -> Result<Vec<u8>, Box<dyn Error>> {
|
||||
let data = reqwest::get(url).await?.bytes().await?.to_vec();
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
pub async fn hash_favicon(body: &String, url: String) -> String {
|
||||
let document = Document::from(body.as_str());
|
||||
|
||||
if let Ok(favurl) = faviconurl(document, url) {
|
||||
if let Ok(data) = dl_favicon(favurl).await {
|
||||
// compute hash
|
||||
let b64 = general_purpose::STANDARD.encode(data);
|
||||
let f_b64 = fmtbase64(&b64, 76,'\n');
|
||||
let hash = murmur3_32(&mut Cursor::new(f_b64.into_bytes()), 0).unwrap_or_else(|_| 0) as i32;
|
||||
return parsehash(hash);
|
||||
}
|
||||
}
|
||||
|
||||
"".into()
|
||||
}
|
@ -30,6 +30,7 @@ pub async fn query(
|
||||
codes: Vec<u16>,
|
||||
exclude: bool,
|
||||
titles: bool,
|
||||
favicon: bool,
|
||||
timestamps: bool,
|
||||
bodysize: usize,
|
||||
) -> Result<(), reqwest::Error> {
|
||||
@ -56,7 +57,7 @@ pub async fn query(
|
||||
let url: String = response.url().to_string();
|
||||
let body = response.text().await?;
|
||||
|
||||
let mut out = parsehit(sc, url);
|
||||
let mut out = parsehit(sc, &url);
|
||||
|
||||
if timestamps {
|
||||
out = format!("{} {}", tstamp(), out);
|
||||
@ -66,6 +67,13 @@ pub async fn query(
|
||||
out = format!("{} {}", out, get_title(&body));
|
||||
}
|
||||
|
||||
if favicon {
|
||||
let hash = hash_favicon(&body, url).await;
|
||||
if !hash.is_empty() {
|
||||
out = format!("{} {}", out, hash);
|
||||
}
|
||||
}
|
||||
|
||||
if bodysize > 0 {
|
||||
out = format!("{} {}", out, read_body(&body, bodysize));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user