implemented mmh3 favicon hashing
This commit is contained in:
parent
d94ca7688c
commit
eeb9a32523
8
Cargo.lock
generated
8
Cargo.lock
generated
@ -730,6 +730,12 @@ dependencies = [
|
|||||||
"windows-sys 0.48.0",
|
"windows-sys 0.48.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "murmur3"
|
||||||
|
version = "0.5.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9252111cf132ba0929b6f8e030cac2a24b507f3a4d6db6fb2896f27b354c714b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "native-tls"
|
name = "native-tls"
|
||||||
version = "0.2.11"
|
version = "0.2.11"
|
||||||
@ -1225,10 +1231,12 @@ dependencies = [
|
|||||||
name = "speedboat"
|
name = "speedboat"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"base64",
|
||||||
"chrono",
|
"chrono",
|
||||||
"clap",
|
"clap",
|
||||||
"colored",
|
"colored",
|
||||||
"futures",
|
"futures",
|
||||||
|
"murmur3",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"select",
|
"select",
|
||||||
"tokio",
|
"tokio",
|
||||||
|
@ -6,10 +6,12 @@ edition = "2021"
|
|||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
base64 = "0.22.1"
|
||||||
chrono = "0.4.38"
|
chrono = "0.4.38"
|
||||||
clap = { version = "4.5.4", features = ["derive"] }
|
clap = { version = "4.5.4", features = ["derive"] }
|
||||||
colored = "2.1.0"
|
colored = "2.1.0"
|
||||||
futures = "0.3.30"
|
futures = "0.3.30"
|
||||||
|
murmur3 = "0.5.2"
|
||||||
reqwest = "0.12.4"
|
reqwest = "0.12.4"
|
||||||
select = "0.6.0"
|
select = "0.6.0"
|
||||||
tokio = { version = "1", features = ["full"] }
|
tokio = { version = "1", features = ["full"] }
|
@ -42,6 +42,10 @@ pub struct Config {
|
|||||||
/// read n bytes of the response document body
|
/// read n bytes of the response document body
|
||||||
pub bodysize: usize,
|
pub bodysize: usize,
|
||||||
|
|
||||||
|
#[clap(long = "favicon")]
|
||||||
|
/// computes an mmh3 favicon hash
|
||||||
|
pub favicon: bool,
|
||||||
|
|
||||||
#[clap(long = "ts")]
|
#[clap(long = "ts")]
|
||||||
/// include timestamps of requests
|
/// include timestamps of requests
|
||||||
pub timestamps: bool,
|
pub timestamps: bool,
|
||||||
|
@ -34,7 +34,7 @@ pub fn fmtcode(code: u16) -> ColoredString {
|
|||||||
pub fn tstamp() -> String {
|
pub fn tstamp() -> String {
|
||||||
let date = Local::now();
|
let date = Local::now();
|
||||||
|
|
||||||
let datestr = format!("{}", date.format("[%Y-%m-%d][%H:%M:%S}"));
|
let datestr = format!("{}", date.format("[%Y-%m-%d][%H:%M:%S]"));
|
||||||
format!("{}", datestr.bright_blue())
|
format!("{}", datestr.bright_blue())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -56,7 +56,15 @@ pub fn parsebody(s: String) -> String {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parsehit(sc: u16, url: String) -> String {
|
pub fn parsehash(h: i32) -> String {
|
||||||
|
format!("{}{}{}",
|
||||||
|
"favicon[".bright_black().bold(),
|
||||||
|
h.to_string().blue().bold(),
|
||||||
|
"]".bright_black().bold()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parsehit(sc: u16, url: &String) -> String {
|
||||||
format!(
|
format!(
|
||||||
"{} {} {}",
|
"{} {} {}",
|
||||||
fmtcode(sc),
|
fmtcode(sc),
|
||||||
|
@ -31,6 +31,7 @@ pub async fn takeoff(args: Config, params: Params) {
|
|||||||
scodes,
|
scodes,
|
||||||
params.exclude,
|
params.exclude,
|
||||||
args.pulltitles,
|
args.pulltitles,
|
||||||
|
args.favicon,
|
||||||
args.timestamps,
|
args.timestamps,
|
||||||
args.bodysize
|
args.bodysize
|
||||||
)
|
)
|
||||||
|
@ -1,6 +1,11 @@
|
|||||||
use select::{document::Document, predicate::Name};
|
use select::{document::Document, predicate::Name};
|
||||||
|
use reqwest::Url;
|
||||||
|
use base64::{Engine as _, engine::general_purpose};
|
||||||
|
use murmur3::murmur3_32;
|
||||||
|
use std::{error::Error,
|
||||||
|
io::Cursor};
|
||||||
|
|
||||||
use super::console::{parsetitle, parsebody, fmtwhitespace};
|
use super::console::{parsetitle, parsebody, parsehash, fmtwhitespace};
|
||||||
|
|
||||||
pub fn get_title(body: &String) -> String {
|
pub fn get_title(body: &String) -> String {
|
||||||
let document = Document::from(body.as_str());
|
let document = Document::from(body.as_str());
|
||||||
@ -30,3 +35,59 @@ pub fn read_body(body: &String, lim: usize) -> String {
|
|||||||
|
|
||||||
parsebody(bodytext)
|
parsebody(bodytext)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn fmtbase64(s: &str, interval: usize, sep: char) -> String {
|
||||||
|
let mut out = String::with_capacity(s.len() + s.len() / interval);
|
||||||
|
let mut count = 0;
|
||||||
|
|
||||||
|
for (_, c) in s.chars().enumerate() {
|
||||||
|
out.push(c);
|
||||||
|
count += 1;
|
||||||
|
if count == interval {
|
||||||
|
out.push(sep);
|
||||||
|
count = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if count != 0 {
|
||||||
|
out.push(sep);
|
||||||
|
}
|
||||||
|
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
fn faviconurl(doc: Document, url: String ) -> Result<String, Box <dyn Error>> {
|
||||||
|
for node in doc.find(Name("link")) {
|
||||||
|
if let Some(rel) = node.attr("rel") {
|
||||||
|
if rel.eq("icon") {
|
||||||
|
if let Some(href) = node.attr("href") {
|
||||||
|
let base_url = Url::parse(&url)?;
|
||||||
|
let favicon_url = base_url.join(href)?;
|
||||||
|
return Ok(favicon_url.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err("".into())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn dl_favicon(url: String) -> Result<Vec<u8>, Box<dyn Error>> {
|
||||||
|
let data = reqwest::get(url).await?.bytes().await?.to_vec();
|
||||||
|
Ok(data)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn hash_favicon(body: &String, url: String) -> String {
|
||||||
|
let document = Document::from(body.as_str());
|
||||||
|
|
||||||
|
if let Ok(favurl) = faviconurl(document, url) {
|
||||||
|
if let Ok(data) = dl_favicon(favurl).await {
|
||||||
|
// compute hash
|
||||||
|
let b64 = general_purpose::STANDARD.encode(data);
|
||||||
|
let f_b64 = fmtbase64(&b64, 76,'\n');
|
||||||
|
let hash = murmur3_32(&mut Cursor::new(f_b64.into_bytes()), 0).unwrap_or_else(|_| 0) as i32;
|
||||||
|
return parsehash(hash);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
"".into()
|
||||||
|
}
|
@ -30,6 +30,7 @@ pub async fn query(
|
|||||||
codes: Vec<u16>,
|
codes: Vec<u16>,
|
||||||
exclude: bool,
|
exclude: bool,
|
||||||
titles: bool,
|
titles: bool,
|
||||||
|
favicon: bool,
|
||||||
timestamps: bool,
|
timestamps: bool,
|
||||||
bodysize: usize,
|
bodysize: usize,
|
||||||
) -> Result<(), reqwest::Error> {
|
) -> Result<(), reqwest::Error> {
|
||||||
@ -56,7 +57,7 @@ pub async fn query(
|
|||||||
let url: String = response.url().to_string();
|
let url: String = response.url().to_string();
|
||||||
let body = response.text().await?;
|
let body = response.text().await?;
|
||||||
|
|
||||||
let mut out = parsehit(sc, url);
|
let mut out = parsehit(sc, &url);
|
||||||
|
|
||||||
if timestamps {
|
if timestamps {
|
||||||
out = format!("{} {}", tstamp(), out);
|
out = format!("{} {}", tstamp(), out);
|
||||||
@ -66,6 +67,13 @@ pub async fn query(
|
|||||||
out = format!("{} {}", out, get_title(&body));
|
out = format!("{} {}", out, get_title(&body));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if favicon {
|
||||||
|
let hash = hash_favicon(&body, url).await;
|
||||||
|
if !hash.is_empty() {
|
||||||
|
out = format!("{} {}", out, hash);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if bodysize > 0 {
|
if bodysize > 0 {
|
||||||
out = format!("{} {}", out, read_body(&body, bodysize));
|
out = format!("{} {}", out, read_body(&body, bodysize));
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user