refactoring, user agent customization, title limit, body streaming & limit, improved efficiency

This commit is contained in:
delorean 2024-08-11 12:30:38 -05:00
parent 8ed9cbeb73
commit 2c8a2b4121
Signed by untrusted user who does not match committer: delorean
GPG Key ID: 59DBCEE82E4CF31B
11 changed files with 115 additions and 78 deletions

0
.gitignore vendored Normal file → Executable file
View File

5
Cargo.lock generated Normal file → Executable file
View File

@ -142,9 +142,9 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
[[package]] [[package]]
name = "bytes" name = "bytes"
version = "1.6.0" version = "1.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50"
[[package]] [[package]]
name = "cc" name = "cc"
@ -1232,6 +1232,7 @@ name = "speedboat"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"base64", "base64",
"bytes",
"chrono", "chrono",
"clap", "clap",
"colored", "colored",

1
Cargo.toml Normal file → Executable file
View File

@ -7,6 +7,7 @@ edition = "2021"
[dependencies] [dependencies]
base64 = "0.22.1" base64 = "0.22.1"
bytes = "1.7.1"
chrono = "0.4.38" chrono = "0.4.38"
clap = { version = "4.5.4", features = ["derive"] } clap = { version = "4.5.4", features = ["derive"] }
colored = "2.1.0" colored = "2.1.0"

0
README.md Normal file → Executable file
View File

14
src/common/conf.rs Normal file → Executable file
View File

@ -1,10 +1,15 @@
use super::console::fatal; use super::console::fatal;
use clap::Parser; use clap::Parser;
pub const VERSION: &str = "1.0.0"; pub const VERSION: &str = "1.2";
pub struct Params { pub struct Params {
pub statcodes: Vec<u16>, pub statcodes: Vec<u16>,
pub exclude: bool, pub exclude: bool,
pub titles: bool,
pub favicon: bool,
pub timestamps: bool,
pub bodysize: usize
} }
#[derive(Parser, Default)] #[derive(Parser, Default)]
@ -22,6 +27,10 @@ pub struct Config {
/// concurrent workers /// concurrent workers
pub threads: usize, pub threads: usize,
#[clap(long = "ua")]
/// custom user agent to operate with
pub useragent: Option<String>,
#[clap(long = "mc")] #[clap(long = "mc")]
/// status codes to match, comma separated /// status codes to match, comma separated
pub matchcodes: Option<String>, pub matchcodes: Option<String>,
@ -77,5 +86,6 @@ pub fn setparams(c: &Config) -> Params {
exclude = true; exclude = true;
} }
Params { statcodes, exclude } Params { statcodes, exclude, titles: c.pulltitles, favicon: c.favicon,
timestamps: c.timestamps, bodysize: c.bodysize }
} }

27
src/common/console.rs Normal file → Executable file
View File

@ -9,6 +9,16 @@ pub fn fatal(msg: &str) -> ! {
process::exit(-1); process::exit(-1);
} }
pub fn trunc(s: String, lim: usize) -> String {
let truncatedstr = s.char_indices()
.take_while(|(i, _)| *i < lim)
.map(|(_, c)| c)
.collect();
truncatedstr
}
// strips whitespace while maintaining legibility
pub fn fmtwhitespace(s: String) -> String { pub fn fmtwhitespace(s: String) -> String {
let mut out = String::with_capacity(s.len()); let mut out = String::with_capacity(s.len());
s.split_whitespace().for_each(|w| { s.split_whitespace().for_each(|w| {
@ -27,7 +37,7 @@ pub fn fmtcode(code: u16) -> ColoredString {
300..=399 => code.to_string().yellow(), 300..=399 => code.to_string().yellow(),
400..=499 => code.to_string().bright_red(), 400..=499 => code.to_string().bright_red(),
500..=599 => code.to_string().red().bold(), 500..=599 => code.to_string().red().bold(),
_ => code.to_string().black(), _ => code.to_string().bright_black(),
} }
} }
@ -39,7 +49,7 @@ pub fn tstamp() -> String {
} }
pub fn parsetitle(s: String) -> String { pub fn parsetitle(s: String) -> String {
let title = fmtwhitespace(s); let title: String = trunc(fmtwhitespace(s), 1024);
format!("{}{}{}", format!("{}{}{}",
"title[".bright_black().bold(), "title[".bright_black().bold(),
@ -74,10 +84,11 @@ pub fn parsehit(sc: u16, url: &String) -> String {
} }
pub fn banner() { pub fn banner() {
eprintln!( eprintln!(r#"
"{}{} {}", {}{} |\___..--"/
"speed".bright_cyan().bold(), __..--`"" /
"boat".bright_magenta().bold(), '-._.'._:._'-._____..--' {}
VERSION.bright_black() "#, "speed".bright_cyan().bold(),
); "boat".bright_magenta().bold(),
VERSION.bright_black());
} }

31
src/common/exec.rs Normal file → Executable file
View File

@ -5,35 +5,36 @@ use std::{
}; };
use super::{ use super::{
conf::{Config, Params}, conf::{Config, setparams},
console::fatal, console::fatal,
net::{mkclient, query}, net::{mkclient, query},
}; };
pub async fn takeoff(args: Config, params: Params) { pub async fn takeoff(args: Config) {
let c = mkclient(args.follow).unwrap_or_else(|_| fatal("error instantiating http client")); let params = setparams(&args);
let file = File::open(args.list) let mut ua: String = "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/126.0.0.0 Safari/537.36".to_string();
if let Some(custom_ua) = args.useragent {
ua = custom_ua;
}
let client = mkclient(args.follow, ua).unwrap_or_else(|_| fatal("error instantiating http client"));
let file = File::open(&args.list)
.unwrap_or_else(|e| fatal(format!("unable to read file: {e}").as_str())); .unwrap_or_else(|e| fatal(format!("unable to read file: {e}").as_str()));
// Create a buffered reader.
let buf = BufReader::new(file); let buf = BufReader::new(file);
stream::iter(buf.lines()) stream::iter(buf.lines())
.for_each_concurrent(args.threads, |line| { .for_each_concurrent(args.threads, |line| {
let wc = c.clone(); // workers using the same client ref > each worker getting their own? we'll see
let scodes = params.statcodes.clone(); let wc = &client;
let qparams = &params;
async move { async move {
let _ = query( let _ = query(
wc, wc,
line.unwrap_or_else(|_| fatal("error attempting buffered read")) line.unwrap_or_else(|_| fatal("error attempting buffered read")).trim(),
.trim(), qparams
scodes,
params.exclude,
args.pulltitles,
args.favicon,
args.timestamps,
args.bodysize
) )
.await; .await;
} }

0
src/common/mod.rs Normal file → Executable file
View File

42
src/common/modules.rs Normal file → Executable file
View File

@ -1,14 +1,14 @@
use select::{document::Document, predicate::Name}; use select::{document::Document, predicate::Name};
use reqwest::Url; use reqwest::{Response, Url};
use base64::{Engine as _, engine::general_purpose}; use base64::{Engine as _, engine::general_purpose};
use bytes::{Bytes, BytesMut};
use murmur3::murmur3_32; use murmur3::murmur3_32;
use std::{error::Error, use std::{error::Error, io::Cursor};
io::Cursor};
use super::console::{parsetitle, parsebody, parsehash, fmtwhitespace}; use super::console::{parsetitle, parsebody, parsehash, fmtwhitespace, trunc};
pub fn get_title(body: &String) -> String { pub fn get_title(body: &str) -> String {
let document = Document::from(body.as_str()); let document = Document::from(body);
let title = document let title = document
.find(Name("title")) .find(Name("title"))
@ -19,18 +19,30 @@ pub fn get_title(body: &String) -> String {
parsetitle(title) parsetitle(title)
} }
pub fn read_body(body: &String, lim: usize) -> String { pub async fn readnbody(mut resp: Response, n: usize) -> Result<Bytes, reqwest::Error> {
let document = Document::from(body.as_str()); let mut body = BytesMut::new();
while let Some(chunk) = resp.chunk().await? {
if !(body.len() + chunk.len()) > n {
body.extend_from_slice(&chunk);
}
}
Ok(body.freeze())
}
pub fn body_str(body: Bytes) -> Result<String, Box<dyn Error>> {
String::from_utf8(body.to_vec())
.map_err(|e| format!("error reading body into a string: {}", e).into())
}
pub fn body_contents(body: &str, lim: usize) -> String {
let document = Document::from(body);
let mut bodytext = document.find(Name("body")).next().map(|n| n.text()).unwrap_or_else(|| "".to_string()); let mut bodytext = document.find(Name("body")).next().map(|n| n.text()).unwrap_or_else(|| "".to_string());
bodytext = fmtwhitespace(bodytext); bodytext = fmtwhitespace(bodytext);
if bodytext.len() > lim { if bodytext.len() > lim {
bodytext = bodytext bodytext = trunc(bodytext, lim);
.char_indices()
.take_while(|(i, _)| *i < lim)
.map(|(_, c)| c)
.collect();
} }
parsebody(bodytext) parsebody(bodytext)
@ -76,8 +88,8 @@ async fn dl_favicon(url: String) -> Result<Vec<u8>, Box<dyn Error>> {
Ok(data) Ok(data)
} }
pub async fn hash_favicon(body: &String, url: String) -> String { pub async fn hash_favicon(body: &str, url: String) -> String {
let document = Document::from(body.as_str()); let document = Document::from(body);
if let Ok(favurl) = faviconurl(document, url) { if let Ok(favurl) = faviconurl(document, url) {
if let Ok(data) = dl_favicon(favurl).await { if let Ok(data) = dl_favicon(favurl).await {

62
src/common/net.rs Normal file → Executable file
View File

@ -2,17 +2,18 @@ use reqwest::{redirect::Policy, Client};
use std::time::Duration; use std::time::Duration;
use super::console::{parsehit, tstamp}; use super::console::{parsehit, tstamp};
use super::conf::Params;
use super::modules::*; use super::modules::*;
pub fn mkclient(redir: bool) -> Result<Client, reqwest::Error> { pub fn mkclient(redir: bool, ua: String) -> Result<Client, reqwest::Error> {
let rpolicy: Policy = if redir { let rpolicy: Policy = if redir {
Policy::limited(5) Policy::limited(3)
} else { } else {
Policy::none() Policy::none()
}; };
Client::builder() Client::builder()
.user_agent("buttplug/1.0") .user_agent(ua)
.redirect(rpolicy) .redirect(rpolicy)
.timeout(Duration::from_secs(2)) .timeout(Duration::from_secs(2))
.connect_timeout(Duration::from_millis(500)) .connect_timeout(Duration::from_millis(500))
@ -25,57 +26,60 @@ async fn sendreq(c: &Client, use_https: bool, url: &str) -> Result<reqwest::Resp
} }
pub async fn query( pub async fn query(
c: Client, c: &Client,
url: &str, url: &str,
codes: Vec<u16>, params: &Params
exclude: bool,
titles: bool,
favicon: bool,
timestamps: bool,
bodysize: usize,
) -> Result<(), reqwest::Error> { ) -> Result<(), reqwest::Error> {
let response: reqwest::Response; let response: reqwest::Response;
if let Ok(res) = sendreq(&c, true, url).await {
response = res;
} else {
response = sendreq(&c, false, url).await?;
}
let statcode = response.status().as_u16(); match sendreq(c, true, url).await {
Ok(res) => response = res,
if codes.len() > 0 { Err(e) => {
if codes.contains(&statcode) { if e.is_request() {
if exclude { response = sendreq(c, false, url).await?;
return Ok(()); } else {
return Err(e);
} }
} else if !exclude {
return Ok(());
} }
} }
let sc = response.status().as_u16(); let sc = response.status().as_u16();
if !params.statcodes.is_empty() {
if params.statcodes.contains(&sc) {
if params.exclude {
return Ok(());
}
} else if !params.exclude {
return Ok(());
}
}
let url: String = response.url().to_string(); let url: String = response.url().to_string();
let body = response.text().await?;
// 10mb response body limit
let body_raw = readnbody(response, 10485760).await?;
let body = body_str(body_raw).unwrap_or("error parsing response body".to_string());
let mut out = parsehit(sc, &url); let mut out = parsehit(sc, &url);
if timestamps { if params.timestamps {
out = format!("{} {}", tstamp(), out); out = format!("{} {}", tstamp(), out);
} }
if titles { if params.titles {
out = format!("{} {}", out, get_title(&body)); out = format!("{} {}", out, get_title(&body));
} }
if favicon { if params.favicon {
let hash = hash_favicon(&body, url).await; let hash = hash_favicon(&body, url).await;
if !hash.is_empty() { if !hash.is_empty() {
out = format!("{} {}", out, hash); out = format!("{} {}", out, hash);
} }
} }
if bodysize > 0 { if params.bodysize > 0 {
out = format!("{} {}", out, read_body(&body, bodysize)); out = format!("{} {}", out, body_contents(&body, params.bodysize));
} }
println!("{}", out); println!("{}", out);

5
src/main.rs Normal file → Executable file
View File

@ -2,9 +2,6 @@ mod common;
#[tokio::main] #[tokio::main]
async fn main() { async fn main() {
let args = common::conf::load();
let scanparams = common::conf::setparams(&args);
common::console::banner(); common::console::banner();
common::exec::takeoff(args, scanparams).await; common::exec::takeoff(common::conf::load()).await;
} }