From 2c8a2b4121041afdc3a5a83a4bfe8d006bf20c56 Mon Sep 17 00:00:00 2001 From: delorean Date: Sun, 11 Aug 2024 12:30:38 -0500 Subject: [PATCH] refactoring, user agent customization, title limit, body streaming & limit, improved efficiency --- .gitignore | 0 Cargo.lock | 5 ++-- Cargo.toml | 1 + README.md | 0 src/common/conf.rs | 14 ++++++++-- src/common/console.rs | 27 +++++++++++++------ src/common/exec.rs | 31 +++++++++++----------- src/common/mod.rs | 2 +- src/common/modules.rs | 44 +++++++++++++++++++----------- src/common/net.rs | 62 +++++++++++++++++++++++-------------------- src/main.rs | 7 ++--- 11 files changed, 115 insertions(+), 78 deletions(-) mode change 100644 => 100755 .gitignore mode change 100644 => 100755 Cargo.lock mode change 100644 => 100755 Cargo.toml mode change 100644 => 100755 README.md mode change 100644 => 100755 src/common/conf.rs mode change 100644 => 100755 src/common/console.rs mode change 100644 => 100755 src/common/exec.rs mode change 100644 => 100755 src/common/mod.rs mode change 100644 => 100755 src/common/modules.rs mode change 100644 => 100755 src/common/net.rs mode change 100644 => 100755 src/main.rs diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/Cargo.lock b/Cargo.lock old mode 100644 new mode 100755 index 68d2749..0a4102b --- a/Cargo.lock +++ b/Cargo.lock @@ -142,9 +142,9 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytes" -version = "1.6.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" [[package]] name = "cc" @@ -1232,6 +1232,7 @@ name = "speedboat" version = "0.1.0" dependencies = [ "base64", + "bytes", "chrono", "clap", "colored", diff --git a/Cargo.toml b/Cargo.toml old mode 100644 new mode 100755 index f2c229b..ae1c449 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" [dependencies] base64 = "0.22.1" +bytes = "1.7.1" chrono = "0.4.38" clap = { version = "4.5.4", features = ["derive"] } colored = "2.1.0" diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/src/common/conf.rs b/src/common/conf.rs old mode 100644 new mode 100755 index 7a0225c..5b67b60 --- a/src/common/conf.rs +++ b/src/common/conf.rs @@ -1,10 +1,15 @@ use super::console::fatal; use clap::Parser; -pub const VERSION: &str = "1.0.0"; +pub const VERSION: &str = "1.2"; + pub struct Params { pub statcodes: Vec, pub exclude: bool, + pub titles: bool, + pub favicon: bool, + pub timestamps: bool, + pub bodysize: usize } #[derive(Parser, Default)] @@ -22,6 +27,10 @@ pub struct Config { /// concurrent workers pub threads: usize, + #[clap(long = "ua")] + /// custom user agent to operate with + pub useragent: Option, + #[clap(long = "mc")] /// status codes to match, comma separated pub matchcodes: Option, @@ -77,5 +86,6 @@ pub fn setparams(c: &Config) -> Params { exclude = true; } - Params { statcodes, exclude } + Params { statcodes, exclude, titles: c.pulltitles, favicon: c.favicon, + timestamps: c.timestamps, bodysize: c.bodysize } } diff --git a/src/common/console.rs b/src/common/console.rs old mode 100644 new mode 100755 index 402a46d..75de2ec --- a/src/common/console.rs +++ b/src/common/console.rs @@ -9,6 +9,16 @@ pub fn fatal(msg: &str) -> ! { process::exit(-1); } +pub fn trunc(s: String, lim: usize) -> String { + let truncatedstr = s.char_indices() + .take_while(|(i, _)| *i < lim) + .map(|(_, c)| c) + .collect(); + + truncatedstr +} + +// strips whitespace while maintaining legibility pub fn fmtwhitespace(s: String) -> String { let mut out = String::with_capacity(s.len()); s.split_whitespace().for_each(|w| { @@ -27,7 +37,7 @@ pub fn fmtcode(code: u16) -> ColoredString { 300..=399 => code.to_string().yellow(), 400..=499 => code.to_string().bright_red(), 500..=599 => code.to_string().red().bold(), - _ => code.to_string().black(), + _ => code.to_string().bright_black(), } } @@ -39,7 +49,7 @@ pub fn tstamp() -> String { } pub fn parsetitle(s: String) -> String { - let title = fmtwhitespace(s); + let title: String = trunc(fmtwhitespace(s), 1024); format!("{}{}{}", "title[".bright_black().bold(), @@ -74,10 +84,11 @@ pub fn parsehit(sc: u16, url: &String) -> String { } pub fn banner() { - eprintln!( - "{}{} {}", - "speed".bright_cyan().bold(), - "boat".bright_magenta().bold(), - VERSION.bright_black() - ); + eprintln!(r#" + {}{} |\___..--"/ + __..--`"" / +'-._.'._:._'-._____..--' {} +"#, "speed".bright_cyan().bold(), +"boat".bright_magenta().bold(), +VERSION.bright_black()); } diff --git a/src/common/exec.rs b/src/common/exec.rs old mode 100644 new mode 100755 index d1ff556..c5417f5 --- a/src/common/exec.rs +++ b/src/common/exec.rs @@ -5,35 +5,36 @@ use std::{ }; use super::{ - conf::{Config, Params}, + conf::{Config, setparams}, console::fatal, net::{mkclient, query}, }; -pub async fn takeoff(args: Config, params: Params) { - let c = mkclient(args.follow).unwrap_or_else(|_| fatal("error instantiating http client")); +pub async fn takeoff(args: Config) { + let params = setparams(&args); + + let mut ua: String = "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/126.0.0.0 Safari/537.36".to_string(); + if let Some(custom_ua) = args.useragent { + ua = custom_ua; + } - let file = File::open(args.list) + let client = mkclient(args.follow, ua).unwrap_or_else(|_| fatal("error instantiating http client")); + + let file = File::open(&args.list) .unwrap_or_else(|e| fatal(format!("unable to read file: {e}").as_str())); - // Create a buffered reader. let buf = BufReader::new(file); stream::iter(buf.lines()) .for_each_concurrent(args.threads, |line| { - let wc = c.clone(); - let scodes = params.statcodes.clone(); + // workers using the same client ref > each worker getting their own? we'll see + let wc = &client; + let qparams = ¶ms; async move { let _ = query( wc, - line.unwrap_or_else(|_| fatal("error attempting buffered read")) - .trim(), - scodes, - params.exclude, - args.pulltitles, - args.favicon, - args.timestamps, - args.bodysize + line.unwrap_or_else(|_| fatal("error attempting buffered read")).trim(), + qparams ) .await; } diff --git a/src/common/mod.rs b/src/common/mod.rs old mode 100644 new mode 100755 index 693179a..a2cfbe0 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -2,4 +2,4 @@ pub mod conf; pub mod console; pub mod exec; pub mod net; -pub mod modules; +pub mod modules; \ No newline at end of file diff --git a/src/common/modules.rs b/src/common/modules.rs old mode 100644 new mode 100755 index 86c19ff..48e74e4 --- a/src/common/modules.rs +++ b/src/common/modules.rs @@ -1,14 +1,14 @@ use select::{document::Document, predicate::Name}; -use reqwest::Url; +use reqwest::{Response, Url}; use base64::{Engine as _, engine::general_purpose}; +use bytes::{Bytes, BytesMut}; use murmur3::murmur3_32; -use std::{error::Error, -io::Cursor}; +use std::{error::Error, io::Cursor}; -use super::console::{parsetitle, parsebody, parsehash, fmtwhitespace}; +use super::console::{parsetitle, parsebody, parsehash, fmtwhitespace, trunc}; -pub fn get_title(body: &String) -> String { - let document = Document::from(body.as_str()); +pub fn get_title(body: &str) -> String { + let document = Document::from(body); let title = document .find(Name("title")) @@ -19,18 +19,30 @@ pub fn get_title(body: &String) -> String { parsetitle(title) } -pub fn read_body(body: &String, lim: usize) -> String { - let document = Document::from(body.as_str()); +pub async fn readnbody(mut resp: Response, n: usize) -> Result { + let mut body = BytesMut::new(); + while let Some(chunk) = resp.chunk().await? { + if !(body.len() + chunk.len()) > n { + body.extend_from_slice(&chunk); + } + } + + Ok(body.freeze()) +} + +pub fn body_str(body: Bytes) -> Result> { + String::from_utf8(body.to_vec()) + .map_err(|e| format!("error reading body into a string: {}", e).into()) +} + +pub fn body_contents(body: &str, lim: usize) -> String { + let document = Document::from(body); let mut bodytext = document.find(Name("body")).next().map(|n| n.text()).unwrap_or_else(|| "".to_string()); bodytext = fmtwhitespace(bodytext); if bodytext.len() > lim { - bodytext = bodytext - .char_indices() - .take_while(|(i, _)| *i < lim) - .map(|(_, c)| c) - .collect(); + bodytext = trunc(bodytext, lim); } parsebody(bodytext) @@ -76,8 +88,8 @@ async fn dl_favicon(url: String) -> Result, Box> { Ok(data) } -pub async fn hash_favicon(body: &String, url: String) -> String { - let document = Document::from(body.as_str()); +pub async fn hash_favicon(body: &str, url: String) -> String { + let document = Document::from(body); if let Ok(favurl) = faviconurl(document, url) { if let Ok(data) = dl_favicon(favurl).await { @@ -90,4 +102,4 @@ pub async fn hash_favicon(body: &String, url: String) -> String { } "".into() -} \ No newline at end of file +} diff --git a/src/common/net.rs b/src/common/net.rs old mode 100644 new mode 100755 index 7fca2c6..a0903c4 --- a/src/common/net.rs +++ b/src/common/net.rs @@ -2,17 +2,18 @@ use reqwest::{redirect::Policy, Client}; use std::time::Duration; use super::console::{parsehit, tstamp}; +use super::conf::Params; use super::modules::*; -pub fn mkclient(redir: bool) -> Result { +pub fn mkclient(redir: bool, ua: String) -> Result { let rpolicy: Policy = if redir { - Policy::limited(5) + Policy::limited(3) } else { Policy::none() }; Client::builder() - .user_agent("buttplug/1.0") + .user_agent(ua) .redirect(rpolicy) .timeout(Duration::from_secs(2)) .connect_timeout(Duration::from_millis(500)) @@ -25,57 +26,60 @@ async fn sendreq(c: &Client, use_https: bool, url: &str) -> Result, - exclude: bool, - titles: bool, - favicon: bool, - timestamps: bool, - bodysize: usize, + params: &Params ) -> Result<(), reqwest::Error> { let response: reqwest::Response; - if let Ok(res) = sendreq(&c, true, url).await { - response = res; - } else { - response = sendreq(&c, false, url).await?; - } - let statcode = response.status().as_u16(); - - if codes.len() > 0 { - if codes.contains(&statcode) { - if exclude { - return Ok(()); + match sendreq(c, true, url).await { + Ok(res) => response = res, + Err(e) => { + if e.is_request() { + response = sendreq(c, false, url).await?; + } else { + return Err(e); } - } else if !exclude { - return Ok(()); } } let sc = response.status().as_u16(); + + if !params.statcodes.is_empty() { + if params.statcodes.contains(&sc) { + if params.exclude { + return Ok(()); + } + } else if !params.exclude { + return Ok(()); + } + } + let url: String = response.url().to_string(); - let body = response.text().await?; + + // 10mb response body limit + let body_raw = readnbody(response, 10485760).await?; + let body = body_str(body_raw).unwrap_or("error parsing response body".to_string()); let mut out = parsehit(sc, &url); - if timestamps { + if params.timestamps { out = format!("{} {}", tstamp(), out); } - if titles { + if params.titles { out = format!("{} {}", out, get_title(&body)); } - if favicon { + if params.favicon { let hash = hash_favicon(&body, url).await; if !hash.is_empty() { out = format!("{} {}", out, hash); } } - if bodysize > 0 { - out = format!("{} {}", out, read_body(&body, bodysize)); + if params.bodysize > 0 { + out = format!("{} {}", out, body_contents(&body, params.bodysize)); } println!("{}", out); diff --git a/src/main.rs b/src/main.rs old mode 100644 new mode 100755 index 0f3d66c..0afe5b3 --- a/src/main.rs +++ b/src/main.rs @@ -2,9 +2,6 @@ mod common; #[tokio::main] async fn main() { - let args = common::conf::load(); - let scanparams = common::conf::setparams(&args); - common::console::banner(); - common::exec::takeoff(args, scanparams).await; -} \ No newline at end of file + common::exec::takeoff(common::conf::load()).await; +}