refactoring, user agent customization, title limit, body streaming & limit, improved efficiency

This commit is contained in:
delorean 2024-08-11 12:30:38 -05:00
parent 8ed9cbeb73
commit 2c8a2b4121
Signed by untrusted user who does not match committer: delorean
GPG Key ID: 59DBCEE82E4CF31B
11 changed files with 115 additions and 78 deletions

0
.gitignore vendored Normal file → Executable file
View File

5
Cargo.lock generated Normal file → Executable file
View File

@ -142,9 +142,9 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
[[package]]
name = "bytes"
version = "1.6.0"
version = "1.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50"
[[package]]
name = "cc"
@ -1232,6 +1232,7 @@ name = "speedboat"
version = "0.1.0"
dependencies = [
"base64",
"bytes",
"chrono",
"clap",
"colored",

1
Cargo.toml Normal file → Executable file
View File

@ -7,6 +7,7 @@ edition = "2021"
[dependencies]
base64 = "0.22.1"
bytes = "1.7.1"
chrono = "0.4.38"
clap = { version = "4.5.4", features = ["derive"] }
colored = "2.1.0"

0
README.md Normal file → Executable file
View File

14
src/common/conf.rs Normal file → Executable file
View File

@ -1,10 +1,15 @@
use super::console::fatal;
use clap::Parser;
pub const VERSION: &str = "1.0.0";
pub const VERSION: &str = "1.2";
pub struct Params {
pub statcodes: Vec<u16>,
pub exclude: bool,
pub titles: bool,
pub favicon: bool,
pub timestamps: bool,
pub bodysize: usize
}
#[derive(Parser, Default)]
@ -22,6 +27,10 @@ pub struct Config {
/// concurrent workers
pub threads: usize,
#[clap(long = "ua")]
/// custom user agent to operate with
pub useragent: Option<String>,
#[clap(long = "mc")]
/// status codes to match, comma separated
pub matchcodes: Option<String>,
@ -77,5 +86,6 @@ pub fn setparams(c: &Config) -> Params {
exclude = true;
}
Params { statcodes, exclude }
Params { statcodes, exclude, titles: c.pulltitles, favicon: c.favicon,
timestamps: c.timestamps, bodysize: c.bodysize }
}

27
src/common/console.rs Normal file → Executable file
View File

@ -9,6 +9,16 @@ pub fn fatal(msg: &str) -> ! {
process::exit(-1);
}
pub fn trunc(s: String, lim: usize) -> String {
let truncatedstr = s.char_indices()
.take_while(|(i, _)| *i < lim)
.map(|(_, c)| c)
.collect();
truncatedstr
}
// strips whitespace while maintaining legibility
pub fn fmtwhitespace(s: String) -> String {
let mut out = String::with_capacity(s.len());
s.split_whitespace().for_each(|w| {
@ -27,7 +37,7 @@ pub fn fmtcode(code: u16) -> ColoredString {
300..=399 => code.to_string().yellow(),
400..=499 => code.to_string().bright_red(),
500..=599 => code.to_string().red().bold(),
_ => code.to_string().black(),
_ => code.to_string().bright_black(),
}
}
@ -39,7 +49,7 @@ pub fn tstamp() -> String {
}
pub fn parsetitle(s: String) -> String {
let title = fmtwhitespace(s);
let title: String = trunc(fmtwhitespace(s), 1024);
format!("{}{}{}",
"title[".bright_black().bold(),
@ -74,10 +84,11 @@ pub fn parsehit(sc: u16, url: &String) -> String {
}
pub fn banner() {
eprintln!(
"{}{} {}",
"speed".bright_cyan().bold(),
"boat".bright_magenta().bold(),
VERSION.bright_black()
);
eprintln!(r#"
{}{} |\___..--"/
__..--`"" /
'-._.'._:._'-._____..--' {}
"#, "speed".bright_cyan().bold(),
"boat".bright_magenta().bold(),
VERSION.bright_black());
}

31
src/common/exec.rs Normal file → Executable file
View File

@ -5,35 +5,36 @@ use std::{
};
use super::{
conf::{Config, Params},
conf::{Config, setparams},
console::fatal,
net::{mkclient, query},
};
pub async fn takeoff(args: Config, params: Params) {
let c = mkclient(args.follow).unwrap_or_else(|_| fatal("error instantiating http client"));
pub async fn takeoff(args: Config) {
let params = setparams(&args);
let file = File::open(args.list)
let mut ua: String = "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/126.0.0.0 Safari/537.36".to_string();
if let Some(custom_ua) = args.useragent {
ua = custom_ua;
}
let client = mkclient(args.follow, ua).unwrap_or_else(|_| fatal("error instantiating http client"));
let file = File::open(&args.list)
.unwrap_or_else(|e| fatal(format!("unable to read file: {e}").as_str()));
// Create a buffered reader.
let buf = BufReader::new(file);
stream::iter(buf.lines())
.for_each_concurrent(args.threads, |line| {
let wc = c.clone();
let scodes = params.statcodes.clone();
// workers using the same client ref > each worker getting their own? we'll see
let wc = &client;
let qparams = &params;
async move {
let _ = query(
wc,
line.unwrap_or_else(|_| fatal("error attempting buffered read"))
.trim(),
scodes,
params.exclude,
args.pulltitles,
args.favicon,
args.timestamps,
args.bodysize
line.unwrap_or_else(|_| fatal("error attempting buffered read")).trim(),
qparams
)
.await;
}

0
src/common/mod.rs Normal file → Executable file
View File

42
src/common/modules.rs Normal file → Executable file
View File

@ -1,14 +1,14 @@
use select::{document::Document, predicate::Name};
use reqwest::Url;
use reqwest::{Response, Url};
use base64::{Engine as _, engine::general_purpose};
use bytes::{Bytes, BytesMut};
use murmur3::murmur3_32;
use std::{error::Error,
io::Cursor};
use std::{error::Error, io::Cursor};
use super::console::{parsetitle, parsebody, parsehash, fmtwhitespace};
use super::console::{parsetitle, parsebody, parsehash, fmtwhitespace, trunc};
pub fn get_title(body: &String) -> String {
let document = Document::from(body.as_str());
pub fn get_title(body: &str) -> String {
let document = Document::from(body);
let title = document
.find(Name("title"))
@ -19,18 +19,30 @@ pub fn get_title(body: &String) -> String {
parsetitle(title)
}
pub fn read_body(body: &String, lim: usize) -> String {
let document = Document::from(body.as_str());
pub async fn readnbody(mut resp: Response, n: usize) -> Result<Bytes, reqwest::Error> {
let mut body = BytesMut::new();
while let Some(chunk) = resp.chunk().await? {
if !(body.len() + chunk.len()) > n {
body.extend_from_slice(&chunk);
}
}
Ok(body.freeze())
}
pub fn body_str(body: Bytes) -> Result<String, Box<dyn Error>> {
String::from_utf8(body.to_vec())
.map_err(|e| format!("error reading body into a string: {}", e).into())
}
pub fn body_contents(body: &str, lim: usize) -> String {
let document = Document::from(body);
let mut bodytext = document.find(Name("body")).next().map(|n| n.text()).unwrap_or_else(|| "".to_string());
bodytext = fmtwhitespace(bodytext);
if bodytext.len() > lim {
bodytext = bodytext
.char_indices()
.take_while(|(i, _)| *i < lim)
.map(|(_, c)| c)
.collect();
bodytext = trunc(bodytext, lim);
}
parsebody(bodytext)
@ -76,8 +88,8 @@ async fn dl_favicon(url: String) -> Result<Vec<u8>, Box<dyn Error>> {
Ok(data)
}
pub async fn hash_favicon(body: &String, url: String) -> String {
let document = Document::from(body.as_str());
pub async fn hash_favicon(body: &str, url: String) -> String {
let document = Document::from(body);
if let Ok(favurl) = faviconurl(document, url) {
if let Ok(data) = dl_favicon(favurl).await {

62
src/common/net.rs Normal file → Executable file
View File

@ -2,17 +2,18 @@ use reqwest::{redirect::Policy, Client};
use std::time::Duration;
use super::console::{parsehit, tstamp};
use super::conf::Params;
use super::modules::*;
pub fn mkclient(redir: bool) -> Result<Client, reqwest::Error> {
pub fn mkclient(redir: bool, ua: String) -> Result<Client, reqwest::Error> {
let rpolicy: Policy = if redir {
Policy::limited(5)
Policy::limited(3)
} else {
Policy::none()
};
Client::builder()
.user_agent("buttplug/1.0")
.user_agent(ua)
.redirect(rpolicy)
.timeout(Duration::from_secs(2))
.connect_timeout(Duration::from_millis(500))
@ -25,57 +26,60 @@ async fn sendreq(c: &Client, use_https: bool, url: &str) -> Result<reqwest::Resp
}
pub async fn query(
c: Client,
c: &Client,
url: &str,
codes: Vec<u16>,
exclude: bool,
titles: bool,
favicon: bool,
timestamps: bool,
bodysize: usize,
params: &Params
) -> Result<(), reqwest::Error> {
let response: reqwest::Response;
if let Ok(res) = sendreq(&c, true, url).await {
response = res;
match sendreq(c, true, url).await {
Ok(res) => response = res,
Err(e) => {
if e.is_request() {
response = sendreq(c, false, url).await?;
} else {
response = sendreq(&c, false, url).await?;
return Err(e);
}
let statcode = response.status().as_u16();
if codes.len() > 0 {
if codes.contains(&statcode) {
if exclude {
return Ok(());
}
} else if !exclude {
return Ok(());
}
}
let sc = response.status().as_u16();
if !params.statcodes.is_empty() {
if params.statcodes.contains(&sc) {
if params.exclude {
return Ok(());
}
} else if !params.exclude {
return Ok(());
}
}
let url: String = response.url().to_string();
let body = response.text().await?;
// 10mb response body limit
let body_raw = readnbody(response, 10485760).await?;
let body = body_str(body_raw).unwrap_or("error parsing response body".to_string());
let mut out = parsehit(sc, &url);
if timestamps {
if params.timestamps {
out = format!("{} {}", tstamp(), out);
}
if titles {
if params.titles {
out = format!("{} {}", out, get_title(&body));
}
if favicon {
if params.favicon {
let hash = hash_favicon(&body, url).await;
if !hash.is_empty() {
out = format!("{} {}", out, hash);
}
}
if bodysize > 0 {
out = format!("{} {}", out, read_body(&body, bodysize));
if params.bodysize > 0 {
out = format!("{} {}", out, body_contents(&body, params.bodysize));
}
println!("{}", out);

5
src/main.rs Normal file → Executable file
View File

@ -2,9 +2,6 @@ mod common;
#[tokio::main]
async fn main() {
let args = common::conf::load();
let scanparams = common::conf::setparams(&args);
common::console::banner();
common::exec::takeoff(args, scanparams).await;
common::exec::takeoff(common::conf::load()).await;
}