refactoring, user agent customization, title limit, body streaming & limit, improved efficiency
This commit is contained in:
parent
8ed9cbeb73
commit
2c8a2b4121
0
.gitignore
vendored
Normal file → Executable file
0
.gitignore
vendored
Normal file → Executable file
5
Cargo.lock
generated
Normal file → Executable file
5
Cargo.lock
generated
Normal file → Executable file
@ -142,9 +142,9 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bytes"
|
name = "bytes"
|
||||||
version = "1.6.0"
|
version = "1.7.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
|
checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cc"
|
name = "cc"
|
||||||
@ -1232,6 +1232,7 @@ name = "speedboat"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"base64",
|
"base64",
|
||||||
|
"bytes",
|
||||||
"chrono",
|
"chrono",
|
||||||
"clap",
|
"clap",
|
||||||
"colored",
|
"colored",
|
||||||
|
1
Cargo.toml
Normal file → Executable file
1
Cargo.toml
Normal file → Executable file
@ -7,6 +7,7 @@ edition = "2021"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
base64 = "0.22.1"
|
base64 = "0.22.1"
|
||||||
|
bytes = "1.7.1"
|
||||||
chrono = "0.4.38"
|
chrono = "0.4.38"
|
||||||
clap = { version = "4.5.4", features = ["derive"] }
|
clap = { version = "4.5.4", features = ["derive"] }
|
||||||
colored = "2.1.0"
|
colored = "2.1.0"
|
||||||
|
14
src/common/conf.rs
Normal file → Executable file
14
src/common/conf.rs
Normal file → Executable file
@ -1,10 +1,15 @@
|
|||||||
use super::console::fatal;
|
use super::console::fatal;
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
|
|
||||||
pub const VERSION: &str = "1.0.0";
|
pub const VERSION: &str = "1.2";
|
||||||
|
|
||||||
pub struct Params {
|
pub struct Params {
|
||||||
pub statcodes: Vec<u16>,
|
pub statcodes: Vec<u16>,
|
||||||
pub exclude: bool,
|
pub exclude: bool,
|
||||||
|
pub titles: bool,
|
||||||
|
pub favicon: bool,
|
||||||
|
pub timestamps: bool,
|
||||||
|
pub bodysize: usize
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Parser, Default)]
|
#[derive(Parser, Default)]
|
||||||
@ -22,6 +27,10 @@ pub struct Config {
|
|||||||
/// concurrent workers
|
/// concurrent workers
|
||||||
pub threads: usize,
|
pub threads: usize,
|
||||||
|
|
||||||
|
#[clap(long = "ua")]
|
||||||
|
/// custom user agent to operate with
|
||||||
|
pub useragent: Option<String>,
|
||||||
|
|
||||||
#[clap(long = "mc")]
|
#[clap(long = "mc")]
|
||||||
/// status codes to match, comma separated
|
/// status codes to match, comma separated
|
||||||
pub matchcodes: Option<String>,
|
pub matchcodes: Option<String>,
|
||||||
@ -77,5 +86,6 @@ pub fn setparams(c: &Config) -> Params {
|
|||||||
exclude = true;
|
exclude = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
Params { statcodes, exclude }
|
Params { statcodes, exclude, titles: c.pulltitles, favicon: c.favicon,
|
||||||
|
timestamps: c.timestamps, bodysize: c.bodysize }
|
||||||
}
|
}
|
||||||
|
27
src/common/console.rs
Normal file → Executable file
27
src/common/console.rs
Normal file → Executable file
@ -9,6 +9,16 @@ pub fn fatal(msg: &str) -> ! {
|
|||||||
process::exit(-1);
|
process::exit(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn trunc(s: String, lim: usize) -> String {
|
||||||
|
let truncatedstr = s.char_indices()
|
||||||
|
.take_while(|(i, _)| *i < lim)
|
||||||
|
.map(|(_, c)| c)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
truncatedstr
|
||||||
|
}
|
||||||
|
|
||||||
|
// strips whitespace while maintaining legibility
|
||||||
pub fn fmtwhitespace(s: String) -> String {
|
pub fn fmtwhitespace(s: String) -> String {
|
||||||
let mut out = String::with_capacity(s.len());
|
let mut out = String::with_capacity(s.len());
|
||||||
s.split_whitespace().for_each(|w| {
|
s.split_whitespace().for_each(|w| {
|
||||||
@ -27,7 +37,7 @@ pub fn fmtcode(code: u16) -> ColoredString {
|
|||||||
300..=399 => code.to_string().yellow(),
|
300..=399 => code.to_string().yellow(),
|
||||||
400..=499 => code.to_string().bright_red(),
|
400..=499 => code.to_string().bright_red(),
|
||||||
500..=599 => code.to_string().red().bold(),
|
500..=599 => code.to_string().red().bold(),
|
||||||
_ => code.to_string().black(),
|
_ => code.to_string().bright_black(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -39,7 +49,7 @@ pub fn tstamp() -> String {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn parsetitle(s: String) -> String {
|
pub fn parsetitle(s: String) -> String {
|
||||||
let title = fmtwhitespace(s);
|
let title: String = trunc(fmtwhitespace(s), 1024);
|
||||||
|
|
||||||
format!("{}{}{}",
|
format!("{}{}{}",
|
||||||
"title[".bright_black().bold(),
|
"title[".bright_black().bold(),
|
||||||
@ -74,10 +84,11 @@ pub fn parsehit(sc: u16, url: &String) -> String {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn banner() {
|
pub fn banner() {
|
||||||
eprintln!(
|
eprintln!(r#"
|
||||||
"{}{} {}",
|
{}{} |\___..--"/
|
||||||
"speed".bright_cyan().bold(),
|
__..--`"" /
|
||||||
"boat".bright_magenta().bold(),
|
'-._.'._:._'-._____..--' {}
|
||||||
VERSION.bright_black()
|
"#, "speed".bright_cyan().bold(),
|
||||||
);
|
"boat".bright_magenta().bold(),
|
||||||
|
VERSION.bright_black());
|
||||||
}
|
}
|
||||||
|
31
src/common/exec.rs
Normal file → Executable file
31
src/common/exec.rs
Normal file → Executable file
@ -5,35 +5,36 @@ use std::{
|
|||||||
};
|
};
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
conf::{Config, Params},
|
conf::{Config, setparams},
|
||||||
console::fatal,
|
console::fatal,
|
||||||
net::{mkclient, query},
|
net::{mkclient, query},
|
||||||
};
|
};
|
||||||
|
|
||||||
pub async fn takeoff(args: Config, params: Params) {
|
pub async fn takeoff(args: Config) {
|
||||||
let c = mkclient(args.follow).unwrap_or_else(|_| fatal("error instantiating http client"));
|
let params = setparams(&args);
|
||||||
|
|
||||||
let file = File::open(args.list)
|
let mut ua: String = "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/126.0.0.0 Safari/537.36".to_string();
|
||||||
|
if let Some(custom_ua) = args.useragent {
|
||||||
|
ua = custom_ua;
|
||||||
|
}
|
||||||
|
|
||||||
|
let client = mkclient(args.follow, ua).unwrap_or_else(|_| fatal("error instantiating http client"));
|
||||||
|
|
||||||
|
let file = File::open(&args.list)
|
||||||
.unwrap_or_else(|e| fatal(format!("unable to read file: {e}").as_str()));
|
.unwrap_or_else(|e| fatal(format!("unable to read file: {e}").as_str()));
|
||||||
|
|
||||||
// Create a buffered reader.
|
|
||||||
let buf = BufReader::new(file);
|
let buf = BufReader::new(file);
|
||||||
|
|
||||||
stream::iter(buf.lines())
|
stream::iter(buf.lines())
|
||||||
.for_each_concurrent(args.threads, |line| {
|
.for_each_concurrent(args.threads, |line| {
|
||||||
let wc = c.clone();
|
// workers using the same client ref > each worker getting their own? we'll see
|
||||||
let scodes = params.statcodes.clone();
|
let wc = &client;
|
||||||
|
let qparams = ¶ms;
|
||||||
async move {
|
async move {
|
||||||
let _ = query(
|
let _ = query(
|
||||||
wc,
|
wc,
|
||||||
line.unwrap_or_else(|_| fatal("error attempting buffered read"))
|
line.unwrap_or_else(|_| fatal("error attempting buffered read")).trim(),
|
||||||
.trim(),
|
qparams
|
||||||
scodes,
|
|
||||||
params.exclude,
|
|
||||||
args.pulltitles,
|
|
||||||
args.favicon,
|
|
||||||
args.timestamps,
|
|
||||||
args.bodysize
|
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
0
src/common/mod.rs
Normal file → Executable file
0
src/common/mod.rs
Normal file → Executable file
42
src/common/modules.rs
Normal file → Executable file
42
src/common/modules.rs
Normal file → Executable file
@ -1,14 +1,14 @@
|
|||||||
use select::{document::Document, predicate::Name};
|
use select::{document::Document, predicate::Name};
|
||||||
use reqwest::Url;
|
use reqwest::{Response, Url};
|
||||||
use base64::{Engine as _, engine::general_purpose};
|
use base64::{Engine as _, engine::general_purpose};
|
||||||
|
use bytes::{Bytes, BytesMut};
|
||||||
use murmur3::murmur3_32;
|
use murmur3::murmur3_32;
|
||||||
use std::{error::Error,
|
use std::{error::Error, io::Cursor};
|
||||||
io::Cursor};
|
|
||||||
|
|
||||||
use super::console::{parsetitle, parsebody, parsehash, fmtwhitespace};
|
use super::console::{parsetitle, parsebody, parsehash, fmtwhitespace, trunc};
|
||||||
|
|
||||||
pub fn get_title(body: &String) -> String {
|
pub fn get_title(body: &str) -> String {
|
||||||
let document = Document::from(body.as_str());
|
let document = Document::from(body);
|
||||||
|
|
||||||
let title = document
|
let title = document
|
||||||
.find(Name("title"))
|
.find(Name("title"))
|
||||||
@ -19,18 +19,30 @@ pub fn get_title(body: &String) -> String {
|
|||||||
parsetitle(title)
|
parsetitle(title)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn read_body(body: &String, lim: usize) -> String {
|
pub async fn readnbody(mut resp: Response, n: usize) -> Result<Bytes, reqwest::Error> {
|
||||||
let document = Document::from(body.as_str());
|
let mut body = BytesMut::new();
|
||||||
|
while let Some(chunk) = resp.chunk().await? {
|
||||||
|
if !(body.len() + chunk.len()) > n {
|
||||||
|
body.extend_from_slice(&chunk);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(body.freeze())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn body_str(body: Bytes) -> Result<String, Box<dyn Error>> {
|
||||||
|
String::from_utf8(body.to_vec())
|
||||||
|
.map_err(|e| format!("error reading body into a string: {}", e).into())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn body_contents(body: &str, lim: usize) -> String {
|
||||||
|
let document = Document::from(body);
|
||||||
|
|
||||||
let mut bodytext = document.find(Name("body")).next().map(|n| n.text()).unwrap_or_else(|| "".to_string());
|
let mut bodytext = document.find(Name("body")).next().map(|n| n.text()).unwrap_or_else(|| "".to_string());
|
||||||
|
|
||||||
bodytext = fmtwhitespace(bodytext);
|
bodytext = fmtwhitespace(bodytext);
|
||||||
if bodytext.len() > lim {
|
if bodytext.len() > lim {
|
||||||
bodytext = bodytext
|
bodytext = trunc(bodytext, lim);
|
||||||
.char_indices()
|
|
||||||
.take_while(|(i, _)| *i < lim)
|
|
||||||
.map(|(_, c)| c)
|
|
||||||
.collect();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
parsebody(bodytext)
|
parsebody(bodytext)
|
||||||
@ -76,8 +88,8 @@ async fn dl_favicon(url: String) -> Result<Vec<u8>, Box<dyn Error>> {
|
|||||||
Ok(data)
|
Ok(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn hash_favicon(body: &String, url: String) -> String {
|
pub async fn hash_favicon(body: &str, url: String) -> String {
|
||||||
let document = Document::from(body.as_str());
|
let document = Document::from(body);
|
||||||
|
|
||||||
if let Ok(favurl) = faviconurl(document, url) {
|
if let Ok(favurl) = faviconurl(document, url) {
|
||||||
if let Ok(data) = dl_favicon(favurl).await {
|
if let Ok(data) = dl_favicon(favurl).await {
|
||||||
|
62
src/common/net.rs
Normal file → Executable file
62
src/common/net.rs
Normal file → Executable file
@ -2,17 +2,18 @@ use reqwest::{redirect::Policy, Client};
|
|||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use super::console::{parsehit, tstamp};
|
use super::console::{parsehit, tstamp};
|
||||||
|
use super::conf::Params;
|
||||||
use super::modules::*;
|
use super::modules::*;
|
||||||
|
|
||||||
pub fn mkclient(redir: bool) -> Result<Client, reqwest::Error> {
|
pub fn mkclient(redir: bool, ua: String) -> Result<Client, reqwest::Error> {
|
||||||
let rpolicy: Policy = if redir {
|
let rpolicy: Policy = if redir {
|
||||||
Policy::limited(5)
|
Policy::limited(3)
|
||||||
} else {
|
} else {
|
||||||
Policy::none()
|
Policy::none()
|
||||||
};
|
};
|
||||||
|
|
||||||
Client::builder()
|
Client::builder()
|
||||||
.user_agent("buttplug/1.0")
|
.user_agent(ua)
|
||||||
.redirect(rpolicy)
|
.redirect(rpolicy)
|
||||||
.timeout(Duration::from_secs(2))
|
.timeout(Duration::from_secs(2))
|
||||||
.connect_timeout(Duration::from_millis(500))
|
.connect_timeout(Duration::from_millis(500))
|
||||||
@ -25,57 +26,60 @@ async fn sendreq(c: &Client, use_https: bool, url: &str) -> Result<reqwest::Resp
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn query(
|
pub async fn query(
|
||||||
c: Client,
|
c: &Client,
|
||||||
url: &str,
|
url: &str,
|
||||||
codes: Vec<u16>,
|
params: &Params
|
||||||
exclude: bool,
|
|
||||||
titles: bool,
|
|
||||||
favicon: bool,
|
|
||||||
timestamps: bool,
|
|
||||||
bodysize: usize,
|
|
||||||
) -> Result<(), reqwest::Error> {
|
) -> Result<(), reqwest::Error> {
|
||||||
let response: reqwest::Response;
|
let response: reqwest::Response;
|
||||||
if let Ok(res) = sendreq(&c, true, url).await {
|
|
||||||
response = res;
|
|
||||||
} else {
|
|
||||||
response = sendreq(&c, false, url).await?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let statcode = response.status().as_u16();
|
match sendreq(c, true, url).await {
|
||||||
|
Ok(res) => response = res,
|
||||||
if codes.len() > 0 {
|
Err(e) => {
|
||||||
if codes.contains(&statcode) {
|
if e.is_request() {
|
||||||
if exclude {
|
response = sendreq(c, false, url).await?;
|
||||||
return Ok(());
|
} else {
|
||||||
|
return Err(e);
|
||||||
}
|
}
|
||||||
} else if !exclude {
|
|
||||||
return Ok(());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let sc = response.status().as_u16();
|
let sc = response.status().as_u16();
|
||||||
|
|
||||||
|
if !params.statcodes.is_empty() {
|
||||||
|
if params.statcodes.contains(&sc) {
|
||||||
|
if params.exclude {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
} else if !params.exclude {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let url: String = response.url().to_string();
|
let url: String = response.url().to_string();
|
||||||
let body = response.text().await?;
|
|
||||||
|
// 10mb response body limit
|
||||||
|
let body_raw = readnbody(response, 10485760).await?;
|
||||||
|
let body = body_str(body_raw).unwrap_or("error parsing response body".to_string());
|
||||||
|
|
||||||
let mut out = parsehit(sc, &url);
|
let mut out = parsehit(sc, &url);
|
||||||
|
|
||||||
if timestamps {
|
if params.timestamps {
|
||||||
out = format!("{} {}", tstamp(), out);
|
out = format!("{} {}", tstamp(), out);
|
||||||
}
|
}
|
||||||
|
|
||||||
if titles {
|
if params.titles {
|
||||||
out = format!("{} {}", out, get_title(&body));
|
out = format!("{} {}", out, get_title(&body));
|
||||||
}
|
}
|
||||||
|
|
||||||
if favicon {
|
if params.favicon {
|
||||||
let hash = hash_favicon(&body, url).await;
|
let hash = hash_favicon(&body, url).await;
|
||||||
if !hash.is_empty() {
|
if !hash.is_empty() {
|
||||||
out = format!("{} {}", out, hash);
|
out = format!("{} {}", out, hash);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if bodysize > 0 {
|
if params.bodysize > 0 {
|
||||||
out = format!("{} {}", out, read_body(&body, bodysize));
|
out = format!("{} {}", out, body_contents(&body, params.bodysize));
|
||||||
}
|
}
|
||||||
|
|
||||||
println!("{}", out);
|
println!("{}", out);
|
||||||
|
5
src/main.rs
Normal file → Executable file
5
src/main.rs
Normal file → Executable file
@ -2,9 +2,6 @@ mod common;
|
|||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() {
|
async fn main() {
|
||||||
let args = common::conf::load();
|
|
||||||
let scanparams = common::conf::setparams(&args);
|
|
||||||
|
|
||||||
common::console::banner();
|
common::console::banner();
|
||||||
common::exec::takeoff(args, scanparams).await;
|
common::exec::takeoff(common::conf::load()).await;
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user