From d9b0eef4dfc3ad8d7a1e8c7b5f1e99b57803520c Mon Sep 17 00:00:00 2001 From: delorean Date: Thu, 23 May 2024 19:41:09 -0500 Subject: [PATCH] fixed body text boundary truncation --- src/common/conf.rs | 2 +- src/common/modules.rs | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/common/conf.rs b/src/common/conf.rs index c596494..189ea09 100644 --- a/src/common/conf.rs +++ b/src/common/conf.rs @@ -38,7 +38,7 @@ pub struct Config { /// follow redirects pub follow: bool, - #[clap(long = "body")] + #[clap(default_value_t = 0, long = "body")] /// read n bytes of the response document body pub bodysize: usize, } diff --git a/src/common/modules.rs b/src/common/modules.rs index 886bbd2..1496c4a 100644 --- a/src/common/modules.rs +++ b/src/common/modules.rs @@ -17,15 +17,16 @@ pub fn get_title(body: &String) -> String { pub fn read_body(body: &String, lim: usize) -> String { let document = Document::from(body.as_str()); - let mut text = String::new(); - for tag in document.find(Name("body")) { - text.push_str(&tag.text()); + let mut bodytext = document.find(Name("body")).next().map(|n| n.text()).unwrap_or_else(|| "".to_string()); + + bodytext = fmtwhitespace(bodytext); + if bodytext.len() > lim { + bodytext = bodytext + .char_indices() + .take_while(|(i, _)| *i < lim) + .map(|(_, c)| c) + .collect(); } - text = fmtwhitespace(text); - if text.len() > lim { - text.truncate(lim); - } - - parsebody(text) + parsebody(bodytext) } \ No newline at end of file