From ad835d3410c16bb2b1a2341f09fcbe2d1647e71c Mon Sep 17 00:00:00 2001 From: acidvegas Date: Wed, 12 Feb 2025 03:07:42 -0500 Subject: [PATCH] sup tommyrot --- httpz_scanner/__init__.py | 2 +- httpz_scanner/formatters.py | 38 +++++++++-------------- httpz_scanner/scanner.py | 61 +++++++++++++++++++++++++++++++------ setup.py | 2 +- 4 files changed, 68 insertions(+), 35 deletions(-) diff --git a/httpz_scanner/__init__.py b/httpz_scanner/__init__.py index c5f2f58..b56751b 100644 --- a/httpz_scanner/__init__.py +++ b/httpz_scanner/__init__.py @@ -6,4 +6,4 @@ from .colors import Colors from .scanner import HTTPZScanner -__version__ = '2.1.8' \ No newline at end of file +__version__ = '2.1.9' \ No newline at end of file diff --git a/httpz_scanner/formatters.py b/httpz_scanner/formatters.py index d493efe..d12c584 100644 --- a/httpz_scanner/formatters.py +++ b/httpz_scanner/formatters.py @@ -41,6 +41,20 @@ def format_console_output(result: dict, debug: bool = False, show_fields: dict = # Domain/URL parts.append(f"[{result['url']}]") + # IPs (moved up for visibility) + if show_fields.get('ip') and result.get('ips'): + ips_text = ', '.join(result['ips']) + parts.append(f"{Colors.YELLOW}[{ips_text}]{Colors.RESET}") + + # Title (moved up for visibility) + if show_fields.get('title') and result.get('title'): + parts.append(f"{Colors.DARK_GREEN}[{result['title']}]{Colors.RESET}") + + # Body preview (moved up for visibility) + if show_fields.get('body') and result.get('body'): + body = result['body'][:100].replace('\n', ' ') + ('...' if len(result['body']) > 100 else '') + parts.append(f"{Colors.BLUE}[{body}]{Colors.RESET}") + # Content Type if show_fields.get('content_type') and result.get('content_type'): parts.append(f"{Colors.CYAN}[{result['content_type']}]{Colors.RESET}") @@ -48,20 +62,6 @@ def format_console_output(result: dict, debug: bool = False, show_fields: dict = # Content Length if show_fields.get('content_length') and result.get('content_length'): parts.append(f"{Colors.PINK}[{result['content_length']}]{Colors.RESET}") - - # Title - if show_fields.get('title') and result.get('title'): - parts.append(f"{Colors.DARK_GREEN}[{result['title']}]{Colors.RESET}") - - # Body preview - if show_fields.get('body') and result.get('body'): - body = result['body'][:100] + ('...' if len(result['body']) > 100 else '') - parts.append(f"{Colors.BLUE}[{body}]{Colors.RESET}") - - # IPs - if show_fields.get('ip') and result.get('ips'): - ips_text = ', '.join(result['ips']) - parts.append(f"{Colors.YELLOW}[{ips_text}]{Colors.RESET}") # Favicon hash if show_fields.get('favicon') and result.get('favicon_hash'): @@ -71,16 +71,6 @@ def format_console_output(result: dict, debug: bool = False, show_fields: dict = if show_fields.get('headers') and result.get('response_headers'): headers_text = [f"{k}: {v}" for k, v in result['response_headers'].items()] parts.append(f"{Colors.CYAN}[{', '.join(headers_text)}]{Colors.RESET}") - else: - if show_fields.get('content_type') and result.get('content_type'): - parts.append(f"{Colors.HEADER}[{result['content_type']}]{Colors.RESET}") - - if show_fields.get('content_length') and result.get('content_length'): - try: - size = human_size(int(result['content_length'])) - parts.append(f"{Colors.PINK}[{size}]{Colors.RESET}") - except (ValueError, TypeError): - parts.append(f"{Colors.PINK}[{result['content_length']}]{Colors.RESET}") # Redirect Chain if show_fields.get('follow_redirects') and result.get('redirect_chain'): diff --git a/httpz_scanner/scanner.py b/httpz_scanner/scanner.py index 8e569f2..9d2df1f 100644 --- a/httpz_scanner/scanner.py +++ b/httpz_scanner/scanner.py @@ -112,24 +112,67 @@ class HTTPZScanner: debug(f'Making request to {url} with headers: {headers}') async with session.request('GET', url, timeout=self.timeout, - allow_redirects=True, # Always follow redirects + allow_redirects=True, max_redirects=10, - ssl=False, # Don't verify SSL + ssl=False, headers=headers) as response: debug(f'Got response from {url}: status={response.status}, headers={dict(response.headers)}') + # Get domain and parse URL + parsed_url = urllib.parse.urlparse(url) + domain = parsed_url.hostname + + # Basic result structure result = { - 'domain': urllib.parse.urlparse(url).hostname, + 'domain': domain, 'status': response.status, 'url': str(response.url), - 'response_headers': dict(response.headers) + 'response_headers': dict(response.headers), + 'protocol': parsed_url.scheme } - - if response.history: - result['redirect_chain'] = [str(h.url) for h in response.history] + [str(response.url)] - debug(f'Redirect chain for {url}: {result["redirect_chain"]}') - + + try: + # Get response body + body = await response.text() + result['body'] = body[:500] # Limit body preview + + # Parse title using bs4 + if 'text/html' in response.headers.get('content-type', '').lower(): + soup = bs4.BeautifulSoup(body, 'html.parser') + if title_tag := soup.title: + result['title'] = title_tag.string.strip() + + # Get content type and length + result['content_type'] = response.headers.get('content-type') + result['content_length'] = response.headers.get('content-length') + + # Get redirect chain + if response.history: + result['redirect_chain'] = [str(h.url) for h in response.history] + [str(response.url)] + + # Get DNS info + if self.show_fields.get('ip') or self.show_fields.get('cname'): + ips, cname, _, _ = await resolve_all_dns(domain) + if ips: + result['ips'] = ips + if cname: + result['cname'] = cname + + # Get TLS info for HTTPS + if url.startswith('https://') and self.show_fields.get('tls'): + if cert_info := await get_cert_info(response.connection.transport.get_extra_info('ssl_object'), url): + result['tls'] = cert_info + + # Get favicon hash if requested + if self.show_fields.get('favicon'): + if favicon_hash := await get_favicon_hash(session, f"{parsed_url.scheme}://{domain}", body): + result['favicon_hash'] = favicon_hash + + except Exception as e: + debug(f'Error processing response data for {url}: {str(e)}') + # Still return basic result even if additional processing fails + return result except aiohttp.ClientSSLError as e: diff --git a/setup.py b/setup.py index 364d24a..0509e4d 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ with open('README.md', 'r', encoding='utf-8') as f: setup( name='httpz_scanner', - version='2.1.8', + version='2.1.9', author='acidvegas', author_email='acid.vegas@acid.vegas', description='Hyper-fast HTTP Scraping Tool',