httpz/httpz.py

728 lines
25 KiB
Python
Raw Normal View History

2025-02-10 04:56:46 +00:00
#!/usr/bin/env python3
# HTTPZ Web Scanner - Developed by acidvegas in Python (https://github.com/acidvegas/httpz)
2023-12-15 04:48:50 +00:00
'''
2025-02-10 04:56:46 +00:00
BCUZ FUCK PROJECT DISCOVERY PYTHON STILL GO HARD
REAL BAY SHIT FOR REAL BAY MOTHER FUCKERS
2023-12-15 04:48:50 +00:00
'''
import argparse
import asyncio
2025-02-10 04:56:46 +00:00
import itertools
2023-12-15 04:48:50 +00:00
import json
import logging
2025-02-10 05:24:28 +00:00
import os
2025-02-10 06:30:30 +00:00
import random
import sys
2023-12-15 04:48:50 +00:00
try:
2025-02-10 04:56:46 +00:00
import aiohttp
2023-12-15 04:48:50 +00:00
except ImportError:
2025-02-10 04:56:46 +00:00
raise ImportError('missing \'aiohttp\' library (pip install aiohttp)')
2023-12-15 04:48:50 +00:00
try:
2025-02-10 04:56:46 +00:00
import apv
2023-12-15 04:48:50 +00:00
except ImportError:
2025-02-10 04:56:46 +00:00
raise ImportError('missing \'apv\' library (pip install apv)')
try:
import bs4
except ImportError:
raise ImportError('missing \'bs4\' library (pip install beautifulsoup4)')
try:
from cryptography import x509
from cryptography.hazmat.primitives import hashes
from cryptography.x509.oid import NameOID
except ImportError:
raise ImportError('missing \'cryptography\' library (pip install cryptography)')
try:
import dns.asyncresolver
import dns.query
import dns.resolver
import dns.zone
2025-02-10 04:56:46 +00:00
except ImportError:
raise ImportError('missing \'dns\' library (pip install dnspython)')
try:
import mmh3
except ImportError:
raise ImportError('missing \'mmh3\' library (pip install mmh3)')
class Colors:
'''ANSI color codes for terminal output'''
HEADER = '\033[95m' # Light purple
BLUE = '\033[94m'
GREEN = '\033[92m'
YELLOW = '\033[93m'
RED = '\033[91m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
RESET = '\033[0m'
PURPLE = '\033[35m' # Dark purple
LIGHT_RED = '\033[38;5;203m' # Light red
DARK_GREEN = '\033[38;5;22m' # Dark green
PINK = '\033[38;5;198m' # Bright pink
2025-02-10 06:11:30 +00:00
GRAY = '\033[90m' # Gray color
CYAN = '\033[96m' # Cyan color
2025-02-10 04:56:46 +00:00
2025-02-11 05:03:28 +00:00
# Global for silent mode
SILENT_MODE = False
2025-02-10 05:24:28 +00:00
def debug(msg: str):
2025-02-11 05:03:28 +00:00
if not SILENT_MODE: logging.debug(msg)
def error(msg: str):
2025-02-11 05:03:28 +00:00
if not SILENT_MODE: logging.error(msg)
def info(msg: str):
2025-02-11 05:03:28 +00:00
if not SILENT_MODE: logging.info(msg)
2025-02-10 05:24:28 +00:00
async def get_cert_info(ssl_object, url: str) -> dict:
2025-02-10 06:30:30 +00:00
'''
Get SSL certificate information for a domain
2025-02-10 06:30:30 +00:00
:param ssl_object: SSL object to get certificate info from
:param url: URL to get certificate info from
2025-02-10 06:30:30 +00:00
'''
try:
# Check if we have a certificate
if not ssl_object:
return None
# Get the certificate in DER format
if not (cert_der := ssl_object.getpeercert(binary_form=True)):
return None
# Load the certificate
cert = x509.load_der_x509_certificate(cert_der)
# Extract all subject alternative names
2025-02-10 06:30:30 +00:00
try:
san_extension = cert.extensions.get_extension_for_oid(x509.oid.ExtensionOID.SUBJECT_ALTERNATIVE_NAME)
alt_names = [name.value for name in san_extension.value] if san_extension else []
except x509.extensions.ExtensionNotFound:
alt_names = []
2025-02-10 06:30:30 +00:00
# Get subject CN
try:
common_name = cert.subject.get_attributes_for_oid(NameOID.COMMON_NAME)[0].value
except IndexError:
common_name = None
2025-02-10 06:30:30 +00:00
# Get issuer CN
try:
issuer = cert.issuer.get_attributes_for_oid(NameOID.COMMON_NAME)[0].value
except IndexError:
issuer = None
return {
'fingerprint' : cert.fingerprint(hashes.SHA256()).hex(),
'common_name' : common_name,
'issuer' : issuer,
'alt_names' : alt_names,
'not_before' : cert.not_valid_before_utc.isoformat(),
'not_after' : cert.not_valid_after_utc.isoformat(),
'version' : cert.version.value,
'serial_number' : format(cert.serial_number, 'x'),
}
except Exception as e:
2025-02-11 05:03:28 +00:00
error(f'Error getting cert info for {url}: {str(e)}')
return None
2025-02-10 04:56:46 +00:00
async def get_favicon_hash(session: aiohttp.ClientSession, base_url: str, html: str) -> str:
'''
Get favicon hash from a webpage
:param session: aiohttp client session
:param base_url: base URL of the website
:param html: HTML content of the page
'''
try:
soup = bs4.BeautifulSoup(html, 'html.parser')
# Try to find favicon in link tags
favicon_url = None
for link in soup.find_all('link'):
if link.get('rel') and any(x.lower() == 'icon' for x in link.get('rel')):
favicon_url = link.get('href')
break
if not favicon_url:
# Try default location
favicon_url = '/favicon.ico'
# Handle relative URLs
if favicon_url.startswith('//'):
favicon_url = 'https:' + favicon_url
elif favicon_url.startswith('/'):
favicon_url = base_url + favicon_url
elif not favicon_url.startswith(('http://', 'https://')):
favicon_url = base_url + '/' + favicon_url
# Get favicon hash
2025-02-10 04:56:46 +00:00
async with session.get(favicon_url, timeout=10) as response:
if response.status == 200:
content = (await response.read())[:1024*1024]
hash_value = mmh3.hash64(content)[0]
if hash_value != 0:
return str(hash_value)
2025-02-10 04:56:46 +00:00
except Exception as e:
2025-02-10 05:24:28 +00:00
debug(f'Error getting favicon for {base_url}: {str(e)}')
2025-02-10 04:56:46 +00:00
return None
def human_size(size_bytes: int) -> str:
2025-02-10 04:56:46 +00:00
'''
Convert bytes to human readable string
2025-02-10 04:56:46 +00:00
:param size_bytes: Size in bytes
2025-02-10 04:56:46 +00:00
'''
if not size_bytes:
return '0B'
units = ('B', 'KB', 'MB', 'GB')
size = float(size_bytes)
unit_index = 0
while size >= 1024 and unit_index < len(units) - 1:
size /= 1024
unit_index += 1
return f'{size:.1f}{units[unit_index]}'
def input_generator(input_source: str):
'''
Generator function to yield domains from file or stdin
:param input_source: path to file containing domains, or None for stdin
'''
if input_source == '-' or input_source is None:
for line in sys.stdin:
if line.strip():
yield line.strip()
else:
with open(input_source, 'r') as f:
for line in f:
if line.strip():
yield line.strip()
async def load_resolvers(resolver_file: str = None) -> list:
'''
Load DNS resolvers from file or return default resolvers
:param resolver_file: Path to file containing resolver IPs
:return: List of resolver IPs
'''
if resolver_file:
try:
with open(resolver_file) as f:
resolvers = [line.strip() for line in f if line.strip()]
if resolvers:
return resolvers
except Exception as e:
debug(f'Error loading resolvers from {resolver_file}: {str(e)}')
else:
async with aiohttp.ClientSession() as session:
async with session.get('https://raw.githubusercontent.com/trickest/resolvers/refs/heads/main/resolvers.txt') as response:
resolvers = await response.text()
2025-02-11 05:03:28 +00:00
if not SILENT_MODE:
info(f'Loaded {len(resolvers.splitlines()):,} resolvers.')
return [resolver.strip() for resolver in resolvers.splitlines()]
async def resolve_all_dns(domain: str, timeout: int = 5, nameserver: str = None, check_axfr: bool = False) -> tuple:
'''
Resolve all DNS records (NS, A, AAAA, CNAME) for a domain
:param domain: Domain to resolve
:param timeout: Timeout in seconds
:param nameserver: Specific nameserver to use
'''
# Create the resolver
resolver = dns.asyncresolver.Resolver()
resolver.lifetime = timeout
# Set the nameserver if provided
if nameserver:
resolver.nameservers = [nameserver]
# Do all DNS lookups at once
results = await asyncio.gather(*[resolver.resolve(domain, rtype) for rtype in ('NS', 'A', 'AAAA', 'CNAME')], return_exceptions=True)
# Parse the results
nameservers = [str(ns).rstrip('.') for ns in results[0]] if isinstance(results[0], dns.resolver.Answer) else []
ips = ([str(ip) for ip in results[1]] if isinstance(results[1], dns.resolver.Answer) else []) + ([str(ip) for ip in results[2]] if isinstance(results[2], dns.resolver.Answer) else [])
cname = str(results[3][0].target).rstrip('.') if isinstance(results[3], dns.resolver.Answer) else None
# Get NS IPs
ns_ips = {}
if nameservers:
ns_results = await asyncio.gather(*[resolver.resolve(ns, rtype) for ns in nameservers for rtype in ('A', 'AAAA')], return_exceptions=True)
for i, ns in enumerate(nameservers):
ns_ips[ns] = [str(ip) for records in ns_results[i*2:i*2+2] if isinstance(records, dns.resolver.Answer) for ip in records]
# Try AXFR if enabled (using already resolved nameserver IPs)
if check_axfr:
try:
# Create the axfrout directory if it doesn't exist
os.makedirs('axfrout', exist_ok=True)
# Iterate over each nameserver and their IPs
for ns_host, ips in ns_ips.items():
for ns_ip in ips:
try:
# Perform the AXFR transfer
zone = dns.zone.from_xfr(dns.query.xfr(ns_ip, domain, lifetime=timeout))
# Write the zone to a file
with open(f'axfrout/{domain}_{ns_ip}.zone', 'w') as f:
zone.to_text(f)
info(f'{Colors.GREEN}[AXFR SUCCESS] {domain} from {ns_host} ({ns_ip}){Colors.RESET}')
except Exception as e:
debug(f'AXFR failed for {domain} from {ns_ip}: {str(e)}')
except Exception as e:
debug(f'Failed AXFR for {domain}: {str(e)}')
return sorted(set(ips)), cname, nameservers, ns_ips
2025-02-10 04:56:46 +00:00
2025-02-11 05:03:28 +00:00
def parse_domain_url(domain: str) -> tuple:
2025-02-10 04:56:46 +00:00
'''
2025-02-11 05:03:28 +00:00
Parse domain string into base domain, port, and protocol list
2025-02-10 04:56:46 +00:00
2025-02-11 05:03:28 +00:00
:param domain: Raw domain string to parse
:return: Tuple of (base_domain, port, protocols)
2025-02-10 04:56:46 +00:00
'''
2025-02-10 06:30:30 +00:00
2025-02-11 01:37:24 +00:00
port = None
base_domain = domain.rstrip('/')
if base_domain.startswith(('http://', 'https://')):
protocol = 'https://' if base_domain.startswith('https://') else 'http://'
base_domain = base_domain.split('://', 1)[1]
if ':' in base_domain.split('/')[0]:
base_domain, port_str = base_domain.split(':', 1)
try:
port = int(port_str.split('/')[0])
except ValueError:
port = 443 if protocol == 'https://' else 80
else:
port = 443 if protocol == 'https://' else 80
protocols = [f'{protocol}{base_domain}{":" + str(port) if port else ""}']
2025-02-10 04:56:46 +00:00
else:
2025-02-11 01:37:24 +00:00
if ':' in base_domain.split('/')[0]:
base_domain, port_str = base_domain.split(':', 1)
2025-02-11 05:03:28 +00:00
port = int(port_str.split('/')[0]) if port_str.split('/')[0].isdigit() else 443
2025-02-11 01:51:36 +00:00
else:
2025-02-11 05:03:28 +00:00
port = 443
2025-02-11 01:37:24 +00:00
protocols = [
f'https://{base_domain}{":" + str(port) if port else ""}',
f'http://{base_domain}{":" + str(port) if port else ""}'
]
2025-02-11 05:03:28 +00:00
return base_domain, port, protocols
2025-02-10 04:56:46 +00:00
2025-02-11 05:03:28 +00:00
async def check_domain(session: aiohttp.ClientSession, domain: str, follow_redirects: bool = False, timeout: int = 5, check_axfr: bool = False, resolvers: list = None) -> dict:
'''
Check a single domain for its status code, title, and body preview
:param session: aiohttp client session
:param domain: domain to check
:param follow_redirects: whether to follow redirects
:param timeout: timeout in seconds
:param check_axfr: whether to check for AXFR
:param resolvers: list of DNS resolvers to use
'''
nameserver = random.choice(resolvers) if resolvers else None
base_domain, port, protocols = parse_domain_url(domain)
result = {
2025-02-10 04:56:46 +00:00
'domain' : base_domain,
'status' : 0,
'title' : None,
'body' : None,
'content_type' : None,
2025-02-11 01:51:36 +00:00
'url' : protocols[0],
'port' : port,
2025-02-10 04:56:46 +00:00
'ips' : [],
'cname' : None,
2025-02-10 06:01:37 +00:00
'nameservers' : [],
2025-02-10 04:56:46 +00:00
'favicon_hash' : None,
'headers' : {},
'content_length' : None,
'redirect_chain' : [],
'tls' : None
}
2025-02-11 05:03:28 +00:00
# Do DNS lookups
result['ips'], result['cname'], result['nameservers'], _ = await resolve_all_dns(base_domain, timeout, nameserver, check_axfr)
2025-02-10 04:56:46 +00:00
2025-02-11 05:03:28 +00:00
# Try each protocol
2025-02-11 01:37:24 +00:00
for url in protocols:
2025-02-10 04:56:46 +00:00
try:
2025-02-11 05:03:28 +00:00
async with session.get(url, timeout=timeout, allow_redirects=follow_redirects, max_redirects=10 if follow_redirects else 0) as response:
result.update({
'status' : response.status,
'url' : str(response.url),
'headers' : dict(response.headers),
'content_type' : response.headers.get('content-type', '').split(';')[0],
'content_length' : response.headers.get('content-length'),
'redirect_chain' : [str(h.url) for h in response.history] + [str(response.url)] if follow_redirects and response.history else []
})
if response.url.scheme == 'https':
try:
2025-02-11 05:03:28 +00:00
if ssl_object := response._protocol.transport.get_extra_info('ssl_object'):
result['tls'] = await get_cert_info(ssl_object, str(response.url))
except AttributeError:
debug(f'Failed to get SSL info for {url}')
2025-02-10 04:56:46 +00:00
if response.status == 200:
html = (await response.text())[:1024*1024]
soup = bs4.BeautifulSoup(html, 'html.parser')
2025-02-11 05:03:28 +00:00
result.update({
'title' : ' '.join(soup.title.string.strip().split()).rstrip('.')[:300] if soup.title and soup.title.string else None,
'body' : ' '.join(soup.get_text().split()).rstrip('.')[:500] if soup.get_text() else None,
'favicon_hash' : await get_favicon_hash(session, url, html)
})
2025-02-10 04:56:46 +00:00
break
except Exception as e:
2025-02-10 05:24:28 +00:00
debug(f'Error checking {url}: {str(e)}')
2025-02-10 04:56:46 +00:00
result['status'] = -1
continue
return result
2025-02-11 05:03:28 +00:00
def format_console_output(result: dict, debug: bool = False, show_fields: dict = None, match_codes: set = None, exclude_codes: set = None) -> str:
2025-02-10 04:56:46 +00:00
'''
Format the output with colored sections
:param result: Dictionary containing domain check results
:param debug: Whether to show error states
:param show_fields: Dictionary of fields to show
:param match_codes: Set of status codes to match
:param exclude_codes: Set of status codes to exclude
'''
# Skip errors unless in debug mode
if result['status'] < 0 and not debug:
return ''
# Skip if status code doesn't match filters
if match_codes and result['status'] not in match_codes:
return ''
if exclude_codes and result['status'] in exclude_codes:
return ''
parts = []
# Status code
if show_fields['status_code']:
if result['status'] < 0:
status = f"{Colors.RED}[{result['status']}]{Colors.RESET}"
elif 200 <= result['status'] < 300:
status = f"{Colors.GREEN}[{result['status']}]{Colors.RESET}"
elif 300 <= result['status'] < 400:
status = f"{Colors.YELLOW}[{result['status']}]{Colors.RESET}"
else: # 400+ and 500+ codes
2025-02-10 04:56:46 +00:00
status = f"{Colors.RED}[{result['status']}]{Colors.RESET}"
parts.append(status)
# Domain (always shown)
parts.append(f"[{result['url']}]")
# Title
if show_fields['title'] and result['title']:
parts.append(f"{Colors.DARK_GREEN}[{result['title']}]{Colors.RESET}")
# Body
if show_fields['body'] and result['body']:
body = result['body'][:100] + ('...' if len(result['body']) > 100 else '')
parts.append(f"{Colors.BLUE}[{body}]{Colors.RESET}")
# IPs
if show_fields['ip'] and result['ips']:
ips_text = ', '.join(result['ips'])
parts.append(f"{Colors.YELLOW}[{ips_text}]{Colors.RESET}")
# Favicon hash
if show_fields['favicon'] and result['favicon_hash']:
parts.append(f"{Colors.PURPLE}[{result['favicon_hash']}]{Colors.RESET}")
# Headers (includes content-type and content-length)
if show_fields['headers'] and result['headers']:
headers_text = []
for k, v in result['headers'].items():
headers_text.append(f"{k}: {v}")
2025-02-10 06:01:37 +00:00
parts.append(f"{Colors.CYAN}[{', '.join(headers_text)}]{Colors.RESET}")
2025-02-10 04:56:46 +00:00
else:
# Only show content-type and content-length if headers aren't shown
if show_fields['content_type'] and result['content_type']:
parts.append(f"{Colors.HEADER}[{result['content_type']}]{Colors.RESET}")
if show_fields['content_length'] and result['content_length']:
try:
size = human_size(int(result['content_length']))
parts.append(f"{Colors.PINK}[{size}]{Colors.RESET}")
except (ValueError, TypeError):
parts.append(f"{Colors.PINK}[{result['content_length']}]{Colors.RESET}")
# CNAME
if show_fields['cname'] and result['cname']:
parts.append(f"{Colors.PURPLE}[CNAME: {result['cname']}]{Colors.RESET}")
# Redirect Chain
if show_fields['follow_redirects'] and result['redirect_chain']:
chain = ' -> '.join(result['redirect_chain'])
parts.append(f"{Colors.YELLOW}[Redirects: {chain}]{Colors.RESET}")
# TLS Certificate Info - Modified to always show if available
if result['tls']:
2025-02-10 04:56:46 +00:00
cert = result['tls']
tls_parts = []
if cert.get('subject'):
tls_parts.append(f"Subject: {cert['subject']}")
if cert.get('issuer'):
tls_parts.append(f"Issuer: {cert['issuer']}")
if cert.get('fingerprint'):
tls_parts.append(f"Fingerprint: {cert['fingerprint'][:16]}...")
if cert.get('alt_names'):
tls_parts.append(f"SANs: {', '.join(cert['alt_names'][:3])}")
if cert.get('not_before') and cert.get('not_after'):
tls_parts.append(f"Valid: {cert['not_before'].split('T')[0]} to {cert['not_after'].split('T')[0]}")
if cert.get('version'):
tls_parts.append(f"Version: {cert['version']}")
if cert.get('serial_number'):
tls_parts.append(f"Serial: {cert['serial_number'][:16]}...")
2025-02-10 04:56:46 +00:00
parts.append(f"{Colors.GREEN}[{' | '.join(tls_parts)}]{Colors.RESET}")
return ' '.join(parts)
def parse_status_codes(codes_str: str) -> set:
'''
Parse comma-separated status codes and ranges into a set of integers
:param codes_str: Comma-separated status codes (e.g., "200,301-399,404,500-503")
'''
codes = set()
try:
for part in codes_str.split(','):
if '-' in part:
start, end = map(int, part.split('-'))
codes.update(range(start, end + 1))
else:
codes.add(int(part))
return codes
except ValueError:
raise argparse.ArgumentTypeError('Invalid status code format. Use comma-separated numbers or ranges (e.g., 200,301-399,404,500-503)')
2025-02-10 06:30:30 +00:00
async def process_domains(input_source: str = None, debug: bool = False, concurrent_limit: int = 100, show_fields: dict = None, output_file: str = None, jsonl: bool = None, timeout: int = 5, match_codes: set = None, exclude_codes: set = None, show_progress: bool = False, check_axfr: bool = False, resolver_file: str = None):
2025-02-10 04:56:46 +00:00
'''
Process domains from a file or stdin with concurrent requests
:param input_source: path to file containing domains, or None for stdin
:param debug: Whether to show error states
:param concurrent_limit: maximum number of concurrent requests
:param show_fields: Dictionary of fields to show
:param output_file: Path to output file (JSONL format)
:param timeout: Request timeout in seconds
:param match_codes: Set of status codes to match
:param exclude_codes: Set of status codes to exclude
:param show_progress: Whether to show progress counter
2025-02-10 05:24:28 +00:00
:param check_axfr: Whether to check for AXFR
2025-02-10 06:30:30 +00:00
:param resolver_file: Path to file containing DNS resolvers
2025-02-10 04:56:46 +00:00
'''
2025-02-10 05:24:28 +00:00
2025-02-11 05:03:28 +00:00
# Check if input file exists
if input_source and input_source != '-' and not os.path.exists(input_source):
2025-02-10 04:56:46 +00:00
raise FileNotFoundError(f'Domain file not found: {input_source}')
2025-02-11 05:03:28 +00:00
# Initialize tasks and processed domains
tasks = set()
2025-02-11 05:03:28 +00:00
processed_domains = 0
2025-02-10 04:56:46 +00:00
# Load resolvers - await the coroutine
resolvers = await load_resolvers(resolver_file)
2025-02-10 04:56:46 +00:00
async def write_result(result: dict):
'''Write a single result to the output file'''
2025-02-11 05:03:28 +00:00
2025-02-10 04:56:46 +00:00
nonlocal processed_domains
2025-02-11 01:51:36 +00:00
# Create JSON output dict with required fields
2025-02-11 05:03:28 +00:00
output_dict = {'url': result['url'], 'domain': result['domain'], 'status': result['status'], 'port': result['port']}
2025-02-10 05:24:28 +00:00
2025-02-10 04:56:46 +00:00
# Add optional fields if they exist
if result['title']:
output_dict['title'] = result['title']
if result['body']:
output_dict['body'] = result['body']
if result['ips']:
output_dict['ips'] = result['ips']
if result['favicon_hash']:
output_dict['favicon_hash'] = result['favicon_hash']
if result['headers']:
output_dict['headers'] = result['headers']
if result['cname']:
output_dict['cname'] = result['cname']
if result['redirect_chain']:
output_dict['redirect_chain'] = result['redirect_chain']
if result['tls']:
output_dict['tls'] = result['tls']
2025-02-10 06:01:37 +00:00
if result['nameservers']:
output_dict['nameservers'] = result['nameservers']
2025-02-10 04:56:46 +00:00
# Get formatted output based on filters
2025-02-11 05:03:28 +00:00
formatted = format_console_output(result, debug, show_fields, match_codes, exclude_codes)
2025-02-10 04:56:46 +00:00
if formatted:
# Write to file if specified
if output_file:
2025-02-10 05:24:28 +00:00
if (not match_codes or result['status'] in match_codes) and (not exclude_codes or result['status'] not in exclude_codes):
2025-02-10 04:56:46 +00:00
with open(output_file, 'a') as f:
json.dump(output_dict, f, ensure_ascii=False)
f.write('\n')
# Console output
if jsonl:
print(json.dumps(output_dict))
else:
2025-02-10 06:01:37 +00:00
processed_domains += 1 # Increment counter for each domain processed
2025-02-10 04:56:46 +00:00
if show_progress:
info(f"{Colors.GRAY}[{processed_domains:,}]{Colors.RESET} {formatted}")
2025-02-10 04:56:46 +00:00
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session:
2025-02-10 04:56:46 +00:00
# Start initial batch of tasks
for domain in itertools.islice(input_generator(input_source), concurrent_limit):
2025-02-10 06:30:30 +00:00
task = asyncio.create_task(check_domain(session, domain, follow_redirects=show_fields['follow_redirects'], timeout=timeout, check_axfr=check_axfr, resolvers=resolvers))
2025-02-10 04:56:46 +00:00
tasks.add(task)
# Process remaining domains, maintaining concurrent_limit active tasks
domains_iter = input_generator(input_source)
2025-02-10 04:56:46 +00:00
next(itertools.islice(domains_iter, concurrent_limit, concurrent_limit), None) # Skip first concurrent_limit domains
for domain in domains_iter:
done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED)
tasks = pending
for task in done:
result = await task
await write_result(result)
2025-02-10 06:30:30 +00:00
task = asyncio.create_task(check_domain(session, domain, follow_redirects=show_fields['follow_redirects'], timeout=timeout, check_axfr=check_axfr, resolvers=resolvers))
2025-02-10 04:56:46 +00:00
tasks.add(task)
# Wait for remaining tasks
if tasks:
done, _ = await asyncio.wait(tasks)
for task in done:
result = await task
await write_result(result)
2023-12-15 04:48:50 +00:00
def main():
2025-02-10 04:56:46 +00:00
'''Main function to handle command line arguments and run the domain checker'''
2025-02-11 05:03:28 +00:00
global SILENT_MODE
2025-02-10 05:24:28 +00:00
# Setup argument parser
2025-02-10 05:24:28 +00:00
parser = argparse.ArgumentParser(description=f'{Colors.GREEN}Hyper-fast HTTP Scraping Tool{Colors.RESET}', formatter_class=argparse.RawDescriptionHelpFormatter)
# Add arguments
2025-02-10 04:56:46 +00:00
parser.add_argument('file', nargs='?', default='-', help='File containing domains to check (one per line), use - for stdin')
parser.add_argument('-all', '--all-flags', action='store_true', help='Enable all output flags')
parser.add_argument('-d', '--debug', action='store_true', help='Show error states and debug information')
parser.add_argument('-c', '--concurrent', type=int, default=100, help='Number of concurrent checks')
parser.add_argument('-j', '--jsonl', action='store_true', help='Output JSON Lines format to console')
parser.add_argument('-o', '--output', help='Output file path (JSONL format)')
2025-02-10 04:56:46 +00:00
# Output field flags
2025-02-10 05:24:28 +00:00
parser.add_argument('-b', '--body', action='store_true', help='Show body preview')
parser.add_argument('-cn', '--cname', action='store_true', help='Show CNAME records')
2025-02-10 05:24:28 +00:00
parser.add_argument('-cl', '--content-length', action='store_true', help='Show content length')
parser.add_argument('-ct', '--content-type', action='store_true', help='Show content type')
parser.add_argument('-f', '--favicon', action='store_true', help='Show favicon hash')
2025-02-10 05:24:28 +00:00
parser.add_argument('-fr', '--follow-redirects', action='store_true', help='Follow redirects (max 10)')
parser.add_argument('-hr', '--headers', action='store_true', help='Show response headers')
parser.add_argument('-i', '--ip', action='store_true', help='Show IP addresses')
parser.add_argument('-sc', '--status-code', action='store_true', help='Show status code')
parser.add_argument('-ti', '--title', action='store_true', help='Show page title')
2025-02-10 04:56:46 +00:00
parser.add_argument('-tls', '--tls-info', action='store_true', help='Show TLS certificate information')
# Other arguments
2025-02-10 05:24:28 +00:00
parser.add_argument('-ax', '--axfr', action='store_true', help='Try AXFR transfer against nameservers')
parser.add_argument('-ec', '--exclude-codes', type=parse_status_codes, help='Exclude these status codes (comma-separated, e.g., 404,500)')
parser.add_argument('-mc', '--match-codes', type=parse_status_codes, help='Only show these status codes (comma-separated, e.g., 200,301,404)')
parser.add_argument('-p', '--progress', action='store_true', help='Show progress counter')
parser.add_argument('-r', '--resolvers', help='File containing DNS resolvers (one per line)')
parser.add_argument('-to', '--timeout', type=int, default=5, help='Request timeout in seconds')
2025-02-10 04:56:46 +00:00
# Parse arguments
2025-02-10 04:56:46 +00:00
args = parser.parse_args()
2025-02-11 05:03:28 +00:00
if not (SILENT_MODE := args.jsonl):
# Setup logging
if args.debug:
apv.setup_logging(level='DEBUG', log_to_disk=True, log_file_name='havoc', show_details=True)
logging.debug('Debug logging enabled')
else:
apv.setup_logging(level='INFO')
2025-02-10 04:56:46 +00:00
if args.file == '-':
logging.info('Reading domains from stdin')
2025-02-10 04:56:46 +00:00
else:
logging.info(f'Processing file: {args.file}')
2025-02-10 04:56:46 +00:00
# Setup show_fields
2025-02-10 04:56:46 +00:00
show_fields = {
'status_code' : args.all_flags or args.status_code,
'content_type' : args.all_flags or args.content_type,
'content_length' : args.all_flags or args.content_length,
2025-02-10 04:56:46 +00:00
'title' : args.all_flags or args.title,
'body' : args.all_flags or args.body,
'ip' : args.all_flags or args.ip,
'favicon' : args.all_flags or args.favicon,
'headers' : args.all_flags or args.headers,
'follow_redirects' : args.all_flags or args.follow_redirects,
'cname' : args.all_flags or args.cname,
'tls' : args.all_flags or args.tls_info
}
# If no fields specified show all
2025-02-10 04:56:46 +00:00
if not any(show_fields.values()):
show_fields = {k: True for k in show_fields}
try:
2025-02-10 06:30:30 +00:00
asyncio.run(process_domains(args.file, args.debug, args.concurrent, show_fields, args.output, args.jsonl, args.timeout, args.match_codes, args.exclude_codes, args.progress, check_axfr=args.axfr, resolver_file=args.resolvers))
2025-02-10 04:56:46 +00:00
except KeyboardInterrupt:
logging.warning('Process interrupted by user')
2025-02-10 04:56:46 +00:00
sys.exit(1)
except Exception as e:
logging.error(f'Unexpected error: {str(e)}')
2025-02-10 04:56:46 +00:00
sys.exit(1)
2023-12-15 04:48:50 +00:00
2023-12-15 04:48:50 +00:00
if __name__ == '__main__':
2025-02-10 04:56:46 +00:00
main()