httpz/httpz_scanner/cli.py

197 lines
8.2 KiB
Python
Raw Normal View History

2025-02-11 07:15:39 +00:00
#!/usr/bin/env python3
# HTTPZ Web Scanner - Developed by acidvegas in Python (https://github.com/acidvegas/httpz)
2025-02-11 07:46:01 +00:00
# httpz_scanner/cli.py
2025-02-11 07:15:39 +00:00
import argparse
import asyncio
2025-02-12 03:30:22 +00:00
import json
2025-02-11 07:15:39 +00:00
import logging
import os
import sys
2025-02-12 03:30:22 +00:00
from datetime import datetime
2025-02-11 07:15:39 +00:00
2025-02-12 02:08:12 +00:00
from .colors import Colors
2025-02-12 03:30:22 +00:00
from .formatters import format_console_output
from .parsers import parse_status_codes, parse_shard
2025-02-12 02:08:12 +00:00
from .scanner import HTTPZScanner
from .utils import SILENT_MODE, info
2025-02-12 03:30:22 +00:00
2025-02-11 07:15:39 +00:00
def setup_logging(level='INFO', log_to_disk=False):
'''
Setup logging configuration
:param level: Logging level (INFO or DEBUG)
:param log_to_disk: Whether to also log to file
'''
2025-02-12 03:30:22 +00:00
2025-02-11 07:15:39 +00:00
class ColoredFormatter(logging.Formatter):
2025-02-12 03:30:22 +00:00
def formatTime(self, record):
2025-02-11 07:15:39 +00:00
dt = datetime.fromtimestamp(record.created)
2025-02-12 03:30:22 +00:00
return f'{Colors.GRAY}{dt.strftime("%m-%d %H:%M")}{Colors.RESET}'
2025-02-11 07:15:39 +00:00
def format(self, record):
return f'{self.formatTime(record)} {record.getMessage()}'
2025-02-12 03:30:22 +00:00
# Setup logging handlers
2025-02-11 07:15:39 +00:00
handlers = []
# Console handler
console = logging.StreamHandler()
console.setFormatter(ColoredFormatter())
handlers.append(console)
# File handler
if log_to_disk:
os.makedirs('logs', exist_ok=True)
file_handler = logging.FileHandler(f'logs/httpz.log')
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
handlers.append(file_handler)
# Setup logger
2025-02-12 03:30:22 +00:00
logging.basicConfig(level=getattr(logging, level.upper()), handlers=handlers)
2025-02-11 07:15:39 +00:00
async def main():
2025-02-12 03:30:22 +00:00
parser = argparse.ArgumentParser(description=f'{Colors.GREEN}Hyper-fast HTTP Scraping Tool{Colors.RESET}', formatter_class=argparse.RawDescriptionHelpFormatter)
2025-02-11 07:15:39 +00:00
# Add arguments
parser.add_argument('file', nargs='?', default='-', help='File containing domains to check (one per line), use - for stdin')
parser.add_argument('-all', '--all-flags', action='store_true', help='Enable all output flags')
2025-02-12 03:30:22 +00:00
parser.add_argument('-d', '--debug', action='store_true', help='Show error states and debug information')
parser.add_argument('-c', '--concurrent', type=int, default=100, help='Number of concurrent checks')
parser.add_argument('-j', '--jsonl', action='store_true', help='Output JSON Lines format to console')
parser.add_argument('-o', '--output', help='Output file path (JSONL format)')
2025-02-11 07:15:39 +00:00
# Output field flags
2025-02-12 03:30:22 +00:00
parser.add_argument('-b', '--body', action='store_true', help='Show body preview')
parser.add_argument('-cn', '--cname', action='store_true', help='Show CNAME records')
parser.add_argument('-cl', '--content-length', action='store_true', help='Show content length')
parser.add_argument('-ct', '--content-type', action='store_true', help='Show content type')
parser.add_argument('-f', '--favicon', action='store_true', help='Show favicon hash')
parser.add_argument('-fr', '--follow-redirects', action='store_true', help='Follow redirects (max 10)')
parser.add_argument('-hr', '--headers', action='store_true', help='Show response headers')
parser.add_argument('-i', '--ip', action='store_true', help='Show IP addresses')
parser.add_argument('-sc', '--status-code', action='store_true', help='Show status code')
parser.add_argument('-ti', '--title', action='store_true', help='Show page title')
2025-02-11 07:15:39 +00:00
parser.add_argument('-tls', '--tls-info', action='store_true', help='Show TLS certificate information')
# Other arguments
parser.add_argument('-ax', '--axfr', action='store_true', help='Try AXFR transfer against nameservers')
parser.add_argument('-ec', '--exclude-codes', type=parse_status_codes, help='Exclude these status codes (comma-separated, e.g., 404,500)')
parser.add_argument('-mc', '--match-codes', type=parse_status_codes, help='Only show these status codes (comma-separated, e.g., 200,301,404)')
2025-02-12 03:30:22 +00:00
parser.add_argument('-p', '--progress', action='store_true', help='Show progress counter')
parser.add_argument('-r', '--resolvers', help='File containing DNS resolvers (one per line)')
2025-02-11 07:15:39 +00:00
parser.add_argument('-to', '--timeout', type=int, default=5, help='Request timeout in seconds')
2025-02-12 00:18:52 +00:00
# Add shard argument
parser.add_argument('-sh','--shard', type=parse_shard, help='Shard index and total shards (e.g., 1/3)')
2025-02-12 05:50:02 +00:00
# Add this to the argument parser section
parser.add_argument('-pa', '--paths', help='Additional paths to check (comma-separated, e.g., ".git/config,.env")')
2025-02-12 00:18:52 +00:00
2025-02-12 07:55:31 +00:00
# Add these arguments in the parser section
parser.add_argument('-hd', '--headers', help='Custom headers to send with each request (format: "Header1: value1,Header2: value2")')
parser.add_argument('-p', '--post', help='Send POST request with this data')
2025-02-11 07:52:12 +00:00
# If no arguments provided, print help and exit
if len(sys.argv) == 1:
parser.print_help()
sys.exit(0)
2025-02-11 07:15:39 +00:00
args = parser.parse_args()
# Setup logging based on arguments
global SILENT_MODE
SILENT_MODE = args.jsonl
if not SILENT_MODE:
if args.debug:
setup_logging(level='DEBUG', log_to_disk=True)
else:
setup_logging(level='INFO')
if args.file == '-':
info('Reading domains from stdin')
else:
info(f'Processing file: {args.file}')
# Setup show_fields
show_fields = {
'status_code' : args.all_flags or args.status_code,
'content_type' : args.all_flags or args.content_type,
'content_length' : args.all_flags or args.content_length,
'title' : args.all_flags or args.title,
'body' : args.all_flags or args.body,
'ip' : args.all_flags or args.ip,
'favicon' : args.all_flags or args.favicon,
'headers' : args.all_flags or args.headers,
'follow_redirects' : args.all_flags or args.follow_redirects,
'cname' : args.all_flags or args.cname,
'tls' : args.all_flags or args.tls_info
}
# If no fields specified show all
if not any(show_fields.values()):
show_fields = {k: True for k in show_fields}
try:
scanner = HTTPZScanner(
concurrent_limit=args.concurrent,
timeout=args.timeout,
follow_redirects=args.all_flags or args.follow_redirects,
check_axfr=args.axfr,
resolver_file=args.resolvers,
output_file=args.output,
show_progress=args.progress,
debug_mode=args.debug,
jsonl_output=args.jsonl,
show_fields=show_fields,
match_codes=args.match_codes,
2025-02-12 00:18:52 +00:00
exclude_codes=args.exclude_codes,
2025-02-12 05:50:02 +00:00
shard=args.shard,
2025-02-12 07:55:31 +00:00
paths=args.paths.split(',') if args.paths else None,
custom_headers=dict(h.split(': ', 1) for h in args.headers.split(',')) if args.headers else None,
post_data=args.post
2025-02-11 07:15:39 +00:00
)
2025-02-12 02:25:47 +00:00
count = 0
2025-02-12 02:08:12 +00:00
async for result in scanner.scan(args.file):
2025-02-12 02:21:45 +00:00
# Write to output file if specified
if args.output:
with open(args.output, 'a') as f:
f.write(json.dumps(result) + '\n')
2025-02-12 05:35:35 +00:00
f.flush() # Ensure file output is immediate
2025-02-12 02:21:45 +00:00
2025-02-12 02:31:00 +00:00
# Handle JSON output separately
if args.jsonl:
2025-02-12 05:35:35 +00:00
print(json.dumps(result), flush=True) # Force flush
2025-02-12 02:31:00 +00:00
continue
# Only output and increment counter if we have content to show for normal output
2025-02-12 02:28:08 +00:00
formatted = format_console_output(result, args.debug, show_fields, args.match_codes, args.exclude_codes)
if formatted:
if args.progress:
count += 1
info(f"[{count}] {formatted}")
2025-02-12 05:35:35 +00:00
sys.stdout.flush() # Force flush after each domain
2025-02-12 02:25:47 +00:00
else:
2025-02-12 05:35:35 +00:00
print(formatted, flush=True) # Force flush
2025-02-11 07:15:39 +00:00
except KeyboardInterrupt:
logging.warning('Process interrupted by user')
sys.exit(1)
except Exception as e:
logging.error(f'Unexpected error: {str(e)}')
sys.exit(1)
2025-02-12 03:30:22 +00:00
2025-02-11 07:15:39 +00:00
def run():
'''Entry point for the CLI'''
asyncio.run(main())
2025-02-12 03:30:22 +00:00
2025-02-11 07:15:39 +00:00
if __name__ == '__main__':
run()