Code has been refined and improved, README updated, LICENSE added

This commit is contained in:
Dionysus 2023-12-15 23:02:37 -05:00
parent 21ea59bebc
commit 3b96fa7bfd
Signed by: acidvegas
GPG Key ID: EF4B922DB85DC9DE
3 changed files with 160 additions and 99 deletions

15
LICENSE Normal file
View File

@ -0,0 +1,15 @@
ISC License
Copyright (c) 2023, acidvegas <acid.vegas@acid.vegas>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

View File

@ -1,6 +1,24 @@
# HTTP-Z # HTTP-Z
###### This is still a work in progress...stay tuned for updates!
## Information ## Information
This is still a work in progress & was made because HTTPX fucking sucks. This script is developed as a robust alternative to HTTPX, addressing the limitations in customizing JSON outputs and other functionalities that HTTPX lacks. It is specifically designed for asynchronous lookups on a list of domains, efficiently gathering DNS information and web content details such as page titles and body previews.
Stay tuned ## Usage
| Argument | Description |
| ---------------------- | ----------------------------------------------------------- |
| `<input_file>` | File containing list of domains |
| `-c`, `--concurrency` | Number of concurrent requests |
| `-m`, `--memory_limit` | Number of results to store in memory before syncing to file |
| `-o`, `--output` | Output file |
| `-t`, `--timeout` | Timeout for HTTP requests |
| `-u`, `--user_agent` | User agent to use for HTTP requests |
| `-x`, `--proxy` | Proxy to use for HTTP requests |
| `-r`, `--retry` | Number of times to retry failed requests |
| `-v`, `--verbose` | Increase output verbosity |
| `-p`, `--preview` | Preview size in bytes for body & title *(default: 500)* |
___
###### Mirrors
[acid.vegas](https://git.acid.vegas/httpz) • [GitHub](https://github.com/acidvegas/httpz) • [GitLab](https://gitlab.com/acidvegas/httpz) • [SuperNETs](https://git.supernets.org/acidvegas/httpz)

210
httpz.py
View File

@ -14,190 +14,222 @@ import logging
import ssl import ssl
import urllib.request import urllib.request
try: try:
import aiodns import aiodns
except ImportError: except ImportError:
print('Missing required module \'aiodns\'. (pip install aiodns)') print('Missing required module \'aiodns\'. (pip install aiodns)')
exit(1) exit(1)
try: try:
import aiohttp import aiohttp
except ImportError: except ImportError:
print('Missing required module \'aiohttp\'. (pip install aiohttp)') print('Missing required module \'aiohttp\'. (pip install aiohttp)')
exit(1) exit(1)
# ANSI escape codes for colors # ANSI escape codes for colors
RED = '\033[91m' RED = '\033[91m'
GREEN = '\033[92m' GREEN = '\033[92m'
DARK_GREY = '\033[90m' DARK_GREY = '\033[90m'
RESET = '\033[0m' RESET = '\033[0m'
# Globals # Globals
DNS_SERVERS = None DNS_SERVERS = None
args = None # Global args variable
def vlog(msg: str):
'''
Verbose logging only if enabled
:param msg: Message to print to console
'''
if args.verbose:
logging.info(msg)
def get_dns_servers() -> list: def get_dns_servers() -> dict:
'''Get a list of DNS servers to use for lookups.''' '''Get a list of DNS servers to use for lookups.'''
source = urllib.request.urlopen('https://public-dns.info/nameservers.txt') with urllib.request.urlopen('https://public-dns.info/nameservers.txt') as source:
results = source.read().decode().split('\n') results = source.read().decode().split('\n')
v4_servers = [server for server in results if ':' not in server] v4_servers = [server for server in results if ':' not in server]
v6_servers = [server for server in results if ':' in server] v6_servers = [server for server in results if ':' in server]
return {'4': v4_servers, '6': v6_servers} return {'4': v4_servers, '6': v6_servers}
async def dns_lookup(domain: str, record_type: str) -> list: async def dns_lookup(domain: str, record_type: str, timeout: int) -> list:
''' '''
Resolve DNS information from a domain Resolve DNS information from a domain
:param domain: Domain name to resolve :param domain: Domain name to resolve
:param record_type: DNS record type to resolve :param record_type: DNS record type to resolve
:param timeout: Timeout for DNS request
''' '''
for i in range(args.retry):
try: try:
version = '4' if record_type == 'A' else '6' if record_type == 'AAAA' else random.choice(['4','6']) version = '4' if record_type == 'A' else '6' if record_type == 'AAAA' else random.choice(['4','6'])
resolver = aiodns.DNSResolver(nameservers=[random.choice(DNS_SERVERS[version])]) nameserver = random.choice(DNS_SERVERS[version])
resolver = aiodns.DNSResolver(nameservers=[nameserver], timeout=timeout)
records = await resolver.query(domain, record_type) records = await resolver.query(domain, record_type)
return [record.host for record in records] return records.cname if record_type == 'CNAME' else [record.host for record in records]
except Exception: except Exception as e:
pass vlog(f'{RED}[ERROR]{RESET} {domain} - Failed to resolve {record_type} record using {nameserver} {DARK_GREY}({str(e)}){RESET}')
return []
async def get_title(session: aiohttp.ClientSession, domain: str, max_redirects: int, timeout: int): async def get_body(source: str, preview: int) -> str:
''' '''
Get the title of a webpage Get the body of a webpage
:param source: HTML source of the webpage
:param preview: Number of bytes to preview
'''
body_content = re.search(r'<body.*?>(.*?)</body>', source, re.DOTALL | re.IGNORECASE)
processed_content = body_content.group(1) if body_content else source
clean_content = re.sub(r'<[^>]+>', '', processed_content)
return clean_content[:preview]
async def get_title(session: aiohttp.ClientSession, domain: str):
'''
Get the title of a webpage and its status code
:param session: aiohttp session :param session: aiohttp session
:param domain: URL to get the title of :param domain: URL to get the title of
:param max_redirects: Maximum number of redirects to follow
:param timeout: Timeout for HTTP requests
''' '''
body = None
status_code = None
title = None
try: try:
async with session.get(domain, timeout=timeout, allow_redirects=False) as response: async with session.get(domain, timeout=args.timeout, allow_redirects=False) as response:
if response.status in (200, 201): status_code = response.status
if status_code in (200, 201):
html_content = await response.text() html_content = await response.text()
match = re.search(r'<title>(.*?)</title>', html_content, re.IGNORECASE | re.DOTALL) match = re.search(r'<title>(.*?)</title>', html_content, re.IGNORECASE | re.DOTALL)
return match.group(1).strip() if match else None title = match.group(1).strip() if match else None
elif response.status in (301, 302, 303, 307, 308) and max_redirects > 0: title = re.sub(r'[\r\n]+', ' ', title)[:300] if title else None # Fix this ugly shit
body = await get_body(html_content, args.preview)
elif status_code in (301, 302, 303, 307, 308) and args.retry > 0: # Need to implement a max redirect limit
redirect_url = response.headers.get('Location') redirect_url = response.headers.get('Location')
if redirect_url: if redirect_url:
return await get_title(session, redirect_url, max_redirects - 1, timeout) return await get_title(session, redirect_url)
else: else:
logging.error(f'{RED}[ERROR]{RESET} {domain} - Invalid HTTP status code {DARK_GREY}({response.status}){RESET}') vlog(f'{RED}[ERROR]{RESET} {domain} - No redirect URL found for {status_code} status code')
else:
vlog(f'{RED}[ERROR]{RESET} {domain} - Invalid status code {DARK_GREY}{status_code}{RESET}')
except asyncio.TimeoutError:
vlog(f'{RED}[ERROR]{RESET} {domain} - HTTP request timed out')
except Exception as e: except Exception as e:
logging.error(f'{RED}[ERROR]{RESET} {domain} - {e}') vlog(f'{RED}[ERROR]{RESET} Failed to get title for {domain} {DARK_GREY}({e}){RESET}')
return None return title, body, status_code
async def check_url(session: aiohttp.ClientSession, domain: str, timeout: int, retry: int): async def check_url(session: aiohttp.ClientSession, domain: str):
''' '''
Process a domain name Process a domain name
:param session: aiohttp session :param session: aiohttp session
:param domain: URL to get the title of :param domain: URL to get the title of
:param timeout: Timeout for HTTP requests
:param retry: Number of retries for failed requests
''' '''
dns_records = {} dns_records = {}
for record_type in ('A', 'AAAA'): for record_type in ('A', 'AAAA'):
records = await dns_lookup(domain, record_type) records = await dns_lookup(domain, record_type, args.timeout)
if records: if records:
dns_records[record_type] = records dns_records[record_type] = records
break
if not dns_records: if not dns_records:
cname_records = await dns_lookup(domain, 'CNAME') cname_record = await dns_lookup(domain, 'CNAME', args.timeout)
if cname_records: if cname_record:
dns_records['CNAME'] = cname_records dns_records['CNAME'] = cname_record
domain = cname_records[0] domain = cname_record
if not dns_records:
logging.info(f'{DARK_GREY}[NO DNS RECORDS]{RESET} {domain}')
return domain, None, None, None
title = await get_title(session, f'https://{domain}', retry, timeout)
if not title:
title = await get_title(session, f'http://{domain}', retry, timeout)
if title:
logging.info(f'{GREEN}[SUCCESS]{RESET} {domain} - {title} - DNS: {dns_records}')
return domain, 'https', title, dns_records
else: else:
logging.error(f'{RED}[ERROR]{RESET} {domain} - Failed to retrieve title') vlog(f'{RED}[ERROR]{RESET} No DNS records found for {domain}')
return domain, None, None, None, None, None
return domain, None, None, None title, body, status_code = await get_title(session, f'https://{domain}')
if not title and not body:
title, body, status_code = await get_title(session, f'http://{domain}')
if title or body:
logging.info(f'[{GREEN}SUCCESS{RESET}] {domain} - {title} - {body}')
return domain, 'https', title, body, dns_records, status_code
else:
vlog(f'{RED}[ERROR]{RESET} {domain} - Failed to retrieve title')
return domain, None, None, None, None, status_code
async def process_file(file_path: str, concurrency: int, memory_limit: int, output_file: str, timeout: int, user_agent: str, proxy: str, retry: int): async def process_file():
''' '''
Process a list of domains from file Process a list of domains from file
:param file_path: Path to the file to read from
:param concurrency: Number of domains to look up concurrently
:param memory_limit: Number of successful domain lookups to store in memory before syncing to file
:param output_file: Output file for results
:param timeout: Timeout for HTTP requests
:param user_agent: User agent for HTTP requests
:param proxy: Proxy for HTTP requests
:param retry: Number of retries for failed requests
''' '''
results = {}
counter = 0 counter = 0
ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
ssl_context.check_hostname = False ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE ssl_context.verify_mode = ssl.CERT_NONE
headers = {'User-Agent': args.user_agent}
headers = {'User-Agent': user_agent}
connector = aiohttp.TCPConnector(ssl=ssl_context) connector = aiohttp.TCPConnector(ssl=ssl_context)
session_params = { session_params = {
'connector': connector, 'connector': connector,
'headers': headers, 'headers': headers,
'timeout': aiohttp.ClientTimeout(total=timeout) 'timeout': aiohttp.ClientTimeout(total=args.timeout)
} }
if proxy: if args.proxy:
session_params['proxy'] = proxy session_params['proxy'] = args.proxy
async with aiohttp.ClientSession(**session_params) as session: async with aiohttp.ClientSession(**session_params) as session:
tasks = set() tasks = set()
with open(file_path, 'r') as file: with open(args.file, 'r') as file:
for line in file: for line in file:
domain = line.strip() domain = line.strip()
if domain: if domain:
tasks.add(asyncio.create_task(check_url(session, domain, timeout, retry))) tasks.add(asyncio.create_task(check_url(session, domain)))
if len(tasks) >= concurrency: if len(tasks) >= args.concurrency:
done, tasks = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED) done, tasks = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED)
for task in done: for task in done:
domain, protocol, title, dns_records = task.result() domain, protocol, title, body, dns_records, status_code = task.result()
if title: if title or body: # log results for dns?
results[domain] = {'protocol': protocol, 'title': title, 'dns_records': dns_records} write_result_to_file(domain, protocol, title, body, dns_records, status_code)
counter += 1 counter += 1
if counter >= memory_limit: if counter % args.memory_limit == 0:
with open(output_file, 'w') as f: logging.info(f'Processed {counter} domains')
json.dump(results, f, indent=4)
counter = 0
results.clear()
if tasks: if tasks:
await asyncio.wait(tasks) done, _ = await asyncio.wait(tasks)
for task in tasks: for task in done:
domain, protocol, title, dns_records = task.result() domain, protocol, title, body, dns_records, status_code = task.result()
if title: if title:
results[domain] = {'protocol': protocol, 'title': title, 'dns_records': dns_records} write_result_to_file(domain, protocol, title, body, dns_records, status_code)
with open(output_file, 'a') as f:
json.dump(results, f, indent=4) def write_result_to_file(domain, protocol, title, body, dns_records, status_code):
'''
Write a single domain result to file
:param domain: Domain name
:param protocol: Protocol used (http or https)
:param title: Title of the domain
:param dns_records: DNS records of the domain
:param status_code: HTTP status code
'''
result = {
'domain': domain,
'protocol': protocol,
'status_code': status_code,
'title': title,
'body': body,
'dns_records': dns_records
}
with open(args.output, 'a') as f:
json.dump(result, f)
f.write('\n')
def main(): def main():
global DNS_SERVERS global DNS_SERVERS, args
parser = argparse.ArgumentParser(description='Check URLs from a file asynchronously, perform DNS lookups and store results in JSON.') parser = argparse.ArgumentParser(description='Check URLs from a file asynchronously, perform DNS lookups and store results in JSON.')
parser.add_argument('file', help='File containing list of domains') parser.add_argument('file', help='File containing list of domains')
@ -209,23 +241,19 @@ def main():
parser.add_argument('-x', '--proxy', help='Proxy to use for HTTP requests') parser.add_argument('-x', '--proxy', help='Proxy to use for HTTP requests')
parser.add_argument('-r', '--retry', type=int, default=3, help='Number of times to retry failed requests') parser.add_argument('-r', '--retry', type=int, default=3, help='Number of times to retry failed requests')
parser.add_argument('-v', '--verbose', action='store_true', help='Increase output verbosity') parser.add_argument('-v', '--verbose', action='store_true', help='Increase output verbosity')
parser.add_argument('-p', '--preview', type=int, default=500, help='Preview size in bytes for body & title (default: 500)')
args = parser.parse_args() args = parser.parse_args()
log_level = logging.INFO if args.verbose else logging.WARNING log_level = logging.INFO
logging.basicConfig(level=log_level, format=f'{DARK_GREY}%(asctime)s{RESET} - %(message)s', datefmt='%H:%M:%S') logging.basicConfig(level=log_level, format=f'{DARK_GREY}%(asctime)s{RESET} %(message)s', datefmt='%H:%M:%S')
logging.info('Loading DNS servers...') logging.info('Loading DNS servers...')
DNS_SERVERS = get_dns_servers() DNS_SERVERS = get_dns_servers()
if not DNS_SERVERS: if not DNS_SERVERS:
logging.fatal('Failed to get DNS servers.') logging.fatal('Failed to get DNS servers.')
exit(1)
logging.info(f'Found {len(DNS_SERVERS["4"])} IPv4 and {len(DNS_SERVERS["6"])} IPv6 DNS servers.') logging.info(f'Found {len(DNS_SERVERS["4"])} IPv4 and {len(DNS_SERVERS["6"])} IPv6 DNS servers.')
asyncio.run(process_file(args.file, args.concurrency, args.memory_limit, args.output, args.timeout, args.user_agent, args.proxy, args.retry)) asyncio.run(process_file())
if __name__ == '__main__': if __name__ == '__main__':
main() main()