Better input processing

This commit is contained in:
Dionysus 2025-02-11 20:57:01 -05:00
parent 718b50b6c2
commit dfb11b0a1c
Signed by: acidvegas
GPG Key ID: EF4B922DB85DC9DE
6 changed files with 197 additions and 129 deletions

View File

@ -92,14 +92,29 @@ This allows efficient distribution of large scans across multiple machines.
### Python Library
```python
import asyncio
import aiohttp
import aioboto3
import urllib.request
from httpz_scanner import HTTPZScanner
async def scan_domains():
async def scan_from_list() -> list:
with urllib.request.urlopen('https://example.com/domains.txt') as response:
content = response.read().decode()
return [line.strip() for line in content.splitlines() if line.strip()][:20]
async def scan_from_url():
with urllib.request.urlopen('https://example.com/domains.txt') as response:
for line in response:
if line := line.strip():
yield line.decode().strip()
async def scan_from_file():
with open('domains.txt', 'r') as file:
for line in file:
if line := line.strip():
yield line
async def main():
# Initialize scanner with all possible options (showing defaults)
scanner = HTTPZScanner(
# Core settings
concurrent_limit=100, # Number of concurrent requests
timeout=5, # Request timeout in seconds
follow_redirects=False, # Follow redirects (max 10)
@ -131,61 +146,33 @@ async def scan_domains():
exclude_codes={404,500,503} # Exclude these status codes
)
# Initialize resolvers (required before scanning)
await scanner.init()
# Example 1: Process file
print('\nProcessing file:')
async for result in scanner.scan(scan_from_file()):
print(f"{result['domain']}: {result['status']}")
# Example 1: Stream from S3/MinIO using aioboto3
async with aioboto3.Session().client('s3',
endpoint_url='http://minio.example.com:9000',
aws_access_key_id='access_key',
aws_secret_access_key='secret_key') as s3:
response = await s3.get_object(Bucket='my-bucket', Key='huge-domains.txt')
async with response['Body'] as stream:
async def s3_generator():
while True:
line = await stream.readline()
if not line:
break
yield line.decode().strip()
await scanner.scan(s3_generator())
# Example 2: Stream URLs
print('\nStreaming URLs:')
async for result in scanner.scan(scan_from_url()):
print(f"{result['domain']}: {result['status']}")
# Example 2: Stream from URL using aiohttp
async with aiohttp.ClientSession() as session:
# For large files - stream line by line
async with session.get('https://example.com/huge-domains.txt') as resp:
async def url_generator():
async for line in resp.content:
yield line.decode().strip()
await scanner.scan(url_generator())
# For small files - read all at once
async with session.get('https://example.com/small-domains.txt') as resp:
content = await resp.text()
await scanner.scan(content) # Library handles splitting into lines
# Example 3: Simple list of domains
domains = [
'example1.com',
'example2.com',
'example3.com'
]
await scanner.scan(domains)
# Example 3: Process list
print('\nProcessing list:')
domains = await scan_from_list()
async for result in scanner.scan(domains):
print(f"{result['domain']}: {result['status']}")
if __name__ == '__main__':
asyncio.run(scan_domains())
asyncio.run(main())
```
The scanner accepts various input types:
- Async/sync generators that yield domains
- String content with newlines
- File paths (string)
- Lists/tuples of domains
- File paths
- stdin (using '-')
- Async generators that yield domains
All inputs support sharding for distributed scanning.
All inputs support sharding for distributed scanning using the `shard` parameter.
## Arguments

View File

@ -6,4 +6,4 @@ from .scanner import HTTPZScanner
from .colors import Colors
__version__ = '2.0.0'
__version__ = '2.0.1'

View File

@ -8,9 +8,9 @@ except ImportError:
raise ImportError('missing bs4 module (pip install beautifulsoup4)')
try:
from cryptography import x509
from cryptography import x509
from cryptography.hazmat.primitives import hashes
from cryptography.x509.oid import NameOID
from cryptography.x509.oid import NameOID
except ImportError:
raise ImportError('missing cryptography module (pip install cryptography)')
@ -28,8 +28,8 @@ def parse_domain_url(domain: str) -> tuple:
Parse domain string into base domain, port, and protocol list
:param domain: Raw domain string to parse
:return: Tuple of (base_domain, port, protocols)
'''
port = None
base_domain = domain.rstrip('/')
@ -58,6 +58,7 @@ def parse_domain_url(domain: str) -> tuple:
return base_domain, port, protocols
async def get_cert_info(ssl_object, url: str) -> dict:
'''
Get SSL certificate information for a domain
@ -65,6 +66,7 @@ async def get_cert_info(ssl_object, url: str) -> dict:
:param ssl_object: SSL object to get certificate info from
:param url: URL to get certificate info from
'''
try:
if not ssl_object or not (cert_der := ssl_object.getpeercert(binary_form=True)):
return None
@ -101,6 +103,7 @@ async def get_cert_info(ssl_object, url: str) -> dict:
error(f'Error getting cert info for {url}: {str(e)}')
return None
async def get_favicon_hash(session, base_url: str, html: str) -> str:
'''
Get favicon hash from a webpage
@ -141,6 +144,7 @@ async def get_favicon_hash(session, base_url: str, html: str) -> str:
return None
def parse_status_codes(codes_str: str) -> set:
'''
Parse comma-separated status codes and ranges into a set of integers
@ -174,4 +178,26 @@ def parse_shard(shard_str: str) -> tuple:
raise ValueError
return shard_index - 1, total_shards # Convert to 0-based index
except (ValueError, TypeError):
raise argparse.ArgumentTypeError('Shard must be in format INDEX/TOTAL where INDEX <= TOTAL')
raise argparse.ArgumentTypeError('Shard must be in format INDEX/TOTAL where INDEX <= TOTAL')
def parse_title(html: str, content_type: str = None) -> str:
'''
Parse title from HTML content
:param html: HTML content of the page
:param content_type: Content-Type header value
'''
# Only parse title for HTML content
if content_type and not any(x in content_type.lower() for x in ['text/html', 'application/xhtml']):
return None
try:
soup = bs4.BeautifulSoup(html, 'html.parser', from_encoding='utf-8', features='lxml')
if title := soup.title:
return title.string.strip()
except:
pass
return None

View File

@ -5,7 +5,6 @@
import asyncio
import json
import random
import sys
try:
import aiohttp
@ -20,7 +19,7 @@ except ImportError:
from .dns import resolve_all_dns, load_resolvers
from .formatters import format_console_output
from .colors import Colors
from .parsers import parse_domain_url, get_cert_info, get_favicon_hash
from .parsers import parse_domain_url, get_cert_info, get_favicon_hash, parse_title
from .utils import debug, info, USER_AGENTS, input_generator
@ -154,12 +153,13 @@ class HTTPZScanner:
except AttributeError:
debug(f'Failed to get SSL info for {url}')
html = (await response.text())[:1024*1024]
soup = bs4.BeautifulSoup(html, 'html.parser')
content_type = response.headers.get('Content-Type', '')
html = await response.text() if any(x in content_type.lower() for x in ['text/html', 'application/xhtml']) else None
# Only add title if it exists
if soup.title and soup.title.string:
result['title'] = ' '.join(soup.title.string.strip().split()).rstrip('.')[:300]
if soup := bs4.BeautifulSoup(html, 'html.parser'):
if soup.title and soup.title.string:
result['title'] = ' '.join(soup.title.string.strip().split()).rstrip('.')[:300]
# Only add body if it exists
if body_text := soup.get_text():
@ -210,32 +210,81 @@ class HTTPZScanner:
async def scan(self, input_source):
'''
Scan domains from a file or stdin
Scan domains from a file, stdin, or async generator
:param input_source: Path to file or '-' for stdin
:param input_source: Can be:
- Path to file (str)
- stdin ('-')
- List/tuple of domains
- Async generator yielding domains
:yields: Result dictionary for each domain scanned
'''
if not self.resolvers:
await self.init()
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session:
tasks = set()
# Pass shard info to input_generator
for domain in input_generator(input_source, self.shard):
if len(tasks) >= self.concurrent_limit:
done, tasks = await asyncio.wait(
tasks, return_when=asyncio.FIRST_COMPLETED
)
for task in done:
result = await task
await self.process_result(result)
# Handle different input types
if isinstance(input_source, str):
# File or stdin input
domain_iter = input_generator(input_source, self.shard)
for domain in domain_iter:
if len(tasks) >= self.concurrent_limit:
done, tasks = await asyncio.wait(
tasks, return_when=asyncio.FIRST_COMPLETED
)
for task in done:
result = await task
await self.process_result(result)
yield result
task = asyncio.create_task(self.check_domain(session, domain))
tasks.add(task)
task = asyncio.create_task(self.check_domain(session, domain))
tasks.add(task)
elif isinstance(input_source, (list, tuple)):
# List/tuple input
for line_num, domain in enumerate(input_source):
if domain := str(domain).strip():
if self.shard is None or line_num % self.shard[1] == self.shard[0]:
if len(tasks) >= self.concurrent_limit:
done, tasks = await asyncio.wait(
tasks, return_when=asyncio.FIRST_COMPLETED
)
for task in done:
result = await task
await self.process_result(result)
yield result
task = asyncio.create_task(self.check_domain(session, domain))
tasks.add(task)
else:
# Async generator input
line_num = 0
async for domain in input_source:
if isinstance(domain, bytes):
domain = domain.decode()
domain = domain.strip()
if domain:
if self.shard is None or line_num % self.shard[1] == self.shard[0]:
if len(tasks) >= self.concurrent_limit:
done, tasks = await asyncio.wait(
tasks, return_when=asyncio.FIRST_COMPLETED
)
for task in done:
result = await task
await self.process_result(result)
yield result
task = asyncio.create_task(self.check_domain(session, domain))
tasks.add(task)
line_num += 1
# Process remaining tasks
if tasks:
done, _ = await asyncio.wait(tasks)
for task in done:
result = await task
await self.process_result(result)
await self.process_result(result)
yield result

View File

@ -5,6 +5,7 @@
import logging
import os
import sys
import asyncio
# Global for silent mode
@ -12,58 +13,58 @@ SILENT_MODE = False
# List of user agents to randomize requests
USER_AGENTS = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.6.5 Chrome/124.0.6367.243 Electron/30.1.2 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 OPR/116.0.0.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:134.0) Gecko/20100101 Firefox/134.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.8.3 Chrome/130.0.6723.191 Electron/33.3.2 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.2 Safari/605.1.15",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.6613.137 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.15",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1.1 Safari/605.1.15",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:135.0) Gecko/20100101 Firefox/135.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.5.12 Chrome/120.0.6099.283 Electron/28.2.3 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0",
"Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 OPR/114.0.0.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Safari/605.1.15",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.7.7 Chrome/128.0.6613.186 Electron/32.2.5 Safari/537.36"
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.6.5 Chrome/124.0.6367.243 Electron/30.1.2 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 OPR/116.0.0.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:134.0) Gecko/20100101 Firefox/134.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.8.3 Chrome/130.0.6723.191 Electron/33.3.2 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.2 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.6613.137 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.15',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1.1 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:135.0) Gecko/20100101 Firefox/135.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.5.12 Chrome/120.0.6099.283 Electron/28.2.3 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0',
'Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 OPR/114.0.0.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Safari/605.1.15',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.7.7 Chrome/128.0.6613.186 Electron/32.2.5 Safari/537.36'
]
@ -98,9 +99,9 @@ def human_size(size_bytes: int) -> str:
return f'{size:.1f}{units[unit_index]}'
def input_generator(input_source, shard: tuple = None):
async def input_generator(input_source, shard: tuple = None):
'''
Generator function to yield domains from various input sources with optional sharding
Async generator function to yield domains from various input sources with optional sharding
:param input_source: Can be:
- string path to local file
@ -116,6 +117,7 @@ def input_generator(input_source, shard: tuple = None):
# Handle stdin
if input_source == '-' or input_source is None:
for line in sys.stdin:
await asyncio.sleep(0) # Yield control
if line := line.strip():
if shard is None or line_num % shard[1] == shard[0]:
yield line
@ -125,6 +127,7 @@ def input_generator(input_source, shard: tuple = None):
elif isinstance(input_source, str) and os.path.exists(input_source):
with open(input_source, 'r') as f:
for line in f:
await asyncio.sleep(0) # Yield control
if line := line.strip():
if shard is None or line_num % shard[1] == shard[0]:
yield line
@ -133,6 +136,7 @@ def input_generator(input_source, shard: tuple = None):
# Handle iterables (generators, lists, etc)
elif hasattr(input_source, '__iter__') and not isinstance(input_source, (str, bytes)):
for line in input_source:
await asyncio.sleep(0) # Yield control
if isinstance(line, bytes):
line = line.decode()
if line := line.strip():
@ -145,6 +149,7 @@ def input_generator(input_source, shard: tuple = None):
if isinstance(input_source, bytes):
input_source = input_source.decode()
for line in input_source.splitlines():
await asyncio.sleep(0) # Yield control
if line := line.strip():
if shard is None or line_num % shard[1] == shard[0]:
yield line

View File

@ -4,12 +4,13 @@
from setuptools import setup, find_packages
with open('README.md', 'r', encoding='utf-8') as f:
long_description = f.read()
setup(
name='httpz_scanner',
version='2.0.0',
version='2.0.1',
author='acidvegas',
author_email='acid.vegas@acid.vegas',
description='Hyper-fast HTTP Scraping Tool',