Better input processing
This commit is contained in:
parent
718b50b6c2
commit
dfb11b0a1c
85
README.md
85
README.md
@ -92,14 +92,29 @@ This allows efficient distribution of large scans across multiple machines.
|
||||
### Python Library
|
||||
```python
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import aioboto3
|
||||
import urllib.request
|
||||
from httpz_scanner import HTTPZScanner
|
||||
|
||||
async def scan_domains():
|
||||
async def scan_from_list() -> list:
|
||||
with urllib.request.urlopen('https://example.com/domains.txt') as response:
|
||||
content = response.read().decode()
|
||||
return [line.strip() for line in content.splitlines() if line.strip()][:20]
|
||||
|
||||
async def scan_from_url():
|
||||
with urllib.request.urlopen('https://example.com/domains.txt') as response:
|
||||
for line in response:
|
||||
if line := line.strip():
|
||||
yield line.decode().strip()
|
||||
|
||||
async def scan_from_file():
|
||||
with open('domains.txt', 'r') as file:
|
||||
for line in file:
|
||||
if line := line.strip():
|
||||
yield line
|
||||
|
||||
async def main():
|
||||
# Initialize scanner with all possible options (showing defaults)
|
||||
scanner = HTTPZScanner(
|
||||
# Core settings
|
||||
concurrent_limit=100, # Number of concurrent requests
|
||||
timeout=5, # Request timeout in seconds
|
||||
follow_redirects=False, # Follow redirects (max 10)
|
||||
@ -131,61 +146,33 @@ async def scan_domains():
|
||||
exclude_codes={404,500,503} # Exclude these status codes
|
||||
)
|
||||
|
||||
# Initialize resolvers (required before scanning)
|
||||
await scanner.init()
|
||||
# Example 1: Process file
|
||||
print('\nProcessing file:')
|
||||
async for result in scanner.scan(scan_from_file()):
|
||||
print(f"{result['domain']}: {result['status']}")
|
||||
|
||||
# Example 1: Stream from S3/MinIO using aioboto3
|
||||
async with aioboto3.Session().client('s3',
|
||||
endpoint_url='http://minio.example.com:9000',
|
||||
aws_access_key_id='access_key',
|
||||
aws_secret_access_key='secret_key') as s3:
|
||||
|
||||
response = await s3.get_object(Bucket='my-bucket', Key='huge-domains.txt')
|
||||
async with response['Body'] as stream:
|
||||
async def s3_generator():
|
||||
while True:
|
||||
line = await stream.readline()
|
||||
if not line:
|
||||
break
|
||||
yield line.decode().strip()
|
||||
|
||||
await scanner.scan(s3_generator())
|
||||
# Example 2: Stream URLs
|
||||
print('\nStreaming URLs:')
|
||||
async for result in scanner.scan(scan_from_url()):
|
||||
print(f"{result['domain']}: {result['status']}")
|
||||
|
||||
# Example 2: Stream from URL using aiohttp
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# For large files - stream line by line
|
||||
async with session.get('https://example.com/huge-domains.txt') as resp:
|
||||
async def url_generator():
|
||||
async for line in resp.content:
|
||||
yield line.decode().strip()
|
||||
|
||||
await scanner.scan(url_generator())
|
||||
|
||||
# For small files - read all at once
|
||||
async with session.get('https://example.com/small-domains.txt') as resp:
|
||||
content = await resp.text()
|
||||
await scanner.scan(content) # Library handles splitting into lines
|
||||
|
||||
# Example 3: Simple list of domains
|
||||
domains = [
|
||||
'example1.com',
|
||||
'example2.com',
|
||||
'example3.com'
|
||||
]
|
||||
await scanner.scan(domains)
|
||||
# Example 3: Process list
|
||||
print('\nProcessing list:')
|
||||
domains = await scan_from_list()
|
||||
async for result in scanner.scan(domains):
|
||||
print(f"{result['domain']}: {result['status']}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(scan_domains())
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
The scanner accepts various input types:
|
||||
- Async/sync generators that yield domains
|
||||
- String content with newlines
|
||||
- File paths (string)
|
||||
- Lists/tuples of domains
|
||||
- File paths
|
||||
- stdin (using '-')
|
||||
- Async generators that yield domains
|
||||
|
||||
All inputs support sharding for distributed scanning.
|
||||
All inputs support sharding for distributed scanning using the `shard` parameter.
|
||||
|
||||
## Arguments
|
||||
|
||||
|
@ -6,4 +6,4 @@ from .scanner import HTTPZScanner
|
||||
from .colors import Colors
|
||||
|
||||
|
||||
__version__ = '2.0.0'
|
||||
__version__ = '2.0.1'
|
@ -8,9 +8,9 @@ except ImportError:
|
||||
raise ImportError('missing bs4 module (pip install beautifulsoup4)')
|
||||
|
||||
try:
|
||||
from cryptography import x509
|
||||
from cryptography import x509
|
||||
from cryptography.hazmat.primitives import hashes
|
||||
from cryptography.x509.oid import NameOID
|
||||
from cryptography.x509.oid import NameOID
|
||||
except ImportError:
|
||||
raise ImportError('missing cryptography module (pip install cryptography)')
|
||||
|
||||
@ -28,8 +28,8 @@ def parse_domain_url(domain: str) -> tuple:
|
||||
Parse domain string into base domain, port, and protocol list
|
||||
|
||||
:param domain: Raw domain string to parse
|
||||
:return: Tuple of (base_domain, port, protocols)
|
||||
'''
|
||||
|
||||
port = None
|
||||
base_domain = domain.rstrip('/')
|
||||
|
||||
@ -58,6 +58,7 @@ def parse_domain_url(domain: str) -> tuple:
|
||||
|
||||
return base_domain, port, protocols
|
||||
|
||||
|
||||
async def get_cert_info(ssl_object, url: str) -> dict:
|
||||
'''
|
||||
Get SSL certificate information for a domain
|
||||
@ -65,6 +66,7 @@ async def get_cert_info(ssl_object, url: str) -> dict:
|
||||
:param ssl_object: SSL object to get certificate info from
|
||||
:param url: URL to get certificate info from
|
||||
'''
|
||||
|
||||
try:
|
||||
if not ssl_object or not (cert_der := ssl_object.getpeercert(binary_form=True)):
|
||||
return None
|
||||
@ -101,6 +103,7 @@ async def get_cert_info(ssl_object, url: str) -> dict:
|
||||
error(f'Error getting cert info for {url}: {str(e)}')
|
||||
return None
|
||||
|
||||
|
||||
async def get_favicon_hash(session, base_url: str, html: str) -> str:
|
||||
'''
|
||||
Get favicon hash from a webpage
|
||||
@ -141,6 +144,7 @@ async def get_favicon_hash(session, base_url: str, html: str) -> str:
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def parse_status_codes(codes_str: str) -> set:
|
||||
'''
|
||||
Parse comma-separated status codes and ranges into a set of integers
|
||||
@ -174,4 +178,26 @@ def parse_shard(shard_str: str) -> tuple:
|
||||
raise ValueError
|
||||
return shard_index - 1, total_shards # Convert to 0-based index
|
||||
except (ValueError, TypeError):
|
||||
raise argparse.ArgumentTypeError('Shard must be in format INDEX/TOTAL where INDEX <= TOTAL')
|
||||
raise argparse.ArgumentTypeError('Shard must be in format INDEX/TOTAL where INDEX <= TOTAL')
|
||||
|
||||
|
||||
def parse_title(html: str, content_type: str = None) -> str:
|
||||
'''
|
||||
Parse title from HTML content
|
||||
|
||||
:param html: HTML content of the page
|
||||
:param content_type: Content-Type header value
|
||||
'''
|
||||
|
||||
# Only parse title for HTML content
|
||||
if content_type and not any(x in content_type.lower() for x in ['text/html', 'application/xhtml']):
|
||||
return None
|
||||
|
||||
try:
|
||||
soup = bs4.BeautifulSoup(html, 'html.parser', from_encoding='utf-8', features='lxml')
|
||||
if title := soup.title:
|
||||
return title.string.strip()
|
||||
except:
|
||||
pass
|
||||
|
||||
return None
|
@ -5,7 +5,6 @@
|
||||
import asyncio
|
||||
import json
|
||||
import random
|
||||
import sys
|
||||
|
||||
try:
|
||||
import aiohttp
|
||||
@ -20,7 +19,7 @@ except ImportError:
|
||||
from .dns import resolve_all_dns, load_resolvers
|
||||
from .formatters import format_console_output
|
||||
from .colors import Colors
|
||||
from .parsers import parse_domain_url, get_cert_info, get_favicon_hash
|
||||
from .parsers import parse_domain_url, get_cert_info, get_favicon_hash, parse_title
|
||||
from .utils import debug, info, USER_AGENTS, input_generator
|
||||
|
||||
|
||||
@ -154,12 +153,13 @@ class HTTPZScanner:
|
||||
except AttributeError:
|
||||
debug(f'Failed to get SSL info for {url}')
|
||||
|
||||
html = (await response.text())[:1024*1024]
|
||||
soup = bs4.BeautifulSoup(html, 'html.parser')
|
||||
content_type = response.headers.get('Content-Type', '')
|
||||
html = await response.text() if any(x in content_type.lower() for x in ['text/html', 'application/xhtml']) else None
|
||||
|
||||
# Only add title if it exists
|
||||
if soup.title and soup.title.string:
|
||||
result['title'] = ' '.join(soup.title.string.strip().split()).rstrip('.')[:300]
|
||||
if soup := bs4.BeautifulSoup(html, 'html.parser'):
|
||||
if soup.title and soup.title.string:
|
||||
result['title'] = ' '.join(soup.title.string.strip().split()).rstrip('.')[:300]
|
||||
|
||||
# Only add body if it exists
|
||||
if body_text := soup.get_text():
|
||||
@ -210,32 +210,81 @@ class HTTPZScanner:
|
||||
|
||||
async def scan(self, input_source):
|
||||
'''
|
||||
Scan domains from a file or stdin
|
||||
Scan domains from a file, stdin, or async generator
|
||||
|
||||
:param input_source: Path to file or '-' for stdin
|
||||
:param input_source: Can be:
|
||||
- Path to file (str)
|
||||
- stdin ('-')
|
||||
- List/tuple of domains
|
||||
- Async generator yielding domains
|
||||
:yields: Result dictionary for each domain scanned
|
||||
'''
|
||||
|
||||
if not self.resolvers:
|
||||
await self.init()
|
||||
|
||||
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session:
|
||||
tasks = set()
|
||||
|
||||
# Pass shard info to input_generator
|
||||
for domain in input_generator(input_source, self.shard):
|
||||
if len(tasks) >= self.concurrent_limit:
|
||||
done, tasks = await asyncio.wait(
|
||||
tasks, return_when=asyncio.FIRST_COMPLETED
|
||||
)
|
||||
for task in done:
|
||||
result = await task
|
||||
await self.process_result(result)
|
||||
# Handle different input types
|
||||
if isinstance(input_source, str):
|
||||
# File or stdin input
|
||||
domain_iter = input_generator(input_source, self.shard)
|
||||
for domain in domain_iter:
|
||||
if len(tasks) >= self.concurrent_limit:
|
||||
done, tasks = await asyncio.wait(
|
||||
tasks, return_when=asyncio.FIRST_COMPLETED
|
||||
)
|
||||
for task in done:
|
||||
result = await task
|
||||
await self.process_result(result)
|
||||
yield result
|
||||
|
||||
task = asyncio.create_task(self.check_domain(session, domain))
|
||||
tasks.add(task)
|
||||
task = asyncio.create_task(self.check_domain(session, domain))
|
||||
tasks.add(task)
|
||||
elif isinstance(input_source, (list, tuple)):
|
||||
# List/tuple input
|
||||
for line_num, domain in enumerate(input_source):
|
||||
if domain := str(domain).strip():
|
||||
if self.shard is None or line_num % self.shard[1] == self.shard[0]:
|
||||
if len(tasks) >= self.concurrent_limit:
|
||||
done, tasks = await asyncio.wait(
|
||||
tasks, return_when=asyncio.FIRST_COMPLETED
|
||||
)
|
||||
for task in done:
|
||||
result = await task
|
||||
await self.process_result(result)
|
||||
yield result
|
||||
|
||||
task = asyncio.create_task(self.check_domain(session, domain))
|
||||
tasks.add(task)
|
||||
else:
|
||||
# Async generator input
|
||||
line_num = 0
|
||||
async for domain in input_source:
|
||||
if isinstance(domain, bytes):
|
||||
domain = domain.decode()
|
||||
domain = domain.strip()
|
||||
|
||||
if domain:
|
||||
if self.shard is None or line_num % self.shard[1] == self.shard[0]:
|
||||
if len(tasks) >= self.concurrent_limit:
|
||||
done, tasks = await asyncio.wait(
|
||||
tasks, return_when=asyncio.FIRST_COMPLETED
|
||||
)
|
||||
for task in done:
|
||||
result = await task
|
||||
await self.process_result(result)
|
||||
yield result
|
||||
|
||||
task = asyncio.create_task(self.check_domain(session, domain))
|
||||
tasks.add(task)
|
||||
line_num += 1
|
||||
|
||||
# Process remaining tasks
|
||||
if tasks:
|
||||
done, _ = await asyncio.wait(tasks)
|
||||
for task in done:
|
||||
result = await task
|
||||
await self.process_result(result)
|
||||
await self.process_result(result)
|
||||
yield result
|
@ -5,6 +5,7 @@
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import asyncio
|
||||
|
||||
|
||||
# Global for silent mode
|
||||
@ -12,58 +13,58 @@ SILENT_MODE = False
|
||||
|
||||
# List of user agents to randomize requests
|
||||
USER_AGENTS = [
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.6.5 Chrome/124.0.6367.243 Electron/30.1.2 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 OPR/116.0.0.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:134.0) Gecko/20100101 Firefox/134.0",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.8.3 Chrome/130.0.6723.191 Electron/33.3.2 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.2 Safari/605.1.15",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.6613.137 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.15",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1.1 Safari/605.1.15",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:135.0) Gecko/20100101 Firefox/135.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.5.12 Chrome/120.0.6099.283 Electron/28.2.3 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0",
|
||||
"Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 OPR/114.0.0.0",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Safari/605.1.15",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.7.7 Chrome/128.0.6613.186 Electron/32.2.5 Safari/537.36"
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.6.5 Chrome/124.0.6367.243 Electron/30.1.2 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 OPR/116.0.0.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:134.0) Gecko/20100101 Firefox/134.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.8.3 Chrome/130.0.6723.191 Electron/33.3.2 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.2 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.6613.137 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1.1 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:135.0) Gecko/20100101 Firefox/135.0',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.5.12 Chrome/120.0.6099.283 Electron/28.2.3 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0',
|
||||
'Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 OPR/114.0.0.0',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.7.7 Chrome/128.0.6613.186 Electron/32.2.5 Safari/537.36'
|
||||
]
|
||||
|
||||
|
||||
@ -98,9 +99,9 @@ def human_size(size_bytes: int) -> str:
|
||||
return f'{size:.1f}{units[unit_index]}'
|
||||
|
||||
|
||||
def input_generator(input_source, shard: tuple = None):
|
||||
async def input_generator(input_source, shard: tuple = None):
|
||||
'''
|
||||
Generator function to yield domains from various input sources with optional sharding
|
||||
Async generator function to yield domains from various input sources with optional sharding
|
||||
|
||||
:param input_source: Can be:
|
||||
- string path to local file
|
||||
@ -116,6 +117,7 @@ def input_generator(input_source, shard: tuple = None):
|
||||
# Handle stdin
|
||||
if input_source == '-' or input_source is None:
|
||||
for line in sys.stdin:
|
||||
await asyncio.sleep(0) # Yield control
|
||||
if line := line.strip():
|
||||
if shard is None or line_num % shard[1] == shard[0]:
|
||||
yield line
|
||||
@ -125,6 +127,7 @@ def input_generator(input_source, shard: tuple = None):
|
||||
elif isinstance(input_source, str) and os.path.exists(input_source):
|
||||
with open(input_source, 'r') as f:
|
||||
for line in f:
|
||||
await asyncio.sleep(0) # Yield control
|
||||
if line := line.strip():
|
||||
if shard is None or line_num % shard[1] == shard[0]:
|
||||
yield line
|
||||
@ -133,6 +136,7 @@ def input_generator(input_source, shard: tuple = None):
|
||||
# Handle iterables (generators, lists, etc)
|
||||
elif hasattr(input_source, '__iter__') and not isinstance(input_source, (str, bytes)):
|
||||
for line in input_source:
|
||||
await asyncio.sleep(0) # Yield control
|
||||
if isinstance(line, bytes):
|
||||
line = line.decode()
|
||||
if line := line.strip():
|
||||
@ -145,6 +149,7 @@ def input_generator(input_source, shard: tuple = None):
|
||||
if isinstance(input_source, bytes):
|
||||
input_source = input_source.decode()
|
||||
for line in input_source.splitlines():
|
||||
await asyncio.sleep(0) # Yield control
|
||||
if line := line.strip():
|
||||
if shard is None or line_num % shard[1] == shard[0]:
|
||||
yield line
|
||||
|
3
setup.py
3
setup.py
@ -4,12 +4,13 @@
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
|
||||
with open('README.md', 'r', encoding='utf-8') as f:
|
||||
long_description = f.read()
|
||||
|
||||
setup(
|
||||
name='httpz_scanner',
|
||||
version='2.0.0',
|
||||
version='2.0.1',
|
||||
author='acidvegas',
|
||||
author_email='acid.vegas@acid.vegas',
|
||||
description='Hyper-fast HTTP Scraping Tool',
|
||||
|
Loading…
Reference in New Issue
Block a user