Better input processing
This commit is contained in:
parent
718b50b6c2
commit
dfb11b0a1c
85
README.md
85
README.md
@ -92,14 +92,29 @@ This allows efficient distribution of large scans across multiple machines.
|
|||||||
### Python Library
|
### Python Library
|
||||||
```python
|
```python
|
||||||
import asyncio
|
import asyncio
|
||||||
import aiohttp
|
import urllib.request
|
||||||
import aioboto3
|
|
||||||
from httpz_scanner import HTTPZScanner
|
from httpz_scanner import HTTPZScanner
|
||||||
|
|
||||||
async def scan_domains():
|
async def scan_from_list() -> list:
|
||||||
|
with urllib.request.urlopen('https://example.com/domains.txt') as response:
|
||||||
|
content = response.read().decode()
|
||||||
|
return [line.strip() for line in content.splitlines() if line.strip()][:20]
|
||||||
|
|
||||||
|
async def scan_from_url():
|
||||||
|
with urllib.request.urlopen('https://example.com/domains.txt') as response:
|
||||||
|
for line in response:
|
||||||
|
if line := line.strip():
|
||||||
|
yield line.decode().strip()
|
||||||
|
|
||||||
|
async def scan_from_file():
|
||||||
|
with open('domains.txt', 'r') as file:
|
||||||
|
for line in file:
|
||||||
|
if line := line.strip():
|
||||||
|
yield line
|
||||||
|
|
||||||
|
async def main():
|
||||||
# Initialize scanner with all possible options (showing defaults)
|
# Initialize scanner with all possible options (showing defaults)
|
||||||
scanner = HTTPZScanner(
|
scanner = HTTPZScanner(
|
||||||
# Core settings
|
|
||||||
concurrent_limit=100, # Number of concurrent requests
|
concurrent_limit=100, # Number of concurrent requests
|
||||||
timeout=5, # Request timeout in seconds
|
timeout=5, # Request timeout in seconds
|
||||||
follow_redirects=False, # Follow redirects (max 10)
|
follow_redirects=False, # Follow redirects (max 10)
|
||||||
@ -131,61 +146,33 @@ async def scan_domains():
|
|||||||
exclude_codes={404,500,503} # Exclude these status codes
|
exclude_codes={404,500,503} # Exclude these status codes
|
||||||
)
|
)
|
||||||
|
|
||||||
# Initialize resolvers (required before scanning)
|
# Example 1: Process file
|
||||||
await scanner.init()
|
print('\nProcessing file:')
|
||||||
|
async for result in scanner.scan(scan_from_file()):
|
||||||
|
print(f"{result['domain']}: {result['status']}")
|
||||||
|
|
||||||
# Example 1: Stream from S3/MinIO using aioboto3
|
# Example 2: Stream URLs
|
||||||
async with aioboto3.Session().client('s3',
|
print('\nStreaming URLs:')
|
||||||
endpoint_url='http://minio.example.com:9000',
|
async for result in scanner.scan(scan_from_url()):
|
||||||
aws_access_key_id='access_key',
|
print(f"{result['domain']}: {result['status']}")
|
||||||
aws_secret_access_key='secret_key') as s3:
|
|
||||||
|
|
||||||
response = await s3.get_object(Bucket='my-bucket', Key='huge-domains.txt')
|
|
||||||
async with response['Body'] as stream:
|
|
||||||
async def s3_generator():
|
|
||||||
while True:
|
|
||||||
line = await stream.readline()
|
|
||||||
if not line:
|
|
||||||
break
|
|
||||||
yield line.decode().strip()
|
|
||||||
|
|
||||||
await scanner.scan(s3_generator())
|
|
||||||
|
|
||||||
# Example 2: Stream from URL using aiohttp
|
# Example 3: Process list
|
||||||
async with aiohttp.ClientSession() as session:
|
print('\nProcessing list:')
|
||||||
# For large files - stream line by line
|
domains = await scan_from_list()
|
||||||
async with session.get('https://example.com/huge-domains.txt') as resp:
|
async for result in scanner.scan(domains):
|
||||||
async def url_generator():
|
print(f"{result['domain']}: {result['status']}")
|
||||||
async for line in resp.content:
|
|
||||||
yield line.decode().strip()
|
|
||||||
|
|
||||||
await scanner.scan(url_generator())
|
|
||||||
|
|
||||||
# For small files - read all at once
|
|
||||||
async with session.get('https://example.com/small-domains.txt') as resp:
|
|
||||||
content = await resp.text()
|
|
||||||
await scanner.scan(content) # Library handles splitting into lines
|
|
||||||
|
|
||||||
# Example 3: Simple list of domains
|
|
||||||
domains = [
|
|
||||||
'example1.com',
|
|
||||||
'example2.com',
|
|
||||||
'example3.com'
|
|
||||||
]
|
|
||||||
await scanner.scan(domains)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
asyncio.run(scan_domains())
|
asyncio.run(main())
|
||||||
```
|
```
|
||||||
|
|
||||||
The scanner accepts various input types:
|
The scanner accepts various input types:
|
||||||
- Async/sync generators that yield domains
|
- File paths (string)
|
||||||
- String content with newlines
|
|
||||||
- Lists/tuples of domains
|
- Lists/tuples of domains
|
||||||
- File paths
|
|
||||||
- stdin (using '-')
|
- stdin (using '-')
|
||||||
|
- Async generators that yield domains
|
||||||
|
|
||||||
All inputs support sharding for distributed scanning.
|
All inputs support sharding for distributed scanning using the `shard` parameter.
|
||||||
|
|
||||||
## Arguments
|
## Arguments
|
||||||
|
|
||||||
|
@ -6,4 +6,4 @@ from .scanner import HTTPZScanner
|
|||||||
from .colors import Colors
|
from .colors import Colors
|
||||||
|
|
||||||
|
|
||||||
__version__ = '2.0.0'
|
__version__ = '2.0.1'
|
@ -8,9 +8,9 @@ except ImportError:
|
|||||||
raise ImportError('missing bs4 module (pip install beautifulsoup4)')
|
raise ImportError('missing bs4 module (pip install beautifulsoup4)')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from cryptography import x509
|
from cryptography import x509
|
||||||
from cryptography.hazmat.primitives import hashes
|
from cryptography.hazmat.primitives import hashes
|
||||||
from cryptography.x509.oid import NameOID
|
from cryptography.x509.oid import NameOID
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise ImportError('missing cryptography module (pip install cryptography)')
|
raise ImportError('missing cryptography module (pip install cryptography)')
|
||||||
|
|
||||||
@ -28,8 +28,8 @@ def parse_domain_url(domain: str) -> tuple:
|
|||||||
Parse domain string into base domain, port, and protocol list
|
Parse domain string into base domain, port, and protocol list
|
||||||
|
|
||||||
:param domain: Raw domain string to parse
|
:param domain: Raw domain string to parse
|
||||||
:return: Tuple of (base_domain, port, protocols)
|
|
||||||
'''
|
'''
|
||||||
|
|
||||||
port = None
|
port = None
|
||||||
base_domain = domain.rstrip('/')
|
base_domain = domain.rstrip('/')
|
||||||
|
|
||||||
@ -58,6 +58,7 @@ def parse_domain_url(domain: str) -> tuple:
|
|||||||
|
|
||||||
return base_domain, port, protocols
|
return base_domain, port, protocols
|
||||||
|
|
||||||
|
|
||||||
async def get_cert_info(ssl_object, url: str) -> dict:
|
async def get_cert_info(ssl_object, url: str) -> dict:
|
||||||
'''
|
'''
|
||||||
Get SSL certificate information for a domain
|
Get SSL certificate information for a domain
|
||||||
@ -65,6 +66,7 @@ async def get_cert_info(ssl_object, url: str) -> dict:
|
|||||||
:param ssl_object: SSL object to get certificate info from
|
:param ssl_object: SSL object to get certificate info from
|
||||||
:param url: URL to get certificate info from
|
:param url: URL to get certificate info from
|
||||||
'''
|
'''
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if not ssl_object or not (cert_der := ssl_object.getpeercert(binary_form=True)):
|
if not ssl_object or not (cert_der := ssl_object.getpeercert(binary_form=True)):
|
||||||
return None
|
return None
|
||||||
@ -101,6 +103,7 @@ async def get_cert_info(ssl_object, url: str) -> dict:
|
|||||||
error(f'Error getting cert info for {url}: {str(e)}')
|
error(f'Error getting cert info for {url}: {str(e)}')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
async def get_favicon_hash(session, base_url: str, html: str) -> str:
|
async def get_favicon_hash(session, base_url: str, html: str) -> str:
|
||||||
'''
|
'''
|
||||||
Get favicon hash from a webpage
|
Get favicon hash from a webpage
|
||||||
@ -141,6 +144,7 @@ async def get_favicon_hash(session, base_url: str, html: str) -> str:
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def parse_status_codes(codes_str: str) -> set:
|
def parse_status_codes(codes_str: str) -> set:
|
||||||
'''
|
'''
|
||||||
Parse comma-separated status codes and ranges into a set of integers
|
Parse comma-separated status codes and ranges into a set of integers
|
||||||
@ -174,4 +178,26 @@ def parse_shard(shard_str: str) -> tuple:
|
|||||||
raise ValueError
|
raise ValueError
|
||||||
return shard_index - 1, total_shards # Convert to 0-based index
|
return shard_index - 1, total_shards # Convert to 0-based index
|
||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
raise argparse.ArgumentTypeError('Shard must be in format INDEX/TOTAL where INDEX <= TOTAL')
|
raise argparse.ArgumentTypeError('Shard must be in format INDEX/TOTAL where INDEX <= TOTAL')
|
||||||
|
|
||||||
|
|
||||||
|
def parse_title(html: str, content_type: str = None) -> str:
|
||||||
|
'''
|
||||||
|
Parse title from HTML content
|
||||||
|
|
||||||
|
:param html: HTML content of the page
|
||||||
|
:param content_type: Content-Type header value
|
||||||
|
'''
|
||||||
|
|
||||||
|
# Only parse title for HTML content
|
||||||
|
if content_type and not any(x in content_type.lower() for x in ['text/html', 'application/xhtml']):
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
soup = bs4.BeautifulSoup(html, 'html.parser', from_encoding='utf-8', features='lxml')
|
||||||
|
if title := soup.title:
|
||||||
|
return title.string.strip()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return None
|
@ -5,7 +5,6 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
import sys
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import aiohttp
|
import aiohttp
|
||||||
@ -20,7 +19,7 @@ except ImportError:
|
|||||||
from .dns import resolve_all_dns, load_resolvers
|
from .dns import resolve_all_dns, load_resolvers
|
||||||
from .formatters import format_console_output
|
from .formatters import format_console_output
|
||||||
from .colors import Colors
|
from .colors import Colors
|
||||||
from .parsers import parse_domain_url, get_cert_info, get_favicon_hash
|
from .parsers import parse_domain_url, get_cert_info, get_favicon_hash, parse_title
|
||||||
from .utils import debug, info, USER_AGENTS, input_generator
|
from .utils import debug, info, USER_AGENTS, input_generator
|
||||||
|
|
||||||
|
|
||||||
@ -154,12 +153,13 @@ class HTTPZScanner:
|
|||||||
except AttributeError:
|
except AttributeError:
|
||||||
debug(f'Failed to get SSL info for {url}')
|
debug(f'Failed to get SSL info for {url}')
|
||||||
|
|
||||||
html = (await response.text())[:1024*1024]
|
content_type = response.headers.get('Content-Type', '')
|
||||||
soup = bs4.BeautifulSoup(html, 'html.parser')
|
html = await response.text() if any(x in content_type.lower() for x in ['text/html', 'application/xhtml']) else None
|
||||||
|
|
||||||
# Only add title if it exists
|
# Only add title if it exists
|
||||||
if soup.title and soup.title.string:
|
if soup := bs4.BeautifulSoup(html, 'html.parser'):
|
||||||
result['title'] = ' '.join(soup.title.string.strip().split()).rstrip('.')[:300]
|
if soup.title and soup.title.string:
|
||||||
|
result['title'] = ' '.join(soup.title.string.strip().split()).rstrip('.')[:300]
|
||||||
|
|
||||||
# Only add body if it exists
|
# Only add body if it exists
|
||||||
if body_text := soup.get_text():
|
if body_text := soup.get_text():
|
||||||
@ -210,32 +210,81 @@ class HTTPZScanner:
|
|||||||
|
|
||||||
async def scan(self, input_source):
|
async def scan(self, input_source):
|
||||||
'''
|
'''
|
||||||
Scan domains from a file or stdin
|
Scan domains from a file, stdin, or async generator
|
||||||
|
|
||||||
:param input_source: Path to file or '-' for stdin
|
:param input_source: Can be:
|
||||||
|
- Path to file (str)
|
||||||
|
- stdin ('-')
|
||||||
|
- List/tuple of domains
|
||||||
|
- Async generator yielding domains
|
||||||
|
:yields: Result dictionary for each domain scanned
|
||||||
'''
|
'''
|
||||||
|
|
||||||
if not self.resolvers:
|
if not self.resolvers:
|
||||||
await self.init()
|
await self.init()
|
||||||
|
|
||||||
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session:
|
async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session:
|
||||||
tasks = set()
|
tasks = set()
|
||||||
|
|
||||||
# Pass shard info to input_generator
|
# Handle different input types
|
||||||
for domain in input_generator(input_source, self.shard):
|
if isinstance(input_source, str):
|
||||||
if len(tasks) >= self.concurrent_limit:
|
# File or stdin input
|
||||||
done, tasks = await asyncio.wait(
|
domain_iter = input_generator(input_source, self.shard)
|
||||||
tasks, return_when=asyncio.FIRST_COMPLETED
|
for domain in domain_iter:
|
||||||
)
|
if len(tasks) >= self.concurrent_limit:
|
||||||
for task in done:
|
done, tasks = await asyncio.wait(
|
||||||
result = await task
|
tasks, return_when=asyncio.FIRST_COMPLETED
|
||||||
await self.process_result(result)
|
)
|
||||||
|
for task in done:
|
||||||
|
result = await task
|
||||||
|
await self.process_result(result)
|
||||||
|
yield result
|
||||||
|
|
||||||
task = asyncio.create_task(self.check_domain(session, domain))
|
task = asyncio.create_task(self.check_domain(session, domain))
|
||||||
tasks.add(task)
|
tasks.add(task)
|
||||||
|
elif isinstance(input_source, (list, tuple)):
|
||||||
|
# List/tuple input
|
||||||
|
for line_num, domain in enumerate(input_source):
|
||||||
|
if domain := str(domain).strip():
|
||||||
|
if self.shard is None or line_num % self.shard[1] == self.shard[0]:
|
||||||
|
if len(tasks) >= self.concurrent_limit:
|
||||||
|
done, tasks = await asyncio.wait(
|
||||||
|
tasks, return_when=asyncio.FIRST_COMPLETED
|
||||||
|
)
|
||||||
|
for task in done:
|
||||||
|
result = await task
|
||||||
|
await self.process_result(result)
|
||||||
|
yield result
|
||||||
|
|
||||||
|
task = asyncio.create_task(self.check_domain(session, domain))
|
||||||
|
tasks.add(task)
|
||||||
|
else:
|
||||||
|
# Async generator input
|
||||||
|
line_num = 0
|
||||||
|
async for domain in input_source:
|
||||||
|
if isinstance(domain, bytes):
|
||||||
|
domain = domain.decode()
|
||||||
|
domain = domain.strip()
|
||||||
|
|
||||||
|
if domain:
|
||||||
|
if self.shard is None or line_num % self.shard[1] == self.shard[0]:
|
||||||
|
if len(tasks) >= self.concurrent_limit:
|
||||||
|
done, tasks = await asyncio.wait(
|
||||||
|
tasks, return_when=asyncio.FIRST_COMPLETED
|
||||||
|
)
|
||||||
|
for task in done:
|
||||||
|
result = await task
|
||||||
|
await self.process_result(result)
|
||||||
|
yield result
|
||||||
|
|
||||||
|
task = asyncio.create_task(self.check_domain(session, domain))
|
||||||
|
tasks.add(task)
|
||||||
|
line_num += 1
|
||||||
|
|
||||||
# Process remaining tasks
|
# Process remaining tasks
|
||||||
if tasks:
|
if tasks:
|
||||||
done, _ = await asyncio.wait(tasks)
|
done, _ = await asyncio.wait(tasks)
|
||||||
for task in done:
|
for task in done:
|
||||||
result = await task
|
result = await task
|
||||||
await self.process_result(result)
|
await self.process_result(result)
|
||||||
|
yield result
|
@ -5,6 +5,7 @@
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
|
||||||
# Global for silent mode
|
# Global for silent mode
|
||||||
@ -12,58 +13,58 @@ SILENT_MODE = False
|
|||||||
|
|
||||||
# List of user agents to randomize requests
|
# List of user agents to randomize requests
|
||||||
USER_AGENTS = [
|
USER_AGENTS = [
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0',
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0",
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0',
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:134.0) Gecko/20100101 Firefox/134.0',
|
||||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.6.5 Chrome/124.0.6367.243 Electron/30.1.2 Safari/537.36",
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.6.5 Chrome/124.0.6367.243 Electron/30.1.2 Safari/537.36',
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0',
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 OPR/116.0.0.0",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 OPR/116.0.0.0',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:134.0) Gecko/20100101 Firefox/134.0",
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:134.0) Gecko/20100101 Firefox/134.0',
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.8.3 Chrome/130.0.6723.191 Electron/33.3.2 Safari/537.36",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.8.3 Chrome/130.0.6723.191 Electron/33.3.2 Safari/537.36',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15",
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15',
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.2 Safari/605.1.15",
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.2 Safari/605.1.15',
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.6613.137 Safari/537.36",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.6613.137 Safari/537.36',
|
||||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0",
|
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0',
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.15",
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.15',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1.1 Safari/605.1.15",
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1.1 Safari/605.1.15',
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0",
|
'Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:135.0) Gecko/20100101 Firefox/135.0",
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:135.0) Gecko/20100101 Firefox/135.0',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.5.12 Chrome/120.0.6099.283 Electron/28.2.3 Safari/537.36",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.5.12 Chrome/120.0.6099.283 Electron/28.2.3 Safari/537.36',
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36',
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0',
|
||||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0',
|
||||||
"Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 OPR/114.0.0.0",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 OPR/114.0.0.0',
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Safari/605.1.15",
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Safari/605.1.15',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15",
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
|
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
|
||||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3",
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.7.7 Chrome/128.0.6613.186 Electron/32.2.5 Safari/537.36"
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.7.7 Chrome/128.0.6613.186 Electron/32.2.5 Safari/537.36'
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@ -98,9 +99,9 @@ def human_size(size_bytes: int) -> str:
|
|||||||
return f'{size:.1f}{units[unit_index]}'
|
return f'{size:.1f}{units[unit_index]}'
|
||||||
|
|
||||||
|
|
||||||
def input_generator(input_source, shard: tuple = None):
|
async def input_generator(input_source, shard: tuple = None):
|
||||||
'''
|
'''
|
||||||
Generator function to yield domains from various input sources with optional sharding
|
Async generator function to yield domains from various input sources with optional sharding
|
||||||
|
|
||||||
:param input_source: Can be:
|
:param input_source: Can be:
|
||||||
- string path to local file
|
- string path to local file
|
||||||
@ -116,6 +117,7 @@ def input_generator(input_source, shard: tuple = None):
|
|||||||
# Handle stdin
|
# Handle stdin
|
||||||
if input_source == '-' or input_source is None:
|
if input_source == '-' or input_source is None:
|
||||||
for line in sys.stdin:
|
for line in sys.stdin:
|
||||||
|
await asyncio.sleep(0) # Yield control
|
||||||
if line := line.strip():
|
if line := line.strip():
|
||||||
if shard is None or line_num % shard[1] == shard[0]:
|
if shard is None or line_num % shard[1] == shard[0]:
|
||||||
yield line
|
yield line
|
||||||
@ -125,6 +127,7 @@ def input_generator(input_source, shard: tuple = None):
|
|||||||
elif isinstance(input_source, str) and os.path.exists(input_source):
|
elif isinstance(input_source, str) and os.path.exists(input_source):
|
||||||
with open(input_source, 'r') as f:
|
with open(input_source, 'r') as f:
|
||||||
for line in f:
|
for line in f:
|
||||||
|
await asyncio.sleep(0) # Yield control
|
||||||
if line := line.strip():
|
if line := line.strip():
|
||||||
if shard is None or line_num % shard[1] == shard[0]:
|
if shard is None or line_num % shard[1] == shard[0]:
|
||||||
yield line
|
yield line
|
||||||
@ -133,6 +136,7 @@ def input_generator(input_source, shard: tuple = None):
|
|||||||
# Handle iterables (generators, lists, etc)
|
# Handle iterables (generators, lists, etc)
|
||||||
elif hasattr(input_source, '__iter__') and not isinstance(input_source, (str, bytes)):
|
elif hasattr(input_source, '__iter__') and not isinstance(input_source, (str, bytes)):
|
||||||
for line in input_source:
|
for line in input_source:
|
||||||
|
await asyncio.sleep(0) # Yield control
|
||||||
if isinstance(line, bytes):
|
if isinstance(line, bytes):
|
||||||
line = line.decode()
|
line = line.decode()
|
||||||
if line := line.strip():
|
if line := line.strip():
|
||||||
@ -145,6 +149,7 @@ def input_generator(input_source, shard: tuple = None):
|
|||||||
if isinstance(input_source, bytes):
|
if isinstance(input_source, bytes):
|
||||||
input_source = input_source.decode()
|
input_source = input_source.decode()
|
||||||
for line in input_source.splitlines():
|
for line in input_source.splitlines():
|
||||||
|
await asyncio.sleep(0) # Yield control
|
||||||
if line := line.strip():
|
if line := line.strip():
|
||||||
if shard is None or line_num % shard[1] == shard[0]:
|
if shard is None or line_num % shard[1] == shard[0]:
|
||||||
yield line
|
yield line
|
||||||
|
3
setup.py
3
setup.py
@ -4,12 +4,13 @@
|
|||||||
|
|
||||||
from setuptools import setup, find_packages
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
|
||||||
with open('README.md', 'r', encoding='utf-8') as f:
|
with open('README.md', 'r', encoding='utf-8') as f:
|
||||||
long_description = f.read()
|
long_description = f.read()
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='httpz_scanner',
|
name='httpz_scanner',
|
||||||
version='2.0.0',
|
version='2.0.1',
|
||||||
author='acidvegas',
|
author='acidvegas',
|
||||||
author_email='acid.vegas@acid.vegas',
|
author_email='acid.vegas@acid.vegas',
|
||||||
description='Hyper-fast HTTP Scraping Tool',
|
description='Hyper-fast HTTP Scraping Tool',
|
||||||
|
Loading…
Reference in New Issue
Block a user