From 41d7e53d30a07776137eb3a5846d210e8599170d Mon Sep 17 00:00:00 2001 From: acidvegas Date: Wed, 12 Feb 2025 00:32:28 -0500 Subject: [PATCH] fixed chunk output --- httpz_scanner/__init__.py | 2 +- httpz_scanner/scanner.py | 40 ++++++++++++++++----------------------- setup.py | 2 +- 3 files changed, 18 insertions(+), 26 deletions(-) diff --git a/httpz_scanner/__init__.py b/httpz_scanner/__init__.py index 5f2529d..b2fb36b 100644 --- a/httpz_scanner/__init__.py +++ b/httpz_scanner/__init__.py @@ -6,4 +6,4 @@ from .colors import Colors from .scanner import HTTPZScanner -__version__ = '2.1.1' \ No newline at end of file +__version__ = '2.1.2' \ No newline at end of file diff --git a/httpz_scanner/scanner.py b/httpz_scanner/scanner.py index 5343194..5fb7900 100644 --- a/httpz_scanner/scanner.py +++ b/httpz_scanner/scanner.py @@ -197,7 +197,7 @@ class HTTPZScanner: self.resolvers = await load_resolvers(self.resolver_file) async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session: - tasks = set() + tasks = {} # Change to dict to track domain for each task domain_queue = asyncio.Queue() queue_empty = False @@ -249,52 +249,44 @@ class HTTPZScanner: try: while not queue_empty or tasks: - # Fill up tasks to concurrent_limit + # Start new tasks if needed while len(tasks) < self.concurrent_limit and not queue_empty: try: domain = await domain_queue.get() - if domain is None: # Queue is empty + if domain is None: queue_empty = True break task = asyncio.create_task(process_domain(domain)) - tasks.add(task) - except asyncio.CancelledError: - break + tasks[task] = domain except Exception as e: debug(f'Error creating task: {str(e)}') if not tasks: break - - # Wait for any task to complete with timeout + + # Wait for the FIRST task to complete try: - done, pending = await asyncio.wait( - tasks, + done, _ = await asyncio.wait( + tasks.keys(), timeout=self.timeout, return_when=asyncio.FIRST_COMPLETED ) - # Handle completed tasks + # Process completed task immediately for task in done: - tasks.remove(task) + domain = tasks.pop(task) try: if result := await task: yield result except Exception as e: - debug(f'Error processing task result: {str(e)}') - - # Handle timed out tasks - if not done and pending: - for task in pending: - task.cancel() - try: - await task - except asyncio.CancelledError: - pass - tasks.remove(task) - + debug(f'Error processing result for {domain}: {str(e)}') + except Exception as e: debug(f'Error in task processing loop: {str(e)}') + # Remove any failed tasks + failed_tasks = [t for t in tasks if t.done() and t.exception()] + for task in failed_tasks: + tasks.pop(task) finally: # Clean up diff --git a/setup.py b/setup.py index 9e133fa..5a24466 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ with open('README.md', 'r', encoding='utf-8') as f: setup( name='httpz_scanner', - version='2.1.1', + version='2.1.2', author='acidvegas', author_email='acid.vegas@acid.vegas', description='Hyper-fast HTTP Scraping Tool',