diff --git a/czds/__init__.py b/czds/__init__.py index e40b948..73f5bda 100644 --- a/czds/__init__.py +++ b/czds/__init__.py @@ -4,7 +4,7 @@ from .client import CZDS -__version__ = '1.3.7' +__version__ = '1.3.8' __author__ = 'acidvegas' __email__ = 'acid.vegas@acid.vegas' __github__ = 'https://github.com/acidvegas/czds' \ No newline at end of file diff --git a/czds/utils.py b/czds/utils.py index 31f373e..de6d981 100644 --- a/czds/utils.py +++ b/czds/utils.py @@ -2,6 +2,7 @@ # ICANN API for the Centralized Zones Data Service - developed by acidvegas (https://git.acid.vegas/czds) # czds/utils.py +import asyncio import gzip import logging import os @@ -24,55 +25,30 @@ async def gzip_decompress(filepath: str, cleanup: bool = True): :param filepath: Path to the gzip file :param cleanup: Whether to remove the original gzip file after decompressions ''' - - # Get the original size of the file original_size = os.path.getsize(filepath) - + output_path = filepath[:-3] + logging.debug(f'Decompressing {filepath} ({humanize_bytes(original_size)})...') - # Remove the .gz extension - output_path = filepath[:-3] + # Use a large chunk size (256MB) for maximum throughput + chunk_size = 256 * 1024 * 1024 - # Set the chunk size to 25MB - chunk_size = 100 * 1024 * 1024 - - # Use a decompression object for better memory efficiency - decompressor = gzip.decompressobj() - - # Create progress bar + # Run the actual decompression in a thread pool to prevent blocking with tqdm(total=original_size, unit='B', unit_scale=True, desc=f'Decompressing {os.path.basename(filepath)}', leave=False) as pbar: - # Open the input and output files - async with aiofiles.open(filepath, 'rb') as f_in: - async with aiofiles.open(output_path, 'wb') as f_out: + async with aiofiles.open(output_path, 'wb') as f_out: + # Run gzip decompression in thread pool since it's CPU-bound + loop = asyncio.get_event_loop() + with gzip.open(filepath, 'rb') as gz: while True: - # Read compressed chunk - chunk = await f_in.read(chunk_size) - - # If the chunk is empty, break + chunk = await loop.run_in_executor(None, gz.read, chunk_size) if not chunk: break - - # Decompress chunk - decompressed = decompressor.decompress(chunk) - - # If the decompressed chunk is not empty, write it to the output file - if decompressed: - await f_out.write(decompressed) - - # Update the progress bar + await f_out.write(chunk) pbar.update(len(chunk)) - - # Write any remaining data - remaining = decompressor.flush() - if remaining: - await f_out.write(remaining) - # Get the decompressed size of the file decompressed_size = os.path.getsize(output_path) - logging.debug(f'Decompressed {filepath} ({humanize_bytes(decompressed_size)})') - # If the cleanup flag is set, remove the original gzip file if cleanup: os.remove(filepath) logging.debug(f'Removed original gzip file: {filepath}') diff --git a/setup.py b/setup.py index 18b00db..c171dbe 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ with open('README.md', 'r', encoding='utf-8') as fh: setup( name='czds-api', - version='1.3.7', + version='1.3.8', author='acidvegas', author_email='acid.vegas@acid.vegas', description='ICANN API for the Centralized Zones Data Service',