fixed gzip decompression and index name for reports

2025-03-26 16:25:57 -04:00 · 2025-03-26 16:25:57 -04:00 · e8cc4e2ddb
commit e8cc4e2ddb
parent 2a7b271d4f
3 changed files with 14 additions and 38 deletions
--- a/czds/init.py
+++ b/czds/init.py
@ -4,7 +4,7 @@
 from .client import CZDS
-__version__ = '1.3.7'
+__version__ = '1.3.8'
 __author__  = 'acidvegas'
 __email__   = 'acid.vegas@acid.vegas'
 __github__  = 'https://github.com/acidvegas/czds'
--- a/czds/utils.py
+++ b/czds/utils.py
@ -2,6 +2,7 @@
 # ICANN API for the Centralized Zones Data Service - developed by acidvegas (https://git.acid.vegas/czds)
 # czds/utils.py
 import asyncio
 import gzip
 import logging
 import os
@ -24,55 +25,30 @@ async def gzip_decompress(filepath: str, cleanup: bool = True):
    :param filepath: Path to the gzip file
    :param cleanup: Whether to remove the original gzip file after decompressions
    '''
    # Get the original size of the file
    original_size = os.path.getsize(filepath)
-
+    output_path = filepath[:-3]
    logging.debug(f'Decompressing {filepath} ({humanize_bytes(original_size)})...')
-    # Remove the .gz extension
+    # Use a large chunk size (256MB) for maximum throughput
-    output_path = filepath[:-3]
+    chunk_size = 256 * 1024 * 1024
-    # Set the chunk size to 25MB
+    # Run the actual decompression in a thread pool to prevent blocking
    chunk_size = 100 * 1024 * 1024
    # Use a decompression object for better memory efficiency
    decompressor = gzip.decompressobj()
    # Create progress bar
    with tqdm(total=original_size, unit='B', unit_scale=True, desc=f'Decompressing {os.path.basename(filepath)}', leave=False) as pbar:
-        # Open the input and output files
+        async with aiofiles.open(output_path, 'wb') as f_out:
-        async with aiofiles.open(filepath, 'rb') as f_in:
+            # Run gzip decompression in thread pool since it's CPU-bound
-            async with aiofiles.open(output_path, 'wb') as f_out:
+            loop = asyncio.get_event_loop()
            with gzip.open(filepath, 'rb') as gz:
                while True:
-                    # Read compressed chunk
+                    chunk = await loop.run_in_executor(None, gz.read, chunk_size)
                    chunk = await f_in.read(chunk_size)
                    # If the chunk is empty, break
                    if not chunk:
                        break
-                    
+                    await f_out.write(chunk)
                    # Decompress chunk
                    decompressed = decompressor.decompress(chunk)
                    # If the decompressed chunk is not empty, write it to the output file
                    if decompressed:
                        await f_out.write(decompressed)
                    # Update the progress bar
                    pbar.update(len(chunk))
                # Write any remaining data
                remaining = decompressor.flush()
                if remaining:
                    await f_out.write(remaining)
    # Get the decompressed size of the file
    decompressed_size = os.path.getsize(output_path)
    logging.debug(f'Decompressed {filepath} ({humanize_bytes(decompressed_size)})')
    # If the cleanup flag is set, remove the original gzip file
    if cleanup:
        os.remove(filepath)
        logging.debug(f'Removed original gzip file: {filepath}')
--- a/setup.py
+++ b/setup.py
@ -11,7 +11,7 @@ with open('README.md', 'r', encoding='utf-8') as fh:
 setup(
 	name='czds-api',
-	version='1.3.7',
+	version='1.3.8',
 	author='acidvegas',
 	author_email='acid.vegas@acid.vegas',
 	description='ICANN API for the Centralized Zones Data Service',