fixed progress

This commit is contained in:
Dionysus 2025-03-21 20:46:08 -04:00
parent 6047e0aed7
commit 08ccea270d
Signed by: acidvegas
GPG Key ID: EF4B922DB85DC9DE
3 changed files with 63 additions and 30 deletions

View File

@ -5,7 +5,7 @@
from .client import CZDS
__version__ = '1.2.6'
__version__ = '1.2.7'
__author__ = 'acidvegas'
__email__ = 'acid.vegas@acid.vegas'
__github__ = 'https://github.com/acidvegas/czds'

View File

@ -141,7 +141,7 @@ class CZDS:
Decompress a gzip file in place
:param filepath: Path to the gzip file
:param cleanup: Whether to remove the original gzip file after decompression
:param cleanup: Whether to remove the original gzip file after decompressions
'''
logging.debug(f'Decompressing {filepath}')
@ -173,38 +173,71 @@ class CZDS:
tld = url.split('/')[-1].split('.')[0] # Extract TLD from URL
logging.info(f'Starting download of {tld} zone file')
async with self.session.get(url, headers=self.headers) as response:
if response.status != 200:
error_msg = f'Failed to download {tld}: {response.status} {await response.text()}'
logging.error(error_msg)
raise Exception(error_msg)
try:
async with self.session.get(url, headers=self.headers) as response:
if response.status != 200:
error_msg = f'Failed to download {tld}: {response.status} {await response.text()}'
logging.error(error_msg)
raise Exception(error_msg)
if not (content_disposition := response.headers.get('Content-Disposition')):
error_msg = f'Missing Content-Disposition header for {tld}'
logging.error(error_msg)
raise ValueError(error_msg)
# Get expected file size from headers
expected_size = int(response.headers.get('Content-Length', 0))
if not expected_size:
logging.warning(f'No Content-Length header for {tld}')
filename = content_disposition.split('filename=')[-1].strip('"')
filepath = os.path.join(output_directory, filename)
if not (content_disposition := response.headers.get('Content-Disposition')):
error_msg = f'Missing Content-Disposition header for {tld}'
logging.error(error_msg)
raise ValueError(error_msg)
async with aiofiles.open(filepath, 'wb') as file:
total_size = 0
while True:
chunk = await response.content.read(8192)
if not chunk:
break
await file.write(chunk)
total_size += len(chunk)
size_mb = total_size / (1024 * 1024)
logging.info(f'Successfully downloaded {tld} zone file ({size_mb:.2f} MB)')
filename = content_disposition.split('filename=')[-1].strip('"')
filepath = os.path.join(output_directory, filename)
if decompress:
await self.gzip_decompress(filepath, cleanup)
filepath = filepath[:-3] # Remove .gz extension
logging.info(f'Decompressed {tld} zone file')
async with aiofiles.open(filepath, 'wb') as file:
total_size = 0
last_progress = 0
async for chunk in response.content.iter_chunked(8192):
await file.write(chunk)
total_size += len(chunk)
if expected_size:
progress = int((total_size / expected_size) * 100)
if progress >= last_progress + 5: # Log every 5% increase
logging.info(f'Downloading {tld}: {progress}% ({total_size:,}/{expected_size:,} bytes)')
last_progress = progress
return filepath
# Verify file size
if expected_size and total_size != expected_size:
error_msg = f'Incomplete download for {tld}: Got {total_size} bytes, expected {expected_size} bytes'
logging.error(error_msg)
os.remove(filepath) # Clean up incomplete file
raise Exception(error_msg)
size_mb = total_size / (1024 * 1024)
logging.info(f'Successfully downloaded {tld} zone file ({size_mb:.2f} MB)')
if decompress:
try:
# Verify gzip integrity before decompressing
with gzip.open(filepath, 'rb') as test_gzip:
test_gzip.read(1) # Try reading first byte to verify gzip integrity
await self.gzip_decompress(filepath, cleanup)
filepath = filepath[:-3] # Remove .gz extension
logging.info(f'Decompressed {tld} zone file')
except (gzip.BadGzipFile, OSError) as e:
error_msg = f'Failed to decompress {tld}: {str(e)}'
logging.error(error_msg)
os.remove(filepath) # Clean up corrupted file
raise Exception(error_msg)
return filepath
except Exception as e:
logging.error(f'Error downloading {tld}: {str(e)}')
# Clean up any partial downloads
if 'filepath' in locals() and os.path.exists(filepath):
os.remove(filepath)
raise
if semaphore:
async with semaphore:

View File

@ -11,7 +11,7 @@ with open('README.md', 'r', encoding='utf-8') as fh:
setup(
name='czds-api',
version='1.2.6',
version='1.2.7',
author='acidvegas',
author_email='acid.vegas@acid.vegas',
description='ICANN API for the Centralized Zones Data Service',