diff --git a/czds/__init__.py b/czds/__init__.py index a3aa2e6..16a6549 100644 --- a/czds/__init__.py +++ b/czds/__init__.py @@ -5,7 +5,7 @@ from .client import CZDS -__version__ = '1.2.6' +__version__ = '1.2.7' __author__ = 'acidvegas' __email__ = 'acid.vegas@acid.vegas' __github__ = 'https://github.com/acidvegas/czds' \ No newline at end of file diff --git a/czds/client.py b/czds/client.py index 5e8a44c..44716bc 100644 --- a/czds/client.py +++ b/czds/client.py @@ -141,7 +141,7 @@ class CZDS: Decompress a gzip file in place :param filepath: Path to the gzip file - :param cleanup: Whether to remove the original gzip file after decompression + :param cleanup: Whether to remove the original gzip file after decompressions ''' logging.debug(f'Decompressing {filepath}') @@ -173,38 +173,71 @@ class CZDS: tld = url.split('/')[-1].split('.')[0] # Extract TLD from URL logging.info(f'Starting download of {tld} zone file') - async with self.session.get(url, headers=self.headers) as response: - if response.status != 200: - error_msg = f'Failed to download {tld}: {response.status} {await response.text()}' - logging.error(error_msg) - raise Exception(error_msg) + try: + async with self.session.get(url, headers=self.headers) as response: + if response.status != 200: + error_msg = f'Failed to download {tld}: {response.status} {await response.text()}' + logging.error(error_msg) + raise Exception(error_msg) - if not (content_disposition := response.headers.get('Content-Disposition')): - error_msg = f'Missing Content-Disposition header for {tld}' - logging.error(error_msg) - raise ValueError(error_msg) + # Get expected file size from headers + expected_size = int(response.headers.get('Content-Length', 0)) + if not expected_size: + logging.warning(f'No Content-Length header for {tld}') - filename = content_disposition.split('filename=')[-1].strip('"') - filepath = os.path.join(output_directory, filename) + if not (content_disposition := response.headers.get('Content-Disposition')): + error_msg = f'Missing Content-Disposition header for {tld}' + logging.error(error_msg) + raise ValueError(error_msg) - async with aiofiles.open(filepath, 'wb') as file: - total_size = 0 - while True: - chunk = await response.content.read(8192) - if not chunk: - break - await file.write(chunk) - total_size += len(chunk) - - size_mb = total_size / (1024 * 1024) - logging.info(f'Successfully downloaded {tld} zone file ({size_mb:.2f} MB)') + filename = content_disposition.split('filename=')[-1].strip('"') + filepath = os.path.join(output_directory, filename) - if decompress: - await self.gzip_decompress(filepath, cleanup) - filepath = filepath[:-3] # Remove .gz extension - logging.info(f'Decompressed {tld} zone file') + async with aiofiles.open(filepath, 'wb') as file: + total_size = 0 + last_progress = 0 + async for chunk in response.content.iter_chunked(8192): + await file.write(chunk) + total_size += len(chunk) + if expected_size: + progress = int((total_size / expected_size) * 100) + if progress >= last_progress + 5: # Log every 5% increase + logging.info(f'Downloading {tld}: {progress}% ({total_size:,}/{expected_size:,} bytes)') + last_progress = progress - return filepath + # Verify file size + if expected_size and total_size != expected_size: + error_msg = f'Incomplete download for {tld}: Got {total_size} bytes, expected {expected_size} bytes' + logging.error(error_msg) + os.remove(filepath) # Clean up incomplete file + raise Exception(error_msg) + + size_mb = total_size / (1024 * 1024) + logging.info(f'Successfully downloaded {tld} zone file ({size_mb:.2f} MB)') + + if decompress: + try: + # Verify gzip integrity before decompressing + with gzip.open(filepath, 'rb') as test_gzip: + test_gzip.read(1) # Try reading first byte to verify gzip integrity + + await self.gzip_decompress(filepath, cleanup) + filepath = filepath[:-3] # Remove .gz extension + logging.info(f'Decompressed {tld} zone file') + except (gzip.BadGzipFile, OSError) as e: + error_msg = f'Failed to decompress {tld}: {str(e)}' + logging.error(error_msg) + os.remove(filepath) # Clean up corrupted file + raise Exception(error_msg) + + return filepath + + except Exception as e: + logging.error(f'Error downloading {tld}: {str(e)}') + # Clean up any partial downloads + if 'filepath' in locals() and os.path.exists(filepath): + os.remove(filepath) + raise if semaphore: async with semaphore: diff --git a/setup.py b/setup.py index 6ff4d85..48a5cdc 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ with open('README.md', 'r', encoding='utf-8') as fh: setup( name='czds-api', - version='1.2.6', + version='1.2.7', author='acidvegas', author_email='acid.vegas@acid.vegas', description='ICANN API for the Centralized Zones Data Service',