Compare commits
7 Commits
Author | SHA1 | Date | |
---|---|---|---|
e8cc4e2ddb | |||
2a7b271d4f | |||
ff89f14a85 | |||
e9795a0177 | |||
551df89f3f | |||
9e76ed0684 | |||
656bd50e47 |
@ -39,8 +39,6 @@ czds [-h] [-u USERNAME] [-p PASSWORD] [-z] [-c CONCURRENCY] [-d] [-k] [-r] [-s]
|
|||||||
###### Zone Options
|
###### Zone Options
|
||||||
| `-z`, `--zones` | Download zone files | |
|
| `-z`, `--zones` | Download zone files | |
|
||||||
| `-c`, `--concurrency` | Number of concurrent downloads | `3` |
|
| `-c`, `--concurrency` | Number of concurrent downloads | `3` |
|
||||||
| `-d`, `--decompress` | Decompress zone files after download | |
|
|
||||||
| `-k`, `--keep` | Keep original gzip files after decompression | |
|
|
||||||
|
|
||||||
###### Report Options
|
###### Report Options
|
||||||
| `-r`, `--report` | Download the zone stats report | |
|
| `-r`, `--report` | Download the zone stats report | |
|
||||||
|
@ -4,8 +4,7 @@
|
|||||||
|
|
||||||
from .client import CZDS
|
from .client import CZDS
|
||||||
|
|
||||||
|
__version__ = '1.3.8'
|
||||||
__version__ = '1.3.2'
|
|
||||||
__author__ = 'acidvegas'
|
__author__ = 'acidvegas'
|
||||||
__email__ = 'acid.vegas@acid.vegas'
|
__email__ = 'acid.vegas@acid.vegas'
|
||||||
__github__ = 'https://github.com/acidvegas/czds'
|
__github__ = 'https://github.com/acidvegas/czds'
|
@ -10,11 +10,6 @@ import os
|
|||||||
|
|
||||||
from .client import CZDS
|
from .client import CZDS
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
'''Entry point for the command line interface'''
|
'''Entry point for the command line interface'''
|
||||||
|
@ -6,6 +6,8 @@ import asyncio
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import csv
|
||||||
|
import io
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import aiohttp
|
import aiohttp
|
||||||
@ -44,8 +46,21 @@ class CZDS:
|
|||||||
self.username = username
|
self.username = username
|
||||||
self.password = password
|
self.password = password
|
||||||
|
|
||||||
# Set the session with longer timeouts
|
# Configure TCP keepalive
|
||||||
self.session = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=None, connect=60, sock_connect=60, sock_read=60))
|
connector = aiohttp.TCPConnector(
|
||||||
|
keepalive_timeout=300, # Keep connections alive for 5 minutes
|
||||||
|
force_close=False, # Don't force close connections
|
||||||
|
enable_cleanup_closed=True, # Cleanup closed connections
|
||||||
|
ttl_dns_cache=300, # Cache DNS results for 5 minutes
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set the session with longer timeouts and keepalive
|
||||||
|
self.session = aiohttp.ClientSession(
|
||||||
|
connector=connector,
|
||||||
|
timeout=aiohttp.ClientTimeout(total=None, connect=60, sock_connect=60, sock_read=None),
|
||||||
|
headers={'Connection': 'keep-alive'},
|
||||||
|
raise_for_status=True
|
||||||
|
)
|
||||||
|
|
||||||
# Placeholder for the headers after authentication
|
# Placeholder for the headers after authentication
|
||||||
self.headers = None
|
self.headers = None
|
||||||
@ -125,15 +140,12 @@ class CZDS:
|
|||||||
Downloads the zone report stats from the API and scrubs the report for privacy
|
Downloads the zone report stats from the API and scrubs the report for privacy
|
||||||
|
|
||||||
:param filepath: Filepath to save the scrubbed report
|
:param filepath: Filepath to save the scrubbed report
|
||||||
:param scrub: Whether to scrub the username from the report
|
|
||||||
:param format: Output format ('csv' or 'json')
|
:param format: Output format ('csv' or 'json')
|
||||||
'''
|
'''
|
||||||
|
|
||||||
logging.info('Downloading zone stats report')
|
logging.info('Downloading zone stats report')
|
||||||
|
|
||||||
# Send the request to the API
|
# Send the request to the API
|
||||||
async with self.session.get('https://czds-api.icann.org/czds/requests/report', headers=self.headers) as response:
|
async with self.session.get('https://czds-api.icann.org/czds/requests/report', headers=self.headers) as response:
|
||||||
# Check if the request was successful
|
|
||||||
if response.status != 200:
|
if response.status != 200:
|
||||||
raise Exception(f'Failed to download the zone stats report: {response.status} {await response.text()}')
|
raise Exception(f'Failed to download the zone stats report: {response.status} {await response.text()}')
|
||||||
|
|
||||||
@ -144,9 +156,21 @@ class CZDS:
|
|||||||
content = content.replace(self.username, 'nobody@no.name')
|
content = content.replace(self.username, 'nobody@no.name')
|
||||||
logging.debug('Scrubbed username from report')
|
logging.debug('Scrubbed username from report')
|
||||||
|
|
||||||
# Convert the report to JSON format if requested (default is CSV)
|
# Convert the report to JSON format if requested
|
||||||
if format.lower() == 'json':
|
if format.lower() == 'json':
|
||||||
content = json.dumps(content, indent=4)
|
# Parse CSV content
|
||||||
|
csv_reader = csv.DictReader(io.StringIO(content))
|
||||||
|
|
||||||
|
# Convert to list of dicts with formatted keys
|
||||||
|
json_data = []
|
||||||
|
for row in csv_reader:
|
||||||
|
formatted_row = {
|
||||||
|
key.lower().replace(' ', '_'): value
|
||||||
|
for key, value in row.items()
|
||||||
|
}
|
||||||
|
json_data.append(formatted_row)
|
||||||
|
|
||||||
|
content = json.dumps(json_data, indent=4)
|
||||||
logging.debug('Converted report to JSON format')
|
logging.debug('Converted report to JSON format')
|
||||||
|
|
||||||
# Save the report to a file if a filepath is provided
|
# Save the report to a file if a filepath is provided
|
||||||
@ -169,9 +193,16 @@ class CZDS:
|
|||||||
|
|
||||||
async def _download():
|
async def _download():
|
||||||
tld_name = url.split('/')[-1].split('.')[0] # Extract TLD from URL
|
tld_name = url.split('/')[-1].split('.')[0] # Extract TLD from URL
|
||||||
max_retries = 10 # Maximum number of retries for failed downloads
|
max_retries = 20 # Maximum number of retries for failed downloads
|
||||||
retry_delay = 5 # Delay between retries in seconds
|
retry_delay = 5 # Delay between retries in seconds
|
||||||
timeout = aiohttp.ClientTimeout(total=120) # Timeout for the download
|
|
||||||
|
# Headers for better connection stability
|
||||||
|
download_headers = {
|
||||||
|
**self.headers,
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'Keep-Alive': 'timeout=600', # 10 minutes
|
||||||
|
'Accept-Encoding': 'gzip'
|
||||||
|
}
|
||||||
|
|
||||||
# Start the attempt loop
|
# Start the attempt loop
|
||||||
for attempt in range(max_retries):
|
for attempt in range(max_retries):
|
||||||
@ -179,7 +210,7 @@ class CZDS:
|
|||||||
logging.info(f'Starting download of {tld_name} zone file{" (attempt " + str(attempt + 1) + ")" if attempt > 0 else ""}')
|
logging.info(f'Starting download of {tld_name} zone file{" (attempt " + str(attempt + 1) + ")" if attempt > 0 else ""}')
|
||||||
|
|
||||||
# Send the request to the API
|
# Send the request to the API
|
||||||
async with self.session.get(url, headers=self.headers, timeout=timeout) as response:
|
async with self.session.get(url, headers=download_headers) as response:
|
||||||
# Check if the request was successful
|
# Check if the request was successful
|
||||||
if response.status != 200:
|
if response.status != 200:
|
||||||
logging.error(f'Failed to download {tld_name}: {response.status} {await response.text()}')
|
logging.error(f'Failed to download {tld_name}: {response.status} {await response.text()}')
|
||||||
@ -219,7 +250,7 @@ class CZDS:
|
|||||||
await file.write(chunk)
|
await file.write(chunk)
|
||||||
total_size += len(chunk)
|
total_size += len(chunk)
|
||||||
pbar.update(len(chunk))
|
pbar.update(len(chunk))
|
||||||
except (asyncio.TimeoutError, aiohttp.ClientError) as e:
|
except Exception as e:
|
||||||
logging.error(f'Connection error while downloading {tld_name}: {str(e)}')
|
logging.error(f'Connection error while downloading {tld_name}: {str(e)}')
|
||||||
if attempt + 1 < max_retries:
|
if attempt + 1 < max_retries:
|
||||||
logging.info(f'Retrying {tld_name} in {retry_delay} seconds...')
|
logging.info(f'Retrying {tld_name} in {retry_delay} seconds...')
|
||||||
@ -246,7 +277,7 @@ class CZDS:
|
|||||||
|
|
||||||
return filepath
|
return filepath
|
||||||
|
|
||||||
except (aiohttp.ClientError, asyncio.TimeoutError) as e:
|
except Exception as e:
|
||||||
if attempt + 1 >= max_retries:
|
if attempt + 1 >= max_retries:
|
||||||
logging.error(f'Failed to download {tld_name} after {max_retries} attempts: {str(e)}')
|
logging.error(f'Failed to download {tld_name} after {max_retries} attempts: {str(e)}')
|
||||||
if 'filepath' in locals() and os.path.exists(filepath):
|
if 'filepath' in locals() and os.path.exists(filepath):
|
||||||
@ -255,12 +286,6 @@ class CZDS:
|
|||||||
logging.warning(f'Download attempt {attempt + 1} failed for {tld_name}: {str(e)}')
|
logging.warning(f'Download attempt {attempt + 1} failed for {tld_name}: {str(e)}')
|
||||||
await asyncio.sleep(retry_delay)
|
await asyncio.sleep(retry_delay)
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logging.error(f'Error downloading {tld_name}: {str(e)}')
|
|
||||||
if 'filepath' in locals() and os.path.exists(filepath):
|
|
||||||
os.remove(filepath)
|
|
||||||
raise
|
|
||||||
|
|
||||||
async with semaphore:
|
async with semaphore:
|
||||||
return await _download()
|
return await _download()
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
# ICANN API for the Centralized Zones Data Service - developed by acidvegas (https://git.acid.vegas/czds)
|
# ICANN API for the Centralized Zones Data Service - developed by acidvegas (https://git.acid.vegas/czds)
|
||||||
# czds/utils.py
|
# czds/utils.py
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import gzip
|
import gzip
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@ -24,43 +25,30 @@ async def gzip_decompress(filepath: str, cleanup: bool = True):
|
|||||||
:param filepath: Path to the gzip file
|
:param filepath: Path to the gzip file
|
||||||
:param cleanup: Whether to remove the original gzip file after decompressions
|
:param cleanup: Whether to remove the original gzip file after decompressions
|
||||||
'''
|
'''
|
||||||
|
|
||||||
# Get the original size of the file
|
|
||||||
original_size = os.path.getsize(filepath)
|
original_size = os.path.getsize(filepath)
|
||||||
|
output_path = filepath[:-3]
|
||||||
|
|
||||||
logging.debug(f'Decompressing {filepath} ({humanize_bytes(original_size)})...')
|
logging.debug(f'Decompressing {filepath} ({humanize_bytes(original_size)})...')
|
||||||
|
|
||||||
# Remove the .gz extension
|
# Use a large chunk size (256MB) for maximum throughput
|
||||||
output_path = filepath[:-3]
|
chunk_size = 256 * 1024 * 1024
|
||||||
|
|
||||||
# Set the chunk size to 25MB
|
# Run the actual decompression in a thread pool to prevent blocking
|
||||||
chunk_size = 25 * 1024 * 1024
|
|
||||||
|
|
||||||
# Create progress bar for decompression
|
|
||||||
with tqdm(total=original_size, unit='B', unit_scale=True, desc=f'Decompressing {os.path.basename(filepath)}', leave=False) as pbar:
|
with tqdm(total=original_size, unit='B', unit_scale=True, desc=f'Decompressing {os.path.basename(filepath)}', leave=False) as pbar:
|
||||||
# Decompress the file
|
|
||||||
with gzip.open(filepath, 'rb') as gz:
|
|
||||||
async with aiofiles.open(output_path, 'wb') as f_out:
|
async with aiofiles.open(output_path, 'wb') as f_out:
|
||||||
|
# Run gzip decompression in thread pool since it's CPU-bound
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
with gzip.open(filepath, 'rb') as gz:
|
||||||
while True:
|
while True:
|
||||||
# Read the next chunk
|
chunk = await loop.run_in_executor(None, gz.read, chunk_size)
|
||||||
chunk = gz.read(chunk_size)
|
|
||||||
|
|
||||||
# If the chunk is empty, break
|
|
||||||
if not chunk:
|
if not chunk:
|
||||||
break
|
break
|
||||||
|
|
||||||
# Write the chunk to the output file
|
|
||||||
await f_out.write(chunk)
|
await f_out.write(chunk)
|
||||||
|
|
||||||
# Update the progress bar
|
|
||||||
pbar.update(len(chunk))
|
pbar.update(len(chunk))
|
||||||
|
|
||||||
# Get the decompressed size of the file
|
|
||||||
decompressed_size = os.path.getsize(output_path)
|
decompressed_size = os.path.getsize(output_path)
|
||||||
|
|
||||||
logging.debug(f'Decompressed {filepath} ({humanize_bytes(decompressed_size)})')
|
logging.debug(f'Decompressed {filepath} ({humanize_bytes(decompressed_size)})')
|
||||||
|
|
||||||
# If the cleanup flag is set, remove the original gzip file
|
|
||||||
if cleanup:
|
if cleanup:
|
||||||
os.remove(filepath)
|
os.remove(filepath)
|
||||||
logging.debug(f'Removed original gzip file: {filepath}')
|
logging.debug(f'Removed original gzip file: {filepath}')
|
||||||
|
2
setup.py
2
setup.py
@ -11,7 +11,7 @@ with open('README.md', 'r', encoding='utf-8') as fh:
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='czds-api',
|
name='czds-api',
|
||||||
version='1.3.2',
|
version='1.3.8',
|
||||||
author='acidvegas',
|
author='acidvegas',
|
||||||
author_email='acid.vegas@acid.vegas',
|
author_email='acid.vegas@acid.vegas',
|
||||||
description='ICANN API for the Centralized Zones Data Service',
|
description='ICANN API for the Centralized Zones Data Service',
|
||||||
|
Loading…
Reference in New Issue
Block a user