Compare commits
28 Commits
Author | SHA1 | Date | |
---|---|---|---|
e8cc4e2ddb | |||
2a7b271d4f | |||
ff89f14a85 | |||
e9795a0177 | |||
551df89f3f | |||
9e76ed0684 | |||
656bd50e47 | |||
93d2bec204 | |||
ab147238ce | |||
8e84c3e224 | |||
6585a7b081 | |||
aea3ae6a6d | |||
fd5e9542f6 | |||
08ccea270d | |||
6047e0aed7 | |||
6cd0d267d1 | |||
2b7a0c5229 | |||
6f230396d3 | |||
edd1e130e6 | |||
2690f4e50b | |||
![]() |
9ee756e647 | ||
![]() |
1f59a7d20b | ||
![]() |
349dddb29d | ||
![]() |
e5d9d679df | ||
![]() |
afbf17c995 | ||
![]() |
42efe2b80a | ||
![]() |
bcb6546731 | ||
61639141a0 |
3
.gitignore
vendored
3
.gitignore
vendored
@ -4,4 +4,5 @@ logs/*
|
||||
*.log
|
||||
.log.
|
||||
*.egg-info
|
||||
dist/
|
||||
dist/
|
||||
zones/
|
||||
|
@ -28,19 +28,22 @@ czds [-h] [-u USERNAME] [-p PASSWORD] [-z] [-c CONCURRENCY] [-d] [-k] [-r] [-s]
|
||||
```
|
||||
|
||||
#### Arguments
|
||||
###### Basic Options
|
||||
| Argument | Description | Default |
|
||||
|-----------------------|----------------------------------------------|-------------------|
|
||||
| `-h`, `--help` | Show help message and exit | |
|
||||
| `-u`, `--username` | ICANN Username | `$CZDS_USER` |
|
||||
| `-p`, `--password` | ICANN Password | `$CZDS_PASS` |
|
||||
| `-o`, `--output` | Output directory | Current directory |
|
||||
|
||||
###### Zone Options
|
||||
| `-z`, `--zones` | Download zone files | |
|
||||
| `-c`, `--concurrency` | Number of concurrent downloads | `3` |
|
||||
| `-d`, `--decompress` | Decompress zone files after download | |
|
||||
| `-k`, `--keep` | Keep original gzip files after decompression | |
|
||||
|
||||
###### Report Options
|
||||
| `-r`, `--report` | Download the zone stats report | |
|
||||
| `-s`, `--scrub` | Scrub username from the report | |
|
||||
| `-f`, `--format` | Report output format (csv/json) | `csv` |
|
||||
| `-o`, `--output` | Output directory | Current directory |
|
||||
|
||||
### Environment Variables
|
||||
```bash
|
||||
|
@ -4,8 +4,7 @@
|
||||
|
||||
from .client import CZDS
|
||||
|
||||
|
||||
__version__ = '1.2.1'
|
||||
__version__ = '1.3.8'
|
||||
__author__ = 'acidvegas'
|
||||
__email__ = 'acid.vegas@acid.vegas'
|
||||
__github__ = 'https://github.com/acidvegas/czds'
|
@ -7,7 +7,6 @@ import asyncio
|
||||
import getpass
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
|
||||
from .client import CZDS
|
||||
|
||||
@ -21,14 +20,12 @@ async def main():
|
||||
# Authentication
|
||||
parser.add_argument('-u', '--username', default=os.getenv('CZDS_USER'), help='ICANN Username')
|
||||
parser.add_argument('-p', '--password', default=os.getenv('CZDS_PASS'), help='ICANN Password')
|
||||
parser.add_argument('-o', '--output', default=os.getcwd(), help='Output directory')
|
||||
parser.add_argument('-o', '--output', default=os.getcwd(), help='Output directory')
|
||||
|
||||
# Zone download options
|
||||
zone_group = parser.add_argument_group('Zone download options')
|
||||
zone_group.add_argument('-z', '--zones', action='store_true', help='Download zone files')
|
||||
zone_group.add_argument('-c', '--concurrency', type=int, default=3, help='Number of concurrent downloads')
|
||||
zone_group.add_argument('-d', '--decompress', action='store_true', help='Decompress zone files after download')
|
||||
zone_group.add_argument('-k', '--keep', action='store_true', help='Keep the original gzip files after decompression')
|
||||
|
||||
# Report options
|
||||
report_group = parser.add_argument_group('Report options')
|
||||
@ -39,6 +36,7 @@ async def main():
|
||||
# Parse arguments
|
||||
args = parser.parse_args()
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
|
||||
# Get username and password
|
||||
@ -46,20 +44,25 @@ async def main():
|
||||
password = args.password or getpass.getpass('ICANN Password: ')
|
||||
|
||||
# Create output directory
|
||||
now = time.strftime('%Y-%m-%d')
|
||||
output_directory = os.path.join(args.output, 'zones', now)
|
||||
output_directory = os.path.join(args.output, 'zones')
|
||||
os.makedirs(output_directory, exist_ok=True)
|
||||
|
||||
logging.info('Authenticating with ICANN API...')
|
||||
|
||||
# Create the CZDS client
|
||||
async with CZDS(username, password) as client:
|
||||
# Download zone stats report if requested
|
||||
if args.report:
|
||||
logging.info('Fetching zone stats report...')
|
||||
try:
|
||||
# Create the report directory
|
||||
output = os.path.join(output_directory, '.report.csv')
|
||||
|
||||
# Download the report
|
||||
await client.get_report(output, scrub=args.scrub, format=args.format)
|
||||
|
||||
logging.info(f'Zone stats report saved to {output}')
|
||||
|
||||
return
|
||||
except Exception as e:
|
||||
raise Exception(f'Failed to download zone stats report: {e}')
|
||||
@ -68,15 +71,18 @@ async def main():
|
||||
if args.zones:
|
||||
logging.info('Downloading zone files...')
|
||||
try:
|
||||
await client.download_zones(output_directory, args.concurrency, decompress=args.decompress, cleanup=not args.keep)
|
||||
# Download the zone files
|
||||
await client.download_zones(output_directory, args.concurrency)
|
||||
except Exception as e:
|
||||
raise Exception(f'Failed to download zone files: {e}')
|
||||
|
||||
|
||||
def cli_entry():
|
||||
'''Synchronous entry point for console script'''
|
||||
|
||||
return asyncio.run(main())
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
285
czds/client.py
285
czds/client.py
@ -3,8 +3,11 @@
|
||||
# czds/client.py
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import gzip
|
||||
import csv
|
||||
import io
|
||||
|
||||
try:
|
||||
import aiohttp
|
||||
@ -16,6 +19,17 @@ try:
|
||||
except ImportError:
|
||||
raise ImportError('missing aiofiles library (pip install aiofiles)')
|
||||
|
||||
try:
|
||||
from tqdm import tqdm
|
||||
except ImportError:
|
||||
raise ImportError('missing tqdm library (pip install tqdm)')
|
||||
|
||||
from .utils import gzip_decompress, humanize_bytes
|
||||
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
|
||||
|
||||
|
||||
class CZDS:
|
||||
'''Class for the ICANN Centralized Zones Data Service'''
|
||||
@ -28,17 +42,37 @@ class CZDS:
|
||||
:param password: ICANN Password
|
||||
'''
|
||||
|
||||
# Set the username and password
|
||||
self.username = username
|
||||
self.password = password
|
||||
self.headers = None # Store the authorization header for reuse
|
||||
self.session = None # Store the client session for reuse
|
||||
|
||||
# Configure TCP keepalive
|
||||
connector = aiohttp.TCPConnector(
|
||||
keepalive_timeout=300, # Keep connections alive for 5 minutes
|
||||
force_close=False, # Don't force close connections
|
||||
enable_cleanup_closed=True, # Cleanup closed connections
|
||||
ttl_dns_cache=300, # Cache DNS results for 5 minutes
|
||||
)
|
||||
|
||||
# Set the session with longer timeouts and keepalive
|
||||
self.session = aiohttp.ClientSession(
|
||||
connector=connector,
|
||||
timeout=aiohttp.ClientTimeout(total=None, connect=60, sock_connect=60, sock_read=None),
|
||||
headers={'Connection': 'keep-alive'},
|
||||
raise_for_status=True
|
||||
)
|
||||
|
||||
# Placeholder for the headers after authentication
|
||||
self.headers = None
|
||||
|
||||
logging.info('Initialized CZDS client')
|
||||
|
||||
|
||||
async def __aenter__(self):
|
||||
'''Async context manager entry'''
|
||||
|
||||
self.session = aiohttp.ClientSession()
|
||||
self.headers = {'Authorization': f'Bearer {await self.authenticate()}'}
|
||||
# Authenticate with the ICANN API
|
||||
await self.authenticate()
|
||||
|
||||
return self
|
||||
|
||||
@ -46,149 +80,240 @@ class CZDS:
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
'''Async context manager exit'''
|
||||
|
||||
# Close the client session
|
||||
await self.close()
|
||||
|
||||
|
||||
async def close(self):
|
||||
'''Close the client session'''
|
||||
|
||||
# Close the client session if it exists
|
||||
if self.session:
|
||||
await self.session.close()
|
||||
logging.debug('Closed aiohttp session')
|
||||
|
||||
|
||||
async def authenticate(self) -> str:
|
||||
'''Authenticate with the ICANN API and return the access token'''
|
||||
|
||||
try:
|
||||
data = {'username': self.username, 'password': self.password}
|
||||
# Set the data to be sent to the API
|
||||
data = {'username': self.username, 'password': self.password}
|
||||
|
||||
async with self.session.post('https://account-api.icann.org/api/authenticate', json=data) as response:
|
||||
if response.status != 200:
|
||||
raise Exception(f'Authentication failed: {response.status} {await response.text()}')
|
||||
logging.info('Authenticating with ICANN API...')
|
||||
|
||||
result = await response.json()
|
||||
# Send the request to the API
|
||||
async with self.session.post('https://account-api.icann.org/api/authenticate', json=data) as response:
|
||||
if response.status != 200:
|
||||
raise Exception(f'Authentication failed: {response.status} {await response.text()}')
|
||||
|
||||
return result['accessToken']
|
||||
# Get the result from the API
|
||||
result = await response.json()
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f'Failed to authenticate with ICANN API: {e}')
|
||||
logging.info('Successfully authenticated with ICANN API')
|
||||
|
||||
# Set the headers for the API requests
|
||||
self.headers = {'Authorization': f'Bearer {result["accessToken"]}'}
|
||||
|
||||
return result['accessToken']
|
||||
|
||||
|
||||
async def fetch_zone_links(self) -> list:
|
||||
'''Fetch the list of zone files available for download'''
|
||||
|
||||
logging.info('Fetching zone file links...')
|
||||
|
||||
# Send the request to the API
|
||||
async with self.session.get('https://czds-api.icann.org/czds/downloads/links', headers=self.headers) as response:
|
||||
if response.status != 200:
|
||||
raise Exception(f'Failed to fetch zone links: {response.status} {await response.text()}')
|
||||
|
||||
return await response.json()
|
||||
# Get the result from the API
|
||||
links = await response.json()
|
||||
|
||||
logging.info(f'Successfully fetched {len(links):,} zone links')
|
||||
|
||||
return links
|
||||
|
||||
|
||||
async def get_report(self, filepath: str = None, scrub: bool = True, format: str = 'csv') -> str | dict:
|
||||
async def get_report(self, filepath: str = None, format: str = 'csv') -> str | dict:
|
||||
'''
|
||||
Downloads the zone report stats from the API and scrubs the report for privacy
|
||||
|
||||
:param filepath: Filepath to save the scrubbed report
|
||||
:param scrub: Whether to scrub the username from the report
|
||||
:param format: Output format ('csv' or 'json')
|
||||
:return: Report content as CSV string or JSON dict
|
||||
'''
|
||||
logging.info('Downloading zone stats report')
|
||||
|
||||
# Send the request to the API
|
||||
async with self.session.get('https://czds-api.icann.org/czds/requests/report', headers=self.headers) as response:
|
||||
if response.status != 200:
|
||||
raise Exception(f'Failed to download the zone stats report: {response.status} {await response.text()}')
|
||||
|
||||
# Get the content of the report
|
||||
content = await response.text()
|
||||
|
||||
if scrub:
|
||||
content = content.replace(self.username, 'nobody@no.name')
|
||||
# Scrub the username from the report
|
||||
content = content.replace(self.username, 'nobody@no.name')
|
||||
logging.debug('Scrubbed username from report')
|
||||
|
||||
# Convert the report to JSON format if requested
|
||||
if format.lower() == 'json':
|
||||
rows = [row.split(',') for row in content.strip().split('\n')]
|
||||
header = rows[0]
|
||||
content = [dict(zip(header, row)) for row in rows[1:]]
|
||||
# Parse CSV content
|
||||
csv_reader = csv.DictReader(io.StringIO(content))
|
||||
|
||||
# Convert to list of dicts with formatted keys
|
||||
json_data = []
|
||||
for row in csv_reader:
|
||||
formatted_row = {
|
||||
key.lower().replace(' ', '_'): value
|
||||
for key, value in row.items()
|
||||
}
|
||||
json_data.append(formatted_row)
|
||||
|
||||
content = json.dumps(json_data, indent=4)
|
||||
logging.debug('Converted report to JSON format')
|
||||
|
||||
# Save the report to a file if a filepath is provided
|
||||
if filepath:
|
||||
async with aiofiles.open(filepath, 'w') as file:
|
||||
if format.lower() == 'json':
|
||||
import json
|
||||
await file.write(json.dumps(content, indent=4))
|
||||
else:
|
||||
await file.write(content)
|
||||
await file.write(content)
|
||||
logging.info(f'Saved report to {filepath}')
|
||||
|
||||
return content
|
||||
|
||||
|
||||
async def gzip_decompress(self, filepath: str, cleanup: bool = True):
|
||||
'''
|
||||
Decompress a gzip file in place
|
||||
|
||||
:param filepath: Path to the gzip file
|
||||
:param cleanup: Whether to remove the original gzip file after decompression
|
||||
'''
|
||||
|
||||
output_path = filepath[:-3] # Remove .gz extension
|
||||
|
||||
async with aiofiles.open(filepath, 'rb') as f_in:
|
||||
content = await f_in.read()
|
||||
with gzip.open(content, 'rb') as gz:
|
||||
async with aiofiles.open(output_path, 'wb') as f_out:
|
||||
await f_out.write(gz.read())
|
||||
|
||||
if cleanup:
|
||||
os.remove(filepath)
|
||||
|
||||
|
||||
async def download_zone(self, url: str, output_directory: str, decompress: bool = False, cleanup: bool = True, semaphore: asyncio.Semaphore = None):
|
||||
async def download_zone(self, url: str, output_directory: str, semaphore: asyncio.Semaphore):
|
||||
'''
|
||||
Download a single zone file
|
||||
|
||||
:param url: URL to download
|
||||
:param output_directory: Directory to save the zone file
|
||||
:param decompress: Whether to decompress the gzip file after download
|
||||
:param cleanup: Whether to remove the original gzip file after decompression
|
||||
:param semaphore: Optional semaphore for controlling concurrency
|
||||
'''
|
||||
|
||||
|
||||
async def _download():
|
||||
async with self.session.get(url, headers=self.headers) as response:
|
||||
if response.status != 200:
|
||||
raise Exception(f'Failed to download {url}: {response.status} {await response.text()}')
|
||||
tld_name = url.split('/')[-1].split('.')[0] # Extract TLD from URL
|
||||
max_retries = 20 # Maximum number of retries for failed downloads
|
||||
retry_delay = 5 # Delay between retries in seconds
|
||||
|
||||
# Headers for better connection stability
|
||||
download_headers = {
|
||||
**self.headers,
|
||||
'Connection': 'keep-alive',
|
||||
'Keep-Alive': 'timeout=600', # 10 minutes
|
||||
'Accept-Encoding': 'gzip'
|
||||
}
|
||||
|
||||
if not (content_disposition := response.headers.get('Content-Disposition')):
|
||||
raise ValueError('Missing Content-Disposition header')
|
||||
# Start the attempt loop
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
logging.info(f'Starting download of {tld_name} zone file{" (attempt " + str(attempt + 1) + ")" if attempt > 0 else ""}')
|
||||
|
||||
filename = content_disposition.split('filename=')[-1].strip('"')
|
||||
filepath = os.path.join(output_directory, filename)
|
||||
# Send the request to the API
|
||||
async with self.session.get(url, headers=download_headers) as response:
|
||||
# Check if the request was successful
|
||||
if response.status != 200:
|
||||
logging.error(f'Failed to download {tld_name}: {response.status} {await response.text()}')
|
||||
|
||||
async with aiofiles.open(filepath, 'wb') as file:
|
||||
while True:
|
||||
chunk = await response.content.read(8192)
|
||||
if not chunk:
|
||||
break
|
||||
await file.write(chunk)
|
||||
# Retry the download if there are more attempts
|
||||
if attempt + 1 < max_retries:
|
||||
logging.info(f'Retrying {tld_name} in {retry_delay:,} seconds...')
|
||||
await asyncio.sleep(retry_delay)
|
||||
continue
|
||||
|
||||
if decompress:
|
||||
await self.gzip_decompress(filepath, cleanup)
|
||||
filepath = filepath[:-3] # Remove .gz extension
|
||||
raise Exception(f'Failed to download {tld_name}: {response.status} {await response.text()}')
|
||||
|
||||
return filepath
|
||||
# Get expected file size from headers
|
||||
if not (expected_size := int(response.headers.get('Content-Length', 0))):
|
||||
raise ValueError(f'Missing Content-Length header for {tld_name}')
|
||||
|
||||
if semaphore:
|
||||
async with semaphore:
|
||||
return await _download()
|
||||
else:
|
||||
# Check if the Content-Disposition header is present
|
||||
if not (content_disposition := response.headers.get('Content-Disposition')):
|
||||
raise ValueError(f'Missing Content-Disposition header for {tld_name}')
|
||||
|
||||
# Extract the filename from the Content-Disposition header
|
||||
filename = content_disposition.split('filename=')[-1].strip('"')
|
||||
|
||||
# Create the filepath
|
||||
filepath = os.path.join(output_directory, filename)
|
||||
|
||||
# Create a progress bar to track the download
|
||||
with tqdm(total=expected_size, unit='B', unit_scale=True, desc=f'Downloading {tld_name}', leave=False) as pbar:
|
||||
# Open the file for writing
|
||||
async with aiofiles.open(filepath, 'wb') as file:
|
||||
# Initialize the total size for tracking
|
||||
total_size = 0
|
||||
|
||||
# Write the chunk to the file
|
||||
try:
|
||||
async for chunk in response.content.iter_chunked(8192):
|
||||
await file.write(chunk)
|
||||
total_size += len(chunk)
|
||||
pbar.update(len(chunk))
|
||||
except Exception as e:
|
||||
logging.error(f'Connection error while downloading {tld_name}: {str(e)}')
|
||||
if attempt + 1 < max_retries:
|
||||
logging.info(f'Retrying {tld_name} in {retry_delay} seconds...')
|
||||
await asyncio.sleep(retry_delay)
|
||||
continue
|
||||
raise
|
||||
|
||||
# Verify file size
|
||||
if expected_size and total_size != expected_size:
|
||||
error_msg = f'Incomplete download for {tld_name}: Got {humanize_bytes(total_size)}, expected {humanize_bytes(expected_size)}'
|
||||
logging.error(error_msg)
|
||||
os.remove(filepath)
|
||||
if attempt + 1 < max_retries:
|
||||
logging.info(f'Retrying {tld_name} in {retry_delay} seconds...')
|
||||
await asyncio.sleep(retry_delay)
|
||||
continue
|
||||
raise Exception(error_msg)
|
||||
|
||||
logging.info(f'Successfully downloaded {tld_name} zone file ({humanize_bytes(total_size)})')
|
||||
|
||||
await gzip_decompress(filepath)
|
||||
filepath = filepath[:-3]
|
||||
logging.info(f'Decompressed {tld_name} zone file')
|
||||
|
||||
return filepath
|
||||
|
||||
except Exception as e:
|
||||
if attempt + 1 >= max_retries:
|
||||
logging.error(f'Failed to download {tld_name} after {max_retries} attempts: {str(e)}')
|
||||
if 'filepath' in locals() and os.path.exists(filepath):
|
||||
os.remove(filepath)
|
||||
raise
|
||||
logging.warning(f'Download attempt {attempt + 1} failed for {tld_name}: {str(e)}')
|
||||
await asyncio.sleep(retry_delay)
|
||||
|
||||
async with semaphore:
|
||||
return await _download()
|
||||
|
||||
|
||||
async def download_zones(self, output_directory: str, concurrency: int, decompress: bool = False, cleanup: bool = True):
|
||||
async def download_zones(self, output_directory: str, concurrency: int):
|
||||
'''
|
||||
Download multiple zone files concurrently
|
||||
|
||||
:param output_directory: Directory to save the zone files
|
||||
:param concurrency: Number of concurrent downloads
|
||||
:param decompress: Whether to decompress the gzip files after download
|
||||
:param cleanup: Whether to remove the original gzip files after decompression
|
||||
'''
|
||||
|
||||
|
||||
# Create the output directory if it doesn't exist
|
||||
os.makedirs(output_directory, exist_ok=True)
|
||||
|
||||
semaphore = asyncio.Semaphore(concurrency)
|
||||
|
||||
# Get the zone links
|
||||
zone_links = await self.fetch_zone_links()
|
||||
tasks = [self.download_zone(url, output_directory, decompress, cleanup, semaphore) for url in zone_links]
|
||||
zone_links.sort() # Sort the zone alphabetically for better tracking
|
||||
|
||||
# Create a semaphore to limit the number of concurrent downloads
|
||||
semaphore = asyncio.Semaphore(concurrency)
|
||||
|
||||
logging.info(f'Downloading {len(zone_links):,} zone files...')
|
||||
|
||||
# Create a list of tasks to download the zone files
|
||||
tasks = [self.download_zone(url, output_directory, semaphore) for url in zone_links]
|
||||
|
||||
# Run the tasks concurrently
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
logging.info(f'Completed downloading {len(zone_links):,} zone files')
|
76
czds/utils.py
Normal file
76
czds/utils.py
Normal file
@ -0,0 +1,76 @@
|
||||
#!/usr/bin/env python3
|
||||
# ICANN API for the Centralized Zones Data Service - developed by acidvegas (https://git.acid.vegas/czds)
|
||||
# czds/utils.py
|
||||
|
||||
import asyncio
|
||||
import gzip
|
||||
import logging
|
||||
import os
|
||||
|
||||
try:
|
||||
import aiofiles
|
||||
except ImportError:
|
||||
raise ImportError('missing aiofiles library (pip install aiofiles)')
|
||||
|
||||
try:
|
||||
from tqdm import tqdm
|
||||
except ImportError:
|
||||
raise ImportError('missing tqdm library (pip install tqdm)')
|
||||
|
||||
|
||||
async def gzip_decompress(filepath: str, cleanup: bool = True):
|
||||
'''
|
||||
Decompress a gzip file in place
|
||||
|
||||
:param filepath: Path to the gzip file
|
||||
:param cleanup: Whether to remove the original gzip file after decompressions
|
||||
'''
|
||||
original_size = os.path.getsize(filepath)
|
||||
output_path = filepath[:-3]
|
||||
|
||||
logging.debug(f'Decompressing {filepath} ({humanize_bytes(original_size)})...')
|
||||
|
||||
# Use a large chunk size (256MB) for maximum throughput
|
||||
chunk_size = 256 * 1024 * 1024
|
||||
|
||||
# Run the actual decompression in a thread pool to prevent blocking
|
||||
with tqdm(total=original_size, unit='B', unit_scale=True, desc=f'Decompressing {os.path.basename(filepath)}', leave=False) as pbar:
|
||||
async with aiofiles.open(output_path, 'wb') as f_out:
|
||||
# Run gzip decompression in thread pool since it's CPU-bound
|
||||
loop = asyncio.get_event_loop()
|
||||
with gzip.open(filepath, 'rb') as gz:
|
||||
while True:
|
||||
chunk = await loop.run_in_executor(None, gz.read, chunk_size)
|
||||
if not chunk:
|
||||
break
|
||||
await f_out.write(chunk)
|
||||
pbar.update(len(chunk))
|
||||
|
||||
decompressed_size = os.path.getsize(output_path)
|
||||
logging.debug(f'Decompressed {filepath} ({humanize_bytes(decompressed_size)})')
|
||||
|
||||
if cleanup:
|
||||
os.remove(filepath)
|
||||
logging.debug(f'Removed original gzip file: {filepath}')
|
||||
|
||||
|
||||
def humanize_bytes(bytes: int) -> str:
|
||||
'''
|
||||
Humanize a number of bytes
|
||||
|
||||
:param bytes: The number of bytes to humanize
|
||||
'''
|
||||
|
||||
# List of units
|
||||
units = ('B','KB','MB','GB','TB','PB','EB','ZB','YB')
|
||||
|
||||
# Iterate over the units
|
||||
for unit in units:
|
||||
# If the bytes are less than 1024, return the bytes with the unit
|
||||
if bytes < 1024:
|
||||
return f'{bytes:.2f} {unit}' if unit != 'B' else f'{bytes} {unit}'
|
||||
|
||||
# Divide the bytes by 1024
|
||||
bytes /= 1024
|
||||
|
||||
return f'{bytes:.2f} {units[-1]}'
|
@ -1,117 +0,0 @@
|
||||
Metadata-Version: 2.2
|
||||
Name: czds-api
|
||||
Version: 1.2.0
|
||||
Summary: ICANN API for the Centralized Zones Data Service
|
||||
Home-page: https://github.com/acidvegas/czds
|
||||
Author: acidvegas
|
||||
Author-email: acid.vegas@acid.vegas
|
||||
Project-URL: Bug Tracker, https://github.com/acidvegas/czds/issues
|
||||
Project-URL: Documentation, https://github.com/acidvegas/czds#readme
|
||||
Project-URL: Source Code, https://github.com/acidvegas/czds
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: ISC License (ISCL)
|
||||
Classifier: Operating System :: OS Independent
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3.6
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: 3.11
|
||||
Classifier: Topic :: Internet
|
||||
Classifier: Topic :: Security
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
Requires-Python: >=3.6
|
||||
Description-Content-Type: text/markdown
|
||||
License-File: LICENSE
|
||||
Requires-Dist: aiohttp>=3.8.0
|
||||
Requires-Dist: aiofiles>=23.2.1
|
||||
Dynamic: author
|
||||
Dynamic: author-email
|
||||
Dynamic: classifier
|
||||
Dynamic: description
|
||||
Dynamic: description-content-type
|
||||
Dynamic: home-page
|
||||
Dynamic: project-url
|
||||
Dynamic: requires-dist
|
||||
Dynamic: requires-python
|
||||
Dynamic: summary
|
||||
|
||||
# ICANN Centralized Zone Data Service API
|
||||
|
||||
The [ICANN Centralized Zone Data Service](https://czds.icann.org) *(CZDS)* allows *approved* users to request and download DNS zone files in bulk, provided they represent a legitimate company or academic institution and their intended use is legal and ethical. Once ICANN approves the request, this tool streamlines the retrieval of extensive domain name system data, facilitating research and security analysis in the realm of internet infrastructure.
|
||||
|
||||
## Features
|
||||
* Asynchronous downloads with configurable concurrency
|
||||
* Support for both CSV and JSON report formats
|
||||
* Optional gzip decompression of zone files
|
||||
* Environment variable support for credentials
|
||||
* Comprehensive error handling and logging
|
||||
|
||||
## Zone Information
|
||||
Zone files are updated once every 24 hours, specifically from 00:00 UTC to 06:00 UTC. Access to these zones is granted in increments, and the total time for approval across all zones may extend to a month or longer. It is typical for more than 90% of requested zones to receive approval. Access to certain zone files may require additional application forms with the TLD organization. Please be aware that access to certain zones is time-bound, expiring at the beginning of the following year, or up to a decade after the initial approval has been confirmed.
|
||||
|
||||
At the time of writing this repository, the CZDS offers access to 1,151 zones in total.
|
||||
|
||||
1,079 have been approved, 55 are still pending *(after 3 months)*, 10 have been revoked because the TLDs are longer active, and 6 have been denied. Zones that have expired automatically had the expiration extended for me without doing anything, aside from 13 zones that remained expired. I have included a recent [stats file](./extras/stats.csv) directly from my ICANN account.
|
||||
|
||||
## Installation
|
||||
```bash
|
||||
pip install czds-api
|
||||
```
|
||||
|
||||
## Usage
|
||||
### Command Line Interface
|
||||
```bash
|
||||
czds [-h] [-u USERNAME] [-p PASSWORD] [-z] [-c CONCURRENCY] [-d] [-k] [-r] [-s] [-f {csv,json}] [-o OUTPUT]
|
||||
```
|
||||
|
||||
#### Arguments
|
||||
| Argument | Description | Default |
|
||||
|-----------------------|----------------------------------------------|-------------------|
|
||||
| `-h`, `--help` | Show help message and exit | |
|
||||
| `-u`, `--username` | ICANN Username | `$CZDS_USER` |
|
||||
| `-p`, `--password` | ICANN Password | `$CZDS_PASS` |
|
||||
| `-z`, `--zones` | Download zone files | |
|
||||
| `-c`, `--concurrency` | Number of concurrent downloads | `3` |
|
||||
| `-d`, `--decompress` | Decompress zone files after download | |
|
||||
| `-k`, `--keep` | Keep original gzip files after decompression | |
|
||||
| `-r`, `--report` | Download the zone stats report | |
|
||||
| `-s`, `--scrub` | Scrub username from the report | |
|
||||
| `-f`, `--format` | Report output format (csv/json) | `csv` |
|
||||
| `-o`, `--output` | Output directory | Current directory |
|
||||
|
||||
### Environment Variables
|
||||
```bash
|
||||
export CZDS_USER='your_username'
|
||||
export CZDS_PASS='your_password'
|
||||
```
|
||||
|
||||
### Python Module
|
||||
```python
|
||||
import os
|
||||
from czds import CZDS
|
||||
|
||||
async with CZDS(username, password) as client:
|
||||
# Download zone stats report
|
||||
await client.get_report('report.csv', scrub=True, format='json')
|
||||
|
||||
# Download zone files
|
||||
zone_links = await client.fetch_zone_links()
|
||||
await client.download_zones(zone_links, 'zones', concurrency=3, decompress=True)
|
||||
```
|
||||
|
||||
## Zone Information
|
||||
Zone files are updated once every 24 hours, specifically from 00:00 UTC to 06:00 UTC. Access to these zones is granted in increments, and the total time for approval across all zones may extend to a month or longer. It is typical for more than 90% of requested zones to receive approval. Access to certain zone files may require additional application forms with the TLD organization. Please be aware that access to certain zones is time-bound, expiring at the beginning of the following year, or up to a decade after the initial approval has been confirmed.
|
||||
|
||||
At the time of writing this repository, the CZDS offers access to 1,151 zones in total.
|
||||
|
||||
1,079 have been approved, 55 are still pending *(after 3 months)*, 10 have been revoked because the TLDs are longer active, and 6 have been denied. Zones that have expired automatically had the expiration extended for me without doing anything, aside from 13 zones that remained expired. I have included a recent [stats file](./extras/stats.csv) directly from my ICANN account.
|
||||
|
||||
## Respects & extras
|
||||
While ICANN does have an official [czds-api-client-python](https://github.com/icann/czds-api-client-python) repository, I rewrote it from scratch to be more streamline & included a [POSIX version](./extras/czds) for portability. There is some [official documentation](https://raw.githubusercontent.com/icann/czds-api-client-java/master/docs/ICANN_CZDS_api.pdf) that was referenced in the creation of the POSIX version. Either way, big props to ICANN for allowing me to use the CZDS for research purposes!
|
||||
|
||||
___
|
||||
|
||||
###### Mirrors for this repository: [acid.vegas](https://git.acid.vegas/czds) • [SuperNETs](https://git.supernets.org/acidvegas/czds) • [GitHub](https://github.com/acidvegas/czds) • [GitLab](https://gitlab.com/acidvegas/czds) • [Codeberg](https://codeberg.org/acidvegas/czds)
|
@ -1,12 +0,0 @@
|
||||
LICENSE
|
||||
README.md
|
||||
setup.py
|
||||
czds/__init__.py
|
||||
czds/__main__.py
|
||||
czds/client.py
|
||||
czds_api.egg-info/PKG-INFO
|
||||
czds_api.egg-info/SOURCES.txt
|
||||
czds_api.egg-info/dependency_links.txt
|
||||
czds_api.egg-info/entry_points.txt
|
||||
czds_api.egg-info/requires.txt
|
||||
czds_api.egg-info/top_level.txt
|
@ -1 +0,0 @@
|
||||
|
@ -1,2 +0,0 @@
|
||||
[console_scripts]
|
||||
czds = czds.__main__:main
|
@ -1 +0,0 @@
|
||||
czds
|
BIN
dist/czds_api-1.0.0-py3-none-any.whl
vendored
BIN
dist/czds_api-1.0.0-py3-none-any.whl
vendored
Binary file not shown.
BIN
dist/czds_api-1.0.0.tar.gz
vendored
BIN
dist/czds_api-1.0.0.tar.gz
vendored
Binary file not shown.
@ -1,2 +1,3 @@
|
||||
aiohttp
|
||||
aiofiles
|
||||
aiofiles
|
||||
tqdm
|
Loading…
Reference in New Issue
Block a user