diff --git a/czds/__init__.py b/czds/__init__.py index 9da2243..3d36416 100644 --- a/czds/__init__.py +++ b/czds/__init__.py @@ -5,7 +5,7 @@ from .client import CZDS -__version__ = '1.1.0' +__version__ = '1.2.0' __author__ = 'acidvegas' __email__ = 'acid.vegas@acid.vegas' -__github__ = 'https://github.com/acidvegas/czds' \ No newline at end of file +__github__ = 'https://github.com/acidvegas/czds' diff --git a/czds/__main__.py b/czds/__main__.py index a7949d9..ce5aeef 100644 --- a/czds/__main__.py +++ b/czds/__main__.py @@ -21,20 +21,20 @@ async def main(): # Authentication parser.add_argument('-u', '--username', default=os.getenv('CZDS_USER'), help='ICANN Username') parser.add_argument('-p', '--password', default=os.getenv('CZDS_PASS'), help='ICANN Password') + parser.add_argument('-o', '--output', default=os.getcwd(), help='Output directory') # Zone download options - parser.add_argument('-z', '--zones', action='store_true', help='Download zone files') - parser.add_argument('-c', '--concurrency', type=int, default=3, help='Number of concurrent downloads') - parser.add_argument('-d', '--decompress', action='store_true', help='Decompress zone files after download') - parser.add_argument('-k', '--keep', action='store_true', help='Keep the original gzip files after decompression') + zone_group = parser.add_argument_group('Zone download options') + zone_group.add_argument('-z', '--zones', action='store_true', help='Download zone files') + zone_group.add_argument('-c', '--concurrency', type=int, default=3, help='Number of concurrent downloads') + zone_group.add_argument('-d', '--decompress', action='store_true', help='Decompress zone files after download') + zone_group.add_argument('-k', '--keep', action='store_true', help='Keep the original gzip files after decompression') # Report options - parser.add_argument('-r', '--report', action='store_true', help='Download the zone stats report') - parser.add_argument('-s', '--scrub', action='store_true', help='Scrub the username from the report') - parser.add_argument('-f', '--format', choices=['csv', 'json'], default='csv', help='Report output format') - - # Output options - parser.add_argument('-o', '--output', default=os.getcwd(), help='Output directory') + report_group = parser.add_argument_group('Report options') + report_group.add_argument('-r', '--report', action='store_true', help='Download the zone stats report') + report_group.add_argument('-s', '--scrub', action='store_true', help='Scrub the username from the report') + report_group.add_argument('-f', '--format', choices=['csv', 'json'], default='csv', help='Report output format') # Parse arguments args = parser.parse_args() @@ -66,14 +66,11 @@ async def main(): # Download zone files if requested if args.zones: - logging.info('Fetching zone links...') + logging.info('Downloading zone files...') try: - zone_links = await client.fetch_zone_links() + await client.download_zones(output_directory, args.concurrency, decompress=args.decompress, cleanup=not args.keep) except Exception as e: - raise Exception(f'Failed to fetch zone links: {e}') - - logging.info(f'Downloading {len(zone_links):,} zone files...') - await client.download_zones(zone_links, output_directory, args.concurrency, decompress=args.decompress, cleanup=not args.keep) + raise Exception(f'Failed to download zone files: {e}') diff --git a/czds/client.py b/czds/client.py index 9e872a7..123f8d0 100644 --- a/czds/client.py +++ b/czds/client.py @@ -175,11 +175,10 @@ class CZDS: return await _download() - async def download_zones(self, zone_links: list, output_directory: str, concurrency: int, decompress: bool = False, cleanup: bool = True): + async def download_zones(self, output_directory: str, concurrency: int, decompress: bool = False, cleanup: bool = True): ''' Download multiple zone files concurrently - :param zone_links: List of zone URLs to download :param output_directory: Directory to save the zone files :param concurrency: Number of concurrent downloads :param decompress: Whether to decompress the gzip files after download @@ -188,7 +187,8 @@ class CZDS: os.makedirs(output_directory, exist_ok=True) - semaphore = asyncio.Semaphore(concurrency) - tasks = [self.download_zone(url, output_directory, decompress, cleanup, semaphore) for url in zone_links] + semaphore = asyncio.Semaphore(concurrency) + zone_links = await self.fetch_zone_links() + tasks = [self.download_zone(url, output_directory, decompress, cleanup, semaphore) for url in zone_links] await asyncio.gather(*tasks) diff --git a/czds_api.egg-info/PKG-INFO b/czds_api.egg-info/PKG-INFO index 10348a6..b4463ad 100644 --- a/czds_api.egg-info/PKG-INFO +++ b/czds_api.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.2 Name: czds-api -Version: 1.0.1 +Version: 1.1.0 Summary: ICANN API for the Centralized Zones Data Service Home-page: https://github.com/acidvegas/czds Author: acidvegas @@ -25,6 +25,8 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules Requires-Python: >=3.6 Description-Content-Type: text/markdown License-File: LICENSE +Requires-Dist: aiohttp>=3.8.0 +Requires-Dist: aiofiles>=23.2.1 Dynamic: author Dynamic: author-email Dynamic: classifier @@ -32,6 +34,7 @@ Dynamic: description Dynamic: description-content-type Dynamic: home-page Dynamic: project-url +Dynamic: requires-dist Dynamic: requires-python Dynamic: summary @@ -39,6 +42,13 @@ Dynamic: summary The [ICANN Centralized Zone Data Service](https://czds.icann.org) *(CZDS)* allows *approved* users to request and download DNS zone files in bulk, provided they represent a legitimate company or academic institution and their intended use is legal and ethical. Once ICANN approves the request, this tool streamlines the retrieval of extensive domain name system data, facilitating research and security analysis in the realm of internet infrastructure. +## Features +* Asynchronous downloads with configurable concurrency +* Support for both CSV and JSON report formats +* Optional gzip decompression of zone files +* Environment variable support for credentials +* Comprehensive error handling and logging + ## Zone Information Zone files are updated once every 24 hours, specifically from 00:00 UTC to 06:00 UTC. Access to these zones is granted in increments, and the total time for approval across all zones may extend to a month or longer. It is typical for more than 90% of requested zones to receive approval. Access to certain zone files may require additional application forms with the TLD organization. Please be aware that access to certain zones is time-bound, expiring at the beginning of the following year, or up to a decade after the initial approval has been confirmed. @@ -52,36 +62,53 @@ pip install czds-api ``` ## Usage -###### Command line +### Command Line Interface ```bash -czds [--username --password ] [--concurrency ] +czds [-h] [-u USERNAME] [-p PASSWORD] [-z] [-c CONCURRENCY] [-d] [-k] [-r] [-s] [-f {csv,json}] [-o OUTPUT] ``` -You can also set the `CZDS_USER` & `CZDS_PASS` environment variables to automatically authenticate: +#### Arguments +| Argument | Description | Default | +|-----------------------|----------------------------------------------|-------------------| +| `-h`, `--help` | Show help message and exit | | +| `-u`, `--username` | ICANN Username | `$CZDS_USER` | +| `-p`, `--password` | ICANN Password | `$CZDS_PASS` | +| `-z`, `--zones` | Download zone files | | +| `-c`, `--concurrency` | Number of concurrent downloads | `3` | +| `-d`, `--decompress` | Decompress zone files after download | | +| `-k`, `--keep` | Keep original gzip files after decompression | | +| `-r`, `--report` | Download the zone stats report | | +| `-s`, `--scrub` | Scrub username from the report | | +| `-f`, `--format` | Report output format (csv/json) | `csv` | +| `-o`, `--output` | Output directory | Current directory | +### Environment Variables ```bash export CZDS_USER='your_username' export CZDS_PASS='your_password' ``` -###### As a Python module +### Python Module ```python import os - from czds import CZDS -CZDS_client = CZDS(username, password) - -CZDS_client.download_report('report.csv') - -zone_links = CZDS_client.fetch_zone_links() - -os.makedirs('zones', exist_ok=True) - -for zone_link in zone_links: - CZDS_client.download_zone(zone_link, 'zones') +async with CZDS(username, password) as client: + # Download zone stats report + await client.get_report('report.csv', scrub=True, format='json') + + # Download zone files + zone_links = await client.fetch_zone_links() + await client.download_zones(zone_links, 'zones', concurrency=3, decompress=True) ``` +## Zone Information +Zone files are updated once every 24 hours, specifically from 00:00 UTC to 06:00 UTC. Access to these zones is granted in increments, and the total time for approval across all zones may extend to a month or longer. It is typical for more than 90% of requested zones to receive approval. Access to certain zone files may require additional application forms with the TLD organization. Please be aware that access to certain zones is time-bound, expiring at the beginning of the following year, or up to a decade after the initial approval has been confirmed. + +At the time of writing this repository, the CZDS offers access to 1,151 zones in total. + +1,079 have been approved, 55 are still pending *(after 3 months)*, 10 have been revoked because the TLDs are longer active, and 6 have been denied. Zones that have expired automatically had the expiration extended for me without doing anything, aside from 13 zones that remained expired. I have included a recent [stats file](./extras/stats.csv) directly from my ICANN account. + ## Respects & extras While ICANN does have an official [czds-api-client-python](https://github.com/icann/czds-api-client-python) repository, I rewrote it from scratch to be more streamline & included a [POSIX version](./extras/czds) for portability. There is some [official documentation](https://raw.githubusercontent.com/icann/czds-api-client-java/master/docs/ICANN_CZDS_api.pdf) that was referenced in the creation of the POSIX version. Either way, big props to ICANN for allowing me to use the CZDS for research purposes! diff --git a/czds_api.egg-info/SOURCES.txt b/czds_api.egg-info/SOURCES.txt index 4b466c0..a693383 100644 --- a/czds_api.egg-info/SOURCES.txt +++ b/czds_api.egg-info/SOURCES.txt @@ -8,4 +8,5 @@ czds_api.egg-info/PKG-INFO czds_api.egg-info/SOURCES.txt czds_api.egg-info/dependency_links.txt czds_api.egg-info/entry_points.txt +czds_api.egg-info/requires.txt czds_api.egg-info/top_level.txt \ No newline at end of file diff --git a/setup.py b/setup.py index 633a271..a32e796 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ with open('README.md', 'r', encoding='utf-8') as fh: setup( name='czds-api', - version='1.1.0', + version='1.2.0', author='acidvegas', author_email='acid.vegas@acid.vegas', description='ICANN API for the Centralized Zones Data Service',