From 542573a469eaebea6f7dcced6d554d8047feb2be Mon Sep 17 00:00:00 2001
From: acidvegas <acid.vegas@acid.vegas>
Date: Tue, 5 Mar 2024 21:10:57 -0500
Subject: [PATCH] Updated both Python and POSIX version for minor improvements
 and code structure. Both files will now download a stats report from ICANN
 prior to downloading zones.

---
 czds                              |  89 +++++++++---------
 czds.py                           | 144 +++++++++++++++++++++---------
 stats_2024-01-31.csv => stats.csv |   0
 3 files changed, 146 insertions(+), 87 deletions(-)
 rename stats_2024-01-31.csv => stats.csv (100%)

diff --git a/czds b/czds
index 999a1b2..3bccede 100755
--- a/czds
+++ b/czds
@@ -2,60 +2,63 @@
 # ICANN API for the Centralized Zones Data Service - developed by acidvegas (https://git.acid.vegas/czds)
 # Reference: https://czds.icann.org
 
-# Function to authenticate and get access token
-authenticate() {
-	username="$1"
-	password="$2"
-	# Make an authentication request and inline the URL
-	response=$(curl -s -X POST "https://account-api.icann.org/api/authenticate" \
-		-H "Content-Type: application/json" \
-		-H "Accept: application/json" \
-		-d "{\"username\":\"$username\",\"password\":\"$password\"}")
-
-	# Extract and return the access token
-	echo "$response" | grep -o '"accessToken":"[^"]*' | cut -d '"' -f 4
-}
-
-# Function to download a zone file
-download_zone() {
-	url="$1"
-	token="$2"
-	tld=$(basename "$url" .zone)
-
-
-	# Make the GET request and save the response to a file
-	echo "Downloading $url..."
-	curl --progress-bar -o zonefiles/$tld.txt.gz -H "Authorization: Bearer $token" "$url"
-	echo "Downloaded zone file to zonefiles/$tld.txt.gz"
-}
-
 # Main program starts here
 echo "ICANN Zone Data Service Script"
 
-# Get username and password
-username=${CZDS_USER:-$(read -p "ICANN Username: " user && echo "$user")}
-password=${CZDS_PASS:-$(read -sp "ICANN Password: " pass && echo "$pass" && echo)}
+# Define the current date for data organization
+now=$(date +"%Y-%m-%d")
 
-# Authenticate and get token
-echo "Authenticating..."
-token=$(authenticate "$username" "$password")
+# Get username and password (interactive if not set by environment variables)
+username=${CZDS_USER:-$(read -p  "ICANN Username: " user && echo "$user")}
+password=${CZDS_PASS:-$(read -sp "ICANN Password: " pass && echo "$pass")}
+
+echo "Authenticating as $username..."
+
+# Make an authentication request
+response=$(curl -s -X POST "https://account-api.icann.org/api/authenticate" \
+	-H "Content-Type: application/json" \
+	-H "Accept: application/json" \
+	-d "{\"username\":\"$username\",\"password\":\"$password\"}")
+
+# Extract and return the access access_token
+access_token=$(echo "$response" | grep -o '"accessToken":"[^"]*' | cut -d '"' -f 4)
 
 # Check if authentication was successful
-if [ -z "$token" ]; then
-	echo "Authentication failed."
-	exit 1
-fi
+[ -z $access_token ] && echo "error: authentication failed" && exit 1
+
+echo "Authenticated successfully & recieved access_token $access_token"
+
+# Create output directory
+mkdir -p zonefiles/$now
+
+echo "Fetching zone report..."
+
+# Get your zone report stats from the API
+curl --progress-bar -o zonefiles/$now/.stats.csv -H "Authorization: Bearer $access_token" https://czds-api.icann.org/czds/requests/report
+
+echo "Scrubbing report for privacy..."
+
+# Redact username from report for privacy
+sed -i 's/$username/nobody@no.name/g' zonefiles/$now/report.csv
 
 echo "Fetching zone file links..."
-# Fetch zone links with inline URL and download zone files
-zone_links=$(curl -s -H "Authorization: Bearer $token" "https://czds-api.icann.org/czds/downloads/links" | grep -o 'https://[^"]*')
 
-# Create output directory if it does not exist
-mkdir -p zonefiles
+# Get the zone file links from the API
+zone_links=$(curl -s -H "Authorization: Bearer $access_token" https://czds-api.icann.org/czds/downloads/links | grep -o 'https://[^"]*')
 
 # Download zone files
 for url in $zone_links; do
-	download_zone "$url" "$token"
+	tld=$(basename "$url" .zone)
+
+	echo "Downloading $url..."
+
+	# Make the GET request and save the response to a file
+	curl --progress-bar -o zonefiles/$now/$tld.txt.gz -H "Authorization: Bearer $access_token" "$url"
+
+	echo "Downloaded $tld zone file to zonefiles/$tld.txt.gz (extracting...)"
+
+	# Unzip the zone file
+	gunzip zonefiles/$now/$tld.txt.gz
 done
 
-echo "All zone files downloaded."
+echo "All zone files downloaded."
\ No newline at end of file
diff --git a/czds.py b/czds.py
index 2b36f3b..a6d8448 100644
--- a/czds.py
+++ b/czds.py
@@ -5,13 +5,11 @@
 import argparse
 import concurrent.futures
 import getpass
+import json
 import logging
 import os
-
-try:
-	import requests
-except ImportError:
-	raise ImportError('Missing dependency: requests (pip install requests)')
+import time
+import urllib.request
 
 
 # Setting up logging
@@ -20,21 +18,64 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(level
 
 def authenticate(username: str, password: str) -> str:
 	'''
-	Authenticate with ICANN's API and return the access token.
+	Authenticate with the ICANN API and return the access token.
 
 	:param username: ICANN Username
 	:param password: ICANN Password
 	'''
 
-	response = requests.post('https://account-api.icann.org/api/authenticate', json={'username': username, 'password': password})
-	response.raise_for_status()
+	data	= json.dumps({'username': username, 'password': password}).encode('utf-8')
+	headers = {'Content-Type': 'application/json'}
+	request = urllib.request.Request('https://account-api.icann.org/api/authenticate', data=data, headers=headers)
+
+	with urllib.request.urlopen(request) as response:
+		response = response.read().decode('utf-8')
+		return json.loads(response)['accessToken']
+
+
+def fetch_zone_links(token: str) -> list:
+	'''
+	Fetch the list of zone files available for download.
+
+	:param token: ICANN access token
+	'''
+
+	headers = {'Authorization': f'Bearer {token}'}
+	request = urllib.request.Request('https://czds-api.icann.org/czds/downloads/links', headers=headers)
+
+	with urllib.request.urlopen(request) as response:
+		if response.status == 200:
+			return json.loads(response.read().decode('utf-8'))
+		else:
+			raise Exception(f'Failed to fetch zone links: {response.status} {response.reason}')
+
+
+def download_report(token: str, output_directory: str, username: str):
+	'''
+	Downloads the zone report stats from the API and scrubs the report for privacy.
+
+	:param token: ICANN access token
+	:param output_directory: Directory to save the scrubbed report
+	:param username: Username to be redacted
+	'''
+
+	filepath = os.path.join(output_directory, '.stats.csv')
+	headers  = {'Authorization': f'Bearer {token}'}
+	request  = urllib.request.Request('https://czds-api.icann.org/czds/requests/report', headers=headers)
+
+	with urllib.request.urlopen(request) as response:
+		if response.status == 200:
+			report_data = response.read().decode('utf-8').replace(username, 'nobody@no.name')
+			with open(filepath, 'w') as file:
+				file.write(report_data)
+		else:
+			raise Exception(f'Failed to download the zone stats report: {response.status} {response.reason}')
 
-	return response.json()['accessToken']
 
 
 def download_zone(url: str, token: str, output_directory: str):
 	'''
-	Download a single zone file.
+	Download a single zone file using urllib.request.
 
 	:param url: URL to download
 	:param token: ICANN access token
@@ -42,15 +83,28 @@ def download_zone(url: str, token: str, output_directory: str):
 	'''
 
 	headers = {'Authorization': f'Bearer {token}'}
-	response = requests.get(url, headers=headers)
-	response.raise_for_status()
-	filename = response.headers.get('Content-Disposition').split('filename=')[-1].strip('"')
-	filepath = os.path.join(output_directory, filename)
+	request = urllib.request.Request(url, headers=headers)
 
-	with open(filepath, 'wb') as file:
-		for chunk in response.iter_content(chunk_size=1024):
-			file.write(chunk)
-	return filepath
+	with urllib.request.urlopen(request) as response:
+		if response.status == 200:
+			content_disposition = response.getheader('Content-Disposition')
+			if content_disposition:
+				filename = content_disposition.split('filename=')[-1].strip('"')
+			else:
+				raise ValueError(f'Failed to get filename from Content-Disposition header: {content_disposition}')
+
+			filepath = os.path.join(output_directory, filename)
+
+			with open(filepath, 'wb') as file:
+				while True:
+					chunk = response.read(1024)
+					if not chunk:
+						break
+					file.write(chunk)
+
+			return filepath
+		else:
+			raise Exception(f'Failed to download {url}: {response.status} {response.reason}')
 
 
 def main(username: str, password: str, concurrency: int):
@@ -62,23 +116,36 @@ def main(username: str, password: str, concurrency: int):
 	:param concurrency: Number of concurrent downloads
 	'''
 
-	token = authenticate(username, password)
-	headers = {'Authorization': f'Bearer {token}'}
-	response = requests.get('https://czds-api.icann.org/czds/downloads/links', headers=headers)
-	response.raise_for_status()
-	zone_links = response.json()
-	output_directory = 'zonefiles'
+	now = time.strftime('%Y-%m-%d')
 
+	logging.info(f'Authenticating with ICANN API...')
+	try:
+		token = authenticate(username, password)
+	except Exception as e:
+		raise Exception(f'Failed to authenticate with ICANN API: {e}')
+	#logging.info(f'Authenticated with token: {token}')
+	# The above line is commented out to avoid printing the token to the logs, you can uncomment it for debugging purposes
+
+	output_directory = os.path.join(os.getcwd(), 'zones', now)
 	os.makedirs(output_directory, exist_ok=True)
 
+	logging.info('Fetching zone stats report...')
+	try:
+		download_report(token, output_directory, username)
+	except Exception as e:
+		raise Exception(f'Failed to download zone stats report: {e}')
+
+	logging.info('Fetching zone links...')
+	try:
+		zone_links = fetch_zone_links(token)
+	except Exception as e:
+		raise Exception(f'Failed to fetch zone links: {e}')
+	logging.info(f'Fetched {len(zone_links)} zone links')
+
 	with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as executor:
-
 		future_to_url = {executor.submit(download_zone, url, token, output_directory): url for url in zone_links}
-
 		for future in concurrent.futures.as_completed(future_to_url):
-
 			url = future_to_url[future]
-
 			try:
 				filepath = future.result()
 				logging.info(f'Completed downloading {url} to file {filepath}')
@@ -88,24 +155,13 @@ def main(username: str, password: str, concurrency: int):
 
 
 if __name__ == '__main__':
-	parser = argparse.ArgumentParser(description="ICANN Zone Files Downloader")
+	parser = argparse.ArgumentParser(description='ICANN API for the Centralized Zones Data Service')
 	parser.add_argument('-u', '--username', help='ICANN Username')
 	parser.add_argument('-p', '--password', help='ICANN Password')
-	parser.add_argument('-c', '--concurrency', type=int, default=5, help='Number of concurrent downloads')
+	parser.add_argument('-c', '--concurrency', type=int, default=3, help='Number of concurrent downloads')
 	args = parser.parse_args()
 
-	username = args.username or os.getenv('CZDS_USER')
-	password = args.password or os.getenv('CZDS_PASS')
+	username = args.username or os.getenv('CZDS_USER') or input('ICANN Username: ')
+	password = args.password or os.getenv('CZDS_PASS') or getpass.getpass('ICANN Password: ')
 
-	if not username:
-		username = input('ICANN Username: ')
-
-	if not password:
-		password = getpass.getpass('ICANN Password: ')
-
-	try:
-		main(username, password, args.concurrency)
-	except requests.HTTPError as e:
-		logging.error(f'HTTP error occurred: {e.response.status_code} - {e.response.reason}')
-	except Exception as e:
-		logging.error(f'An error occurred: {e}')
+	main(username, password, args.concurrency)
diff --git a/stats_2024-01-31.csv b/stats.csv
similarity index 100%
rename from stats_2024-01-31.csv
rename to stats.csv