Certstream ingestor now only logs sub-domains since we already ingested zone files. Ignores www. and wildcard domains.

This commit is contained in:
Dionysus 2024-03-13 22:34:20 -04:00
parent 7f93a4d8de
commit 1ab7199f7d
Signed by: acidvegas
GPG Key ID: EF4B922DB85DC9DE
1 changed files with 13 additions and 3 deletions

View File

@ -54,10 +54,20 @@ async def process_data(place_holder: str = None):
logging.error(f'Invalid line from the websocket: {line}')
continue
# Grab the unique domains from the record (excluding wildcards)
domains = record['data']['leaf_cert']['all_domains']
domains = set([domain[2:] if domain.startswith('*.') else domain for domain in domains])
# Grab the unique domains from the records
all_domains = record['data']['leaf_cert']['all_domains']
domains = list()
# We only care about subdomains (excluding www. and wildcards)
for domain in all_domains:
if domain.startswith('*.'):
domain = domain[2:]
elif domain.startswith('www.') and domain.count('.') == 2:
continue
if domain.count('.') > 1:
if domain not in domains:
domains.append(domain)
# Construct the document
for domain in domains:
struct = {