Added anomaly detection to RIR delegations ingestor

This commit is contained in:
Dionysus 2024-03-12 18:19:47 -04:00
parent 00711fe856
commit b1fa34f3aa
Signed by: acidvegas
GPG Key ID: EF4B922DB85DC9DE
2 changed files with 53 additions and 36 deletions

View File

@ -43,41 +43,34 @@ async def process_data(place_holder: str = None):
:param place_holder: Placeholder parameter to match the process_data function signature of other ingestors. :param place_holder: Placeholder parameter to match the process_data function signature of other ingestors.
''' '''
while True: async for websocket in websockets.connect('wss://certstream.calidog.io', ping_interval=15, ping_timeout=60):
try: try:
async with websockets.connect('wss://certstream.calidog.io') as websocket: async for line in websocket:
while True:
# Read a line from the websocket # Parse the JSON record
line = await websocket.recv() try:
record = json.loads(line)
except json.decoder.JSONDecodeError:
logging.error(f'Invalid line from the websocket: {line}')
continue
# Parse the JSON record # Grab the unique domains from the record (excluding wildcards)
try: domains = record['data']['leaf_cert']['all_domains']
record = json.loads(line) domains = set([domain[2:] if domain.startswith('*.') else domain for domain in domains])
except json.decoder.JSONDecodeError:
logging.error(f'Invalid line from the websocket: {line}')
continue
# Grab the unique domains from the record (excluding wildcards) # Construct the document
domains = record['data']['leaf_cert']['all_domains'] for domain in domains:
domains = set([domain[2:] if domain.startswith('*.') else domain for domain in domains]) struct = {
'domain' : domain,
'seen' : time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())
}
# Construct the document yield {'_index': default_index, '_source': struct}
for domain in domains:
struct = {
'domain' : domain,
'seen' : time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())
}
yield {'_index': default_index, '_source': struct} except websockets.ConnectionClosed as e :
logging.error(f'Connection to Certstream was closed. Attempting to reconnect... ({e})')
except websockets.ConnectionClosed:
logging.error('Connection to Certstream was closed. Attempting to reconnect...')
await asyncio.sleep(3) await asyncio.sleep(3)
except Exception as e:
logging.error(f'An error occurred while processing Certstream records! ({e})')
break
async def test(): async def test():
'''Test the ingestion process.''' '''Test the ingestion process.'''

View File

@ -3,6 +3,7 @@
# ingest_rir_delegations.py # ingest_rir_delegations.py
import csv import csv
import ipaddress
import logging import logging
import time import time
@ -21,7 +22,7 @@ delegation_db = {
'apnic' : 'https://ftp.apnic.net/stats/apnic/delegated-apnic-extended-latest', 'apnic' : 'https://ftp.apnic.net/stats/apnic/delegated-apnic-extended-latest',
'arin' : 'https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest', 'arin' : 'https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest',
'lacnic' : 'https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest', 'lacnic' : 'https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest',
'ripe' : 'https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest' 'ripencc' : 'https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest'
} }
@ -36,7 +37,7 @@ def construct_map() -> dict:
'mappings': { 'mappings': {
'properties': { 'properties': {
'registry' : { 'type': 'keyword' }, 'registry' : { 'type': 'keyword' },
'cc' : { 'type': 'keyword' }, 'cc' : { 'type': 'keyword' }, # ISO 3166 2-letter code
'type' : { 'type': 'keyword' }, 'type' : { 'type': 'keyword' },
'start' : { 'start' : {
'properties': { 'properties': {
@ -106,21 +107,44 @@ async def process_data():
record = { record = {
'registry' : row[0], 'registry' : row[0],
'cc' : row[1],
'type' : row[2], 'type' : row[2],
'start' : { record_type: row[3] }, 'start' : row[3],
'value' : row[4], 'value' : row[4],
'date' : row[5],
'status' : row[6] 'status' : row[6]
} }
if len(row) == 7: if len(row) == 7:
if row[7]: if row[7]:
record['extensions'] = row[7] record['extensions'] = row[7]
if (cc := row[1]): if not record['cc']:
record['cc'] = cc.lower() del record['cc']
elif len(record['cc']) != 2:
raise ValueError(f'Invalid country code: {record["cc"]} ({cache})')
if (date_field := row[5]): if record['type'] == 'asn':
if date_field != '00000000': record['start'] = { record_type : int(record['start']) }
record['date'] = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.strptime(date_field, '%Y%m%d')), elif record['type'] in ('ipv4', 'ipv6'):
try:
ipaddress.ip_address(record['start'])
record['start'] = { record_type : record['start'] }
except ValueError:
raise ValueError(f'Invalid start IP: {record["start"]} ({cache})')
else:
raise ValueError(f'Invalid record type: {record["type"]}')
if not record['value'].isdigit():
raise ValueError(f'Invalid value: {record["value"]} ({cache})')
if not record['date'] or record['date'] == '00000000':
del record['date']
else:
record['date'] = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.strptime(record['date'], '%Y%m%d')),
if record['status'] not in ('allocated', 'assigned', 'available', 'reserved', 'unallocated', 'unknown'):
raise ValueError(f'Invalid status: {record["status"]} ({cache})')
#json_output['records'].append(record) #json_output['records'].append(record)