Added anomaly detection to RIR delegations ingestor
This commit is contained in:
parent
00711fe856
commit
b1fa34f3aa
@ -43,41 +43,34 @@ async def process_data(place_holder: str = None):
|
|||||||
:param place_holder: Placeholder parameter to match the process_data function signature of other ingestors.
|
:param place_holder: Placeholder parameter to match the process_data function signature of other ingestors.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
while True:
|
async for websocket in websockets.connect('wss://certstream.calidog.io', ping_interval=15, ping_timeout=60):
|
||||||
try:
|
try:
|
||||||
async with websockets.connect('wss://certstream.calidog.io') as websocket:
|
async for line in websocket:
|
||||||
while True:
|
|
||||||
# Read a line from the websocket
|
|
||||||
line = await websocket.recv()
|
|
||||||
|
|
||||||
# Parse the JSON record
|
# Parse the JSON record
|
||||||
try:
|
try:
|
||||||
record = json.loads(line)
|
record = json.loads(line)
|
||||||
except json.decoder.JSONDecodeError:
|
except json.decoder.JSONDecodeError:
|
||||||
logging.error(f'Invalid line from the websocket: {line}')
|
logging.error(f'Invalid line from the websocket: {line}')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Grab the unique domains from the record (excluding wildcards)
|
# Grab the unique domains from the record (excluding wildcards)
|
||||||
domains = record['data']['leaf_cert']['all_domains']
|
domains = record['data']['leaf_cert']['all_domains']
|
||||||
domains = set([domain[2:] if domain.startswith('*.') else domain for domain in domains])
|
domains = set([domain[2:] if domain.startswith('*.') else domain for domain in domains])
|
||||||
|
|
||||||
# Construct the document
|
# Construct the document
|
||||||
for domain in domains:
|
for domain in domains:
|
||||||
struct = {
|
struct = {
|
||||||
'domain' : domain,
|
'domain' : domain,
|
||||||
'seen' : time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())
|
'seen' : time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())
|
||||||
}
|
}
|
||||||
|
|
||||||
yield {'_index': default_index, '_source': struct}
|
yield {'_index': default_index, '_source': struct}
|
||||||
|
|
||||||
except websockets.ConnectionClosed:
|
except websockets.ConnectionClosed as e :
|
||||||
logging.error('Connection to Certstream was closed. Attempting to reconnect...')
|
logging.error(f'Connection to Certstream was closed. Attempting to reconnect... ({e})')
|
||||||
await asyncio.sleep(3)
|
await asyncio.sleep(3)
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logging.error(f'An error occurred while processing Certstream records! ({e})')
|
|
||||||
break
|
|
||||||
|
|
||||||
|
|
||||||
async def test():
|
async def test():
|
||||||
'''Test the ingestion process.'''
|
'''Test the ingestion process.'''
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
# ingest_rir_delegations.py
|
# ingest_rir_delegations.py
|
||||||
|
|
||||||
import csv
|
import csv
|
||||||
|
import ipaddress
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
|
|
||||||
@ -21,7 +22,7 @@ delegation_db = {
|
|||||||
'apnic' : 'https://ftp.apnic.net/stats/apnic/delegated-apnic-extended-latest',
|
'apnic' : 'https://ftp.apnic.net/stats/apnic/delegated-apnic-extended-latest',
|
||||||
'arin' : 'https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest',
|
'arin' : 'https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest',
|
||||||
'lacnic' : 'https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest',
|
'lacnic' : 'https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest',
|
||||||
'ripe' : 'https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest'
|
'ripencc' : 'https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -36,7 +37,7 @@ def construct_map() -> dict:
|
|||||||
'mappings': {
|
'mappings': {
|
||||||
'properties': {
|
'properties': {
|
||||||
'registry' : { 'type': 'keyword' },
|
'registry' : { 'type': 'keyword' },
|
||||||
'cc' : { 'type': 'keyword' },
|
'cc' : { 'type': 'keyword' }, # ISO 3166 2-letter code
|
||||||
'type' : { 'type': 'keyword' },
|
'type' : { 'type': 'keyword' },
|
||||||
'start' : {
|
'start' : {
|
||||||
'properties': {
|
'properties': {
|
||||||
@ -106,21 +107,44 @@ async def process_data():
|
|||||||
|
|
||||||
record = {
|
record = {
|
||||||
'registry' : row[0],
|
'registry' : row[0],
|
||||||
|
'cc' : row[1],
|
||||||
'type' : row[2],
|
'type' : row[2],
|
||||||
'start' : { record_type: row[3] },
|
'start' : row[3],
|
||||||
'value' : row[4],
|
'value' : row[4],
|
||||||
|
'date' : row[5],
|
||||||
'status' : row[6]
|
'status' : row[6]
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(row) == 7:
|
if len(row) == 7:
|
||||||
if row[7]:
|
if row[7]:
|
||||||
record['extensions'] = row[7]
|
record['extensions'] = row[7]
|
||||||
|
|
||||||
if (cc := row[1]):
|
if not record['cc']:
|
||||||
record['cc'] = cc.lower()
|
del record['cc']
|
||||||
|
elif len(record['cc']) != 2:
|
||||||
|
raise ValueError(f'Invalid country code: {record["cc"]} ({cache})')
|
||||||
|
|
||||||
if (date_field := row[5]):
|
if record['type'] == 'asn':
|
||||||
if date_field != '00000000':
|
record['start'] = { record_type : int(record['start']) }
|
||||||
record['date'] = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.strptime(date_field, '%Y%m%d')),
|
elif record['type'] in ('ipv4', 'ipv6'):
|
||||||
|
try:
|
||||||
|
ipaddress.ip_address(record['start'])
|
||||||
|
record['start'] = { record_type : record['start'] }
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError(f'Invalid start IP: {record["start"]} ({cache})')
|
||||||
|
else:
|
||||||
|
raise ValueError(f'Invalid record type: {record["type"]}')
|
||||||
|
|
||||||
|
if not record['value'].isdigit():
|
||||||
|
raise ValueError(f'Invalid value: {record["value"]} ({cache})')
|
||||||
|
|
||||||
|
if not record['date'] or record['date'] == '00000000':
|
||||||
|
del record['date']
|
||||||
|
else:
|
||||||
|
record['date'] = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.strptime(record['date'], '%Y%m%d')),
|
||||||
|
|
||||||
|
if record['status'] not in ('allocated', 'assigned', 'available', 'reserved', 'unallocated', 'unknown'):
|
||||||
|
raise ValueError(f'Invalid status: {record["status"]} ({cache})')
|
||||||
|
|
||||||
#json_output['records'].append(record)
|
#json_output['records'].append(record)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user