2024-03-12 21:04:14 +00:00
|
|
|
#!/usr/bin/env python
|
|
|
|
# Elasticsearch Recon Ingestion Scripts (ERIS) - Developed by Acidvegas (https://git.acid.vegas/eris)
|
|
|
|
# ingest_rir_delegations.py
|
|
|
|
|
|
|
|
import csv
|
2024-03-12 22:19:47 +00:00
|
|
|
import ipaddress
|
2024-03-12 21:04:14 +00:00
|
|
|
import logging
|
|
|
|
import time
|
|
|
|
|
|
|
|
try:
|
|
|
|
import aiohttp
|
|
|
|
except ImportError:
|
2024-03-12 22:23:52 +00:00
|
|
|
raise ImportError('Missing required \'aiohttp\' library. (pip install aiohttp)')
|
2024-03-12 21:04:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
# Set a default elasticsearch index if one is not provided
|
2024-11-22 06:18:28 +00:00
|
|
|
default_index = 'eris-rir-delegations'
|
2024-03-12 21:04:14 +00:00
|
|
|
|
|
|
|
# Delegation data sources
|
|
|
|
delegation_db = {
|
|
|
|
'afrinic' : 'https://ftp.afrinic.net/stats/afrinic/delegated-afrinic-extended-latest',
|
|
|
|
'apnic' : 'https://ftp.apnic.net/stats/apnic/delegated-apnic-extended-latest',
|
|
|
|
'arin' : 'https://ftp.arin.net/pub/stats/arin/delegated-arin-extended-latest',
|
|
|
|
'lacnic' : 'https://ftp.lacnic.net/pub/stats/lacnic/delegated-lacnic-extended-latest',
|
2024-03-12 22:19:47 +00:00
|
|
|
'ripencc' : 'https://ftp.ripe.net/pub/stats/ripencc/delegated-ripencc-extended-latest'
|
2024-03-12 21:04:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def construct_map() -> dict:
|
|
|
|
'''Construct the Elasticsearch index mapping for records'''
|
|
|
|
|
|
|
|
# Match on exact value or full text search
|
|
|
|
keyword_mapping = { 'type': 'text', 'fields': { 'keyword': { 'type': 'keyword', 'ignore_above': 256 } } }
|
|
|
|
|
|
|
|
# Construct the index mapping
|
|
|
|
mapping = {
|
|
|
|
'mappings': {
|
|
|
|
'properties': {
|
|
|
|
'registry' : { 'type': 'keyword' },
|
2024-03-12 22:19:47 +00:00
|
|
|
'cc' : { 'type': 'keyword' }, # ISO 3166 2-letter code
|
2024-03-13 01:20:34 +00:00
|
|
|
'asn' : {
|
|
|
|
'properties': {
|
|
|
|
'start' : { 'type': 'integer' },
|
|
|
|
'end' : { 'type': 'integer' }
|
|
|
|
}
|
|
|
|
},
|
|
|
|
'ip' : {
|
2024-03-12 21:04:14 +00:00
|
|
|
'properties': {
|
2024-03-13 01:20:34 +00:00
|
|
|
'start' : { 'type': 'ip' },
|
|
|
|
'end' : { 'type': 'ip' }
|
2024-03-12 21:04:14 +00:00
|
|
|
}
|
|
|
|
},
|
|
|
|
'date' : { 'type': 'date' },
|
|
|
|
'status' : { 'type': 'keyword' },
|
2024-11-22 06:18:28 +00:00
|
|
|
'extensions' : keyword_mapping,
|
|
|
|
'seen' : { 'type': 'date' }
|
2024-03-12 21:04:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return mapping
|
|
|
|
|
|
|
|
|
2024-11-22 06:18:28 +00:00
|
|
|
async def process_data(place_holder: str = None):
|
|
|
|
'''
|
|
|
|
Read and process the delegation data.
|
|
|
|
|
|
|
|
:param place_holder: Placeholder parameter to match the process_data function signature of other ingestors.
|
|
|
|
'''
|
2024-03-12 21:04:14 +00:00
|
|
|
|
|
|
|
for registry, url in delegation_db.items():
|
|
|
|
try:
|
|
|
|
headers = {'Connection': 'keep-alive'} # This is required for AIOHTTP connections to LACNIC
|
|
|
|
|
|
|
|
async with aiohttp.ClientSession(headers=headers) as session:
|
|
|
|
async with session.get(url) as response:
|
|
|
|
if response.status != 200:
|
|
|
|
logging.error(f'Failed to fetch {registry} delegation data: {response.status}')
|
|
|
|
continue
|
|
|
|
|
|
|
|
csv_data = await response.text()
|
2024-03-13 01:20:34 +00:00
|
|
|
rows = [line.lower() for line in csv_data.split('\n') if line and not line.startswith('#')]
|
2024-03-12 21:04:14 +00:00
|
|
|
csv_reader = csv.reader(rows, delimiter='|')
|
|
|
|
|
|
|
|
del rows, csv_data # Cleanup
|
|
|
|
|
|
|
|
# Process the CSV data
|
|
|
|
for row in csv_reader:
|
|
|
|
cache = '|'.join(row) # Cache the last row for error handling
|
|
|
|
|
|
|
|
# Heuristic for the header line (not doing anything with it for now)
|
|
|
|
if len(row) == 7 and row[1] != '*':
|
|
|
|
header = {
|
|
|
|
'version' : row[0],
|
|
|
|
'registry' : row[1],
|
|
|
|
'serial' : row[2],
|
|
|
|
'records' : row[3],
|
|
|
|
'startdate' : row[4],
|
|
|
|
'enddate' : row[5],
|
|
|
|
'UTCoffset' : row[6]
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
|
|
|
|
# Heuristic for the summary lines (not doing anything with it for now)
|
|
|
|
elif row[2] != '*' and row[3] == '*':
|
|
|
|
summary = {
|
|
|
|
'registry' : row[0],
|
|
|
|
'type' : row[2],
|
|
|
|
'count' : row[4]
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
|
|
|
|
# Record lines (this is what we want)
|
|
|
|
else:
|
|
|
|
record = {
|
|
|
|
'registry' : row[0],
|
2024-03-12 22:19:47 +00:00
|
|
|
'cc' : row[1],
|
2024-03-12 21:04:14 +00:00
|
|
|
'type' : row[2],
|
2024-03-12 22:19:47 +00:00
|
|
|
'start' : row[3],
|
2024-03-12 21:04:14 +00:00
|
|
|
'value' : row[4],
|
2024-03-12 22:19:47 +00:00
|
|
|
'date' : row[5],
|
2024-03-12 21:04:14 +00:00
|
|
|
'status' : row[6]
|
|
|
|
}
|
2024-03-13 01:20:34 +00:00
|
|
|
|
2024-03-12 21:04:14 +00:00
|
|
|
if len(row) == 7:
|
|
|
|
if row[7]:
|
|
|
|
record['extensions'] = row[7]
|
|
|
|
|
2024-03-12 22:19:47 +00:00
|
|
|
if not record['cc']:
|
|
|
|
del record['cc']
|
|
|
|
elif len(record['cc']) != 2:
|
2024-03-13 01:20:34 +00:00
|
|
|
raise ValueError(f'Invalid country code: {cache}')
|
|
|
|
|
|
|
|
if not record['value'].isdigit():
|
|
|
|
raise ValueError(f'Invalid value: {cache}')
|
2024-03-12 22:19:47 +00:00
|
|
|
|
|
|
|
if record['type'] == 'asn':
|
2024-03-13 01:20:34 +00:00
|
|
|
end = int(record['start']) + int(record['value']) - 1
|
|
|
|
record['asn'] = { 'start': int(record['start']), 'end': end }
|
2024-03-12 22:19:47 +00:00
|
|
|
elif record['type'] in ('ipv4', 'ipv6'):
|
|
|
|
try:
|
2024-03-13 01:20:34 +00:00
|
|
|
if record['type'] == 'ipv4':
|
|
|
|
end = ipaddress.ip_address(record['start']) + int(record['value']) - 1
|
|
|
|
elif record['type'] == 'ipv6':
|
|
|
|
end = ipaddress.ip_network(f'{record["start"]}/{record["value"]}').broadcast_address
|
|
|
|
end = end.compressed.lower()
|
|
|
|
record['ip'] = { 'start': record['start'], 'end': str(end) }
|
2024-03-12 22:19:47 +00:00
|
|
|
except ValueError:
|
2024-03-13 01:20:34 +00:00
|
|
|
raise ValueError(f'Invalid IP range: {cache}')
|
2024-03-12 22:19:47 +00:00
|
|
|
else:
|
2024-03-13 01:20:34 +00:00
|
|
|
raise ValueError(f'Invalid record type: {cache}')
|
|
|
|
|
|
|
|
del record['start'], record['value'], record['type'] # Cleanup variables no longer needed
|
|
|
|
|
2024-03-12 22:19:47 +00:00
|
|
|
if not record['date'] or record['date'] == '00000000':
|
|
|
|
del record['date']
|
|
|
|
else:
|
2024-11-22 06:18:28 +00:00
|
|
|
record['date'] = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.strptime(record['date'], '%Y%m%d'))
|
2024-03-13 01:20:34 +00:00
|
|
|
|
2024-03-12 22:19:47 +00:00
|
|
|
if record['status'] not in ('allocated', 'assigned', 'available', 'reserved', 'unallocated', 'unknown'):
|
2024-03-13 01:20:34 +00:00
|
|
|
raise ValueError(f'Invalid status: {cache}')
|
2024-11-22 06:18:28 +00:00
|
|
|
|
|
|
|
# Set the seen timestamp
|
|
|
|
record['seen'] = time.strftime('%Y-%m-%dT%H:%M:%SZ')
|
2024-03-12 21:04:14 +00:00
|
|
|
|
|
|
|
# Let's just index the records themself (for now)
|
|
|
|
yield {'_index': default_index, '_source': record}
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
logging.error(f'Error processing {registry} delegation data: {e}')
|
|
|
|
|
|
|
|
|
|
|
|
async def test():
|
|
|
|
'''Test the ingestion process'''
|
|
|
|
|
|
|
|
async for document in process_data():
|
|
|
|
print(document)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
import asyncio
|
|
|
|
|
|
|
|
asyncio.run(test())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
'''
|
|
|
|
Output:
|
|
|
|
arin|US|ipv4|76.15.132.0|1024|20070502|allocated|6c065d5b54b877781f05e7d30ebfff28
|
|
|
|
arin|US|ipv4|76.15.136.0|2048|20070502|allocated|6c065d5b54b877781f05e7d30ebfff28
|
|
|
|
arin|US|ipv4|76.15.144.0|4096|20070502|allocated|6c065d5b54b877781f05e7d30ebfff28
|
|
|
|
arin|US|ipv4|76.15.160.0|8192|20070502|allocated|6c065d5b54b877781f05e7d30ebfff28
|
|
|
|
|
|
|
|
Input:
|
|
|
|
{
|
|
|
|
'registry' : 'arin',
|
2024-03-13 01:20:34 +00:00
|
|
|
'cc' : 'us',
|
2024-03-12 21:04:14 +00:00
|
|
|
'type' : 'ipv4',
|
2024-03-19 23:00:12 +00:00
|
|
|
'ip' : { 'start': '76.15.132.0', 'end': '76.16.146.0' },
|
2024-03-12 21:04:14 +00:00
|
|
|
'date' : '2007-05-02T00:00:00Z',
|
|
|
|
'status' : 'allocated',
|
|
|
|
'extensions' : '6c065d5b54b877781f05e7d30ebfff28'
|
|
|
|
}
|
2024-03-19 23:00:12 +00:00
|
|
|
|
|
|
|
Notes:
|
2024-03-13 01:20:34 +00:00
|
|
|
Do we make this handle the database locally or load it into ram?
|
2024-03-12 21:04:14 +00:00
|
|
|
'''
|