Added IXP ingestor, updated all other ingestors
This commit is contained in:
parent
24850ea976
commit
603d005a47
@ -45,6 +45,7 @@ async def process_data(place_holder: str = None):
|
|||||||
|
|
||||||
async for websocket in websockets.connect('wss://certstream.calidog.io', ping_interval=15, ping_timeout=60):
|
async for websocket in websockets.connect('wss://certstream.calidog.io', ping_interval=15, ping_timeout=60):
|
||||||
try:
|
try:
|
||||||
|
# Read the websocket stream
|
||||||
async for line in websocket:
|
async for line in websocket:
|
||||||
|
|
||||||
# Parse the JSON record
|
# Parse the JSON record
|
||||||
@ -65,6 +66,7 @@ async def process_data(place_holder: str = None):
|
|||||||
elif domain.startswith('www.') and domain.count('.') == 2:
|
elif domain.startswith('www.') and domain.count('.') == 2:
|
||||||
continue
|
continue
|
||||||
if domain.count('.') > 1:
|
if domain.count('.') > 1:
|
||||||
|
# TODO: Add a check for PSL TLDs...domain.co.uk, domain.com.au, etc. (we want to ignore these if they are not subdomains)
|
||||||
if domain not in domains:
|
if domain not in domains:
|
||||||
domains.append(domain)
|
domains.append(domain)
|
||||||
|
|
||||||
@ -81,6 +83,10 @@ async def process_data(place_holder: str = None):
|
|||||||
logging.error(f'Connection to Certstream was closed. Attempting to reconnect... ({e})')
|
logging.error(f'Connection to Certstream was closed. Attempting to reconnect... ({e})')
|
||||||
await asyncio.sleep(3)
|
await asyncio.sleep(3)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f'Error processing Certstream data: {e}')
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
|
||||||
|
|
||||||
async def test():
|
async def test():
|
||||||
'''Test the ingestion process.'''
|
'''Test the ingestion process.'''
|
||||||
@ -91,8 +97,6 @@ async def test():
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import asyncio
|
|
||||||
|
|
||||||
asyncio.run(test())
|
asyncio.run(test())
|
||||||
|
|
||||||
|
|
||||||
@ -155,4 +159,7 @@ Output:
|
|||||||
},
|
},
|
||||||
"message_type": "certificate_update"
|
"message_type": "certificate_update"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- Fix the "no close frame received or sent" error
|
||||||
'''
|
'''
|
||||||
|
@ -3,151 +3,176 @@
|
|||||||
# ingest_httpx.py
|
# ingest_httpx.py
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import aiofiles
|
import aiofiles
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise ImportError('Missing required \'aiofiles\' library. (pip install aiofiles)')
|
raise ImportError('Missing required \'aiofiles\' library. (pip install aiofiles)')
|
||||||
|
|
||||||
|
|
||||||
|
# Set a default elasticsearch index if one is not provided
|
||||||
|
default_index = 'eris-httpx'
|
||||||
|
|
||||||
default_index = 'httpx-logs'
|
|
||||||
|
|
||||||
def construct_map() -> dict:
|
def construct_map() -> dict:
|
||||||
'''Construct the Elasticsearch index mapping for Masscan records.'''
|
'''Construct the Elasticsearch index mapping for Masscan records.'''
|
||||||
|
|
||||||
keyword_mapping = { 'type': 'text', 'fields': { 'keyword': { 'type': 'keyword', 'ignore_above': 256 } } }
|
# Match on exact value or full text search
|
||||||
|
keyword_mapping = { 'type': 'text', 'fields': { 'keyword': { 'type': 'keyword', 'ignore_above': 256 } } }
|
||||||
|
|
||||||
mapping = {
|
# Construct the index mapping
|
||||||
'mappings': {
|
mapping = {
|
||||||
'properties': {
|
'mappings': {
|
||||||
"timestamp" : { 'type' : 'date' },
|
'properties': {
|
||||||
"hash" : {
|
'timestamp' : { 'type' : 'date' },
|
||||||
"body_md5" : { 'type': 'keyword' },
|
'hash' : {
|
||||||
"body_mmh3" : { 'type': 'keyword' },
|
'properties': {
|
||||||
"body_sha256" : { 'type': 'keyword' },
|
'body_md5' : { 'type': 'keyword' },
|
||||||
"body_simhash" : { 'type': 'keyword' },
|
'body_mmh3' : { 'type': 'keyword' },
|
||||||
"header_md5" : { 'type': 'keyword' },
|
'body_sha256' : { 'type': 'keyword' },
|
||||||
"header_mmh3" : { 'type': 'keyword' },
|
'body_simhash' : { 'type': 'keyword' },
|
||||||
"header_sha256" : { 'type': 'keyword' },
|
'header_md5' : { 'type': 'keyword' },
|
||||||
"header_simhash" : { 'type': 'keyword' }
|
'header_mmh3' : { 'type': 'keyword' },
|
||||||
},
|
'header_sha256' : { 'type': 'keyword' },
|
||||||
"port" : { 'type': 'integer' },
|
'header_simhash' : { 'type': 'keyword' }
|
||||||
"url" : keyword_mapping,
|
}
|
||||||
"input" : keyword_mapping,
|
},
|
||||||
"title" : keyword_mapping,
|
'port' : { 'type': 'integer' },
|
||||||
"scheme" : { 'type': 'keyword' },
|
'url' : keyword_mapping,
|
||||||
"webserver" : { 'type': 'keyword' },
|
'final_url' : keyword_mapping,
|
||||||
"body_preview" : keyword_mapping,
|
'input' : keyword_mapping,
|
||||||
"content_type" : { 'type': 'keyword' },
|
'title' : keyword_mapping,
|
||||||
"method" : { 'type': 'keyword'},
|
'scheme' : { 'type': 'keyword' },
|
||||||
"host" : { 'type': 'ip'},
|
'webserver' : { 'type': 'keyword' },
|
||||||
"path" : keyword_mapping,
|
'body_preview' : keyword_mapping,
|
||||||
"favicon" : { 'type': 'keyword' },
|
'content_type' : { 'type': 'keyword' },
|
||||||
"favicon_path" : keyword_mapping,
|
'method' : { 'type': 'keyword' },
|
||||||
"a" : { 'type': 'ip'},
|
'host' : { 'type': 'ip' },
|
||||||
"aaaa" : { 'type': 'ip'},
|
'path' : keyword_mapping,
|
||||||
"tech" : keyword_mapping,
|
'favicon' : { 'type': 'keyword' },
|
||||||
"words" : { 'type': 'integer'},
|
'favicon_path' : keyword_mapping,
|
||||||
"lines" : { 'type': 'integer'},
|
'a' : { 'type': 'ip' },
|
||||||
"status_code" : { 'type': 'integer'},
|
'cname' : keyword_mapping,
|
||||||
"content_length" : { 'type': 'integer'}
|
'aaaa' : { 'type': 'ip' },
|
||||||
}
|
'tech' : keyword_mapping,
|
||||||
}
|
'words' : { 'type': 'integer' },
|
||||||
}
|
'lines' : { 'type': 'integer' },
|
||||||
|
'status_code' : { 'type': 'integer' },
|
||||||
|
'chain_status_codes' : { 'type': 'integer' },
|
||||||
|
'content_length' : { 'type': 'integer' }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return mapping
|
return mapping
|
||||||
|
|
||||||
|
|
||||||
async def process_data(file_path: str):
|
async def process_data(input_path: str):
|
||||||
'''
|
'''
|
||||||
Read and process HTTPX records from the log file.
|
Read and process the input file
|
||||||
|
|
||||||
:param file_path: Path to the HTTPX log file
|
:param input_path: Path to the input file
|
||||||
'''
|
'''
|
||||||
|
|
||||||
async with aiofiles.open(file_path) as input_file:
|
async with aiofiles.open(input_path) as input_file:
|
||||||
async for line in input_file:
|
# Read the input file line by line
|
||||||
line = line.strip()
|
async for line in input_file:
|
||||||
|
line = line.strip()
|
||||||
|
|
||||||
if not line:
|
# Sentinel value to indicate the end of a process (for closing out a FIFO stream)
|
||||||
continue
|
if line == '~eof':
|
||||||
|
break
|
||||||
|
|
||||||
record = json.loads(line)
|
# Skip empty lines
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
|
||||||
record['seen'] = record.pop('timestamp').split('.')[0] + 'Z' # Hacky solution to maintain ISO 8601 format without milliseconds or offsets
|
# Parse the JSON record
|
||||||
record['domain'] = record.pop('input')
|
try:
|
||||||
|
record = json.loads(line)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logging.error(f'Failed to parse JSON record: {line}')
|
||||||
|
continue
|
||||||
|
|
||||||
for item in ('failed', 'knowledgebase', 'time'):
|
# Hacky solution to maintain ISO 8601 format without milliseconds or offsets
|
||||||
del record[item]
|
record['timestamp'] = record['timestamp'].split('.')[0] + 'Z'
|
||||||
|
|
||||||
yield {'_id': record['domain'], '_index': default_index, '_source': record}
|
# Remove unnecessary fields we don't care about
|
||||||
|
for item in ('failed', 'knowledgebase', 'time', 'csp'):
|
||||||
|
if item in record:
|
||||||
|
del record[item]
|
||||||
|
|
||||||
|
yield {'_index': default_index, '_source': record}
|
||||||
|
|
||||||
|
|
||||||
async def test(input_path: str):
|
async def test(input_path: str):
|
||||||
'''
|
'''
|
||||||
Test the HTTPX ingestion process
|
Test the ingestion process
|
||||||
|
|
||||||
:param input_path: Path to the HTTPX log file
|
:param input_path: Path to the input file
|
||||||
'''
|
'''
|
||||||
async for document in process_data(input_path):
|
|
||||||
print(document)
|
async for document in process_data(input_path):
|
||||||
|
print(document)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import argparse
|
import argparse
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='HTTPX Ingestor for ERIS')
|
parser = argparse.ArgumentParser(description='Ingestor for ERIS')
|
||||||
parser.add_argument('input_path', help='Path to the input file or directory')
|
parser.add_argument('input_path', help='Path to the input file or directory')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
asyncio.run(test(args.input_path))
|
asyncio.run(test(args.input_path))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
''''
|
'''
|
||||||
Deploy:
|
Deploy:
|
||||||
go install -v github.com/projectdiscovery/httpx/cmd/httpx@latest
|
go install -v github.com/projectdiscovery/httpx/cmd/httpx@latest
|
||||||
curl -s https://public-dns.info/nameservers.txt -o nameservers.txt
|
curl -s https://public-dns.info/nameservers.txt -o nameservers.txt
|
||||||
httpx -l zone.txt -t 200 -sc -location -favicon -title -bp -td -ip -cname -mc 200,201,301,302,303,307,308 -fr -r nameservers.txt -retries 2 -stream -sd -j -o httpx.json -v
|
httpx -l fulldomains.txt -t 200 -sc -location -favicon -title -bp -td -ip -cname -mc 200,201,301,302,303,307,308 -fr -r nameservers.txt -retries 2 -stream -sd -j -o fifo.json -v
|
||||||
|
|
||||||
Output:
|
Output:
|
||||||
{
|
{
|
||||||
"timestamp":"2024-01-14T13:08:15.117348474-05:00", # Rename to seen and remove milliseconds and offset
|
"timestamp":"2024-01-14T13:08:15.117348474-05:00", # Rename to seen and remove milliseconds and offset
|
||||||
"hash": { # Do we need all of these ?
|
"hash": { # Do we need all of these ?
|
||||||
"body_md5" : "4ae9394eb98233b482508cbda3b33a66",
|
"body_md5" : "4ae9394eb98233b482508cbda3b33a66",
|
||||||
"body_mmh3" : "-4111954",
|
"body_mmh3" : "-4111954",
|
||||||
"body_sha256" : "89e06e8374353469c65adb227b158b265641b424fba7ddb2c67eef0c4c1280d3",
|
"body_sha256" : "89e06e8374353469c65adb227b158b265641b424fba7ddb2c67eef0c4c1280d3",
|
||||||
"body_simhash" : "9814303593401624250",
|
"body_simhash" : "9814303593401624250",
|
||||||
"header_md5" : "980366deb2b2fb5df2ad861fc63e79ce",
|
"header_md5" : "980366deb2b2fb5df2ad861fc63e79ce",
|
||||||
"header_mmh3" : "-813072798",
|
"header_mmh3" : "-813072798",
|
||||||
"header_sha256" : "39aea75ad548e38b635421861641ad1919ed3b103b17a33c41e7ad46516f736d",
|
"header_sha256" : "39aea75ad548e38b635421861641ad1919ed3b103b17a33c41e7ad46516f736d",
|
||||||
"header_simhash" : "10962523587435277678"
|
"header_simhash" : "10962523587435277678"
|
||||||
},
|
},
|
||||||
"port" : "443",
|
"port" : "443",
|
||||||
"url" : "https://supernets.org", # Remove this and only use the input field as "domain" maybe
|
"url" : "https://supernets.org", # Remove this and only use the input field as "domain" maybe
|
||||||
"input" : "supernets.org", # rename to domain
|
"input" : "supernets.org", # rename to domain
|
||||||
"title" : "SuperNETs",
|
"title" : "SuperNETs",
|
||||||
"scheme" : "https",
|
"scheme" : "https",
|
||||||
"webserver" : "nginx",
|
"webserver" : "nginx",
|
||||||
"body_preview" : "SUPERNETS Home About Contact Donate Docs Network IRC Git Invidious Jitsi LibreX Mastodon Matrix Sup",
|
"body_preview" : "SUPERNETS Home About Contact Donate Docs Network IRC Git Invidious Jitsi LibreX Mastodon Matrix Sup",
|
||||||
"content_type" : "text/html",
|
"content_type" : "text/html",
|
||||||
"method" : "GET", # Remove this
|
"method" : "GET", # Remove this
|
||||||
"host" : "51.89.151.158",
|
"host" : "51.89.151.158",
|
||||||
"path" : "/",
|
"path" : "/",
|
||||||
"favicon" : "-674048714",
|
"favicon" : "-674048714",
|
||||||
"favicon_path" : "/i/favicon.png",
|
"favicon_path" : "/i/favicon.png",
|
||||||
"time" : "592.907689ms", # Do we need this ?
|
"time" : "592.907689ms", # Do we need this ?
|
||||||
"a" : ["6.150.220.23"],
|
"a" : ["6.150.220.23"],
|
||||||
"tech" : ["Bootstrap:4.0.0", "HSTS", "Nginx"],
|
"tech" : ["Bootstrap:4.0.0", "HSTS", "Nginx"],
|
||||||
"words" : 436, # Do we need this ?
|
"words" : 436, # Do we need this ?
|
||||||
"lines" : 79, # Do we need this ?
|
"lines" : 79, # Do we need this ?
|
||||||
"status_code" : 200,
|
"status_code" : 200,
|
||||||
"content_length" : 4597,
|
"content_length" : 4597,
|
||||||
"failed" : false, # Do we need this ?
|
"failed" : false, # Do we need this ?
|
||||||
"knowledgebase" : { # Do we need this ?
|
"knowledgebase" : { # Do we need this ?
|
||||||
"PageType" : "nonerror",
|
"PageType" : "nonerror",
|
||||||
"pHash" : 0
|
"pHash" : 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
'''
|
'''
|
111
ingestors/ingest_ixps.py
Normal file
111
ingestors/ingest_ixps.py
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# Elasticsearch Recon Ingestion Scripts (ERIS) - Developed by Acidvegas (https://git.acid.vegas/eris)
|
||||||
|
# ingest_ixps.py
|
||||||
|
|
||||||
|
import json
|
||||||
|
import ipaddress
|
||||||
|
import time
|
||||||
|
|
||||||
|
try:
|
||||||
|
import aiohttp
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError('Missing required \'aiohttp\' library. (pip install aiohttp)')
|
||||||
|
|
||||||
|
|
||||||
|
# Set a default elasticsearch index if one is not provided
|
||||||
|
default_index = 'ixp-' + time.strftime('%Y-%m-%d')
|
||||||
|
|
||||||
|
|
||||||
|
def construct_map() -> dict:
|
||||||
|
'''Construct the Elasticsearch index mapping for records'''
|
||||||
|
|
||||||
|
# Match on exact value or full text search
|
||||||
|
keyword_mapping = { 'type': 'text', 'fields': { 'keyword': { 'type': 'keyword', 'ignore_above': 256 } } }
|
||||||
|
|
||||||
|
# Construct the index mapping
|
||||||
|
mapping = {
|
||||||
|
'mappings': {
|
||||||
|
'properties': {
|
||||||
|
'name' : {'type': 'keyword'},
|
||||||
|
'alternatenames' : {'type': 'keyword'},
|
||||||
|
'sources' : {'type': 'keyword'},
|
||||||
|
'prefixes' : { 'properties': { 'ipv4' : {'type': 'ip'}, 'ipv6' : {'type': 'ip_range'} } },
|
||||||
|
'url' : { 'type': 'keyword' },
|
||||||
|
'region' : { 'type': 'keyword' },
|
||||||
|
'country' : { 'type': 'keyword' },
|
||||||
|
'city' : { 'type': 'keyword' },
|
||||||
|
'state' : { 'type': 'keyword' },
|
||||||
|
'zip' : { 'type': 'keyword' },
|
||||||
|
'address' : keyword_mapping,
|
||||||
|
'iata' : { 'type': 'keyword' },
|
||||||
|
'latitude' : { 'type': 'float' },
|
||||||
|
'longitude' : { 'type': 'float' },
|
||||||
|
'geo_id' : { 'type': 'integer' },
|
||||||
|
'ix_id' : { 'type': 'integer' },
|
||||||
|
'org_id' : { 'type': 'integer' },
|
||||||
|
'pdb_id' : { 'type': 'integer' },
|
||||||
|
'pdb_org_id' : { 'type': 'integer' },
|
||||||
|
'pch_id' : { 'type': 'integer' }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return mapping
|
||||||
|
|
||||||
|
|
||||||
|
async def process_data():
|
||||||
|
'''Read and process the transfers data.'''
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.get('https://publicdata.caida.org/datasets/ixps/ixs_202401.jsonl') as response:
|
||||||
|
if response.status != 200:
|
||||||
|
raise Exception(f'Failed to fetch IXP data: {response.status}')
|
||||||
|
|
||||||
|
data = await response.text()
|
||||||
|
|
||||||
|
try:
|
||||||
|
json_data = json.loads(data)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
raise Exception(f'Failed to parse IXP data: {e}')
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f'Error processing IXP data: {e}')
|
||||||
|
|
||||||
|
|
||||||
|
async def test():
|
||||||
|
'''Test the ingestion process'''
|
||||||
|
|
||||||
|
async for document in process_data():
|
||||||
|
print(document)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
asyncio.run(test())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
Output:
|
||||||
|
{
|
||||||
|
"pch_id" : 1848,
|
||||||
|
"name" : "ANGONIX",
|
||||||
|
"country" : "AO",
|
||||||
|
"region" : "Africa",
|
||||||
|
"city" : "Luanda",
|
||||||
|
"iata" : "LAD",
|
||||||
|
"alternatenames" : [],
|
||||||
|
"sources" : ["pch"],
|
||||||
|
"prefixes" : {
|
||||||
|
"ipv4" : ["196.11.234.0/24"],
|
||||||
|
"ipv6" : ["2001:43f8:9d0::/48"]
|
||||||
|
},
|
||||||
|
"geo_id" : 2240449,
|
||||||
|
"ix_id" : 10
|
||||||
|
}
|
||||||
|
'''
|
@ -7,171 +7,177 @@ import logging
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import aiofiles
|
import aiofiles
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise ImportError('Missing required \'aiofiles\' library. (pip install aiofiles)')
|
raise ImportError('Missing required \'aiofiles\' library. (pip install aiofiles)')
|
||||||
|
|
||||||
|
|
||||||
|
# Set a default elasticsearch index if one is not provided
|
||||||
default_index = 'masscan-logs'
|
default_index = 'masscan-logs'
|
||||||
|
|
||||||
|
|
||||||
def construct_map() -> dict:
|
def construct_map() -> dict:
|
||||||
'''Construct the Elasticsearch index mapping for Masscan records.'''
|
'''Construct the Elasticsearch index mapping for Masscan records.'''
|
||||||
|
|
||||||
keyword_mapping = { 'type': 'text', 'fields': { 'keyword': { 'type': 'keyword', 'ignore_above': 256 } } }
|
# Match on exact value or full text search
|
||||||
|
keyword_mapping = { 'type': 'text', 'fields': { 'keyword': { 'type': 'keyword', 'ignore_above': 256 } } }
|
||||||
|
|
||||||
geoip_mapping = {
|
# Construct the geoip mapping (Used with the geoip pipeline to enrich the data)
|
||||||
'city_name' : keyword_mapping,
|
geoip_mapping = {
|
||||||
'continent_name' : keyword_mapping,
|
'city_name' : keyword_mapping,
|
||||||
'country_iso_code' : keyword_mapping,
|
'continent_name' : keyword_mapping,
|
||||||
'country_name' : keyword_mapping,
|
'country_iso_code' : keyword_mapping,
|
||||||
'location' : { 'type': 'geo_point' },
|
'country_name' : keyword_mapping,
|
||||||
'region_iso_code' : keyword_mapping,
|
'location' : { 'type': 'geo_point' },
|
||||||
'region_name' : keyword_mapping,
|
'region_iso_code' : keyword_mapping,
|
||||||
}
|
'region_name' : keyword_mapping,
|
||||||
|
}
|
||||||
|
|
||||||
mapping = {
|
# Construct the index mapping
|
||||||
'mappings': {
|
mapping = {
|
||||||
'properties': {
|
'mappings': {
|
||||||
'ip' : { 'type': 'ip' },
|
'properties': {
|
||||||
'port' : { 'type': 'integer' },
|
'ip' : { 'type': 'ip' },
|
||||||
'data' : {
|
'port' : { 'type': 'integer' },
|
||||||
'properties': {
|
'proto' : { 'type': 'keyword' },
|
||||||
'proto' : { 'type': 'keyword' },
|
'service' : { 'type': 'keyword' },
|
||||||
'service' : { 'type': 'keyword' },
|
'banner' : keyword_mapping,
|
||||||
'banner' : keyword_mapping,
|
'seen' : { 'type': 'date' }
|
||||||
'seen' : { 'type': 'date' }
|
#'geoip' : { 'properties': geoip_mapping }
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
#'geoip' : { 'properties': geoip_mapping } # Used with the geoip pipeline to enrich the data
|
}
|
||||||
'last_seen' : { 'type': 'date' }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return mapping
|
return mapping
|
||||||
|
|
||||||
|
|
||||||
async def process_data(file_path: str):
|
async def process_data(input_path: str):
|
||||||
'''
|
'''
|
||||||
Read and process Masscan records from the log file.
|
Read and process the input file
|
||||||
|
|
||||||
:param file_path: Path to the Masscan log file
|
:param input_path: Path to the input file
|
||||||
'''
|
'''
|
||||||
|
|
||||||
async with aiofiles.open(file_path) as input_file:
|
async with aiofiles.open(input_path) as input_file:
|
||||||
async for line in input_file:
|
# Read the input file line by line
|
||||||
line = line.strip()
|
async for line in input_file:
|
||||||
|
line = line.strip()
|
||||||
|
|
||||||
if line == '~eof': # Sentinel value to indicate the end of a process (Used with --watch with FIFO)
|
# Sentinel value to indicate the end of a process (for closing out a FIFO stream)
|
||||||
break
|
if line == '~eof':
|
||||||
|
break
|
||||||
|
|
||||||
if not line or not line.startswith('{'):
|
# Skip empty lines and lines that do not start with a JSON object
|
||||||
continue
|
if not line or not line.startswith('{'):
|
||||||
|
continue
|
||||||
|
|
||||||
if line.endswith(','): # Do we need this? Masscan JSON output seems with seperate records with a comma between lines for some reason...
|
# Do we need this? Masscan JSON output seems with seperate records with a comma between lines for some reason...
|
||||||
line = line[:-1]
|
if line.endswith(','):
|
||||||
|
line = line[:-1]
|
||||||
|
|
||||||
try:
|
# Parse the JSON record
|
||||||
record = json.loads(line)
|
try:
|
||||||
except json.decoder.JSONDecodeError:
|
record = json.loads(line)
|
||||||
# In rare cases, the JSON record may be incomplete or malformed:
|
except json.decoder.JSONDecodeError:
|
||||||
# { "ip": "51.161.12.223", "timestamp": "1707628302", "ports": [ {"port": 22, "proto": "tcp", "service": {"name": "ssh", "banner":
|
# In rare cases, the JSON record may be incomplete or malformed:
|
||||||
# { "ip": "83.66.211.246", "timestamp": "1706557002"
|
# { "ip": "51.161.12.223", "timestamp": "1707628302", "ports": [ {"port": 22, "proto": "tcp", "service": {"name": "ssh", "banner":
|
||||||
logging.error(f'Failed to parse JSON record! ({line})')
|
# { "ip": "83.66.211.246", "timestamp": "1706557002"
|
||||||
input('Press Enter to continue...') # Pause for review & debugging (remove this in production)
|
logging.error(f'Failed to parse JSON record! ({line})')
|
||||||
continue
|
input('Press Enter to continue...') # Pause for review & debugging (remove this in production)
|
||||||
|
continue
|
||||||
|
|
||||||
if len(record['ports']) > 1:
|
# In rare cases, a single record may contain multiple ports, though I have yet to witness this...
|
||||||
# In rare cases, a single record may contain multiple ports, though I have yet to witness this...
|
if len(record['ports']) > 1:
|
||||||
logging.warning(f'Multiple ports found for record! ({record})')
|
logging.warning(f'Multiple ports found for record! ({record})')
|
||||||
input('Press Enter to continue...') # Pause for review (remove this in production)
|
input('Press Enter to continue...') # Pause for review (remove this in production)
|
||||||
|
|
||||||
for port_info in record['ports']:
|
# Process each port in the record
|
||||||
struct = {
|
for port_info in record['ports']:
|
||||||
'ip' : record['ip'],
|
struct = {
|
||||||
'data' : {
|
'ip' : record['ip'],
|
||||||
'port' : port_info['port'],
|
'port' : port_info['port'],
|
||||||
'proto' : port_info['proto'],
|
'proto' : port_info['proto'],
|
||||||
'seen' : time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(int(record['timestamp']))),
|
'seen' : time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(int(record['timestamp'])))
|
||||||
},
|
}
|
||||||
'last_seen' : time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(int(record['timestamp']))),
|
|
||||||
}
|
|
||||||
|
|
||||||
if 'service' in port_info:
|
# Add the service information if available (this field is optional)
|
||||||
if 'name' in port_info['service']:
|
if 'service' in port_info:
|
||||||
if (service_name := port_info['service']['name']) not in ('unknown',''):
|
|
||||||
struct['service'] = service_name
|
|
||||||
|
|
||||||
if 'banner' in port_info['service']:
|
# Add the service name if available
|
||||||
banner = ' '.join(port_info['service']['banner'].split()) # Remove extra whitespace
|
if 'name' in port_info['service']:
|
||||||
if banner:
|
if (service_name := port_info['service']['name']) not in ('unknown',''):
|
||||||
struct['banner'] = banner
|
struct['service'] = service_name
|
||||||
|
|
||||||
id = f'{record["ip"]}:{port_info["port"]}' # Store with ip:port as the unique id to allow the record to be reindexed if it exists.
|
# Add the service banner if available
|
||||||
|
if 'banner' in port_info['service']:
|
||||||
|
banner = ' '.join(port_info['service']['banner'].split()) # Remove extra whitespace
|
||||||
|
if banner:
|
||||||
|
struct['banner'] = banner
|
||||||
|
|
||||||
yield {'_id': id, '_index': default_index, '_source': struct}
|
# Yield the record
|
||||||
|
yield {'_index': default_index, '_source': struct}
|
||||||
|
|
||||||
|
|
||||||
async def test(input_path: str):
|
async def test(input_path: str):
|
||||||
'''
|
'''
|
||||||
Test the Masscan ingestion process
|
Test the ingestion process
|
||||||
|
|
||||||
:param input_path: Path to the MassDNS log file
|
:param input_path: Path to the input file
|
||||||
'''
|
'''
|
||||||
async for document in process_data(input_path):
|
|
||||||
print(document)
|
async for document in process_data(input_path):
|
||||||
|
print(document)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import argparse
|
import argparse
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='Masscan Ingestor for ERIS')
|
parser = argparse.ArgumentParser(description='Ingestor for ERIS')
|
||||||
parser.add_argument('input_path', help='Path to the input file or directory')
|
parser.add_argument('input_path', help='Path to the input file or directory')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
asyncio.run(test(args.input_path))
|
asyncio.run(test(args.input_path))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
Deploy:
|
Deploy:
|
||||||
apt-get install iptables masscan libpcap-dev screen
|
apt-get install iptables masscan libpcap-dev screen
|
||||||
setcap 'CAP_NET_RAW+eip CAP_NET_ADMIN+eip' /bin/masscan
|
setcap 'CAP_NET_RAW+eip CAP_NET_ADMIN+eip' /bin/masscan
|
||||||
/sbin/iptables -A INPUT -p tcp --dport 61010 -j DROP # Not persistent
|
/sbin/iptables -A INPUT -p tcp --dport 61010 -j DROP # Not persistent
|
||||||
printf "0.0.0.0/8\n10.0.0.0/8\n100.64.0.0/10\n127.0.0.0/8\n169.254.0.0/16\n172.16.0.0/12\n192.0.0.0/24\n192.0.2.0/24\n192.31.196.0/24\n192.52.193.0/24\n192.88.99.0/24\n192.168.0.0/16\n192.175.48.0/24\n198.18.0.0/15\n198.51.100.0/24\n203.0.113.0/24\n224.0.0.0/3\n255.255.255.255/32" > exclude.conf
|
printf "0.0.0.0/8\n10.0.0.0/8\n100.64.0.0/10\n127.0.0.0/8\n169.254.0.0/16\n172.16.0.0/12\n192.0.0.0/24\n192.0.2.0/24\n192.31.196.0/24\n192.52.193.0/24\n192.88.99.0/24\n192.168.0.0/16\n192.175.48.0/24\n198.18.0.0/15\n198.51.100.0/24\n203.0.113.0/24\n224.0.0.0/3\n255.255.255.255/32" > exclude.conf
|
||||||
screen -S scan
|
screen -S scan
|
||||||
masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61010 --open-only --rate 30000 --excludefile exclude.conf -oJ output.json
|
masscan 0.0.0.0/0 -p18000 --banners --http-user-agent "USER_AGENT" --source-port 61010 --open-only --rate 30000 --excludefile exclude.conf -oJ 18000.json
|
||||||
masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61000-65503 --open-only --rate 30000 --excludefile exclude.conf -oJ output_new.json --shard $i/$TOTAL
|
masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61000-65503 --open-only --rate 30000 --excludefile exclude.conf -oJ output_new.json --shard $i/$TOTAL
|
||||||
|
|
||||||
Output:
|
Output:
|
||||||
{
|
{
|
||||||
"ip" : "43.134.51.142",
|
"ip" : "43.134.51.142",
|
||||||
"timestamp" : "1705255468",
|
"timestamp" : "1705255468",
|
||||||
"ports" : [
|
"ports" : [
|
||||||
{
|
{
|
||||||
"port" : 22, # We will create a record for each port opened
|
"port" : 22, # We will create a record for each port opened
|
||||||
"proto" : "tcp",
|
"proto" : "tcp",
|
||||||
"service" : {
|
"service" : {
|
||||||
"name" : "ssh",
|
"name" : "ssh",
|
||||||
"banner" : "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4"
|
"banner" : "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
Input:
|
Input:
|
||||||
{
|
{
|
||||||
"_id" : "43.134.51.142:22"
|
"_id" : "43.134.51.142:22"
|
||||||
"_index" : "masscan-logs",
|
"_index" : "masscan-logs",
|
||||||
"_source" : {
|
"_source" : {
|
||||||
"ip" : "43.134.51.142",
|
"ip" : "43.134.51.142",
|
||||||
"port" : 22,
|
"port" : 22,
|
||||||
"proto" : "tcp",
|
"proto" : "tcp",
|
||||||
"service" : "ssh",
|
"service" : "ssh",
|
||||||
"banner" : "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4",
|
"banner" : "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4",
|
||||||
"seen" : "2021-10-08T02:04:28Z"
|
"seen" : "2021-10-08T02:04:28Z"
|
||||||
}
|
}
|
||||||
'''
|
'''
|
@ -140,18 +140,21 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
'''
|
'''
|
||||||
Deployment:
|
Deployment:
|
||||||
sudo apt-get install build-essential gcc make
|
sudo apt-get install build-essential gcc make python3 python3-pip
|
||||||
|
pip install aiofiles aiohttp elasticsearch
|
||||||
|
git clone --depth 1 https://github.com/acidvegas/eris.git $HOME/eris
|
||||||
|
|
||||||
git clone --depth 1 https://github.com/blechschmidt/massdns.git $HOME/massdns && cd $HOME/massdns && make
|
git clone --depth 1 https://github.com/blechschmidt/massdns.git $HOME/massdns && cd $HOME/massdns && make
|
||||||
curl -s https://public-dns.info/nameservers.txt | grep -v ':' > $HOME/massdns/nameservers.txt
|
curl -s https://public-dns.info/nameservers.txt | grep -v ':' > $HOME/massdns/nameservers.txt
|
||||||
python3 ./scripts/ptr.py | ./bin/massdns -r $HOME/massdns/nameservers.txt -t PTR --filter NOERROR -o S -w $HOME/massdns/fifo.json
|
while true; do python3 ./scripts/ptr.py | ./bin/massdns -r $HOME/massdns/nameservers.txt -t PTR --filter NOERROR -o S -w $HOME/eris/FIFO; done
|
||||||
or...
|
|
||||||
while true; do python ./scripts/ptr.py | ./bin/massdns -r $HOME/massdns/nameservers.txt -t PTR --filter NOERROR -o S -w $HOME/massdns/fifo.json; done
|
|
||||||
|
|
||||||
Output:
|
Output:
|
||||||
0.6.229.47.in-addr.arpa. PTR 047-229-006-000.res.spectrum.com.
|
0.6.229.47.in-addr.arpa. PTR 047-229-006-000.res.spectrum.com.
|
||||||
0.6.228.75.in-addr.arpa. PTR 0.sub-75-228-6.myvzw.com.
|
0.6.228.75.in-addr.arpa. PTR 0.sub-75-228-6.myvzw.com.
|
||||||
0.6.207.73.in-addr.arpa. PTR c-73-207-6-0.hsd1.ga.comcast.net.
|
0.6.207.73.in-addr.arpa. PTR c-73-207-6-0.hsd1.ga.comcast.net.
|
||||||
|
|
||||||
|
|
||||||
Input:
|
Input:
|
||||||
{
|
{
|
||||||
'_id' : '47.229.6.0'
|
'_id' : '47.229.6.0'
|
||||||
@ -163,6 +166,7 @@ Input:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Notes:
|
Notes:
|
||||||
Why do some IP addresses return a A/CNAME from a PTR request
|
Why do some IP addresses return a A/CNAME from a PTR request
|
||||||
What is dns-servfail.net (Frequent CNAME response from PTR requests)
|
What is dns-servfail.net (Frequent CNAME response from PTR requests)
|
||||||
|
@ -191,13 +191,12 @@ Input:
|
|||||||
'registry' : 'arin',
|
'registry' : 'arin',
|
||||||
'cc' : 'us',
|
'cc' : 'us',
|
||||||
'type' : 'ipv4',
|
'type' : 'ipv4',
|
||||||
'start' : { 'ip': '76.15.132.0' },
|
'ip' : { 'start': '76.15.132.0', 'end': '76.16.146.0' },
|
||||||
'value' : 1024,
|
|
||||||
'date' : '2007-05-02T00:00:00Z',
|
'date' : '2007-05-02T00:00:00Z',
|
||||||
'status' : 'allocated',
|
'status' : 'allocated',
|
||||||
'extensions' : '6c065d5b54b877781f05e7d30ebfff28'
|
'extensions' : '6c065d5b54b877781f05e7d30ebfff28'
|
||||||
}
|
}
|
||||||
|
|
||||||
Notes:
|
Notes:
|
||||||
Do we make this handle the database locally or load it into ram?
|
Do we make this handle the database locally or load it into ram?
|
||||||
'''
|
'''
|
||||||
|
@ -82,7 +82,7 @@ async def process_data():
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
json_data = json.loads(data)
|
json_data = json.loads(data)
|
||||||
except aiohttp.ContentTypeError as e:
|
except json.JSONDecodeError as e:
|
||||||
raise Exception(f'Failed to parse {registry} delegation data: {e}')
|
raise Exception(f'Failed to parse {registry} delegation data: {e}')
|
||||||
|
|
||||||
if 'transfers' not in json_data:
|
if 'transfers' not in json_data:
|
||||||
@ -152,36 +152,36 @@ if __name__ == '__main__':
|
|||||||
'''
|
'''
|
||||||
Output:
|
Output:
|
||||||
{
|
{
|
||||||
"transfer_date" : "2017-09-15T19:00:00Z",
|
"transfer_date" : "2017-09-15T19:00:00Z",
|
||||||
"ip4nets" : {
|
"ip4nets" : {
|
||||||
"original_set" : [ { "start_address": "94.31.64.0", "end_address": "94.31.127.255" } ],
|
"original_set" : [ { "start_address": "94.31.64.0", "end_address": "94.31.127.255" } ],
|
||||||
"transfer_set" : [ { "start_address": "94.31.64.0", "end_address": "94.31.127.255" } ]
|
"transfer_set" : [ { "start_address": "94.31.64.0", "end_address": "94.31.127.255" } ]
|
||||||
},
|
},
|
||||||
"type" : "MERGER_ACQUISITION",
|
"type" : "MERGER_ACQUISITION",
|
||||||
"source_organization" : { "name": "Unser Ortsnetz GmbH" },
|
"source_organization" : { "name": "Unser Ortsnetz GmbH" },
|
||||||
"recipient_organization" : {
|
"recipient_organization" : {
|
||||||
"name" : "Deutsche Glasfaser Wholesale GmbH",
|
"name" : "Deutsche Glasfaser Wholesale GmbH",
|
||||||
"country_code" : "DE"
|
"country_code" : "DE"
|
||||||
},
|
},
|
||||||
"source_rir" : "RIPE NCC",
|
"source_rir" : "RIPE NCC",
|
||||||
"recipient_rir" : "RIPE NCC"
|
"recipient_rir" : "RIPE NCC"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"transfer_date" : "2017-09-18T19:00:00Z",
|
"transfer_date" : "2017-09-18T19:00:00Z",
|
||||||
"asns" : {
|
"asns" : {
|
||||||
"original_set" : [ { "start": 198257, "end": 198257 } ],
|
"original_set" : [ { "start": 198257, "end": 198257 } ],
|
||||||
"transfer_set" : [ { "start": 198257, "end": 198257 } ]
|
"transfer_set" : [ { "start": 198257, "end": 198257 } ]
|
||||||
},
|
},
|
||||||
"type" : "MERGER_ACQUISITION",
|
"type" : "MERGER_ACQUISITION",
|
||||||
"source_organization" : { "name": "CERT PLIX Sp. z o.o." },
|
"source_organization" : { "name": "CERT PLIX Sp. z o.o." },
|
||||||
"recipient_organization" : {
|
"recipient_organization" : {
|
||||||
"name" : "Equinix (Poland) Sp. z o.o.",
|
"name" : "Equinix (Poland) Sp. z o.o.",
|
||||||
"country_code" : "PL"
|
"country_code" : "PL"
|
||||||
},
|
},
|
||||||
"source_rir" : "RIPE NCC",
|
"source_rir" : "RIPE NCC",
|
||||||
"recipient_rir" : "RIPE NCC"
|
"recipient_rir" : "RIPE NCC"
|
||||||
}
|
}
|
||||||
|
|
||||||
Input:
|
Input:
|
||||||
- Nothing changed from the output for now...
|
Nothing changed from the output for now...
|
||||||
'''
|
'''
|
||||||
|
Loading…
Reference in New Issue
Block a user