From 84f124b23d29a7b183dc14ddffe948ae4a8d3b95 Mon Sep 17 00:00:00 2001 From: acidvegas Date: Wed, 6 Mar 2024 13:26:45 -0500 Subject: [PATCH] Masscan ingestion script updated to use ip:port as the document id to allow updating records that already exist. Added a sentinal value to trigger an EOF when using --watch with FIFO's --- ingestors/ingest_masscan.py | 117 ++++++++++++++++++------------------ 1 file changed, 60 insertions(+), 57 deletions(-) diff --git a/ingestors/ingest_masscan.py b/ingestors/ingest_masscan.py index 5eb0660..711112f 100644 --- a/ingestors/ingest_masscan.py +++ b/ingestors/ingest_masscan.py @@ -2,21 +2,8 @@ # Elasticsearch Recon Ingestion Scripts (ERIS) - Developed by Acidvegas (https://git.acid.vegas/eris) # ingest_masscan.py -''' -apt-get install iptables masscan libpcap-dev screen -setcap 'CAP_NET_RAW+eip CAP_NET_ADMIN+eip' /bin/masscan -/sbin/iptables -A INPUT -p tcp --dport 61010 -j DROP -printf "0.0.0.0/8\n10.0.0.0/8\n100.64.0.0/10\n127.0.0.0/8\n169.254.0.0/16\n172.16.0.0/12\n192.0.0.0/24\n192.0.2.0/24\n192.31.196.0/24\n192.52.193.0/24\n192.88.99.0/24\n192.168.0.0/16\n192.175.48.0/24\n198.18.0.0/15\n198.51.100.0/24\n203.0.113.0/24\n224.0.0.0/3\n255.255.255.255/32" > exclude.conf -screen -S scan -masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61010 --open-only --rate 30000 --excludefile exclude.conf -oJ output.json -masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61000-65503 --open-only --rate 30000 --excludefile exclude.conf -oJ output_new.json --shard $i/$TOTAL - -Note: The above iptables rule is not persistent and will be removed on reboot. -''' - import json import logging -import re import time try: @@ -31,27 +18,27 @@ def construct_map() -> dict: keyword_mapping = { 'type': 'text', 'fields': { 'keyword': { 'type': 'keyword', 'ignore_above': 256 } } } + geoip_mapping = { + 'city_name' : keyword_mapping, + 'continent_name' : keyword_mapping, + 'country_iso_code' : keyword_mapping, + 'country_name' : keyword_mapping, + 'location' : { 'type': 'geo_point' }, + 'region_iso_code' : keyword_mapping, + 'region_name' : keyword_mapping, + } + mapping = { 'mappings': { 'properties': { - 'ip': { 'type': 'ip' }, - 'port': { 'type': 'integer' }, - 'proto': { 'type': 'keyword' }, - 'service': { 'type': 'keyword' }, - 'banner': keyword_mapping, - 'ref_id': { 'type': 'keyword' }, - 'seen': { 'type': 'date' } - #'geoip': { - # 'properties': { - # 'city_name': keyword_mapping, - # 'continent_name': keyword_mapping, - # 'country_iso_code': keyword_mapping, - # 'country_name': keyword_mapping, - # 'location': { 'type': 'geo_point' }, - # 'region_iso_code': keyword_mapping, - # 'region_name': keyword_mapping, - # } - #} + 'ip' : { 'type': 'ip' }, + 'port' : { 'type': 'integer' }, + 'proto' : { 'type': 'keyword' }, + 'service' : { 'type': 'keyword' }, + 'banner' : keyword_mapping, + #'geoip' : { 'properties': geoip_mapping } # Used witht he geoip pipeline to enrich the data + 'seen' : { 'type': 'date' } + } } } @@ -70,10 +57,13 @@ async def process_data(file_path: str): async for line in input_file: line = line.strip() + if line == '~eof': # Sentinel value to indicate the end of a process (Used with --watch with FIFO) + break + if not line or not line.startswith('{'): continue - if line.endswith(','): + if line.endswith(','): # Do we need this? Masscan JSON output seems with seperate records with a comma between lines for some reason... line = line[:-1] try: @@ -83,12 +73,13 @@ async def process_data(file_path: str): # { "ip": "51.161.12.223", "timestamp": "1707628302", "ports": [ {"port": 22, "proto": "tcp", "service": {"name": "ssh", "banner": # { "ip": "83.66.211.246", "timestamp": "1706557002" logging.error(f'Failed to parse JSON record! ({line})') - input('Press Enter to continue...') # Pause for review & debugging (Will remove pausing in production, still investigating the cause of this issue.) + input('Press Enter to continue...') # Pause for review & debugging (remove this in production) continue if len(record['ports']) > 1: + # In rare cases, a single record may contain multiple ports, though I have yet to witness this... logging.warning(f'Multiple ports found for record! ({record})') - input('Press Enter to continue...') # Pause for review (Will remove pausing in production, still investigating if you ever seen more than one port in a record.) + input('Press Enter to continue...') # Pause for review (remove this in production) for port_info in record['ports']: struct = { @@ -106,30 +97,26 @@ async def process_data(file_path: str): if 'banner' in port_info['service']: banner = ' '.join(port_info['service']['banner'].split()) # Remove extra whitespace if banner: - match = re.search(r'\(Ref\.Id: (.*?)\)', banner) - if match: - struct['ref_id'] = match.group(1) - else: - struct['banner'] = banner + struct['banner'] = banner - yield {'_index': default_index, '_source': struct} - - return None # EOF + id = f'{record["ip"]}:{port_info["port"]}' # Store with ip:port as the unique id to allow the record to be reindexed if it exists. + + yield {'_id': id, '_index': default_index, '_source': struct} ''' Example record: { - "ip": "43.134.51.142", - "timestamp": "1705255468", # Convert to ZULU BABY - "ports": [ # We will create a record for each port opened + "ip" : "43.134.51.142", + "timestamp" : "1705255468", # Convert to ZULU BABY + "ports" : [ # We will create a record for each port opened { - "port": 22, - "proto": "tcp", - "service": { # This field is optional - "name": "ssh", - "banner": "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4" + "port" : 22, + "proto" : "tcp", + "service" : { # This field is optional + "name" : "ssh", + "banner" : "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4" } } ] @@ -137,12 +124,28 @@ Example record: Will be indexed as: { - "ip": "43.134.51.142", - "port": 22, - "proto": "tcp", - "service": "ssh", - "banner": "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4", - "seen": "2021-10-08T02:04:28Z", - "ref_id": "?sKfOvsC4M4a2W8PaC4zF?" # TCP RST Payload, Might be useful.. + "_id" : "43.134.51.142:22" + "_index" : "masscan-logs", + "_source" : { + "ip" : "43.134.51.142", + "port" : 22, + "proto" : "tcp", + "service" : "ssh", + "banner" : "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4", + "seen" : "2021-10-08T02:04:28Z" } +''' + + + +''' +Notes: + +apt-get install iptables masscan libpcap-dev screen +setcap 'CAP_NET_RAW+eip CAP_NET_ADMIN+eip' /bin/masscan +/sbin/iptables -A INPUT -p tcp --dport 61010 -j DROP # Not persistent +printf "0.0.0.0/8\n10.0.0.0/8\n100.64.0.0/10\n127.0.0.0/8\n169.254.0.0/16\n172.16.0.0/12\n192.0.0.0/24\n192.0.2.0/24\n192.31.196.0/24\n192.52.193.0/24\n192.88.99.0/24\n192.168.0.0/16\n192.175.48.0/24\n198.18.0.0/15\n198.51.100.0/24\n203.0.113.0/24\n224.0.0.0/3\n255.255.255.255/32" > exclude.conf +screen -S scan +masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61010 --open-only --rate 30000 --excludefile exclude.conf -oJ output.json +masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61000-65503 --open-only --rate 30000 --excludefile exclude.conf -oJ output_new.json --shard $i/$TOTAL ''' \ No newline at end of file