From 45f878285cfc3ce604176afa8405d87507384832 Mon Sep 17 00:00:00 2001 From: acidvegas Date: Thu, 7 Mar 2024 23:31:30 -0500 Subject: [PATCH] Testing function added to every ingestor to debug directly. No more --dry-run needed. --- README.md | 1 + ingestors/ingest_certs.py | 137 ++++++++++++++++++++---------------- ingestors/ingest_httpx.py | 103 +++++++++++++++++---------- ingestors/ingest_masscan.py | 97 ++++++++++++++----------- ingestors/ingest_massdns.py | 62 ++++++++-------- ingestors/ingest_zone.py | 66 ++++++++++------- 6 files changed, 273 insertions(+), 193 deletions(-) diff --git a/README.md b/README.md index 2947c12..962497b 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ The is a suite of tools to aid in the ingestion of recon data from various sourc - [elasticsearch](https://pypi.org/project/elasticsearch/) *(`pip install elasticsearch`)* - [aiofiles](https://pypi.org/project/aiofiles) *(`pip install aiofiles`)* - [aiohttp](https://pypi.org/projects/aiohttp) *(`pip install aiohttp`)* + - [websockets](https://pypi.org/project/websockets/) *(`pip install websockets`) (only required for `--certs` ingestion)* ## Usage ```shell diff --git a/ingestors/ingest_certs.py b/ingestors/ingest_certs.py index af5a6be..796c5eb 100644 --- a/ingestors/ingest_certs.py +++ b/ingestors/ingest_certs.py @@ -91,11 +91,11 @@ def construct_map() -> dict: return mapping -async def process_data(file_path: str = None): +async def process_data(place_holder: str = None): ''' Read and process Certsream records live from the Websocket stream. - :param file_path: Path to the Certstream log file (unused, placeholder for consistency with other ingestors) + :param place_holder: Placeholder parameter to match the process_data function signature of other ingestors. ''' while True: @@ -154,63 +154,82 @@ async def strip_struct_empty(data: dict) -> dict: return data +async def test(): + '''Test the Cert stream ingestion process''' + + async for document in process_data(): + print(document) + + + +if __name__ == '__main__': + import argparse + import asyncio + + parser = argparse.ArgumentParser(description='Certstream Ingestor for ERIS') + parser.add_argument('input_path', help='Path to the input file or directory') + args = parser.parse_args() + + asyncio.run(test(args.input_path)) + + ''' -Example record: -{ - "data": { - "cert_index": 43061646, - "cert_link": "https://yeti2025.ct.digicert.com/log/ct/v1/get-entries?start=43061646&end=43061646", - "leaf_cert": { - "all_domains": [ - "*.d7zdnegbre53n.amplifyapp.com", - "d7zdnegbre53n.amplifyapp.com" - ], - "extensions": { - "authorityInfoAccess": "CA Issuers - URI:http://crt.r2m02.amazontrust.com/r2m02.cer\nOCSP - URI:http://ocsp.r2m02.amazontrust.com\n", - "authorityKeyIdentifier": "keyid:C0:31:52:CD:5A:50:C3:82:7C:74:71:CE:CB:E9:9C:F9:7A:EB:82:E2\n", - "basicConstraints": "CA:FALSE", - "certificatePolicies": "Policy: 2.23.140.1.2.1", - "crlDistributionPoints": "Full Name:\n URI:http://crl.r2m02.amazontrust.com/r2m02.crl", - "ctlPoisonByte": true, - "extendedKeyUsage": "TLS Web server authentication, TLS Web client authentication", - "keyUsage": "Digital Signature, Key Encipherment", - "subjectAltName": "DNS:d7zdnegbre53n.amplifyapp.com, DNS:*.d7zdnegbre53n.amplifyapp.com", - "subjectKeyIdentifier": "59:32:78:2A:11:03:62:55:BB:3B:B9:80:24:76:28:90:2E:D1:A4:56" - }, - "fingerprint": "D9:05:A3:D5:AA:F9:68:BC:0C:0A:15:69:C9:5E:11:92:32:67:4F:FA", - "issuer": { - "C": "US", - "CN": "Amazon RSA 2048 M02", - "L": null, - "O": "Amazon", - "OU": null, - "ST": null, - "aggregated": "/C=US/CN=Amazon RSA 2048 M02/O=Amazon", - "emailAddress": null - }, - "not_after": 1743811199, - "not_before": 1709596800, - "serial_number": "FDB450C1942E3D30A18737063449E62", - "signature_algorithm": "sha256, rsa", - "subject": { - "C": null, - "CN": "*.d7zdnegbre53n.amplifyapp.com", - "L": null, - "O": null, - "OU": null, - "ST": null, - "aggregated": "/CN=*.d7zdnegbre53n.amplifyapp.com", - "emailAddress": null - } - }, - "seen": 1709651773.594684, - "source": { - "name": "DigiCert Yeti2025 Log", - "url": "https://yeti2025.ct.digicert.com/log/" - }, - "update_type": "PrecertLogEntry" - }, - "message_type": "certificate_update" -} +Output: + { + "data": { + "cert_index": 43061646, + "cert_link": "https://yeti2025.ct.digicert.com/log/ct/v1/get-entries?start=43061646&end=43061646", + "leaf_cert": { + "all_domains": [ + "*.d7zdnegbre53n.amplifyapp.com", + "d7zdnegbre53n.amplifyapp.com" + ], + "extensions": { + "authorityInfoAccess" : "CA Issuers - URI:http://crt.r2m02.amazontrust.com/r2m02.cer\nOCSP - URI:http://ocsp.r2m02.amazontrust.com\n", + "authorityKeyIdentifier" : "keyid:C0:31:52:CD:5A:50:C3:82:7C:74:71:CE:CB:E9:9C:F9:7A:EB:82:E2\n", + "basicConstraints" : "CA:FALSE", + "certificatePolicies" : "Policy: 2.23.140.1.2.1", + "crlDistributionPoints" : "Full Name:\n URI:http://crl.r2m02.amazontrust.com/r2m02.crl", + "ctlPoisonByte" : true, + "extendedKeyUsage" : "TLS Web server authentication, TLS Web client authentication", + "keyUsage" : "Digital Signature, Key Encipherment", + "subjectAltName" : "DNS:d7zdnegbre53n.amplifyapp.com, DNS:*.d7zdnegbre53n.amplifyapp.com", + "subjectKeyIdentifier" : "59:32:78:2A:11:03:62:55:BB:3B:B9:80:24:76:28:90:2E:D1:A4:56" + }, + "fingerprint": "D9:05:A3:D5:AA:F9:68:BC:0C:0A:15:69:C9:5E:11:92:32:67:4F:FA", + "issuer": { + "C" : "US", + "CN" : "Amazon RSA 2048 M02", + "L" : null, + "O" : "Amazon", + "OU" : null, + "ST" : null, + "aggregated" : "/C=US/CN=Amazon RSA 2048 M02/O=Amazon", + "emailAddress" : null + }, + "not_after" : 1743811199, + "not_before" : 1709596800, + "serial_number" : "FDB450C1942E3D30A18737063449E62", + "signature_algorithm" : "sha256, rsa", + "subject": { + "C" : null, + "CN" : "*.d7zdnegbre53n.amplifyapp.com", + "L" : null, + "O" : null, + "OU" : null, + "ST" : null, + "aggregated" : "/CN=*.d7zdnegbre53n.amplifyapp.com", + "emailAddress" : null + } + }, + "seen": 1709651773.594684, + "source": { + "name" : "DigiCert Yeti2025 Log", + "url" : "https://yeti2025.ct.digicert.com/log/" + }, + "update_type": "PrecertLogEntry" + }, + "message_type": "certificate_update" + } ''' \ No newline at end of file diff --git a/ingestors/ingest_httpx.py b/ingestors/ingest_httpx.py index 0bb9bc2..795b6ff 100644 --- a/ingestors/ingest_httpx.py +++ b/ingestors/ingest_httpx.py @@ -82,45 +82,72 @@ async def process_data(file_path: str): yield {'_id': record['domain'], '_index': default_index, '_source': record} +async def test(input_path: str): + ''' + Test the HTTPX ingestion process + + :param input_path: Path to the HTTPX log file + ''' + async for document in process_data(input_path): + print(document) + + + +if __name__ == '__main__': + import argparse + import asyncio + + parser = argparse.ArgumentParser(description='HTTPX Ingestor for ERIS') + parser.add_argument('input_path', help='Path to the input file or directory') + args = parser.parse_args() + + asyncio.run(test(args.input_path)) + + '''' -Example record: -{ - "timestamp":"2024-01-14T13:08:15.117348474-05:00", # Rename to seen and remove milliseconds and offset - "hash": { # Do we need all of these ? - "body_md5" : "4ae9394eb98233b482508cbda3b33a66", - "body_mmh3" : "-4111954", - "body_sha256" : "89e06e8374353469c65adb227b158b265641b424fba7ddb2c67eef0c4c1280d3", - "body_simhash" : "9814303593401624250", - "header_md5" : "980366deb2b2fb5df2ad861fc63e79ce", - "header_mmh3" : "-813072798", - "header_sha256" : "39aea75ad548e38b635421861641ad1919ed3b103b17a33c41e7ad46516f736d", - "header_simhash" : "10962523587435277678" - }, - "port" : "443", - "url" : "https://supernets.org", # Remove this and only use the input field as "domain" maybe - "input" : "supernets.org", # rename to domain - "title" : "SuperNETs", - "scheme" : "https", - "webserver" : "nginx", - "body_preview" : "SUPERNETS Home About Contact Donate Docs Network IRC Git Invidious Jitsi LibreX Mastodon Matrix Sup", - "content_type" : "text/html", - "method" : "GET", # Remove this - "host" : "51.89.151.158", - "path" : "/", - "favicon" : "-674048714", - "favicon_path" : "/i/favicon.png", - "time" : "592.907689ms", # Do we need this ? - "a" : ["6.150.220.23"], - "tech" : ["Bootstrap:4.0.0", "HSTS", "Nginx"], - "words" : 436, # Do we need this ? - "lines" : 79, # Do we need this ? - "status_code" : 200, - "content_length" : 4597, - "failed" : false, # Do we need this ? - "knowledgebase" : { # Do we need this ? - "PageType" : "nonerror", - "pHash" : 0 +Deploy: + go install -v github.com/projectdiscovery/httpx/cmd/httpx@latest + curl -s https://public-dns.info/nameservers.txt -o nameservers.txt + httpx -l zone.txt -t 200 -sc -location -favicon -title -bp -td -ip -cname -mc 200,201,301,302,303,307,308 -fr -r nameservers.txt -retries 2 -stream -sd -j -o httpx.json -v + +Output: + { + "timestamp":"2024-01-14T13:08:15.117348474-05:00", # Rename to seen and remove milliseconds and offset + "hash": { # Do we need all of these ? + "body_md5" : "4ae9394eb98233b482508cbda3b33a66", + "body_mmh3" : "-4111954", + "body_sha256" : "89e06e8374353469c65adb227b158b265641b424fba7ddb2c67eef0c4c1280d3", + "body_simhash" : "9814303593401624250", + "header_md5" : "980366deb2b2fb5df2ad861fc63e79ce", + "header_mmh3" : "-813072798", + "header_sha256" : "39aea75ad548e38b635421861641ad1919ed3b103b17a33c41e7ad46516f736d", + "header_simhash" : "10962523587435277678" + }, + "port" : "443", + "url" : "https://supernets.org", # Remove this and only use the input field as "domain" maybe + "input" : "supernets.org", # rename to domain + "title" : "SuperNETs", + "scheme" : "https", + "webserver" : "nginx", + "body_preview" : "SUPERNETS Home About Contact Donate Docs Network IRC Git Invidious Jitsi LibreX Mastodon Matrix Sup", + "content_type" : "text/html", + "method" : "GET", # Remove this + "host" : "51.89.151.158", + "path" : "/", + "favicon" : "-674048714", + "favicon_path" : "/i/favicon.png", + "time" : "592.907689ms", # Do we need this ? + "a" : ["6.150.220.23"], + "tech" : ["Bootstrap:4.0.0", "HSTS", "Nginx"], + "words" : 436, # Do we need this ? + "lines" : 79, # Do we need this ? + "status_code" : 200, + "content_length" : 4597, + "failed" : false, # Do we need this ? + "knowledgebase" : { # Do we need this ? + "PageType" : "nonerror", + "pHash" : 0 + } } -} ''' \ No newline at end of file diff --git a/ingestors/ingest_masscan.py b/ingestors/ingest_masscan.py index e048830..546ae01 100644 --- a/ingestors/ingest_masscan.py +++ b/ingestors/ingest_masscan.py @@ -113,48 +113,65 @@ async def process_data(file_path: str): yield {'_id': id, '_index': default_index, '_source': struct} +async def test(input_path: str): + ''' + Test the Masscan ingestion process + + :param input_path: Path to the MassDNS log file + ''' + async for document in process_data(input_path): + print(document) + + + +if __name__ == '__main__': + import argparse + import asyncio + + parser = argparse.ArgumentParser(description='Masscan Ingestor for ERIS') + parser.add_argument('input_path', help='Path to the input file or directory') + args = parser.parse_args() + + asyncio.run(test(args.input_path)) + + ''' -Example record: -{ - "ip" : "43.134.51.142", - "timestamp" : "1705255468", # Convert to ZULU BABY - "ports" : [ # We will create a record for each port opened - { +Deploy: + apt-get install iptables masscan libpcap-dev screen + setcap 'CAP_NET_RAW+eip CAP_NET_ADMIN+eip' /bin/masscan + /sbin/iptables -A INPUT -p tcp --dport 61010 -j DROP # Not persistent + printf "0.0.0.0/8\n10.0.0.0/8\n100.64.0.0/10\n127.0.0.0/8\n169.254.0.0/16\n172.16.0.0/12\n192.0.0.0/24\n192.0.2.0/24\n192.31.196.0/24\n192.52.193.0/24\n192.88.99.0/24\n192.168.0.0/16\n192.175.48.0/24\n198.18.0.0/15\n198.51.100.0/24\n203.0.113.0/24\n224.0.0.0/3\n255.255.255.255/32" > exclude.conf + screen -S scan + masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61010 --open-only --rate 30000 --excludefile exclude.conf -oJ output.json + masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61000-65503 --open-only --rate 30000 --excludefile exclude.conf -oJ output_new.json --shard $i/$TOTAL + +Output: + { + "ip" : "43.134.51.142", + "timestamp" : "1705255468", + "ports" : [ + { + "port" : 22, # We will create a record for each port opened + "proto" : "tcp", + "service" : { + "name" : "ssh", + "banner" : "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4" + } + } + ] + } + +Input: + { + "_id" : "43.134.51.142:22" + "_index" : "masscan-logs", + "_source" : { + "ip" : "43.134.51.142", "port" : 22, "proto" : "tcp", - "service" : { # This field is optional - "name" : "ssh", - "banner" : "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4" - } - } - ] -} - -Will be indexed as: -{ - "_id" : "43.134.51.142:22" - "_index" : "masscan-logs", - "_source" : { - "ip" : "43.134.51.142", - "port" : 22, - "proto" : "tcp", - "service" : "ssh", - "banner" : "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4", - "seen" : "2021-10-08T02:04:28Z" -} -''' - - - -''' -Notes: - -apt-get install iptables masscan libpcap-dev screen -setcap 'CAP_NET_RAW+eip CAP_NET_ADMIN+eip' /bin/masscan -/sbin/iptables -A INPUT -p tcp --dport 61010 -j DROP # Not persistent -printf "0.0.0.0/8\n10.0.0.0/8\n100.64.0.0/10\n127.0.0.0/8\n169.254.0.0/16\n172.16.0.0/12\n192.0.0.0/24\n192.0.2.0/24\n192.31.196.0/24\n192.52.193.0/24\n192.88.99.0/24\n192.168.0.0/16\n192.175.48.0/24\n198.18.0.0/15\n198.51.100.0/24\n203.0.113.0/24\n224.0.0.0/3\n255.255.255.255/32" > exclude.conf -screen -S scan -masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61010 --open-only --rate 30000 --excludefile exclude.conf -oJ output.json -masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61000-65503 --open-only --rate 30000 --excludefile exclude.conf -oJ output_new.json --shard $i/$TOTAL + "service" : "ssh", + "banner" : "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4", + "seen" : "2021-10-08T02:04:28Z" + } ''' \ No newline at end of file diff --git a/ingestors/ingest_massdns.py b/ingestors/ingest_massdns.py index 862d7f9..91ae3c7 100644 --- a/ingestors/ingest_massdns.py +++ b/ingestors/ingest_massdns.py @@ -2,35 +2,6 @@ # Elasticsearch Recon Ingestion Scripts (ERIS) - Developed by Acidvegas (https://git.acid.vegas/eris) # ingest_massdns.py -''' -Deployment: - git clone https://github.com/blechschmidt/massdns.git $HOME/massdns && cd $HOME/massdns && make - curl -s https://public-dns.info/nameservers.txt | grep -v ':' > $HOME/massdns/nameservers.txt - pythons ./scripts/ptr.py | ./bin/massdns -r $HOME/massdns/nameservers.txt -t PTR --filter NOERROR-s 1000 -o S -w $HOME/massdns/fifo.json - or... - while true; do python ./scripts/ptr.py | ./bin/massdns -r $HOME/massdns/nameservers.txt -t PTR --filter NOERROR -s 1000 -o S -w $HOME/massdns/fifo.json; done - -Output: - 0.6.229.47.in-addr.arpa. PTR 047-229-006-000.res.spectrum.com. - 0.6.228.75.in-addr.arpa. PTR 0.sub-75-228-6.myvzw.com. - 0.6.207.73.in-addr.arpa. PTR c-73-207-6-0.hsd1.ga.comcast.net. - -Input: - { - "_id" : "47.229.6.0" - "_index" : "ptr-records", - "_source" : { - "ip" : "47.229.6.0", - "record" : "047-229-006-000.res.spectrum.com", # This will be a list if there are more than one PTR record - "seen" : "2021-06-30T18:31:00Z" - } - } - -Notes: -- Why do some IP addresses return a CNAME from a PTR request -- What is dns-servfail.net (Frequent CNAME response from PTR requests) -''' - import logging import time @@ -161,4 +132,35 @@ if __name__ == '__main__': parser.add_argument('input_path', help='Path to the input file or directory') args = parser.parse_args() - asyncio.run(test(args.input_path)) \ No newline at end of file + asyncio.run(test(args.input_path)) + + + +''' +Deployment: + git clone --depth 1 https://github.com/blechschmidt/massdns.git $HOME/massdns && cd $HOME/massdns && make + curl -s https://public-dns.info/nameservers.txt | grep -v ':' > $HOME/massdns/nameservers.txt + pythons ./scripts/ptr.py | ./bin/massdns -r $HOME/massdns/nameservers.txt -t PTR --filter NOERROR-s 1000 -o S -w $HOME/massdns/fifo.json + or... + while true; do python ./scripts/ptr.py | ./bin/massdns -r $HOME/massdns/nameservers.txt -t PTR --filter NOERROR -s 1000 -o S -w $HOME/massdns/fifo.json; done + +Output: + 0.6.229.47.in-addr.arpa. PTR 047-229-006-000.res.spectrum.com. + 0.6.228.75.in-addr.arpa. PTR 0.sub-75-228-6.myvzw.com. + 0.6.207.73.in-addr.arpa. PTR c-73-207-6-0.hsd1.ga.comcast.net. + +Input: + { + "_id" : "47.229.6.0" + "_index" : "ptr-records", + "_source" : { + "ip" : "47.229.6.0", + "record" : "047-229-006-000.res.spectrum.com", # This will be a list if there are more than one PTR record + "seen" : "2021-06-30T18:31:00Z" + } + } + +Notes: +- Why do some IP addresses return a CNAME from a PTR request +- What is dns-servfail.net (Frequent CNAME response from PTR requests) +''' \ No newline at end of file diff --git a/ingestors/ingest_zone.py b/ingestors/ingest_zone.py index 077355a..35c995b 100644 --- a/ingestors/ingest_zone.py +++ b/ingestors/ingest_zone.py @@ -119,36 +119,50 @@ async def process_data(file_path: str): domain_records[domain][record_type].append({'ttl': ttl, 'data': data}) +async def test(input_path: str): + ''' + Test the Zone file ingestion process + + :param input_path: Path to the MassDNS log file + ''' + async for document in process_data(input_path): + print(document) + + + +if __name__ == '__main__': + import argparse + import asyncio + + parser = argparse.ArgumentParser(description='Zone file Ingestor for ERIS') + parser.add_argument('input_path', help='Path to the input file or directory') + args = parser.parse_args() + + asyncio.run(test(args.input_path)) + + ''' -Example record: -0so9l9nrl425q3tf7dkv1nmv2r3is6vm.vegas. 3600 in nsec3 1 1 100 332539EE7F95C32A 10MHUKG4FHIAVEFDOTF6NKU5KFCB2J3A NS DS RRSIG -0so9l9nrl425q3tf7dkv1nmv2r3is6vm.vegas. 3600 in rrsig NSEC3 8 2 3600 20240122151947 20240101141947 4125 vegas. hzIvQrZIxBSwRWyiHkb5M2W0R3ikNehv884nilkvTt9DaJSDzDUrCtqwQb3jh6+BesByBqfMQK+L2n9c//ZSmD5/iPqxmTPCuYIB9uBV2qSNSNXxCY7uUt5w7hKUS68SLwOSjaQ8GRME9WQJhY6gck0f8TT24enjXXRnQC8QitY= -1-800-flowers.vegas. 3600 in ns dns1.cscdns.net. -1-800-flowers.vegas. 3600 in ns dns2.cscdns.net. -100.vegas. 3600 in ns ns51.domaincontrol.com. -100.vegas. 3600 in ns ns52.domaincontrol.com. -1001.vegas. 3600 in ns ns11.waterrockdigital.com. -1001.vegas. 3600 in ns ns12.waterrockdigital.com. +Output: + 1001.vegas. 3600 in ns ns11.waterrockdigital.com. + 1001.vegas. 3600 in ns ns12.waterrockdigital.com. -Will be indexed as: -{ - "_id" : "1001.vegas" - "_index" : "dns-zones", - "_source" : { - "domain" : "1001.vegas", - "records" : { # All records are stored in a single dictionary - "ns": [ - {"ttl": 3600, "data": "ns11.waterrockdigital.com"}, - {"ttl": 3600, "data": "ns12.waterrockdigital.com"} - ] - }, - "seen" : "2021-09-01T00:00:00Z" # Zulu time added upon indexing +Input: + { + "_id" : "1001.vegas" + "_index" : "dns-zones", + "_source" : { + "domain" : "1001.vegas", + "records" : { + "ns": [ + {"ttl": 3600, "data": "ns11.waterrockdigital.com"}, + {"ttl": 3600, "data": "ns12.waterrockdigital.com"} + ] + }, + "seen" : "2021-09-01T00:00:00Z" + } } -} -''' -''' Notes: -- How do we want to handle hashed NSEC3 records? Do we ignest them as they are, or crack the NSEC3 hashes first and ingest? + How do we want to handle hashed NSEC3 records? Do we ignest them as they are, or crack the NSEC3 hashes first and ingest? ''' \ No newline at end of file