Testing function added to every ingestor to debug directly. No more --dry-run needed.

This commit is contained in:
Dionysus 2024-03-07 23:31:30 -05:00
parent b78b99e060
commit 45f878285c
Signed by: acidvegas
GPG Key ID: EF4B922DB85DC9DE
6 changed files with 273 additions and 193 deletions

View File

@ -8,6 +8,7 @@ The is a suite of tools to aid in the ingestion of recon data from various sourc
- [elasticsearch](https://pypi.org/project/elasticsearch/) *(`pip install elasticsearch`)*
- [aiofiles](https://pypi.org/project/aiofiles) *(`pip install aiofiles`)*
- [aiohttp](https://pypi.org/projects/aiohttp) *(`pip install aiohttp`)*
- [websockets](https://pypi.org/project/websockets/) *(`pip install websockets`) (only required for `--certs` ingestion)*
## Usage
```shell

View File

@ -91,11 +91,11 @@ def construct_map() -> dict:
return mapping
async def process_data(file_path: str = None):
async def process_data(place_holder: str = None):
'''
Read and process Certsream records live from the Websocket stream.
:param file_path: Path to the Certstream log file (unused, placeholder for consistency with other ingestors)
:param place_holder: Placeholder parameter to match the process_data function signature of other ingestors.
'''
while True:
@ -154,63 +154,82 @@ async def strip_struct_empty(data: dict) -> dict:
return data
async def test():
'''Test the Cert stream ingestion process'''
async for document in process_data():
print(document)
if __name__ == '__main__':
import argparse
import asyncio
parser = argparse.ArgumentParser(description='Certstream Ingestor for ERIS')
parser.add_argument('input_path', help='Path to the input file or directory')
args = parser.parse_args()
asyncio.run(test(args.input_path))
'''
Example record:
{
"data": {
"cert_index": 43061646,
"cert_link": "https://yeti2025.ct.digicert.com/log/ct/v1/get-entries?start=43061646&end=43061646",
"leaf_cert": {
"all_domains": [
"*.d7zdnegbre53n.amplifyapp.com",
"d7zdnegbre53n.amplifyapp.com"
],
"extensions": {
"authorityInfoAccess": "CA Issuers - URI:http://crt.r2m02.amazontrust.com/r2m02.cer\nOCSP - URI:http://ocsp.r2m02.amazontrust.com\n",
"authorityKeyIdentifier": "keyid:C0:31:52:CD:5A:50:C3:82:7C:74:71:CE:CB:E9:9C:F9:7A:EB:82:E2\n",
"basicConstraints": "CA:FALSE",
"certificatePolicies": "Policy: 2.23.140.1.2.1",
"crlDistributionPoints": "Full Name:\n URI:http://crl.r2m02.amazontrust.com/r2m02.crl",
"ctlPoisonByte": true,
"extendedKeyUsage": "TLS Web server authentication, TLS Web client authentication",
"keyUsage": "Digital Signature, Key Encipherment",
"subjectAltName": "DNS:d7zdnegbre53n.amplifyapp.com, DNS:*.d7zdnegbre53n.amplifyapp.com",
"subjectKeyIdentifier": "59:32:78:2A:11:03:62:55:BB:3B:B9:80:24:76:28:90:2E:D1:A4:56"
},
"fingerprint": "D9:05:A3:D5:AA:F9:68:BC:0C:0A:15:69:C9:5E:11:92:32:67:4F:FA",
"issuer": {
"C": "US",
"CN": "Amazon RSA 2048 M02",
"L": null,
"O": "Amazon",
"OU": null,
"ST": null,
"aggregated": "/C=US/CN=Amazon RSA 2048 M02/O=Amazon",
"emailAddress": null
},
"not_after": 1743811199,
"not_before": 1709596800,
"serial_number": "FDB450C1942E3D30A18737063449E62",
"signature_algorithm": "sha256, rsa",
"subject": {
"C": null,
"CN": "*.d7zdnegbre53n.amplifyapp.com",
"L": null,
"O": null,
"OU": null,
"ST": null,
"aggregated": "/CN=*.d7zdnegbre53n.amplifyapp.com",
"emailAddress": null
}
},
"seen": 1709651773.594684,
"source": {
"name": "DigiCert Yeti2025 Log",
"url": "https://yeti2025.ct.digicert.com/log/"
},
"update_type": "PrecertLogEntry"
},
"message_type": "certificate_update"
}
Output:
{
"data": {
"cert_index": 43061646,
"cert_link": "https://yeti2025.ct.digicert.com/log/ct/v1/get-entries?start=43061646&end=43061646",
"leaf_cert": {
"all_domains": [
"*.d7zdnegbre53n.amplifyapp.com",
"d7zdnegbre53n.amplifyapp.com"
],
"extensions": {
"authorityInfoAccess" : "CA Issuers - URI:http://crt.r2m02.amazontrust.com/r2m02.cer\nOCSP - URI:http://ocsp.r2m02.amazontrust.com\n",
"authorityKeyIdentifier" : "keyid:C0:31:52:CD:5A:50:C3:82:7C:74:71:CE:CB:E9:9C:F9:7A:EB:82:E2\n",
"basicConstraints" : "CA:FALSE",
"certificatePolicies" : "Policy: 2.23.140.1.2.1",
"crlDistributionPoints" : "Full Name:\n URI:http://crl.r2m02.amazontrust.com/r2m02.crl",
"ctlPoisonByte" : true,
"extendedKeyUsage" : "TLS Web server authentication, TLS Web client authentication",
"keyUsage" : "Digital Signature, Key Encipherment",
"subjectAltName" : "DNS:d7zdnegbre53n.amplifyapp.com, DNS:*.d7zdnegbre53n.amplifyapp.com",
"subjectKeyIdentifier" : "59:32:78:2A:11:03:62:55:BB:3B:B9:80:24:76:28:90:2E:D1:A4:56"
},
"fingerprint": "D9:05:A3:D5:AA:F9:68:BC:0C:0A:15:69:C9:5E:11:92:32:67:4F:FA",
"issuer": {
"C" : "US",
"CN" : "Amazon RSA 2048 M02",
"L" : null,
"O" : "Amazon",
"OU" : null,
"ST" : null,
"aggregated" : "/C=US/CN=Amazon RSA 2048 M02/O=Amazon",
"emailAddress" : null
},
"not_after" : 1743811199,
"not_before" : 1709596800,
"serial_number" : "FDB450C1942E3D30A18737063449E62",
"signature_algorithm" : "sha256, rsa",
"subject": {
"C" : null,
"CN" : "*.d7zdnegbre53n.amplifyapp.com",
"L" : null,
"O" : null,
"OU" : null,
"ST" : null,
"aggregated" : "/CN=*.d7zdnegbre53n.amplifyapp.com",
"emailAddress" : null
}
},
"seen": 1709651773.594684,
"source": {
"name" : "DigiCert Yeti2025 Log",
"url" : "https://yeti2025.ct.digicert.com/log/"
},
"update_type": "PrecertLogEntry"
},
"message_type": "certificate_update"
}
'''

View File

@ -82,45 +82,72 @@ async def process_data(file_path: str):
yield {'_id': record['domain'], '_index': default_index, '_source': record}
async def test(input_path: str):
'''
Test the HTTPX ingestion process
:param input_path: Path to the HTTPX log file
'''
async for document in process_data(input_path):
print(document)
if __name__ == '__main__':
import argparse
import asyncio
parser = argparse.ArgumentParser(description='HTTPX Ingestor for ERIS')
parser.add_argument('input_path', help='Path to the input file or directory')
args = parser.parse_args()
asyncio.run(test(args.input_path))
''''
Example record:
{
"timestamp":"2024-01-14T13:08:15.117348474-05:00", # Rename to seen and remove milliseconds and offset
"hash": { # Do we need all of these ?
"body_md5" : "4ae9394eb98233b482508cbda3b33a66",
"body_mmh3" : "-4111954",
"body_sha256" : "89e06e8374353469c65adb227b158b265641b424fba7ddb2c67eef0c4c1280d3",
"body_simhash" : "9814303593401624250",
"header_md5" : "980366deb2b2fb5df2ad861fc63e79ce",
"header_mmh3" : "-813072798",
"header_sha256" : "39aea75ad548e38b635421861641ad1919ed3b103b17a33c41e7ad46516f736d",
"header_simhash" : "10962523587435277678"
},
"port" : "443",
"url" : "https://supernets.org", # Remove this and only use the input field as "domain" maybe
"input" : "supernets.org", # rename to domain
"title" : "SuperNETs",
"scheme" : "https",
"webserver" : "nginx",
"body_preview" : "SUPERNETS Home About Contact Donate Docs Network IRC Git Invidious Jitsi LibreX Mastodon Matrix Sup",
"content_type" : "text/html",
"method" : "GET", # Remove this
"host" : "51.89.151.158",
"path" : "/",
"favicon" : "-674048714",
"favicon_path" : "/i/favicon.png",
"time" : "592.907689ms", # Do we need this ?
"a" : ["6.150.220.23"],
"tech" : ["Bootstrap:4.0.0", "HSTS", "Nginx"],
"words" : 436, # Do we need this ?
"lines" : 79, # Do we need this ?
"status_code" : 200,
"content_length" : 4597,
"failed" : false, # Do we need this ?
"knowledgebase" : { # Do we need this ?
"PageType" : "nonerror",
"pHash" : 0
Deploy:
go install -v github.com/projectdiscovery/httpx/cmd/httpx@latest
curl -s https://public-dns.info/nameservers.txt -o nameservers.txt
httpx -l zone.txt -t 200 -sc -location -favicon -title -bp -td -ip -cname -mc 200,201,301,302,303,307,308 -fr -r nameservers.txt -retries 2 -stream -sd -j -o httpx.json -v
Output:
{
"timestamp":"2024-01-14T13:08:15.117348474-05:00", # Rename to seen and remove milliseconds and offset
"hash": { # Do we need all of these ?
"body_md5" : "4ae9394eb98233b482508cbda3b33a66",
"body_mmh3" : "-4111954",
"body_sha256" : "89e06e8374353469c65adb227b158b265641b424fba7ddb2c67eef0c4c1280d3",
"body_simhash" : "9814303593401624250",
"header_md5" : "980366deb2b2fb5df2ad861fc63e79ce",
"header_mmh3" : "-813072798",
"header_sha256" : "39aea75ad548e38b635421861641ad1919ed3b103b17a33c41e7ad46516f736d",
"header_simhash" : "10962523587435277678"
},
"port" : "443",
"url" : "https://supernets.org", # Remove this and only use the input field as "domain" maybe
"input" : "supernets.org", # rename to domain
"title" : "SuperNETs",
"scheme" : "https",
"webserver" : "nginx",
"body_preview" : "SUPERNETS Home About Contact Donate Docs Network IRC Git Invidious Jitsi LibreX Mastodon Matrix Sup",
"content_type" : "text/html",
"method" : "GET", # Remove this
"host" : "51.89.151.158",
"path" : "/",
"favicon" : "-674048714",
"favicon_path" : "/i/favicon.png",
"time" : "592.907689ms", # Do we need this ?
"a" : ["6.150.220.23"],
"tech" : ["Bootstrap:4.0.0", "HSTS", "Nginx"],
"words" : 436, # Do we need this ?
"lines" : 79, # Do we need this ?
"status_code" : 200,
"content_length" : 4597,
"failed" : false, # Do we need this ?
"knowledgebase" : { # Do we need this ?
"PageType" : "nonerror",
"pHash" : 0
}
}
}
'''

View File

@ -113,48 +113,65 @@ async def process_data(file_path: str):
yield {'_id': id, '_index': default_index, '_source': struct}
async def test(input_path: str):
'''
Test the Masscan ingestion process
:param input_path: Path to the MassDNS log file
'''
async for document in process_data(input_path):
print(document)
if __name__ == '__main__':
import argparse
import asyncio
parser = argparse.ArgumentParser(description='Masscan Ingestor for ERIS')
parser.add_argument('input_path', help='Path to the input file or directory')
args = parser.parse_args()
asyncio.run(test(args.input_path))
'''
Example record:
{
"ip" : "43.134.51.142",
"timestamp" : "1705255468", # Convert to ZULU BABY
"ports" : [ # We will create a record for each port opened
{
Deploy:
apt-get install iptables masscan libpcap-dev screen
setcap 'CAP_NET_RAW+eip CAP_NET_ADMIN+eip' /bin/masscan
/sbin/iptables -A INPUT -p tcp --dport 61010 -j DROP # Not persistent
printf "0.0.0.0/8\n10.0.0.0/8\n100.64.0.0/10\n127.0.0.0/8\n169.254.0.0/16\n172.16.0.0/12\n192.0.0.0/24\n192.0.2.0/24\n192.31.196.0/24\n192.52.193.0/24\n192.88.99.0/24\n192.168.0.0/16\n192.175.48.0/24\n198.18.0.0/15\n198.51.100.0/24\n203.0.113.0/24\n224.0.0.0/3\n255.255.255.255/32" > exclude.conf
screen -S scan
masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61010 --open-only --rate 30000 --excludefile exclude.conf -oJ output.json
masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61000-65503 --open-only --rate 30000 --excludefile exclude.conf -oJ output_new.json --shard $i/$TOTAL
Output:
{
"ip" : "43.134.51.142",
"timestamp" : "1705255468",
"ports" : [
{
"port" : 22, # We will create a record for each port opened
"proto" : "tcp",
"service" : {
"name" : "ssh",
"banner" : "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4"
}
}
]
}
Input:
{
"_id" : "43.134.51.142:22"
"_index" : "masscan-logs",
"_source" : {
"ip" : "43.134.51.142",
"port" : 22,
"proto" : "tcp",
"service" : { # This field is optional
"name" : "ssh",
"banner" : "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4"
}
}
]
}
Will be indexed as:
{
"_id" : "43.134.51.142:22"
"_index" : "masscan-logs",
"_source" : {
"ip" : "43.134.51.142",
"port" : 22,
"proto" : "tcp",
"service" : "ssh",
"banner" : "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4",
"seen" : "2021-10-08T02:04:28Z"
}
'''
'''
Notes:
apt-get install iptables masscan libpcap-dev screen
setcap 'CAP_NET_RAW+eip CAP_NET_ADMIN+eip' /bin/masscan
/sbin/iptables -A INPUT -p tcp --dport 61010 -j DROP # Not persistent
printf "0.0.0.0/8\n10.0.0.0/8\n100.64.0.0/10\n127.0.0.0/8\n169.254.0.0/16\n172.16.0.0/12\n192.0.0.0/24\n192.0.2.0/24\n192.31.196.0/24\n192.52.193.0/24\n192.88.99.0/24\n192.168.0.0/16\n192.175.48.0/24\n198.18.0.0/15\n198.51.100.0/24\n203.0.113.0/24\n224.0.0.0/3\n255.255.255.255/32" > exclude.conf
screen -S scan
masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61010 --open-only --rate 30000 --excludefile exclude.conf -oJ output.json
masscan 0.0.0.0/0 -p21,22,23 --banners --http-user-agent "USER_AGENT" --source-port 61000-65503 --open-only --rate 30000 --excludefile exclude.conf -oJ output_new.json --shard $i/$TOTAL
"service" : "ssh",
"banner" : "SSH-2.0-OpenSSH_8.9p1 Ubuntu-3ubuntu0.4",
"seen" : "2021-10-08T02:04:28Z"
}
'''

View File

@ -2,35 +2,6 @@
# Elasticsearch Recon Ingestion Scripts (ERIS) - Developed by Acidvegas (https://git.acid.vegas/eris)
# ingest_massdns.py
'''
Deployment:
git clone https://github.com/blechschmidt/massdns.git $HOME/massdns && cd $HOME/massdns && make
curl -s https://public-dns.info/nameservers.txt | grep -v ':' > $HOME/massdns/nameservers.txt
pythons ./scripts/ptr.py | ./bin/massdns -r $HOME/massdns/nameservers.txt -t PTR --filter NOERROR-s 1000 -o S -w $HOME/massdns/fifo.json
or...
while true; do python ./scripts/ptr.py | ./bin/massdns -r $HOME/massdns/nameservers.txt -t PTR --filter NOERROR -s 1000 -o S -w $HOME/massdns/fifo.json; done
Output:
0.6.229.47.in-addr.arpa. PTR 047-229-006-000.res.spectrum.com.
0.6.228.75.in-addr.arpa. PTR 0.sub-75-228-6.myvzw.com.
0.6.207.73.in-addr.arpa. PTR c-73-207-6-0.hsd1.ga.comcast.net.
Input:
{
"_id" : "47.229.6.0"
"_index" : "ptr-records",
"_source" : {
"ip" : "47.229.6.0",
"record" : "047-229-006-000.res.spectrum.com", # This will be a list if there are more than one PTR record
"seen" : "2021-06-30T18:31:00Z"
}
}
Notes:
- Why do some IP addresses return a CNAME from a PTR request
- What is dns-servfail.net (Frequent CNAME response from PTR requests)
'''
import logging
import time
@ -161,4 +132,35 @@ if __name__ == '__main__':
parser.add_argument('input_path', help='Path to the input file or directory')
args = parser.parse_args()
asyncio.run(test(args.input_path))
asyncio.run(test(args.input_path))
'''
Deployment:
git clone --depth 1 https://github.com/blechschmidt/massdns.git $HOME/massdns && cd $HOME/massdns && make
curl -s https://public-dns.info/nameservers.txt | grep -v ':' > $HOME/massdns/nameservers.txt
pythons ./scripts/ptr.py | ./bin/massdns -r $HOME/massdns/nameservers.txt -t PTR --filter NOERROR-s 1000 -o S -w $HOME/massdns/fifo.json
or...
while true; do python ./scripts/ptr.py | ./bin/massdns -r $HOME/massdns/nameservers.txt -t PTR --filter NOERROR -s 1000 -o S -w $HOME/massdns/fifo.json; done
Output:
0.6.229.47.in-addr.arpa. PTR 047-229-006-000.res.spectrum.com.
0.6.228.75.in-addr.arpa. PTR 0.sub-75-228-6.myvzw.com.
0.6.207.73.in-addr.arpa. PTR c-73-207-6-0.hsd1.ga.comcast.net.
Input:
{
"_id" : "47.229.6.0"
"_index" : "ptr-records",
"_source" : {
"ip" : "47.229.6.0",
"record" : "047-229-006-000.res.spectrum.com", # This will be a list if there are more than one PTR record
"seen" : "2021-06-30T18:31:00Z"
}
}
Notes:
- Why do some IP addresses return a CNAME from a PTR request
- What is dns-servfail.net (Frequent CNAME response from PTR requests)
'''

View File

@ -119,36 +119,50 @@ async def process_data(file_path: str):
domain_records[domain][record_type].append({'ttl': ttl, 'data': data})
async def test(input_path: str):
'''
Test the Zone file ingestion process
:param input_path: Path to the MassDNS log file
'''
async for document in process_data(input_path):
print(document)
if __name__ == '__main__':
import argparse
import asyncio
parser = argparse.ArgumentParser(description='Zone file Ingestor for ERIS')
parser.add_argument('input_path', help='Path to the input file or directory')
args = parser.parse_args()
asyncio.run(test(args.input_path))
'''
Example record:
0so9l9nrl425q3tf7dkv1nmv2r3is6vm.vegas. 3600 in nsec3 1 1 100 332539EE7F95C32A 10MHUKG4FHIAVEFDOTF6NKU5KFCB2J3A NS DS RRSIG
0so9l9nrl425q3tf7dkv1nmv2r3is6vm.vegas. 3600 in rrsig NSEC3 8 2 3600 20240122151947 20240101141947 4125 vegas. hzIvQrZIxBSwRWyiHkb5M2W0R3ikNehv884nilkvTt9DaJSDzDUrCtqwQb3jh6+BesByBqfMQK+L2n9c//ZSmD5/iPqxmTPCuYIB9uBV2qSNSNXxCY7uUt5w7hKUS68SLwOSjaQ8GRME9WQJhY6gck0f8TT24enjXXRnQC8QitY=
1-800-flowers.vegas. 3600 in ns dns1.cscdns.net.
1-800-flowers.vegas. 3600 in ns dns2.cscdns.net.
100.vegas. 3600 in ns ns51.domaincontrol.com.
100.vegas. 3600 in ns ns52.domaincontrol.com.
1001.vegas. 3600 in ns ns11.waterrockdigital.com.
1001.vegas. 3600 in ns ns12.waterrockdigital.com.
Output:
1001.vegas. 3600 in ns ns11.waterrockdigital.com.
1001.vegas. 3600 in ns ns12.waterrockdigital.com.
Will be indexed as:
{
"_id" : "1001.vegas"
"_index" : "dns-zones",
"_source" : {
"domain" : "1001.vegas",
"records" : { # All records are stored in a single dictionary
"ns": [
{"ttl": 3600, "data": "ns11.waterrockdigital.com"},
{"ttl": 3600, "data": "ns12.waterrockdigital.com"}
]
},
"seen" : "2021-09-01T00:00:00Z" # Zulu time added upon indexing
Input:
{
"_id" : "1001.vegas"
"_index" : "dns-zones",
"_source" : {
"domain" : "1001.vegas",
"records" : {
"ns": [
{"ttl": 3600, "data": "ns11.waterrockdigital.com"},
{"ttl": 3600, "data": "ns12.waterrockdigital.com"}
]
},
"seen" : "2021-09-01T00:00:00Z"
}
}
}
'''
'''
Notes:
- How do we want to handle hashed NSEC3 records? Do we ignest them as they are, or crack the NSEC3 hashes first and ingest?
How do we want to handle hashed NSEC3 records? Do we ignest them as they are, or crack the NSEC3 hashes first and ingest?
'''