ECS formatting added to eris, certstream ingestor now caches to prevent duplication, elastic connections properly closed now

This commit is contained in:
Dionysus 2024-03-23 22:47:30 -04:00
parent 510f7db07e
commit 124e4b0cf3
Signed by: acidvegas
GPG Key ID: EF4B922DB85DC9DE
5 changed files with 60 additions and 21 deletions

39
eris.py
View File

@ -19,11 +19,6 @@ try:
except ImportError:
raise ImportError('Missing required \'elasticsearch\' library. (pip install elasticsearch)')
try:
from ecs_logging import StdlibFormatter
except ImportError:
raise ImportError('Missing required \'ecs-logging\' library. (pip install ecs-logging)')
class ElasticIndexer:
def __init__(self, args: argparse.Namespace):
@ -61,6 +56,12 @@ class ElasticIndexer:
self.es = AsyncElasticsearch(**es_config)
async def close_connect(self):
'''Close the Elasticsearch connection.'''
await self.es.close()
async def create_index(self, map_body: dict, pipeline: str = None, replicas: int = 1, shards: int = 1):
'''
Create the Elasticsearch index with the defined mapping.
@ -131,7 +132,7 @@ class ElasticIndexer:
raise Exception(f'Failed to index records to {self.es_index} from {file_path} ({e})')
def setup_logger(console_level: int = logging.INFO, file_level: int = None, log_file: str = 'debug.json', max_file_size: int = 10*1024*1024, backups: int = 5):
def setup_logger(console_level: int = logging.INFO, file_level: int = None, log_file: str = 'debug.json', max_file_size: int = 10*1024*1024, backups: int = 5, ecs_format: bool = False):
'''
Setup the global logger for the application.
@ -140,13 +141,14 @@ def setup_logger(console_level: int = logging.INFO, file_level: int = None, log_
:param log_file: File to write logs to.
:param max_file_size: Maximum size of the log file before it is rotated.
:param backups: Number of backup log files to keep.
:param ecs_format: Use the Elastic Common Schema (ECS) format for logs.
'''
# Configure the root logger
logger = logging.getLogger()
logger.setLevel(logging.DEBUG) # Minimum level to capture all logs
# Clear existing handlers
# Clear existing handlersaise Exception(f'Failed to fetch zone links: {e}')
logger.handlers = []
# Setup console handler
@ -160,8 +162,18 @@ def setup_logger(console_level: int = logging.INFO, file_level: int = None, log_
if file_level is not None:
file_handler = logging.handlers.RotatingFileHandler(log_file, maxBytes=max_file_size, backupCount=backups)
file_handler.setLevel(file_level)
ecs_formatter = StdlibFormatter() # ECS (Elastic Common Schema) formatter
file_handler.setFormatter(ecs_formatter)
# Setup formatter to use ECS format if enabled or default format
if ecs_format:
try:
from ecs_logging import StdlibFormatter
except ImportError:
raise ImportError('Missing required \'ecs-logging\' library. (pip install ecs-logging)')
file_formatter = StdlibFormatter() # ECS formatter
else:
file_formatter = logging.Formatter('%(asctime)s | %(levelname)9s | %(message)s', '%Y-%m-%d %H:%M:%S')
file_handler.setFormatter(file_formatter)
logger.addHandler(file_handler)
@ -174,6 +186,7 @@ async def main():
parser.add_argument('input_path', help='Path to the input file or directory') # Required
parser.add_argument('--watch', action='store_true', help='Create or watch a FIFO for real-time indexing')
parser.add_argument('--log', choices=['debug', 'info', 'warning', 'error', 'critical'], help='Logging file level (default: disabled)')
parser.add_argument('--ecs', action='store_true', default=False, help='Use the Elastic Common Schema (ECS) for logging')
# Elasticsearch arguments
parser.add_argument('--host', default='http://localhost', help='Elasticsearch host')
@ -202,14 +215,14 @@ async def main():
parser.add_argument('--massdns', action='store_true', help='Index Massdns records')
parser.add_argument('--zone', action='store_true', help='Index Zone records')
args = parser.parse_args()
if args.log:
levels = {'debug': logging.DEBUG, 'info': logging.INFO, 'warning': logging.WARNING, 'error': logging.ERROR, 'critical': logging.CRITICAL}
setup_logger(file_level=levels[args.log], log_file='eris.log')
setup_logger(file_level=levels[args.log], log_file='eris.log', ecs_format=args.ecs)
else:
setup_logger()
args = parser.parse_args()
if args.host.endswith('/'):
args.host = args.host[:-1]
@ -270,6 +283,8 @@ async def main():
else:
logging.warning(f'[{count:,}/{total:,}] Skipping non-file: {file_path}')
await edx.close_connect() # Close the Elasticsearch connection to stop "Unclosed client session" warnings
if __name__ == '__main__':

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python
# Elasticsearch Recon Ingestion Scripts (ERIS) - Developed by Acidvegas (https://git.acid.vegas/eris)
# ingest_certs.py
# ingest_certstream.py
import asyncio
import json
@ -16,6 +16,10 @@ except ImportError:
# Set a default elasticsearch index if one is not provided
default_index = 'eris-certstream'
# Set the cache size for the Certstream records to prevent duplicates
cache = []
cache_size = 5000
def construct_map() -> dict:
'''Construct the Elasticsearch index mapping for Certstream records.'''
@ -56,14 +60,14 @@ async def process_data(place_holder: str = None):
continue
# Grab the unique domains from the records
all_domains = record['data']['leaf_cert']['all_domains']
all_domains = set(record['data']['leaf_cert']['all_domains'])
domains = list()
# We only care about subdomains (excluding www. and wildcards)
for domain in all_domains:
if domain.startswith('*.'):
domain = domain[2:]
elif domain.startswith('www.') and domain.count('.') == 2:
if domain.startswith('www.') and domain.count('.') == 2:
continue
if domain.count('.') > 1:
# TODO: Add a check for PSL TLDs...domain.co.uk, domain.com.au, etc. (we want to ignore these if they are not subdomains)
@ -72,6 +76,14 @@ async def process_data(place_holder: str = None):
# Construct the document
for domain in domains:
if domain in cache:
continue # Skip the domain if it is already in the cache
if len(cache) >= cache_size:
cache.pop(0) # Remove the oldest domain from the cache
cache.append(domain) # Add the domain to the cache
struct = {
'domain' : domain,
'seen' : time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())
@ -160,6 +172,13 @@ Output:
"message_type": "certificate_update"
}
Input:
{
"domain" : "d7zdnegbre53n.amplifyapp.com",
"seen" : "2022-01-02T12:00:00Z"
}
Notes:
- Fix the "no close frame received or sent" error
'''

View File

@ -29,7 +29,12 @@ def construct_map() -> dict:
'name' : { 'type': 'keyword' },
'alternatenames' : { 'type': 'keyword' },
'sources' : { 'type': 'keyword' },
'prefixes' : { 'properties': { 'ipv4' : {'type': 'ip'}, 'ipv6' : {'type': 'ip_range'} } },
'prefixes' : {
'properties': {
'ipv4' : { 'type': 'ip' },
'ipv6' : { 'type': 'ip_range' }
}
},
'url' : { 'type': 'keyword' },
'region' : { 'type': 'keyword' },
'country' : { 'type': 'keyword' },

View File

@ -104,7 +104,7 @@ async def process_data(input_path: str):
'_index' : default_index,
'doc' : {
'ip' : ip,
'record' : [record], # Consider using painless script to add to list if it exists (Use 'seen' per-record and 'last_seen' for the IP address)
'record' : [record],
'seen' : time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())
},
'doc_as_upsert' : True # Create the document if it does not exist