ECS formatting added to eris, certstream ingestor now caches to prevent duplication, elastic connections properly closed now
This commit is contained in:
parent
510f7db07e
commit
124e4b0cf3
39
eris.py
39
eris.py
@ -19,11 +19,6 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
raise ImportError('Missing required \'elasticsearch\' library. (pip install elasticsearch)')
|
raise ImportError('Missing required \'elasticsearch\' library. (pip install elasticsearch)')
|
||||||
|
|
||||||
try:
|
|
||||||
from ecs_logging import StdlibFormatter
|
|
||||||
except ImportError:
|
|
||||||
raise ImportError('Missing required \'ecs-logging\' library. (pip install ecs-logging)')
|
|
||||||
|
|
||||||
|
|
||||||
class ElasticIndexer:
|
class ElasticIndexer:
|
||||||
def __init__(self, args: argparse.Namespace):
|
def __init__(self, args: argparse.Namespace):
|
||||||
@ -61,6 +56,12 @@ class ElasticIndexer:
|
|||||||
self.es = AsyncElasticsearch(**es_config)
|
self.es = AsyncElasticsearch(**es_config)
|
||||||
|
|
||||||
|
|
||||||
|
async def close_connect(self):
|
||||||
|
'''Close the Elasticsearch connection.'''
|
||||||
|
|
||||||
|
await self.es.close()
|
||||||
|
|
||||||
|
|
||||||
async def create_index(self, map_body: dict, pipeline: str = None, replicas: int = 1, shards: int = 1):
|
async def create_index(self, map_body: dict, pipeline: str = None, replicas: int = 1, shards: int = 1):
|
||||||
'''
|
'''
|
||||||
Create the Elasticsearch index with the defined mapping.
|
Create the Elasticsearch index with the defined mapping.
|
||||||
@ -131,7 +132,7 @@ class ElasticIndexer:
|
|||||||
raise Exception(f'Failed to index records to {self.es_index} from {file_path} ({e})')
|
raise Exception(f'Failed to index records to {self.es_index} from {file_path} ({e})')
|
||||||
|
|
||||||
|
|
||||||
def setup_logger(console_level: int = logging.INFO, file_level: int = None, log_file: str = 'debug.json', max_file_size: int = 10*1024*1024, backups: int = 5):
|
def setup_logger(console_level: int = logging.INFO, file_level: int = None, log_file: str = 'debug.json', max_file_size: int = 10*1024*1024, backups: int = 5, ecs_format: bool = False):
|
||||||
'''
|
'''
|
||||||
Setup the global logger for the application.
|
Setup the global logger for the application.
|
||||||
|
|
||||||
@ -140,13 +141,14 @@ def setup_logger(console_level: int = logging.INFO, file_level: int = None, log_
|
|||||||
:param log_file: File to write logs to.
|
:param log_file: File to write logs to.
|
||||||
:param max_file_size: Maximum size of the log file before it is rotated.
|
:param max_file_size: Maximum size of the log file before it is rotated.
|
||||||
:param backups: Number of backup log files to keep.
|
:param backups: Number of backup log files to keep.
|
||||||
|
:param ecs_format: Use the Elastic Common Schema (ECS) format for logs.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
# Configure the root logger
|
# Configure the root logger
|
||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
logger.setLevel(logging.DEBUG) # Minimum level to capture all logs
|
logger.setLevel(logging.DEBUG) # Minimum level to capture all logs
|
||||||
|
|
||||||
# Clear existing handlers
|
# Clear existing handlersaise Exception(f'Failed to fetch zone links: {e}')
|
||||||
logger.handlers = []
|
logger.handlers = []
|
||||||
|
|
||||||
# Setup console handler
|
# Setup console handler
|
||||||
@ -160,8 +162,18 @@ def setup_logger(console_level: int = logging.INFO, file_level: int = None, log_
|
|||||||
if file_level is not None:
|
if file_level is not None:
|
||||||
file_handler = logging.handlers.RotatingFileHandler(log_file, maxBytes=max_file_size, backupCount=backups)
|
file_handler = logging.handlers.RotatingFileHandler(log_file, maxBytes=max_file_size, backupCount=backups)
|
||||||
file_handler.setLevel(file_level)
|
file_handler.setLevel(file_level)
|
||||||
ecs_formatter = StdlibFormatter() # ECS (Elastic Common Schema) formatter
|
|
||||||
file_handler.setFormatter(ecs_formatter)
|
# Setup formatter to use ECS format if enabled or default format
|
||||||
|
if ecs_format:
|
||||||
|
try:
|
||||||
|
from ecs_logging import StdlibFormatter
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError('Missing required \'ecs-logging\' library. (pip install ecs-logging)')
|
||||||
|
file_formatter = StdlibFormatter() # ECS formatter
|
||||||
|
else:
|
||||||
|
file_formatter = logging.Formatter('%(asctime)s | %(levelname)9s | %(message)s', '%Y-%m-%d %H:%M:%S')
|
||||||
|
|
||||||
|
file_handler.setFormatter(file_formatter)
|
||||||
logger.addHandler(file_handler)
|
logger.addHandler(file_handler)
|
||||||
|
|
||||||
|
|
||||||
@ -174,6 +186,7 @@ async def main():
|
|||||||
parser.add_argument('input_path', help='Path to the input file or directory') # Required
|
parser.add_argument('input_path', help='Path to the input file or directory') # Required
|
||||||
parser.add_argument('--watch', action='store_true', help='Create or watch a FIFO for real-time indexing')
|
parser.add_argument('--watch', action='store_true', help='Create or watch a FIFO for real-time indexing')
|
||||||
parser.add_argument('--log', choices=['debug', 'info', 'warning', 'error', 'critical'], help='Logging file level (default: disabled)')
|
parser.add_argument('--log', choices=['debug', 'info', 'warning', 'error', 'critical'], help='Logging file level (default: disabled)')
|
||||||
|
parser.add_argument('--ecs', action='store_true', default=False, help='Use the Elastic Common Schema (ECS) for logging')
|
||||||
|
|
||||||
# Elasticsearch arguments
|
# Elasticsearch arguments
|
||||||
parser.add_argument('--host', default='http://localhost', help='Elasticsearch host')
|
parser.add_argument('--host', default='http://localhost', help='Elasticsearch host')
|
||||||
@ -202,14 +215,14 @@ async def main():
|
|||||||
parser.add_argument('--massdns', action='store_true', help='Index Massdns records')
|
parser.add_argument('--massdns', action='store_true', help='Index Massdns records')
|
||||||
parser.add_argument('--zone', action='store_true', help='Index Zone records')
|
parser.add_argument('--zone', action='store_true', help='Index Zone records')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.log:
|
if args.log:
|
||||||
levels = {'debug': logging.DEBUG, 'info': logging.INFO, 'warning': logging.WARNING, 'error': logging.ERROR, 'critical': logging.CRITICAL}
|
levels = {'debug': logging.DEBUG, 'info': logging.INFO, 'warning': logging.WARNING, 'error': logging.ERROR, 'critical': logging.CRITICAL}
|
||||||
setup_logger(file_level=levels[args.log], log_file='eris.log')
|
setup_logger(file_level=levels[args.log], log_file='eris.log', ecs_format=args.ecs)
|
||||||
else:
|
else:
|
||||||
setup_logger()
|
setup_logger()
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
if args.host.endswith('/'):
|
if args.host.endswith('/'):
|
||||||
args.host = args.host[:-1]
|
args.host = args.host[:-1]
|
||||||
|
|
||||||
@ -270,6 +283,8 @@ async def main():
|
|||||||
else:
|
else:
|
||||||
logging.warning(f'[{count:,}/{total:,}] Skipping non-file: {file_path}')
|
logging.warning(f'[{count:,}/{total:,}] Skipping non-file: {file_path}')
|
||||||
|
|
||||||
|
await edx.close_connect() # Close the Elasticsearch connection to stop "Unclosed client session" warnings
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# Elasticsearch Recon Ingestion Scripts (ERIS) - Developed by Acidvegas (https://git.acid.vegas/eris)
|
# Elasticsearch Recon Ingestion Scripts (ERIS) - Developed by Acidvegas (https://git.acid.vegas/eris)
|
||||||
# ingest_certs.py
|
# ingest_certstream.py
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
@ -16,6 +16,10 @@ except ImportError:
|
|||||||
# Set a default elasticsearch index if one is not provided
|
# Set a default elasticsearch index if one is not provided
|
||||||
default_index = 'eris-certstream'
|
default_index = 'eris-certstream'
|
||||||
|
|
||||||
|
# Set the cache size for the Certstream records to prevent duplicates
|
||||||
|
cache = []
|
||||||
|
cache_size = 5000
|
||||||
|
|
||||||
|
|
||||||
def construct_map() -> dict:
|
def construct_map() -> dict:
|
||||||
'''Construct the Elasticsearch index mapping for Certstream records.'''
|
'''Construct the Elasticsearch index mapping for Certstream records.'''
|
||||||
@ -56,14 +60,14 @@ async def process_data(place_holder: str = None):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
# Grab the unique domains from the records
|
# Grab the unique domains from the records
|
||||||
all_domains = record['data']['leaf_cert']['all_domains']
|
all_domains = set(record['data']['leaf_cert']['all_domains'])
|
||||||
domains = list()
|
domains = list()
|
||||||
|
|
||||||
# We only care about subdomains (excluding www. and wildcards)
|
# We only care about subdomains (excluding www. and wildcards)
|
||||||
for domain in all_domains:
|
for domain in all_domains:
|
||||||
if domain.startswith('*.'):
|
if domain.startswith('*.'):
|
||||||
domain = domain[2:]
|
domain = domain[2:]
|
||||||
elif domain.startswith('www.') and domain.count('.') == 2:
|
if domain.startswith('www.') and domain.count('.') == 2:
|
||||||
continue
|
continue
|
||||||
if domain.count('.') > 1:
|
if domain.count('.') > 1:
|
||||||
# TODO: Add a check for PSL TLDs...domain.co.uk, domain.com.au, etc. (we want to ignore these if they are not subdomains)
|
# TODO: Add a check for PSL TLDs...domain.co.uk, domain.com.au, etc. (we want to ignore these if they are not subdomains)
|
||||||
@ -72,6 +76,14 @@ async def process_data(place_holder: str = None):
|
|||||||
|
|
||||||
# Construct the document
|
# Construct the document
|
||||||
for domain in domains:
|
for domain in domains:
|
||||||
|
if domain in cache:
|
||||||
|
continue # Skip the domain if it is already in the cache
|
||||||
|
|
||||||
|
if len(cache) >= cache_size:
|
||||||
|
cache.pop(0) # Remove the oldest domain from the cache
|
||||||
|
|
||||||
|
cache.append(domain) # Add the domain to the cache
|
||||||
|
|
||||||
struct = {
|
struct = {
|
||||||
'domain' : domain,
|
'domain' : domain,
|
||||||
'seen' : time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())
|
'seen' : time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())
|
||||||
@ -160,6 +172,13 @@ Output:
|
|||||||
"message_type": "certificate_update"
|
"message_type": "certificate_update"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Input:
|
||||||
|
{
|
||||||
|
"domain" : "d7zdnegbre53n.amplifyapp.com",
|
||||||
|
"seen" : "2022-01-02T12:00:00Z"
|
||||||
|
}
|
||||||
|
|
||||||
Notes:
|
Notes:
|
||||||
- Fix the "no close frame received or sent" error
|
- Fix the "no close frame received or sent" error
|
||||||
'''
|
'''
|
||||||
|
@ -26,10 +26,15 @@ def construct_map() -> dict:
|
|||||||
mapping = {
|
mapping = {
|
||||||
'mappings': {
|
'mappings': {
|
||||||
'properties': {
|
'properties': {
|
||||||
'name' : {'type': 'keyword'},
|
'name' : { 'type': 'keyword' },
|
||||||
'alternatenames' : {'type': 'keyword'},
|
'alternatenames' : { 'type': 'keyword' },
|
||||||
'sources' : {'type': 'keyword'},
|
'sources' : { 'type': 'keyword' },
|
||||||
'prefixes' : { 'properties': { 'ipv4' : {'type': 'ip'}, 'ipv6' : {'type': 'ip_range'} } },
|
'prefixes' : {
|
||||||
|
'properties': {
|
||||||
|
'ipv4' : { 'type': 'ip' },
|
||||||
|
'ipv6' : { 'type': 'ip_range' }
|
||||||
|
}
|
||||||
|
},
|
||||||
'url' : { 'type': 'keyword' },
|
'url' : { 'type': 'keyword' },
|
||||||
'region' : { 'type': 'keyword' },
|
'region' : { 'type': 'keyword' },
|
||||||
'country' : { 'type': 'keyword' },
|
'country' : { 'type': 'keyword' },
|
||||||
|
@ -104,7 +104,7 @@ async def process_data(input_path: str):
|
|||||||
'_index' : default_index,
|
'_index' : default_index,
|
||||||
'doc' : {
|
'doc' : {
|
||||||
'ip' : ip,
|
'ip' : ip,
|
||||||
'record' : [record], # Consider using painless script to add to list if it exists (Use 'seen' per-record and 'last_seen' for the IP address)
|
'record' : [record],
|
||||||
'seen' : time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())
|
'seen' : time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())
|
||||||
},
|
},
|
||||||
'doc_as_upsert' : True # Create the document if it does not exist
|
'doc_as_upsert' : True # Create the document if it does not exist
|
||||||
|
Loading…
Reference in New Issue
Block a user