#!/usr/bin/env python # Elasticsearch Recon Ingestion Scripts (ERIS) - Developed by Acidvegas (https://git.acid.vegas/eris) # ingest_firehol.py import ipaddress import logging import os import time import re try: import git except ImportError: raise ImportError('Missing required libraries. (pip install gitpython)') # Set a default elasticsearch index if one is not provided default_index = 'eris-firehol' # Git repository settings REPO_URL = 'https://github.com/firehol/blocklist-ipsets.git' REPO_PATH = os.path.join('data', 'firehol-blocklist') # Local path to store the repo # File suffixes to ignore IGNORES = ('_1d', '_7d', '_30d', '_90d', '_180d', '_365d', '_730d') def construct_map() -> dict: '''Construct the Elasticsearch index mapping for Firehol records.''' mapping = { 'mappings': { 'properties': { 'ip' : { 'type': 'ip_range' }, 'ipsets' : { 'type': 'keyword' }, 'categories' : { 'type': 'keyword' }, 'seen' : { 'type': 'date' }, } } } return mapping def update_repo(): '''Update the repository locally.''' # If the repository doesn't exist, clone it if not os.path.exists(REPO_PATH): logging.info(f'Cloning repository to {REPO_PATH}...') # Create the directory if it doesn't exist os.makedirs(os.path.dirname(REPO_PATH), exist_ok=True) # Clone the repository git.Repo.clone_from(REPO_URL, REPO_PATH) else: # If the repository already exists, update it repo = git.Repo(REPO_PATH) logging.info('Updating repository...') repo.remotes.origin.pull() def stream_ips(file_path: str): ''' Stream IPs from file, skipping comments and validating each IP. :param file_path: Path to the ipset file. ''' try: # Open the file with open(file_path) as f: # Iterate over each line for line in f: # Skip comments and empty lines line = line.strip() if line.startswith('#') or not line: continue # Validate IP/network try: if not '/' in line: line = f'{line}/32' ipaddress.ip_network(line, strict=True) except ValueError as e: logging.warning(f'Invalid IP/network in {os.path.basename(file_path)}: {line} ({e})') continue # Yield the valid IP/network yield line except Exception as e: logging.error(f'Error streaming IPs from {file_path}: {e}') async def process_data(input_path = None): ''' Process Firehol ipsets and yield records for indexing. :param input_path: Placeholder for uniformity ''' # Update the repository update_repo() # Get all files files = [] for filename in os.listdir(REPO_PATH): if filename.endswith(('.ipset', '.netset')): if any(filename.rsplit('.', 1)[0].endswith(x) for x in IGNORES): logging.debug(f'Ignoring {filename} because it ends with {IGNORES}') continue files.append(os.path.join(REPO_PATH, filename)) logging.info(f'Processing {len(files)} files...') # Dictionary to store unique IPs and their metadata ip_records = {} # Process each file for file_path in files: logging.info(f'Processing {os.path.basename(file_path)}...') # Get the ipset name ipset_name = os.path.splitext(os.path.basename(file_path))[0] # Extract category if present category = None with open(file_path) as f: for line in f: if match := re.search(r'^#\s*Category\s*:\s*(.+)$', line, re.IGNORECASE): category = match.group(1).strip() break # Stream IPs from the file for ip in stream_ips(file_path): # Initialize record if IP not seen before if ip not in ip_records: ip_records[ip] = {'ip': ip, 'ipsets': set(), 'categories': set()} # Update arrays ip_records[ip]['ipsets'].add(ipset_name) if category: ip_records[ip]['categories'].add(category) # Yield unique records with converted sets to lists for ip, record in ip_records.items(): # Convert sets to lists for JSON serialization record['ipsets'] = list(record['ipsets']) record['categories'] = list(record['categories']) record['seen'] = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()) # Yield the document with _id set to the IP yield {'_index': default_index, '_id': ip, '_source': record} async def test(): '''Test the ingestion process''' async for document in process_data(): print(document) if __name__ == '__main__': import asyncio logging.basicConfig(level=logging.INFO) asyncio.run(test())