Updated README, fixed issue using the wrong domain in records for zone file ingestion (woops)

This commit is contained in:
Dionysus 2024-01-20 10:53:55 -05:00
parent 14b6d1c88a
commit 3ff233a991
Signed by: acidvegas
GPG Key ID: EF4B922DB85DC9DE
6 changed files with 75 additions and 20 deletions

View File

@ -1,6 +1,6 @@
ISC License
Copyright (c) 2023, acidvegas <acid.vegas@acid.vegas>
Copyright (c) 2024, acidvegas <acid.vegas@acid.vegas>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above

View File

@ -1,5 +1,5 @@
# Elasticsearch Recon Ingestion Scripts (ERIS)
> A utility for ingesting large scale reconnaissance data into Elast Search
> A utility for ingesting various large scale reconnaissance data logs into Elasticsearch
### Work In Progress
@ -7,20 +7,35 @@
- [python](https://www.python.org/)
- [elasticsearch](https://pypi.org/project/elasticsearch/) *(`pip install elasticsearch`)*
## Usage
```shell
python ingest_XXXX.py [options] <input>
```
**Note:** The `<input>` can be a file or a directory of files, depending on the ingestion script.
###### Options
| Argument | Description |
| --------------- | -------------------------------------------------------------------------------------------- |
| `--dry-run` | Perform a dry run without indexing records to Elasticsearch. |
| `--batch_size` | Number of records to index in a batch *(default 25,000)*. |
###### Elasticsearch Connnection Options
| Argument | Description |
| --------------- | -------------------------------------------------------------------------------------------- |
| `--host` | Elasticsearch host *(default 'localhost')*. |
| `--port` | Elasticsearch port *(default 9200)*. |
| `--user` | Elasticsearch username *(default 'elastic')*. |
| `--password` | Elasticsearch password. If not provided, it checks the environment variable **ES_PASSWORD**. |
| `--api-key` | Elasticsearch API Key for authentication. |
| `--index` | Elasticsearch index name *(default 'zone_files')*. |
| `--filter` | Filter out records by type *(comma-separated list)*. |
| `--self-signed` | Allow self-signed certificates. |
###### Elasticsearch Index Options
| Argument | Description |
| --------------- | -------------------------------------------------------------------------------------------- |
| `--index` | Elasticsearch index name *(default 'zone_files')*. |
| `--replicas` | Number of replicas for the index. |
| `--shards` | Number of shards for the index |
___
###### Mirrors

View File

@ -175,8 +175,19 @@ def main():
if not args.api_key and (not args.user or not args.password):
raise ValueError('Missing required Elasticsearch argument: either user and password or apikey')
if args.shards < 1:
raise ValueError('Number of shards must be greater than 0')
if args.replicas < 1:
raise ValueError('Number of replicas must be greater than 0')
logging.info(f'Connecting to Elasticsearch at {args.host}:{args.port}...')
edx = ElasticIndexer(args.host, args.port, args.user, args.password, args.api_key, args.index, args.dry_run, args.self_signed)
if not args.dry_run:
edx.create_index(args.shards, args.replicas) # Create the index if it does not exist
if os.path.isfile(args.input_path):
logging.info(f'Processing file: {args.input_path}')
edx.process_file(args.input_path, args.batch_size)

View File

@ -205,8 +205,17 @@ def main():
if not args.api_key and (not args.user or not args.password):
raise ValueError('Missing required Elasticsearch argument: either user and password or apikey')
if args.shards < 1:
raise ValueError('Number of shards must be greater than 0')
if args.replicas < 0:
raise ValueError('Number of replicas must be greater than 0')
logging.info(f'Connecting to Elasticsearch at {args.host}:{args.port}...')
edx = ElasticIndexer(args.host, args.port, args.user, args.password, args.api_key, args.index, args.dry_run, args.self_signed)
if not args.dry_run:
edx.create_index(args.shards, args.replicas) # Create the index if it does not exist
if os.path.isfile(args.input_path):

View File

@ -204,8 +204,17 @@ def main():
if not args.api_key and (not args.user or not args.password):
raise ValueError('Missing required Elasticsearch argument: either user and password or apikey')
if args.shards < 1:
raise ValueError('Number of shards must be greater than 0')
if args.replicas < 1:
raise ValueError('Number of replicas must be greater than 0')
logging.info(f'Connecting to Elasticsearch at {args.host}:{args.port}...')
edx = ElasticIndexer(args.host, args.port, args.user, args.password, args.api_key, args.index, args.dry_run, args.self_signed)
if not args.dry_run:
edx.create_index(args.shards, args.replicas) # Create the index if it does not exist
if os.path.isfile(args.input_path):

View File

@ -133,6 +133,7 @@ class ElasticIndexer:
}
'''
count = 0
records = []
domain_records = {}
last_domain = None
@ -173,7 +174,7 @@ class ElasticIndexer:
if domain != last_domain:
if last_domain:
source = {'domain': domain, 'records': domain_records[last_domain], 'seen': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}
source = {'domain': last_domain, 'records': domain_records[last_domain], 'seen': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}
del domain_records[last_domain]
@ -182,9 +183,10 @@ class ElasticIndexer:
else:
struct = {'_index': self.es_index, '_source': source}
records.append(struct)
count += 1
if len(records) >= batch_size:
success, _ = helpers.bulk(self.es, records)
logging.info(f'Successfully indexed {success} records to {self.es_index} from {file_path}')
logging.info(f'Successfully indexed {success:,} ({count:,}) records to {self.es_index} from {file_path}')
records = []
last_domain = domain
@ -198,7 +200,7 @@ class ElasticIndexer:
if records:
success, _ = helpers.bulk(self.es, records)
logging.info(f'Successfully indexed {success} records to {self.es_index} from {file_path}')
logging.info(f'Successfully indexed {success:,} ({count:,}) records to {self.es_index} from {file_path}')
def main():
@ -221,8 +223,8 @@ def main():
# Elasticsearch indexing arguments
parser.add_argument('--index', default='zone-files', help='Elasticsearch index name')
parser.add_argument('--shards', type=int, default=0, help='Number of shards for the index') # This depends on your cluster configuration
parser.add_argument('--replicas', type=int, default=0, help='Number of replicas for the index') # This depends on your cluster configuration
parser.add_argument('--shards', type=int, default=1, help='Number of shards for the index') # This depends on your cluster configuration
parser.add_argument('--replicas', type=int, default=1, help='Number of replicas for the index') # This depends on your cluster configuration
args = parser.parse_args()
@ -239,8 +241,17 @@ def main():
if not args.api_key and (not args.user or not args.password):
raise ValueError('Missing required Elasticsearch argument: either user and password or apikey')
if args.shards < 1:
raise ValueError('Number of shards must be greater than 0')
if args.replicas < 1:
raise ValueError('Number of replicas must be greater than 0')
logging.info(f'Connecting to Elasticsearch at {args.host}:{args.port}...')
edx = ElasticIndexer(args.host, args.port, args.user, args.password, args.api_key, args.index, args.dry_run, args.self_signed)
if not args.dry_run:
edx.create_index(args.shards, args.replicas) # Create the index if it does not exist
if os.path.isfile(args.input_path):