Updated mirrors

Updated README, fixed issue using the wrong domain in records for zone file ingestion (woops)
2024-01-20 21:37:27 -05:00 · 2024-01-20 10:53:55 -05:00
6 changed files with 77 additions and 23 deletions
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 ISC License

-Copyright (c) 2023, acidvegas <acid.vegas@acid.vegas>
+Copyright (c) 2024, acidvegas <acid.vegas@acid.vegas>

 Permission to use, copy, modify, and/or distribute this software for any
 purpose with or without fee is hereby granted, provided that the above
--- a/README.md
+++ b/README.md
@ -1,5 +1,5 @@
 # Elasticsearch Recon Ingestion Scripts (ERIS)
-> A utility for ingesting large scale reconnaissance data into Elast Search
+> A utility for ingesting various large scale reconnaissance data logs into Elasticsearch

 ### Work In Progress

@ -7,21 +7,35 @@
 - [python](https://www.python.org/)
    - [elasticsearch](https://pypi.org/project/elasticsearch/) *(`pip install elasticsearch`)*

+## Usage
+```shell
+python ingest_XXXX.py [options] <input>
+```
+**Note:** The `<input>` can be a file or a directory of files, depending on the ingestion script.
+
 ###### Options
 | Argument        | Description                                                                                  |
 | --------------- | -------------------------------------------------------------------------------------------- |
 | `--dry-run`     | Perform a dry run without indexing records to Elasticsearch.                                 |
 | `--batch_size`  | Number of records to index in a batch *(default 25,000)*.                                    |
+
+###### Elasticsearch Connnection Options
+| Argument        | Description                                                                                  |
+| --------------- | -------------------------------------------------------------------------------------------- |
 | `--host`        | Elasticsearch host *(default 'localhost')*.                                                  |
 | `--port`        | Elasticsearch port *(default 9200)*.                                                         |
 | `--user`        | Elasticsearch username *(default 'elastic')*.                                                |
 | `--password`    | Elasticsearch password. If not provided, it checks the environment variable **ES_PASSWORD**. |
 | `--api-key`     | Elasticsearch API Key for authentication.                                                    |
-| `--index`       | Elasticsearch index name *(default 'zone_files')*.                                           |
-| `--filter`      | Filter out records by type *(comma-separated list)*.                                         |
 | `--self-signed` | Allow self-signed certificates.                                                              |

+###### Elasticsearch Index Options
+| Argument        | Description                                                                                  |
+| --------------- | -------------------------------------------------------------------------------------------- |
+| `--index`       | Elasticsearch index name *(default 'zone_files')*.                                           |
+| `--replicas`    | Number of replicas for the index.                                                            |
+| `--shards`      | Number of shards for the index                                                               |
+
 ___

-###### Mirrors
-[acid.vegas](https://git.acid.vegas/eris) • [GitHub](https://github.com/acidvegas/eris) • [GitLab](https://gitlab.com/acidvegas/eris) • [SuperNETs](https://git.supernets.org/acidvegas/eris)
+###### Mirrors for this repository: [acid.vegas](https://git.acid.vegas/eris) • [SuperNETs](https://git.supernets.org/acidvegas/eris) • [GitHub](https://github.com/acidvegas/eris) • [GitLab](https://gitlab.com/acidvegas/eris) • [Codeberg](https://codeberg.org/acidvegas/eris)
--- a/ingestors/ingest_httpx.py
+++ b/ingestors/ingest_httpx.py
@ -175,8 +175,19 @@ def main():
        if not args.api_key and (not args.user or not args.password):
            raise ValueError('Missing required Elasticsearch argument: either user and password or apikey')
        
+        if args.shards < 1:
+            raise ValueError('Number of shards must be greater than 0')
+        
+        if args.replicas < 1:
+            raise ValueError('Number of replicas must be greater than 0')
+        
+        logging.info(f'Connecting to Elasticsearch at {args.host}:{args.port}...')
+
    edx = ElasticIndexer(args.host, args.port, args.user, args.password, args.api_key, args.index, args.dry_run, args.self_signed)

+    if not args.dry_run:
+        edx.create_index(args.shards, args.replicas) # Create the index if it does not exist
+
    if os.path.isfile(args.input_path):
        logging.info(f'Processing file: {args.input_path}')
        edx.process_file(args.input_path, args.batch_size)
--- a/ingestors/ingest_masscan.py
+++ b/ingestors/ingest_masscan.py
@ -6,7 +6,7 @@
 # This script takes JSON formatted masscan logs with banners and indexes them into Elasticsearch.
 #
 # Saving my "typical" masscan command here for reference to myself:
-#   masscan 0.0.0.0/0 -p80,443 --banners --open-only --rate 50000 --shard 1/10 --excludefile exclude.conf -oJ output.json --interactive
+#   masscan 0.0.0.0/0 -p3559,1900 --banners --open-only --rate 25000 --excludefile exclude.conf -oJ output.json --interactive

 import argparse
 import json
@ -205,8 +205,17 @@ def main():
        if not args.api_key and (not args.user or not args.password):
            raise ValueError('Missing required Elasticsearch argument: either user and password or apikey')

+        if args.shards < 1:
+            raise ValueError('Number of shards must be greater than 0')
+
+        if args.replicas < 0:
+            raise ValueError('Number of replicas must be greater than 0')
+
+        logging.info(f'Connecting to Elasticsearch at {args.host}:{args.port}...')
+
    edx = ElasticIndexer(args.host, args.port, args.user, args.password, args.api_key, args.index, args.dry_run, args.self_signed)

+    if not args.dry_run:
        edx.create_index(args.shards, args.replicas) # Create the index if it does not exist

    if os.path.isfile(args.input_path):
--- a/ingestors/ingest_massdns.py
+++ b/ingestors/ingest_massdns.py
@ -204,8 +204,17 @@ def main():
        if not args.api_key and (not args.user or not args.password):
            raise ValueError('Missing required Elasticsearch argument: either user and password or apikey')
        
+        if args.shards < 1:
+            raise ValueError('Number of shards must be greater than 0')
+        
+        if args.replicas < 1:
+            raise ValueError('Number of replicas must be greater than 0')
+        
+        logging.info(f'Connecting to Elasticsearch at {args.host}:{args.port}...')
+
    edx = ElasticIndexer(args.host, args.port, args.user, args.password, args.api_key, args.index, args.dry_run, args.self_signed)

+    if not args.dry_run:
        edx.create_index(args.shards, args.replicas) # Create the index if it does not exist

    if os.path.isfile(args.input_path):
--- a/ingestors/ingest_zone.py
+++ b/ingestors/ingest_zone.py
@ -133,6 +133,7 @@ class ElasticIndexer:
        }
        '''

+        count = 0
        records = []
        domain_records = {}
        last_domain = None
@ -173,7 +174,7 @@ class ElasticIndexer:

                if domain != last_domain:
                    if last_domain:
-                        source = {'domain': domain, 'records': domain_records[last_domain], 'seen': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}
+                        source = {'domain': last_domain, 'records': domain_records[last_domain], 'seen': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}
                        
                        del domain_records[last_domain]

@ -182,9 +183,10 @@ class ElasticIndexer:
                        else:
                            struct = {'_index': self.es_index, '_source': source}
                            records.append(struct)
+                            count += 1
                            if len(records) >= batch_size:
                                success, _ = helpers.bulk(self.es, records)
-                                logging.info(f'Successfully indexed {success} records to {self.es_index} from {file_path}')
+                                logging.info(f'Successfully indexed {success:,} ({count:,}) records to {self.es_index} from {file_path}')
                                records = []

                    last_domain = domain
@ -198,7 +200,7 @@ class ElasticIndexer:

        if records:
            success, _ = helpers.bulk(self.es, records)
-            logging.info(f'Successfully indexed {success} records to {self.es_index} from {file_path}')
+            logging.info(f'Successfully indexed {success:,} ({count:,}) records to {self.es_index} from {file_path}')


 def main():
@ -221,8 +223,8 @@ def main():

    # Elasticsearch indexing arguments
    parser.add_argument('--index', default='zone-files', help='Elasticsearch index name')
-    parser.add_argument('--shards', type=int, default=0, help='Number of shards for the index') # This depends on your cluster configuration
-    parser.add_argument('--replicas', type=int, default=0, help='Number of replicas for the index') # This depends on your cluster configuration
+    parser.add_argument('--shards', type=int, default=1, help='Number of shards for the index') # This depends on your cluster configuration
+    parser.add_argument('--replicas', type=int, default=1, help='Number of replicas for the index') # This depends on your cluster configuration

    args = parser.parse_args()

@ -239,8 +241,17 @@ def main():
        if not args.api_key and (not args.user or not args.password):
            raise ValueError('Missing required Elasticsearch argument: either user and password or apikey')
        
+        if args.shards < 1:
+            raise ValueError('Number of shards must be greater than 0')
+        
+        if args.replicas < 1:
+            raise ValueError('Number of replicas must be greater than 0')
+        
+        logging.info(f'Connecting to Elasticsearch at {args.host}:{args.port}...')
+
    edx = ElasticIndexer(args.host, args.port, args.user, args.password, args.api_key, args.index, args.dry_run, args.self_signed)

+    if not args.dry_run:
        edx.create_index(args.shards, args.replicas) # Create the index if it does not exist

    if os.path.isfile(args.input_path):
Author	SHA1	Message	Date
acidvegas	b14038a335	Updated mirrors	2024-01-20 21:37:27 -05:00
acidvegas	3ff233a991	Updated README, fixed issue using the wrong domain in records for zone file ingestion (woops)	2024-01-20 10:53:55 -05:00