Overall code cleanup
This commit is contained in:
parent
681e3bd788
commit
d34aa105f1
@ -64,7 +64,7 @@ async def process_data(file_path: str):
|
|||||||
:param file_path: Path to the HTTPX log file
|
:param file_path: Path to the HTTPX log file
|
||||||
'''
|
'''
|
||||||
|
|
||||||
async with aiofiles.open(file_path, mode='r') as input_file:
|
async with aiofiles.open(file_path) as input_file:
|
||||||
async for line in input_file:
|
async for line in input_file:
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
|
|
||||||
|
@ -59,7 +59,7 @@ async def process_data(file_path: str):
|
|||||||
:param file_path: Path to the Masscan log file
|
:param file_path: Path to the Masscan log file
|
||||||
'''
|
'''
|
||||||
|
|
||||||
async with aiofiles.open(file_path, mode='r') as input_file:
|
async with aiofiles.open(file_path) as input_file:
|
||||||
async for line in input_file:
|
async for line in input_file:
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
|
|
||||||
|
@ -39,7 +39,7 @@ async def process_data(file_path: str):
|
|||||||
:param file_path: Path to the Massdns log file
|
:param file_path: Path to the Massdns log file
|
||||||
'''
|
'''
|
||||||
|
|
||||||
async with aiofiles.open(file_path, mode='r') as input_file:
|
async with aiofiles.open(file_path) as input_file:
|
||||||
|
|
||||||
last = None
|
last = None
|
||||||
|
|
||||||
@ -152,7 +152,7 @@ Output:
|
|||||||
Input:
|
Input:
|
||||||
{
|
{
|
||||||
"_id" : "47.229.6.0"
|
"_id" : "47.229.6.0"
|
||||||
"_index" : "ptr-records",
|
"_index" : "eris-massdns",
|
||||||
"_source" : {
|
"_source" : {
|
||||||
"ip" : "47.229.6.0",
|
"ip" : "47.229.6.0",
|
||||||
"record" : "047-229-006-000.res.spectrum.com", # This will be a list if there are more than one PTR record
|
"record" : "047-229-006-000.res.spectrum.com", # This will be a list if there are more than one PTR record
|
||||||
@ -161,6 +161,6 @@ Input:
|
|||||||
}
|
}
|
||||||
|
|
||||||
Notes:
|
Notes:
|
||||||
- Why do some IP addresses return a CNAME from a PTR request
|
Why do some IP addresses return a CNAME from a PTR request
|
||||||
- What is dns-servfail.net (Frequent CNAME response from PTR requests)
|
What is dns-servfail.net (Frequent CNAME response from PTR requests)
|
||||||
'''
|
'''
|
@ -2,6 +2,7 @@
|
|||||||
# Elasticsearch Recon Ingestion Scripts (ERIS) - Developed by Acidvegas (https://git.acid.vegas/eris)
|
# Elasticsearch Recon Ingestion Scripts (ERIS) - Developed by Acidvegas (https://git.acid.vegas/eris)
|
||||||
# ingest_zone.py
|
# ingest_zone.py
|
||||||
|
|
||||||
|
import logging
|
||||||
import time
|
import time
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -56,15 +57,15 @@ async def process_data(file_path: str):
|
|||||||
:param file_path: Path to the zone file
|
:param file_path: Path to the zone file
|
||||||
'''
|
'''
|
||||||
|
|
||||||
domain_records = {}
|
async with aiofiles.open(file_path) as input_file:
|
||||||
last_domain = None
|
|
||||||
|
last = None
|
||||||
|
|
||||||
async with aiofiles.open(file_path, mode='r') as input_file:
|
|
||||||
async for line in input_file:
|
async for line in input_file:
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
|
|
||||||
if line == '~eof': # Sentinel value to indicate the end of a process (Used with --watch with FIFO)
|
if line == '~eof': # Sentinel value to indicate the end of a process (Used with --watch with FIFO)
|
||||||
break
|
return last
|
||||||
|
|
||||||
if not line or line.startswith(';'):
|
if not line or line.startswith(';'):
|
||||||
continue
|
continue
|
||||||
@ -72,22 +73,25 @@ async def process_data(file_path: str):
|
|||||||
parts = line.split()
|
parts = line.split()
|
||||||
|
|
||||||
if len(parts) < 5:
|
if len(parts) < 5:
|
||||||
raise ValueError(f'Invalid line: {line}')
|
logging.warning(f'Invalid line: {line}')
|
||||||
|
|
||||||
domain, ttl, record_class, record_type, data = parts[0].rstrip('.').lower(), parts[1], parts[2].lower(), parts[3].lower(), ' '.join(parts[4:])
|
domain, ttl, record_class, record_type, data = parts[0].rstrip('.').lower(), parts[1], parts[2].lower(), parts[3].lower(), ' '.join(parts[4:])
|
||||||
|
|
||||||
if not ttl.isdigit():
|
if not ttl.isdigit():
|
||||||
raise ValueError(f'Invalid TTL: {ttl} with line: {line}')
|
logging.warning(f'Invalid TTL: {ttl} with line: {line}')
|
||||||
|
continue
|
||||||
|
|
||||||
ttl = int(ttl)
|
ttl = int(ttl)
|
||||||
|
|
||||||
# Anomaly...Doubtful any CHAOS/HESIOD records will be found in zone files
|
# Anomaly...Doubtful any CHAOS/HESIOD records will be found in zone files
|
||||||
if record_class != 'in':
|
if record_class != 'in':
|
||||||
raise ValueError(f'Unsupported record class: {record_class} with line: {line}')
|
logging.warning(f'Unsupported record class: {record_class} with line: {line}')
|
||||||
|
continue
|
||||||
|
|
||||||
# We do not want to collide with our current mapping (Again, this is an anomaly)
|
# We do not want to collide with our current mapping (Again, this is an anomaly)
|
||||||
if record_type not in record_types:
|
if record_type not in record_types:
|
||||||
raise ValueError(f'Unsupported record type: {record_type} with line: {line}')
|
logging.warning(f'Unsupported record type: {record_type} with line: {line}')
|
||||||
|
continue
|
||||||
|
|
||||||
# Little tidying up for specific record types (removing trailing dots, etc)
|
# Little tidying up for specific record types (removing trailing dots, etc)
|
||||||
if record_type == 'nsec':
|
if record_type == 'nsec':
|
||||||
@ -97,27 +101,28 @@ async def process_data(file_path: str):
|
|||||||
elif data.endswith('.'):
|
elif data.endswith('.'):
|
||||||
data = data.rstrip('.')
|
data = data.rstrip('.')
|
||||||
|
|
||||||
if domain != last_domain:
|
if last:
|
||||||
if last_domain:
|
if domain == last['domain']:
|
||||||
struct = {
|
if record_type in last['_doc']['records']:
|
||||||
'domain' : last_domain,
|
last['_doc']['records'][record_type].append({'ttl': ttl, 'data': data}) # Do we need to check for duplicate records?
|
||||||
'records' : domain_records[last_domain],
|
else:
|
||||||
|
last['_doc']['records'][record_type] = [{'ttl': ttl, 'data': data}]
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
yield last
|
||||||
|
|
||||||
|
last = {
|
||||||
|
'_op_type' : 'update',
|
||||||
|
'_id' : domain,
|
||||||
|
'_index' : default_index,
|
||||||
|
'_doc' : {
|
||||||
|
'domain' : domain,
|
||||||
|
'records' : {record_type: [{'ttl': ttl, 'data': data}]},
|
||||||
'seen' : time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()) # Zone files do not contain a timestamp, so we use the current time
|
'seen' : time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()) # Zone files do not contain a timestamp, so we use the current time
|
||||||
|
},
|
||||||
|
'doc_as_upsert' : True # This will create the document if it does not exist
|
||||||
}
|
}
|
||||||
|
|
||||||
del domain_records[last_domain]
|
|
||||||
|
|
||||||
yield {'_id': domain, '_index': default_index, '_source': struct} # Set the ID to the domain name to allow the record to be reindexed if it exists.
|
|
||||||
|
|
||||||
last_domain = domain
|
|
||||||
|
|
||||||
domain_records[domain] = {}
|
|
||||||
|
|
||||||
if record_type not in domain_records[domain]:
|
|
||||||
domain_records[domain][record_type] = []
|
|
||||||
|
|
||||||
domain_records[domain][record_type].append({'ttl': ttl, 'data': data})
|
|
||||||
|
|
||||||
|
|
||||||
async def test(input_path: str):
|
async def test(input_path: str):
|
||||||
'''
|
'''
|
||||||
@ -149,17 +154,17 @@ Output:
|
|||||||
|
|
||||||
Input:
|
Input:
|
||||||
{
|
{
|
||||||
"_id" : "1001.vegas"
|
'_id' : '1001.vegas'
|
||||||
"_index" : "dns-zones",
|
'_index' : 'dns-zones',
|
||||||
"_source" : {
|
'_source' : {
|
||||||
"domain" : "1001.vegas",
|
'domain' : '1001.vegas',
|
||||||
"records" : {
|
'records' : {
|
||||||
"ns": [
|
'ns': [
|
||||||
{"ttl": 3600, "data": "ns11.waterrockdigital.com"},
|
{'ttl': 3600, 'data': 'ns11.waterrockdigital.com'},
|
||||||
{"ttl": 3600, "data": "ns12.waterrockdigital.com"}
|
{'ttl': 3600, 'data': 'ns12.waterrockdigital.com'}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"seen" : "2021-09-01T00:00:00Z"
|
'seen' : '2021-09-01T00:00:00Z'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user