From a53541c32816b101de86b53faeda96f2799ba82e Mon Sep 17 00:00:00 2001 From: acidvegas Date: Sat, 23 Mar 2024 14:31:39 -0400 Subject: [PATCH] Added zone field too better querying on specific tlds/zones --- ingestors/ingest_zone.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ingestors/ingest_zone.py b/ingestors/ingest_zone.py index 01bb236..028d9ae 100644 --- a/ingestors/ingest_zone.py +++ b/ingestors/ingest_zone.py @@ -29,6 +29,7 @@ def construct_map() -> dict: 'mappings': { 'properties': { 'domain' : keyword_mapping, + 'zone' : { 'type': 'keyword' }, 'records' : { 'type': 'nested', 'properties': {} }, 'source' : { 'type': 'keyword' }, 'seen' : { 'type': 'date' } @@ -62,6 +63,9 @@ async def process_data(file_path: str): # Initialize the cache last = None + # Determine the zone name from the file path (e.g., /path/to/zones/com.eu.txt -> com.eu zone) + zone = '.'.join(file_path.split('/')[-1].split('.')[:-1]) + # Read the input file line by line async for line in input_file: line = line.strip() @@ -129,6 +133,7 @@ async def process_data(file_path: str): '_index' : default_index, '_doc' : { 'domain' : domain, + 'zone' : zone, 'records' : {record_type: [{'data': data, 'ttl': ttl}]}, 'source' : 'czds', 'seen' : time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()) # Zone files do not contain a timestamp, so we use the current time