From 68b23feee79687cd104eb83056cb0b2934c92f3a Mon Sep 17 00:00:00 2001 From: acidvegas Date: Tue, 26 Nov 2024 15:57:28 -0500 Subject: [PATCH] Initial commit --- .gitignore | 36 ++++++++++++ LICENSE | 15 +++++ README.md | 142 ++++++++++++++++++++++++++++++++++++++++++++++ pylcg/__init__.py | 5 ++ pylcg/cli.py | 26 +++++++++ pylcg/core.py | 79 ++++++++++++++++++++++++++ pyproject.toml | 3 + setup.py | 43 ++++++++++++++ unit_test.py | 135 +++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 484 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 pylcg/__init__.py create mode 100644 pylcg/cli.py create mode 100644 pylcg/core.py create mode 100644 pyproject.toml create mode 100644 setup.py create mode 100644 unit_test.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2be639f --- /dev/null +++ b/.gitignore @@ -0,0 +1,36 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual Environment +venv/ +ENV/ +env/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..9f32a49 --- /dev/null +++ b/LICENSE @@ -0,0 +1,15 @@ +ISC License + +Copyright (c) 2025, acidvegas + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..7ccd28f --- /dev/null +++ b/README.md @@ -0,0 +1,142 @@ +# PyLCG +> Ultra-fast Linear Congruential Generator for IP Sharding + +PyLCG is a high-performance Python implementation of a memory-efficient IP address sharding system using Linear Congruential Generators (LCG) for deterministic random number generation. This tool enables distributed scanning & network reconnaissance by efficiently dividing IP ranges across multiple machines while maintaining pseudo-random ordering. + +## Features + +- Memory-efficient IP range processing +- Deterministic pseudo-random IP generation +- High-performance LCG implementation +- Support for sharding across multiple machines +- Zero dependencies beyond Python standard library +- Simple command-line interface + +## Installation + +### From PyPI +```bash +pip install pylcg +``` + +### From Source +```bash +git clone https://github.com/acidvegas/pylcg +cd pylcg +chmod +x pylcg.py +``` + +## Usage + +### Command Line + +```bash +./pylcg.py 192.168.0.0/16 --shard-num 1 --total-shards 4 --seed 12345 +``` + +### As a Library + +```python +from pylcg import ip_stream + +# Generate IPs for the first shard of 4 total shards +for ip in ip_stream('192.168.0.0/16', shard_num=1, total_shards=4, seed=12345): + print(ip) +``` + +## How It Works + +### Linear Congruential Generator + +PyLCG uses an optimized LCG implementation with carefully chosen parameters: +| Name | Variable | Value | +|------------|----------|--------------| +| Multiplier | `a` | `1664525` | +| Increment | `c` | `1013904223` | +| Modulus | `m` | `2^32` | + +This generates a deterministic sequence of pseudo-random numbers using the formula: +``` +next = (a * current + c) mod m +``` + +### Memory-Efficient IP Processing + +Instead of loading entire IP ranges into memory, PyLCG: +1. Converts CIDR ranges to start/end integers +2. Uses generator functions for lazy evaluation +3. Calculates IPs on-demand using index mapping +4. Maintains constant memory usage regardless of range size + +### Sharding Algorithm + +The sharding system uses an interleaved approach: +1. Each shard is assigned a subset of indices based on modulo arithmetic +2. The LCG randomizes the order within each shard +3. Work is distributed evenly across shards +4. No sequential scanning patterns + +## Performance + +PyLCG is designed for maximum performance: +- Generates millions of IPs per second +- Constant memory usage (~100KB) +- Minimal CPU overhead +- No disk I/O required + +Benchmark results on a typical system: +- IP Generation: ~5-10 million IPs/second +- Memory Usage: < 1MB for any range size +- LCG Operations: < 1 microsecond per number + +## Contributing + +### Performance Optimization + +We welcome contributions that improve PyLCG's performance. When submitting optimizations: + +1. Run the included benchmark suite: +```bash +python3 unit_test.py +``` + +2. Include before/after benchmark results for: +- IP generation speed +- Memory usage +- LCG sequence generation +- Shard distribution metrics + +3. Consider optimizing: +- Number generation algorithms +- Memory access patterns +- CPU cache utilization +- Python-specific optimizations + +4. Document any tradeoffs between: +- Speed vs memory usage +- Randomness vs performance +- Complexity vs maintainability + +### Benchmark Guidelines + +When running benchmarks: +1. Use consistent hardware/environment +2. Run multiple iterations +3. Test with various CIDR ranges +4. Measure both average and worst-case performance +5. Profile memory usage patterns +6. Test shard distribution uniformity + +## Roadmap + +- [ ] IPv6 support +- [ ] Custom LCG parameters +- [ ] Configurable chunk sizes +- [ ] State persistence +- [ ] Resume capability +- [ ] S3/URL input support +- [ ] Extended benchmark suite + +--- + +###### Mirrors: [acid.vegas](https://git.acid.vegas/pylcg) • [SuperNETs](https://git.supernets.org/acidvegas/pylcg) • [GitHub](https://github.com/acidvegas/pylcg) • [GitLab](https://gitlab.com/acidvegas/pylcg) • [Codeberg](https://codeberg.org/acidvegas/pylcg) diff --git a/pylcg/__init__.py b/pylcg/__init__.py new file mode 100644 index 0000000..99eee9c --- /dev/null +++ b/pylcg/__init__.py @@ -0,0 +1,5 @@ +from .core import LCG, IPRange, ip_stream + +__version__ = "1.0.0" +__author__ = "acidvegas" +__all__ = ["LCG", "IPRange", "ip_stream"] \ No newline at end of file diff --git a/pylcg/cli.py b/pylcg/cli.py new file mode 100644 index 0000000..5ab8f7a --- /dev/null +++ b/pylcg/cli.py @@ -0,0 +1,26 @@ +import argparse +from .core import ip_stream + +def main(): + parser = argparse.ArgumentParser(description='Ultra-fast random IP address generator with optional sharding') + parser.add_argument('cidr', help='Target IP range in CIDR format') + parser.add_argument('--shard-num', type=int, default=1, help='Shard number (1-based)') + parser.add_argument('--total-shards', type=int, default=1, help='Total number of shards (default: 1, no sharding)') + parser.add_argument('--seed', type=int, default=0, help='Random seed for LCG') + + args = parser.parse_args() + + if args.total_shards < 1: + raise ValueError('Total shards must be at least 1') + + if args.shard_num > args.total_shards: + raise ValueError('Shard number must be less than or equal to total shards') + + if args.shard_num < 1: + raise ValueError('Shard number must be at least 1') + + for ip in ip_stream(args.cidr, args.shard_num, args.total_shards, args.seed): + print(ip) + +if __name__ == '__main__': + main() diff --git a/pylcg/core.py b/pylcg/core.py new file mode 100644 index 0000000..93d3ac5 --- /dev/null +++ b/pylcg/core.py @@ -0,0 +1,79 @@ +import ipaddress +import random + +class LCG: + '''Linear Congruential Generator for deterministic random number generation''' + + def __init__(self, seed: int, m: int = 2**32): + self.m = m + self.a = 1664525 + self.c = 1013904223 + self.current = seed + + def next(self) -> int: + '''Generate next random number''' + + self.current = (self.a * self.current + self.c) % self.m + return self.current + + +class IPRange: + '''Memory-efficient IP range iterator''' + + def __init__(self, cidr: str): + network = ipaddress.ip_network(cidr) + self.start = int(network.network_address) + self.total = int(network.broadcast_address) - self.start + 1 + + def get_ip_at_index(self, index: int) -> str: + ''' + Get IP at specific index without generating previous IPs + + :param index: The index of the IP to get + ''' + + if not 0 <= index < self.total: + raise IndexError('IP index out of range') + + return str(ipaddress.ip_address(self.start + index)) + + +def ip_stream(cidr: str, shard_num: int = 1, total_shards: int = 1, seed: int = 0): + ''' + Stream random IPs from the CIDR range. Optionally supports sharding. + Each IP in the range will be yielded exactly once in a pseudo-random order. + + :param cidr: Target IP range in CIDR format + :param shard_num: Shard number (1-based), defaults to 1 + :param total_shards: Total number of shards, defaults to 1 (no sharding) + :param seed: Random seed for LCG (default: random) + ''' + + # Convert to 0-based indexing internally + shard_index = shard_num - 1 + + # Initialize IP range and LCG + ip_range = IPRange(cidr) + + # Use random seed if none provided + if not seed: + seed = random.randint(0, 2**32-1) + + # Initialize LCG + lcg = LCG(seed + shard_index) + + # Calculate how many IPs this shard should generate + shard_size = ip_range.total // total_shards + + # Distribute remainder + if shard_index < (ip_range.total % total_shards): + shard_size += 1 + + # Remaining IPs to yield + remaining = shard_size + + while remaining > 0: + index = lcg.next() % ip_range.total + if total_shards == 1 or index % total_shards == shard_index: + yield ip_range.get_ip_at_index(index) + remaining -= 1 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..16bd754 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools>=42", "wheel"] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..339699a --- /dev/null +++ b/setup.py @@ -0,0 +1,43 @@ +from setuptools import setup, find_packages + +with open("README.md", "r", encoding="utf-8") as fh: + long_description = fh.read() + +setup( + name="pylcg", + version="1.0.2", + author="acidvegas", + author_email="acid.vegas@acid.vegas", + description="Linear Congruential Generator for IP Sharding", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/acidvegas/pylcg", + project_urls={ + "Bug Tracker": "https://github.com/acidvegas/pylcg/issues", + "Documentation": "https://github.com/acidvegas/pylcg#readme", + "Source Code": "https://github.com/acidvegas/pylcg", + }, + classifiers=[ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: ISC License (ISCL)", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Topic :: Internet", + "Topic :: Security", + "Topic :: Software Development :: Libraries :: Python Modules", + ], + packages=find_packages(), + python_requires=">=3.6", + entry_points={ + 'console_scripts': [ + 'pylcg=pylcg.cli:main', + ], + }, +) diff --git a/unit_test.py b/unit_test.py new file mode 100644 index 0000000..69b70d6 --- /dev/null +++ b/unit_test.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 +import unittest +import ipaddress +import time +from pylcg import IPRange, ip_stream, LCG + +class Colors: + BLUE = '\033[94m' + GREEN = '\033[92m' + YELLOW = '\033[93m' + CYAN = '\033[96m' + RED = '\033[91m' + ENDC = '\033[0m' + +def print_header(message: str) -> None: + print(f'\n\n{Colors.BLUE}{"="*80}') + print(f'TEST: {message}') + print(f'{"="*80}{Colors.ENDC}\n') + +def print_success(message: str) -> None: + print(f'{Colors.GREEN}✓ {message}{Colors.ENDC}') + +def print_info(message: str) -> None: + print(f"{Colors.CYAN}ℹ {message}{Colors.ENDC}") + +def print_warning(message: str) -> None: + print(f"{Colors.YELLOW}! {message}{Colors.ENDC}") + +class TestIPSharder(unittest.TestCase): + @classmethod + def setUpClass(cls): + print_header('Setting up test environment') + cls.test_cidr = '192.0.0.0/16' # 65,536 IPs + cls.test_seed = 12345 + cls.total_shards = 4 + + # Calculate expected IPs + network = ipaddress.ip_network(cls.test_cidr) + cls.all_ips = {str(ip) for ip in network} + print_success(f"Initialized test environment with {len(cls.all_ips):,} IPs") + + def test_ip_range_initialization(self): + print_header('Testing IPRange initialization') + start_time = time.perf_counter() + + ip_range = IPRange(self.test_cidr) + self.assertEqual(ip_range.total, 65536) + + first_ip = ip_range.get_ip_at_index(0) + last_ip = ip_range.get_ip_at_index(ip_range.total - 1) + + elapsed = time.perf_counter() - start_time + print_success(f'IP range initialization completed in {elapsed:.6f}s') + print_info(f'IP range spans from {first_ip} to {last_ip}') + print_info(f'Total IPs in range: {ip_range.total:,}') + + def test_lcg_sequence(self): + print_header('Testing LCG sequence generation') + + # Test sequence generation speed + lcg = LCG(seed=self.test_seed) + iterations = 1_000_000 + + start_time = time.perf_counter() + for _ in range(iterations): + lcg.next() + elapsed = time.perf_counter() - start_time + + print_success(f'Generated {iterations:,} random numbers in {elapsed:.6f}s') + print_info(f'Average time per number: {(elapsed/iterations)*1000000:.2f} microseconds') + + # Test deterministic behavior + lcg1 = LCG(seed=self.test_seed) + lcg2 = LCG(seed=self.test_seed) + + start_time = time.perf_counter() + for _ in range(1000): + self.assertEqual(lcg1.next(), lcg2.next()) + elapsed = time.perf_counter() - start_time + + print_success(f'Verified LCG determinism in {elapsed:.6f}s') + + def test_shard_distribution(self): + print_header('Testing shard distribution and randomness') + + # Test distribution across shards + sample_size = 65_536 # Full size for /16 + shard_counts = {i: 0 for i in range(1, self.total_shards + 1)} # 1-based sharding + unique_ips = set() + duplicate_count = 0 + + start_time = time.perf_counter() + + # Collect IPs from each shard + for shard in range(1, self.total_shards + 1): # 1-based sharding + ip_gen = ip_stream(self.test_cidr, shard, self.total_shards, self.test_seed) + shard_unique = set() + + # Get all IPs from this shard + for ip in ip_gen: + if ip in unique_ips: + duplicate_count += 1 + else: + unique_ips.add(ip) + shard_unique.add(ip) + + shard_counts[shard] = len(shard_unique) + + elapsed = time.perf_counter() - start_time + + # Print distribution statistics + print_success(f'Generated {len(unique_ips):,} IPs in {elapsed:.6f}s') + print_info(f'Average time per IP: {(elapsed/len(unique_ips))*1000000:.2f} microseconds') + print_info(f'Unique IPs generated: {len(unique_ips):,}') + + if duplicate_count > 0: + print_warning(f'Duplicates found: {duplicate_count:,} ({(duplicate_count/len(unique_ips))*100:.2f}%)') + + expected_per_shard = sample_size // self.total_shards + for shard, count in shard_counts.items(): + deviation = abs(count - expected_per_shard) / expected_per_shard * 100 + print_info(f'Shard {shard}: {count:,} unique IPs ({deviation:.2f}% deviation from expected)') + + # Test randomness by checking sequential patterns + ips_list = sorted([int(ipaddress.ip_address(ip)) for ip in list(unique_ips)[:1000]]) + sequential_count = sum(1 for i in range(len(ips_list)-1) if ips_list[i] + 1 == ips_list[i+1]) + sequential_percentage = (sequential_count / (len(ips_list)-1)) * 100 + + print_info(f'Sequential IP pairs in first 1000: {sequential_percentage:.2f}% (lower is more random)') + +if __name__ == '__main__': + print(f"\n{Colors.CYAN}{'='*80}") + print(f"Starting IP Sharder Tests - Testing with 65,536 IPs (/16 network)") + print(f"{'='*80}{Colors.ENDC}\n") + unittest.main(verbosity=2)