commit f4eb446fef0bd54cec421ae4fa892c5ce7dfea3e Author: acidvegas Date: Fri Dec 6 23:18:44 2024 -0500 Initial commit diff --git a/.screens/shardz.jpg b/.screens/shardz.jpg new file mode 100644 index 0000000..2a791eb Binary files /dev/null and b/.screens/shardz.jpg differ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..9f32a49 --- /dev/null +++ b/LICENSE @@ -0,0 +1,15 @@ +ISC License + +Copyright (c) 2025, acidvegas + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..7c100b4 --- /dev/null +++ b/README.md @@ -0,0 +1,55 @@ +

Shardz

+

+ +

+ +Shardz is a lightweight C utility that shards *(splits)* the output of any process for distributed processing. It allows you to easily distribute workloads across multiple processes or machines by splitting input streams into evenly distributed chunks. + +## Use Cases +- Distributing large datasets across multiple workers +- Parallel processing of log files +- Load balancing input streams +- Splitting any line-based input for distributed processing + +## Building +```bash +gcc -o shardz shardz.c +``` + +## Usage +```bash +some_command | shardz INDEX/TOTAL +``` + +Where: +- `INDEX` is the shard number (starting from 1) +- `TOTAL` is the total number of shards + +### Examples +- Machine number 1 would run: +```bash +curl https://example.com/large_file.txt | shardz 1/3 +``` + +- Machine number 2 would run: +```bash +curl https://example.com/large_file.txt | shardz 2/3 +``` + +- Machine number 3 would run: +```bash +curl https://example.com/large_file.txt | shardz 3/3 +``` + +## How It Works + +Shardz uses a modulo operation to determine which lines should be processed by each shard. For example, with `3` total shards: +- Shard 1 processes lines 1, 4, 7, 10, ... +- Shard 2 processes lines 2, 5, 8, 11, ... +- Shard 3 processes lines 3, 6, 9, 12, ... + +This ensures an even distribution of the workload across all shards. + +--- + +###### Mirrors: [acid.vegas](https://git.acid.vegas/shardz) • [SuperNETs](https://git.supernets.org/acidvegas/shardz) • [GitHub](https://github.com/acidvegas/shardz) • [GitLab](https://gitlab.com/acidvegas/shardz) • [Codeberg](https://codeberg.org/acidvegas/shardz) diff --git a/shardz.c b/shardz.c new file mode 100644 index 0000000..1564119 --- /dev/null +++ b/shardz.c @@ -0,0 +1,57 @@ +// SHARDZ - Shard the output of any process for distributed processin - Developed by acidvegas in C (https://github.com/acidvegas/shardz) +// shardz.c + +#include +#include +#include + +void print_usage(const char* program_name) { + fprintf(stderr, "Usage: %s INDEX/TOTAL\n", program_name); + exit(1); +} + +int main(int argc, char *argv[]) { + if (argc != 2) { + print_usage(argv[0]); + } + + char *slash = strchr(argv[1], '/'); + if (!slash) { + print_usage(argv[0]); + } + + *slash = '\0'; + char *index_str = argv[1]; + char *total_str = slash + 1; + + char *endptr; + long index = strtol(index_str, &endptr, 10); + if (*endptr != '\0' || index < 1) { + print_usage(argv[0]); + } + + long total = strtol(total_str, &endptr, 10); + if (*endptr != '\0' || total < 1) { + print_usage(argv[0]); + } + + if (index > total) { + fprintf(stderr, "Error: INDEX cannot be greater than TOTAL\n"); + exit(1); + } + + char *line = NULL; + size_t len = 0; + ssize_t read; + long current_line = 1; + + while ((read = getline(&line, &len, stdin)) != -1) { + if (((current_line - index) % total) == 0) { + printf("%s", line); + } + current_line++; + } + + free(line); + return 0; +} \ No newline at end of file