61 lines
1.2 KiB
Bash
Executable File
61 lines
1.2 KiB
Bash
Executable File
#!/usr/bin/zsh
|
|
|
|
OUTFILE_ROOT="$1";
|
|
COUNT0=$2;
|
|
COUNT1=$3;
|
|
#DICTFILE="lowercase-english";
|
|
DICTFILE="$4";
|
|
TMPFILE="$OUTFILE_ROOT.tmp.txt";
|
|
|
|
if [ -z "$OUTFILE_ROOT" ]; then
|
|
echo "Usage: $0 <outfile-root> <num_words0> <num_words1> <dictfile>";
|
|
exit 1;
|
|
fi;
|
|
|
|
if [ ! -f "$DICTFILE" ]; then
|
|
echo "Error: $DICTFILE not found";
|
|
exit 1;
|
|
fi;
|
|
|
|
if [ -z "$COUNT0" ]; then
|
|
COUNT0=100;
|
|
fi;
|
|
|
|
if [ -z "$COUNT1" ]; then
|
|
COUNT1=100;
|
|
fi;
|
|
|
|
rm -v $OUTFILE_ROOT.txt;
|
|
rm -v $TMPFILE;
|
|
rm -v $OUTFILE_ROOT-*;
|
|
|
|
echo "Generating $TMPFILE";
|
|
|
|
for i in `shuf -n $COUNT0 $DICTFILE`; do
|
|
for j in `shuf -n $COUNT1 $DICTFILE | grep -v $i`; do
|
|
echo $i$j;
|
|
done;
|
|
done > $TMPFILE;
|
|
|
|
|
|
echo "Generating $OUTFILE_ROOT.txt";
|
|
|
|
for i in `cat $TMPFILE`; do
|
|
for j in `seq 0 9`; do echo $i"00"$j; done;
|
|
for j in `seq 10 99`; do echo $i"0"$j; done;
|
|
for j in `seq 100 999`; do echo $i$j; done;
|
|
done > $OUTFILE_ROOT.txt
|
|
|
|
rm -v $TMPFILE;
|
|
|
|
echo "Shuffling $OUTFILE_ROOT.txt";
|
|
sort -u -R $OUTFILE_ROOT.txt -o $OUTFILE_ROOT.txt;
|
|
|
|
wc -l $OUTFILE_ROOT.txt;
|
|
|
|
echo "Splitting $OUTFILE_ROOT.txt";
|
|
split -l 100000 -d -a 3 $OUTFILE_ROOT.txt $OUTFILE_ROOT-;
|
|
|
|
echo "Done";
|
|
echo "Split files are $OUTFILE_ROOT-000 to $OUTFILE_ROOT-xxx";
|