set -euo pipefail

. recognizers/functions.bash

usage() {
echo "Usage: $0 <base-directory> <language> <string-length> 

Prepare the datasets for a hand-coded language.

  <base-directory>
    Directory under which all datasets and models are stored.
  <language>
    Name of the language to prepare. Corresponds to the name of a directory
    under <base-directory>/languages/.
  <string-length> 
    Length of the strings to be sampled.
"
}

base_dir=${1-}
language=${2-}
string_len=${3-}
if ! shift 3; then
  usage >&2
  exit 1
fi

if [[ $language =~ ^(k-sparse-parity|k-sparse-majority)-([0-9]+)-([0-9]+)$ ]]; then
  language_name=${BASH_REMATCH[1]}
  k=${BASH_REMATCH[2]}
  trial=${BASH_REMATCH[3]}
elif [[ $language =~ ^(random-language)-([0-9]+)$ ]]; then
  language_name=${BASH_REMATCH[1]}
  k=0
  trial=${BASH_REMATCH[2]}
else
  language_name=$language
  k=0
  trial=0
fi

language_dir=$(get_language_dir "$base_dir" "$language" "$string_len")

automaton=$language_dir/automaton.pt
sampler=$language_dir/sampler.pt

seed=$(( 12345 + trial ))

mkdir -p "$language_dir"
python recognizers/string_sampling/sample_dataset.py \
  --output "$language_dir" \
  --random-seed "$seed" \
  --language "$language_name" \
  --string-length "$string_len" \
  --k "$k" \
  --skip-test-edit-distance
bash recognizers/neural_networks/prepare_language.bash "$base_dir" "$language" "$string_len"
