set -euo pipefail

. recognizers/functions.bash

usage() {
  echo "Usage: $0 <base-directory> <language> <string-length>

Prepare the datasets for a language.

  <base-directory>
    Directory under which all datasets and models are stored.
  <language>
    Name of the language to prepare. Corresponds to the name of a directory
    under <base-directory>/languages/.
  <string-length>
    Length of strings.
"
}

base_dir=${1-}
language=${2-}
string_len=${3-}
if ! shift 3; then
  usage >&2
  exit 1
fi

language_dir=$(get_language_dir "$base_dir" "$language" "$string_len")

# Optional datasets.
# flags=()
# for dataset in test-short-held-out; do
#   if [[ -e $language_dir/datasets/$dataset ]]; then
#     flags+=(--more-data "$dataset")
#   fi
# done

python recognizers/neural_networks/prepare_data.py \
  --training-data "$language_dir" \
  --more-data validation \
  --more-data test \
  "${flags[@]}" \
  --never-allow-unk
