set -euo pipefail

. intervention_sampling/functions.bash

usage() {
  echo "Usage: $0 <base-directory> <automaton-topology> <weight-setting>
"
}

base_dir=${1-}
automaton_topology=${2-}
weight_setting=${3-}
if ! shift 3; then
  usage >&2
  exit 1
fi

if [[ $automaton_topology = example ]]; then
  random_seed=$((123 + weight_setting))
  automaton_args=(--automaton_name canonical_parity)
elif [[ $automaton_topology =~ ^random-([0-9]+)$ ]]; then
  # TODO Temporary hack because random seed randomizes both topology and weights.
  if [[ $weight_setting -ne 1 ]]; then
    echo "error: weight setting must be 1, not $weight_setting" >&2
    exit 1
  fi
  random_seed=123
  automaton_args=()
else
  echo "error: invalid automaton topology: $automaton_topology" >&2
  exit 1
fi

dataset_name=$automaton_topology/$weight_setting
dataset_dir=$(get_dataset_dir "$base_dir" "$dataset_name")
test_dir=$dataset_dir/test
original_dir=$dataset_dir/train/original

python intervention_sampling/sample_data.py \
  --dataset_size 10000 \
  --num_val 1000 \
  --num_test 1000 \
  "${automaton_args[@]}" \
  --accept_prob 0.05 \
  --seed "$random_seed" \
  --intervention_type vanilla \
  --training_output "$original_dir"/main.tok \
  --validation_output "$original_dir"/datasets/validation/main.tok \
  --test_output "$test_dir"/main.tok
python rau/tasks/language_modeling/prepare_data.py \
  --training-data "$test_dir" \
  --more-data-files "$original_dir"/main.{tok,prepared} \
  --more-data-files "$original_dir"/datasets/validation/main.{tok,prepared} \
  --never-allow-unk
