set -euo pipefail

. intervention_sampling/functions.bash

base_dir=${1-}
if ! shift 1; then
  usage >&2
  exit 1
fi

generate_data() {
  local num_examples=$1
  for ((i=0; i<$num_examples; i++)); do
    length=$((RANDOM % 20))
    echo -n 1
    for ((j=0; j<$length; j++)); do
      echo -n ' '$((RANDOM % 2))
    done
    echo
  done
}

training_data=random-1/train/state-1/200
test_data=random-1/test
training_data_dir=$(get_dataset_dir "$base_dir" "$training_data")
test_data_dir=$(get_dataset_dir "$base_dir" "$test_data")
mkdir -p "$training_data_dir" "$test_data_dir"/datasets/validation "$test_data_dir"
generate_data 1000 > "$training_data_dir"/main.tok
generate_data 100 > "$test_data_dir"/datasets/validation/main.tok
generate_data 100 > "$test_data_dir"/main.tok
python rau/tasks/language_modeling/prepare_data.py \
  --training-data "$test_data_dir" \
  --more-data validation \
  --more-data-files "$training_data_dir"/main.{tok,prepared} \
  --never-allow-unk
