#!/usr/bin/env bash
set -euo pipefail

# 1) Load conda and activate your env
#    Adjust the path to `conda.sh` if your install is elsewhere
source "$HOME/miniconda3/etc/profile.d/conda.sh"
conda activate env_isaaclab

# TS loop
SEEDS=$(seq 1 10)
BETA1S=(1e-2)
BETA2S=(1e2)
for seed in ${SEEDS}; do
  for beta1 in "${BETA1S[@]}"; do
    for beta2 in "${BETA2S[@]}"; do

      echo "=== Running ts seed=${seed}, beta1=${beta1}, beta2=${beta2} ==="
      python scripts/rlhf/train_rlhf.py \
        --base_seed ${seed} \
        --beta1 ${beta1} \
        --beta2 ${beta2} \
        --rlhf_algorithm ts_last \
        --pure_exploration

      # sleep a bit before next run
      echo ">>> Sleeping for 20s before next run..."
      sleep 20

      echo "=== Running ts opt design seed=${seed}, beta1=${beta1}, beta2=${beta2} ==="
      python scripts/rlhf/train_rlhf.py \
        --base_seed ${seed} \
        --beta1 ${beta1} \
        --beta2 ${beta2} \
        --rlhf_algorithm ts_last \
        --lazy \
        --opt_design \
        --pure_exploration

      # sleep a bit before next run
      echo ">>> Sleeping for 20s before next run..."
      sleep 20

      echo "=== Running ts lazy seed=${seed}, beta1=${beta1}, beta2=${beta2} ==="
      python scripts/rlhf/train_rlhf.py \
        --base_seed ${seed} \
        --beta1 ${beta1} \
        --beta2 ${beta2} \
        --rlhf_algorithm ts_last \
        --lazy \
        --pure_exploration

      # sleep a bit before next run
      echo ">>> Sleeping for 20s before next run..."
      sleep 20
    done
  done
done


echo "All runs completed."