#!/bin/bash

# Set the experiment parameters
envs=("Swimmer-v5" "HalfCheetah-v5" "Walker2d-v5")
#envs=("Ant-v5")
cuda_devices=(0 1 2 3 4 0 1 2 3 4 0 1 2 3 4)
seeds=(1789 1687123 12 912391 330)
feedback_types=("evaluative" "comparative" "demonstrative" "corrective" "descriptive" "descriptive_preference")

# Loop over the environments and CUDA devices
for seed in "${!seeds[@]}"; do
    for i in "${!envs[@]}"; do
      for j in "${!feedback_types[@]}"; do
          export CUDA_VISIBLE_DEVICES=${cuda_devices[$j]}
          echo "Train Agent for ${envs[$i]} and FB Type ${feedback_types[$j]} with CUDA_VISIBLE_DEVICES=${cuda_devices[$j]}"
              python rlhf/train_agent.py --algorithm ppo --environment ${envs[$i]} --feedback-type ${feedback_types[$j]} --seed ${seeds[$seed]} &
      done
    done
    # Wait for all training processes to finish
    #wait
done

echo "Training for all environments."
