#!/usr/bin/env bash

# Launch AntMazeSparse training for a fixed set of seeds.
# Optional environment variables:
#   GPUS="0 1"            # round-robin assignment across listed GPUs
#   MAN_REW=0.45          # manager intrinsic reward scaling
#   MAN_PROPOSE=10        # manager proposal frequency
#   TRAIN_MANAGER=10      # manager training frequency
#   BALANCE_START=0.05    # shaping mix at warm start
#   BALANCE_END=0.35      # shaping mix at full schedule
#   BALANCE_STEPS=600000  # steps to reach BALANCE_END
#   REACH_WARMUP=4000     # reach buffer warmup samples
#   REACH_ROUNDS=1        # reach-net optimisation rounds during warmup

set -euo pipefail

SEEDS=(7 19 23 42 73)
GPU_LIST=${GPUS:-0}
read -r -a GPU_IDS <<< "$GPU_LIST"
NUM_GPUS=${#GPU_IDS[@]}

MAN_REW=${MAN_REW:-0.45}
MAN_PROPOSE=${MAN_PROPOSE:-10}
TRAIN_MANAGER=${TRAIN_MANAGER:-10}
BALANCE_START=${BALANCE_START:-0.05}
BALANCE_END=${BALANCE_END:-0.35}
BALANCE_STEPS=${BALANCE_STEPS:-600000}
REACH_WARMUP=${REACH_WARMUP:-4000}
REACH_ROUNDS=${REACH_ROUNDS:-1}

job=0
for seed in "${SEEDS[@]}"; do
  gpu=${GPU_IDS[$((job % NUM_GPUS))]}
  algo="HAWK_AntMazeSparse_s${seed}"
  echo "[GPU ${gpu}] Running AntMazeSparse seed=${seed} (algo=${algo})"

  CUDA_VISIBLE_DEVICES=$gpu \
  python main.py \
    --env_name AntMazeSparse \
    --algo "$algo" \
    --seed "$seed" \
    --man_rew_scale "$MAN_REW" \
    --manager_propose_freq "$MAN_PROPOSE" \
    --train_manager_freq "$TRAIN_MANAGER" \
    --man_ctrl_rew_balance_start "$BALANCE_START" \
    --man_ctrl_rew_balance_end "$BALANCE_END" \
    --man_ctrl_rew_balance_steps "$BALANCE_STEPS" \
    --reach_warmup_samples "$REACH_WARMUP" \
    --reach_warmup_rounds "$REACH_ROUNDS" \
    "$@"

  ((job++))
done

echo "All AntMazeSparse runs finished."
