#!/usr/bin/env bash

# Launcher for AntMazeSparse tuned so sparse env reward remains dominant.

set -euo pipefail

SEED=${SEED:-42}
GPU=${GPU:-0}

# Intrinsic shaping defaults keep env reward (0/1) as the primary signal.
MAN_REW=${MAN_REW:-0.2}
BALANCE_START=${BALANCE_START:-0.02}
BALANCE_END=${BALANCE_END:-0.15}
BALANCE_STEPS=${BALANCE_STEPS:-800000}
REACH_WARMUP=${REACH_WARMUP:-4000}
REACH_ROUNDS=${REACH_ROUNDS:-1}

echo "[GPU ${GPU}] Running AntMazeSparse (seed=${SEED}) with man_rew_scale=${MAN_REW}"

CMD=(
  python main.py
  --env_name AntMazeSparse
  --algo "S3_AntMaze_Sparse_s${SEED}"
  --seed "$SEED"
  --man_rew_scale "$MAN_REW"
  --manager_propose_freq 10
  --train_manager_freq 10
  --man_ctrl_rew_balance_start "$BALANCE_START"
  --man_ctrl_rew_balance_end "$BALANCE_END"
  --man_ctrl_rew_balance_steps "$BALANCE_STEPS"
  --reach_warmup_samples "$REACH_WARMUP"
  --reach_warmup_rounds "$REACH_ROUNDS"
)

CMD+=("$@")

if [[ "$SEED" == "7" ]]; then
  CMD+=(--save_models)
fi

CUDA_VISIBLE_DEVICES=$GPU "${CMD[@]}"

echo "Run finished."
