#!/usr/bin/env bash

# Launcher for AntPush with tuned intrinsic weighting for dense rewards.
# Now starts five seeds in parallel to saturate CPU cores while sharing one GPU.

set -euo pipefail

GPU=${GPU:-0}
MAN_REW=${MAN_REW:-0.32}
MAN_NOISE=${MAN_NOISE:-0.52}
MAN_NOISE_FINAL=${MAN_NOISE_FINAL:-0.38}
CTRL_NOISE=${CTRL_NOISE:-0.85}
CTRL_NOISE_FINAL=${CTRL_NOISE_FINAL:-0.5}
USE_ADJ=${USE_ADJ:-1}
DEFAULT_SEEDS=(73)

if [[ -n "${SEEDS:-}" ]]; then
  # Respect user-provided seeds (space separated).
  read -r -a SEED_LIST <<<"${SEEDS}"
else
  SEED_LIST=(${DEFAULT_SEEDS[@]})
fi

if ((${#SEED_LIST[@]} == 0)); then
  echo "[ERROR] No seeds configured. Provide SEEDS=\"s1 s2 ...\"." >&2
  exit 1
fi

NUM_JOBS=${#SEED_LIST[@]}
TOTAL_CORES=$(nproc --all 2>/dev/null || echo 1)
if (( TOTAL_CORES <= 0 )); then
  TOTAL_CORES=1
fi

CORES_PER_JOB=$(( TOTAL_CORES / NUM_JOBS ))
if (( CORES_PER_JOB < 1 )); then
  CORES_PER_JOB=1
fi

# Allow explicit override for thread counts per job.
export OMP_PROC_BIND=${OMP_PROC_BIND:-TRUE}
export OMP_PLACES=${OMP_PLACES:-cores}

echo "[INFO] Launching ${NUM_JOBS} AntPush runs on GPU ${GPU}."
echo "[INFO] System reports ${TOTAL_CORES} CPU cores; assigning ≈${CORES_PER_JOB} per job."

use_taskset=0
if command -v taskset >/dev/null 2>&1 && (( TOTAL_CORES >= NUM_JOBS )); then
  use_taskset=1
fi

pids=()

cleanup() {
  if ((${#pids[@]})); then
    kill "${pids[@]}" 2>/dev/null || true
  fi
}

trap cleanup INT TERM

next_core=0
for idx in "${!SEED_LIST[@]}"; do
  seed=${SEED_LIST[$idx]}
  run_algo="S3_AntPush_s${seed}_strong_parallel_3"

  start=$next_core
  end=$(( start + CORES_PER_JOB - 1 ))
  if (( idx == NUM_JOBS - 1 )); then
    end=$(( TOTAL_CORES - 1 ))
  fi
  if (( end >= TOTAL_CORES )); then
    end=$(( TOTAL_CORES - 1 ))
  fi
  if (( end < start )); then
    end=$start
  fi
  next_core=$(( end + 1 ))

  affinity=""
  if (( use_taskset == 1 )); then
    affinity="${start}-${end}"
  fi

  if [[ "$USE_ADJ" -eq 0 ]]; then
    echo "[GPU ${GPU}] Seed ${seed} → man_rew_scale=${MAN_REW} (adj_net=off) cores=${affinity:-all}" \
         "OMP=${CORES_PER_JOB}"
  else
    echo "[GPU ${GPU}] Seed ${seed} → man_rew_scale=${MAN_REW} (adj_net=on) cores=${affinity:-all}" \
         "OMP=${CORES_PER_JOB}"
  fi

  run_cmd=(
    python main.py
      --env_name AntPush
      --algo "$run_algo"
      --seed "$seed"
      --man_rew_scale "$MAN_REW"
      --manager_propose_freq 10
      --train_manager_freq 11
      --man_ctrl_rew_balance_start 0.12
      --man_ctrl_rew_balance_end 0.42
      --man_ctrl_rew_balance_steps 520000
      --candidate_goals 8
      --man_noise_sigma "$MAN_NOISE"
      --man_noise_sigma_final "$MAN_NOISE_FINAL"
      --ctrl_noise_sigma "$CTRL_NOISE"
      --ctrl_noise_sigma_final "$CTRL_NOISE_FINAL"
      --man_act_lr 6e-5
      --man_crit_lr 6e-4
      --man_soft_sync_rate 0.003
      --ctrl_soft_sync_rate 0.0035
      --man_buffer_size 180000
      --ctrl_buffer_size 220000
      --noise_anneal_start 1.0e6
      --noise_anneal_steps 1.6e6
      --reach_warmup_samples 2200
      --reach_warmup_rounds 1
      --fast_mode
  )

  if [[ "$USE_ADJ" -eq 0 ]]; then
    run_cmd+=(--disable_adj_net)
  fi

  (
    set -euo pipefail
    export OMP_NUM_THREADS=${OMP_NUM_THREADS_OVERRIDE:-$CORES_PER_JOB}
    export MKL_NUM_THREADS=${MKL_NUM_THREADS_OVERRIDE:-$CORES_PER_JOB}
    export CUDA_VISIBLE_DEVICES=$GPU
    if (( use_taskset == 1 )); then
      taskset -c "$affinity" "${run_cmd[@]}"
    else
      "${run_cmd[@]}"
    fi
  ) &

  pids+=($!)
done

fail=0
for pid in "${pids[@]}"; do
  if ! wait "$pid"; then
    fail=1
  fi
done

trap - INT TERM

if (( fail == 0 )); then
  echo "All AntPush runs finished successfully."
else
  echo "One or more AntPush runs failed." >&2
  exit 1
fi
