#!/usr/bin/env bash
set -euo

if [[ -n "${BASH_VERSINFO:-}" ]] && (( BASH_VERSINFO[0] >= 4 )); then
    set -o pipefail
fi

pick_port() {
    while :; do
        port=$(shuf -n 1 -i 49152-65535)
        netstat -atun | grep -q "$port" || break
    done
    echo "$port"
}

TRAIN_CFG="$(realpath configs/train.json)"
EVAL_CFG="$(realpath configs/eval.json)"

COMMON_ARGS=(
    --train-config   "$TRAIN_CFG"
)

if [[ -n "${SLURM_JOB_ID:-}" ]]; then
    echo "Running on SLURM (job ${SLURM_JOB_ID})"

    export NCCL_DEBUG=INFO
    export NCCL_P2P_DISABLE=1
    export CUDA_LAUNCH_BLOCKING=1
    export TORCH_DISTRIBUTED_DEBUG=DETAIL

    MASTER_ADDR=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n1)
    export MASTER_ADDR
    PORT=$(pick_port)
    echo "MASTER_ADDR=$MASTER_ADDR  PORT=$PORT"

elif [[ "$OSTYPE" == "darwin"* ]]; then
    echo "Running locally on macOS (single GPU/CPU)"
    emulator-train \
        "${COMMON_ARGS[@]}" \
        --eval-config "$EVAL_CFG" --run-eval
else
    echo "Unsupported OS: $OSTYPE"
    exit 1
fi