#!/usr/bin/env bash

set -euo pipefail

# Child PIDs tracked for cleanup
declare -a CHILD_PIDS=()

cleanup() {
  local ec=$?
  if ((${#CHILD_PIDS[@]})); then
    echo "[orchestrator] Cleaning up ${#CHILD_PIDS[@]} child process(es)..." >&2
    for pid in "${CHILD_PIDS[@]}"; do
      if kill -0 "$pid" 2>/dev/null; then
        kill "$pid" 2>/dev/null || true
      fi
    done
    # Second pass force kill if still alive
    sleep 1
    for pid in "${CHILD_PIDS[@]}"; do
      if kill -0 "$pid" 2>/dev/null; then
        kill -9 "$pid" 2>/dev/null || true
      fi
    done
  fi
  [[ $ec -eq 0 ]] || echo "[orchestrator] Exit with code $ec" >&2
}
trap cleanup EXIT INT TERM

# ==============================================================================
#                             CONFIGURATION
# ==============================================================================
# Set the maximum number of bash scripts to run in parallel.
# Adjust this based on the number of available GPUs and their memory capacity.
MAX_PARALLEL_JOBS=${MAX_PARALLEL_JOBS:-1}

# An array of CUDA device IDs that you want to use for the experiments.
# The script will cycle through these IDs for each job.
# Example for 4 GPUs: CUDA_DEVICES=(0 1 2 3)
# Example for 2 GPUs: CUDA_DEVICES=(0 1)
CUDA_DEVICES=(7)

# Manually specify the scripts you want to run in this array.
# Example:
# SCRIPTS_TO_RUN=(
#   "run_script_A.sh"
#   "run_script_B.sh"
# )

# Around 108 models per script except off the shelf 
SCRIPTS_TO_RUN=(
  "run_grpo_inference_multiple_best_move_from_legal_random_xy.sh"
  "run_grpo_inference_multiple_best_move_from_legal.sh"
  "run_grpo_inference_multiple_best_move_random_xy.sh"
  # "run_grpo_inference_multiple_best_move.sh" # This is finished already
  
  # legal moves for trained models on legal move
  "run_grpo_inference_multiple_random_xy.sh"
  "run_grpo_inference_multiple.sh"
  
  # Ascii boards
  "run_grpo_inference_multiple_best_move_from_legal_random_xy_ascii_board.sh"
  "run_grpo_inference_multiple_best_move_from_legal_ascii_board.sh"
  "run_grpo_inference_multiple_best_move_random_xy_ascii_board.sh"
  "run_grpo_inference_multiple_best_move_ascii_board.sh"

  "run_grpo_inference_multiple_random_xy_ascii_board.sh"
  "run_grpo_inference_multiple_ascii_board.sh"

  # Off the shelf
  "run_grpo_inference_multiple_best_move_off_the-shelf_canonical.sh"
  "run_grpo_inference_multiple_best_move_off_the-shelf_ascii_board_canonical.sh"
  "run_grpo_inference_multiple_best_move_off_the-shelf_ascii_board.sh"
  "run_grpo_inference_multiple_best_move_off_the-shelf.sh"

  "run_grpo_inference_multiple_legal_move_off_the-shelf_canonical.sh"
  "run_grpo_inference_multiple_legal_move_off_the-shelf_ascii_board_canonical.sh"
  "run_grpo_inference_multiple_legal_move_off_the-shelf_ascii_board.sh"
  "run_grpo_inference_multiple_legal_move_off_the-shelf.sh"
)

# The directory where your `run_*.sh` scripts are located.
# `dirname "$0"` makes it automatically use the directory where this orchestrator script is.
# SCRIPT_DIR=$(dirname "$0")

# We need to run the script from within the folder
SCRIPT_DIR="/home/data/stlm-game-logic/scripts/bash"

# The directory where log files for each run will be stored.
LOG_DIR="${SCRIPT_DIR}/run_logs_$(date +'%Y%m%d_%H%M%S')"
# ==============================================================================

# Optional pre-clean flag (set PRE_CLEAN=0 to disable)
PRE_CLEAN=${PRE_CLEAN:-0}

preclean_inference() {
  (( PRE_CLEAN == 1 )) || { echo "[pre-clean] Disabled (PRE_CLEAN=0)"; return 0; }
  echo "[pre-clean] Scanning for existing grpo_inference.py processes for user $USER..."
  # Collect PIDs for this user only
  mapfile -t existing < <(ps -u "$USER" -o pid=,args= | awk '/grpo_inference\.py/ {print $1}') || true
  if ((${#existing[@]}==0)); then
    echo "[pre-clean] None found."
    return 0
  fi
  echo "[pre-clean] Found ${#existing[@]} process(es): ${existing[*]} (sending SIGTERM)"
  kill -TERM "${existing[@]}" 2>/dev/null || true
  sleep 4
  mapfile -t remaining < <(ps -u "$USER" -o pid=,args= | awk '/grpo_inference\.py/ {print $1}') || true
  if ((${#remaining[@]})); then
    echo "[pre-clean] Forcing ${#remaining[@]} remaining process(es) with SIGKILL: ${remaining[*]}"
    kill -KILL "${remaining[@]}" 2>/dev/null || true
  else
    echo "[pre-clean] All cleared."
  fi
}

# Create the log directory if it doesn't exist
mkdir -p "$LOG_DIR"

# Change to the script directory to ensure relative paths within the scripts work correctly
# cd "$SCRIPT_DIR" || exit

# --- Script Execution Logic ---

# Initialize counters
job_count=0
gpu_index=0
num_gpus=${#CUDA_DEVICES[@]}

echo "Starting orchestrator..."
echo "Maximum parallel jobs: $MAX_PARALLEL_JOBS"
echo "Using GPUs: ${CUDA_DEVICES[*]}"
echo "Logging to directory: $LOG_DIR"
echo "----------------------------------------------------"

# Run pre-clean before starting new jobs
preclean_inference

# Find all scripts matching the pattern "run_*.sh" and make sure they are executable
supports_wait_n=1
if ! wait -n 2>/dev/null; then
  supports_wait_n=0
fi

for base_script in "${SCRIPTS_TO_RUN[@]}"; do
  script="$SCRIPT_DIR/$base_script"
  if [[ ! -f "$script" ]]; then
    echo "Warning: Script '$script' not found. Skipping."
    continue
  fi
  if [[ ! -x "$script" ]]; then
    chmod +x "$script" || true
  fi

  # Concurrency control
  if (( MAX_PARALLEL_JOBS <= 1 )); then
    # Sequential mode: run foreground & wait fully
    gpu_id=${CUDA_DEVICES[$gpu_index]}
    gpu_index=$(((gpu_index + 1) % num_gpus))
    log_file="$LOG_DIR/$(basename "$script" .sh).log"
    echo "($(date +'%Y-%m-%d %H:%M:%S')) (SEQ) Running '$script' on GPU $gpu_id -> $log_file"
    export CUDA_VISIBLE_DEVICES=$gpu_id
    "$script" > "$log_file" 2>&1
    continue
  fi

  while (( job_count >= MAX_PARALLEL_JOBS )); do
    if (( supports_wait_n )); then
      if wait -n; then
        job_count=$((job_count - 1))
      fi
    else
      # Fallback: poll children
      new_list=()
      for pid in "${CHILD_PIDS[@]}"; do
        if kill -0 "$pid" 2>/dev/null; then
          new_list+=("$pid")
        else
          job_count=$((job_count - 1))
        fi
      done
      CHILD_PIDS=("${new_list[@]}")
      (( job_count >= MAX_PARALLEL_JOBS )) && sleep 1
    fi
  done

  gpu_id=${CUDA_DEVICES[$gpu_index]}
  gpu_index=$(((gpu_index + 1) % num_gpus))
  log_file="$LOG_DIR/$(basename "$script" .sh).log"
  echo "($(date +'%Y-%m-%d %H:%M:%S')) Starting '$script' on GPU $gpu_id (parallel) -> $log_file"
  (
    export CUDA_VISIBLE_DEVICES=$gpu_id
    "$script" > "$log_file" 2>&1
  ) &
  pid=$!
  CHILD_PIDS+=("$pid")
  job_count=$((job_count + 1))
  echo "  -> PID $pid"
done

# --- Final Wait ---
# Wait for all remaining background jobs to complete before exiting the orchestrator
echo "----------------------------------------------------"
if (( MAX_PARALLEL_JOBS > 1 )); then
  echo "Waiting for remaining ${#CHILD_PIDS[@]} job(s) to finish..."
  for pid in "${CHILD_PIDS[@]}"; do
    if wait "$pid"; then
      :
    fi
  done
fi
echo "All jobs completed. Orchestrator finished."

