#!/bin/bash

#SBATCH --job-name=umfavi-transfer
#SBATCH --output=logs/slurm/submit_%j/umfavi_transfer_%j_%a.out
#SBATCH --error=logs/slurm/submit_%j/umfavi_transfer_%j_%a.err
#SBATCH --time=4:00:00
#SBATCH --mem-per-cpu=2G
#SBATCH --cpus-per-task=2
#SBATCH --array=0-32
#SBATCH --export=ALL

# =============================================================================
# UMFAVI Transfer Experiment Runner - SLURM Submission Script
# =============================================================================
#
# This script submits a job array to a SLURM cluster where each task runs
# as an independent worker processing transfer experiments from the file-based queue.
#
# Transfer experiments evaluate reward models or imitation policies under
# perturbed environments (e.g., different wind_power in LunarLander).
#
# Usage:
#   1. First, populate the transfer queue with experiments:
#      python -m umfavi.experiments.cli --queue-dir tasks_transfer add-grid transfer_lander --seeds 5
#
#   2. Check the queue status:
#      python -m umfavi.experiments.cli --queue-dir tasks_transfer status
#
#   3. Submit to cluster:
#      sbatch scripts/submit_slurm_transfer.sh
#
#   4. To run fewer workers, override the array:
#      sbatch --array=0-31 scripts/submit_slurm_transfer.sh
#
#   5. Override queue directory:
#      QUEUE_DIR=tasks_transfer_v2 sbatch scripts/submit_slurm_transfer.sh
#
# Configuration:
#   - Adjust --time based on expected experiment duration
#   - Adjust --mem-per-cpu based on model size
#   - Adjust --cpus-per-task for environments that benefit from parallelism
#   - The array range determines the number of parallel workers
#
# Environment Variables:
#   - QUEUE_DIR: Path to the transfer task queue directory (default: tasks_transfer)
#
# =============================================================================

# Exit on error
set -e

# Print job information
echo "=============================================="
echo "UMFAVI Transfer Experiment Worker"
echo "=============================================="
echo "Job ID: $SLURM_JOB_ID"
echo "Array Task ID: $SLURM_ARRAY_TASK_ID"
echo "Node: $(hostname)"
echo "Date: $(date)"
echo "Working Directory: $SLURM_SUBMIT_DIR"
echo "=============================================="

# Change to the project directory
cd $SLURM_SUBMIT_DIR

# Load required modules (adjust based on your cluster setup)
# module load python/3.11.6
# module load cuda/12.1.1  # If using GPU

# Activate virtual environment (adjust path as needed)
if [ -f "venv/bin/activate" ]; then
    source venv/bin/activate
elif [ -f "../venv/bin/activate" ]; then
    source ../venv/bin/activate
else
    echo "Warning: No virtual environment found. Using system Python."
fi

# Print Python info
echo "Python: $(which python)"
echo "Python version: $(python --version)"

# Ensure logs directory exists
mkdir -p logs/slurm

# Queue directory - use a shared location accessible to all nodes
# The file-based queue uses atomic rename operations which are NFS-safe
QUEUE_DIR="${QUEUE_DIR:-tasks_transfer}"

echo "Queue directory: $QUEUE_DIR"
echo "=============================================="

# Build worker arguments
WORKER_ARGS="--queue-dir $QUEUE_DIR"

# Run the transfer worker
# The worker will:
# 1. Claim pending transfer experiments from the queue (via atomic file rename)
# 2. Run each transfer experiment (reward model or imitation policy evaluation)
# 3. Save results (regret, mean_reward) to the queue
# 4. Exit when no more pending experiments
python -m umfavi.experiments.transfer_worker $WORKER_ARGS

echo "=============================================="
echo "Transfer worker completed at $(date)"
echo "=============================================="
