#!/bin/bash

# Script to submit SLURM evaluation jobs for all tasks with configurable evaluation type
# Based on command_llama.txt template and submit_all_tasks.sh structure

# Usage: ./submit_evaluations.sh [eval_type]
# eval_type options: openai, grpo, direct, drgrpo

EVAL_TYPE=${1:-"openai"}  # Default to grpo if no argument provided

# Validate eval_type parameter
case "$EVAL_TYPE" in
    "openai"|"grpo"|"direct"|"drgrpo")
        echo "✅ Using eval_type: $EVAL_TYPE"
        ;;
    *)
        echo "❌ Invalid eval_type: $EVAL_TYPE"
        echo "Usage: $0 [openai|grpo|direct|drgrpo]"
        echo "Supported eval types:"
        echo "  - openai: Uses OpenAI API configs (taskname_openai.yaml)"
        echo "  - grpo: Uses GRPO configs (taskname_grpo.yaml)"
        echo "  - direct: Uses direct configs (taskname_direct.yaml)"
        echo "  - drgrpo: Uses GRPO configs (taskname_grpo.yaml) with drgrpo checkpoints"
        exit 1
        ;;
esac

echo "Submitting SLURM evaluation jobs for reasoning tasks with eval_type: $EVAL_TYPE"

# Function to create and submit a SLURM evaluation job
# Parameters: task_name model_path model_name checkpoint_dir mem_size
submit_evaluation() {
    local task_name=$1
    local model_path=$2
    local model_name=$3
    local checkpoint_dir=$4
    local mem_size=${5:-80G}
    
    local job_name="generate-${task_name}-${model_name}-${EVAL_TYPE}"
    local output_file="/nlp/scr/qinanyu/rl-explanations/bash_output/${job_name}.out"
    local config_name="${task_name}_${EVAL_TYPE}.yaml"
    local output_dir="generate/results/${EVAL_TYPE}_${model_name}/${task_name}"
    local script_file="/nlp/scr/qinanyu/rl-explanations/slurm_${job_name}.sh"
    
    echo "=== Submitting ${task_name} (${EVAL_TYPE}) - ${model_name} ==="
    echo "Output file: ${output_file}"
    echo "Config: ${config_name}"
    echo "Output dir: ${output_dir}"
    
    # Set temporary directory to avoid "No space left on device" in /nlp/scr/qinanyu/rl-explanations/temp
    export TMPDIR="/nlp/scr/qinanyu/rl-explanations/temp"
    mkdir -p "$TMPDIR"
    
    # Determine the python command based on eval type
    local python_cmd=""
    if [ "$EVAL_TYPE" = "openai" ]; then
        # OpenAI evaluation - no checkpoint directory needed
        python_cmd="python generate/generate.py \\
--config-name ${config_name} \\
evaluation.teacher_dataset.size=22000 \\
evaluation.teacher_dataset.val_start=20000 \\
evaluation.student_dataset.size=20110 \\
evaluation.student_dataset.val_start=20100 \\
evaluation.metrics='[answer_removed_explanation_only, teacher_accuracy]' \\
evaluation.teacher_model.openai_model_name=o3-mini \\
evaluation.student_model.model_path=gpt-4.1-mini \\
evaluation.teacher_model.developer_prompt=default \\
+evaluation.postpend=orig "
    elif [ "$EVAL_TYPE" = "direct" ]; then
        # Direct evaluation (without thinking)
        python_cmd="python evaluate/run_teacher_evaluation.py \\
--config-name ${config_name} \\
evaluation.checkpoint_dir=${checkpoint_base_dir}/${task_name} \\
evaluation.base_model_path=${model_path} \\
evaluation.output_dir=${output_dir} \\
evaluation.expert_thinking_dir=/nlp/scr/qinanyu/rl-explanations/evaluate/results/openai/${task_name}/teacher/step_openai_api/teacher_responses_step_openai_api.json" 
    elif [ "$EVAL_TYPE" = "grpo" ]; then
        # GRPO evaluation (with thinking)
        python_cmd="python evaluate/run_teacher_evaluation.py \\
--config-name ${config_name} \\
evaluation.checkpoint_dir=${checkpoint_base_dir}/${task_name} \\
evaluation.base_model_path=${model_path} \\
evaluation.metrics='[answer_removed_explanation_only]' \\
evaluation.teacher_dataset.size=20100 \\
evaluation.teacher_dataset.val_start=20000 \\
evaluation.student_dataset.size=20110 \\
evaluation.student_dataset.val_start=20100 \\
evaluation.student_model.model_path=gpt-4.1-mini \\
evaluation.output_dir=${output_dir}_inform \\
evaluation.expert_thinking_dir=/nlp/scr/qinanyu/rl-explanations/evaluate/results/o3-mini_gpt-4.1-mini/${task_name}/teacher/step_0/teacher_responses_step_0.json"
    elif [ "$EVAL_TYPE" = "drgrpo" ]; then
        # DRGRPO evaluation (uses same config as GRPO)
        local grpo_config_name="${task_name}_grpo.yaml"
        python_cmd="python evaluate/run_teacher_evaluation.py \\
--config-name ${grpo_config_name} \\
evaluation.checkpoint_dir=${checkpoint_base_dir}/${task_name} \\
evaluation.base_model_path=${model_path} \\
evaluation.metrics='[expert_thinking]' \\
evaluation.teacher_dataset.size=20100 \\
evaluation.teacher_dataset.val_start=20000 \\
evaluation.student_dataset.size=20110 \\yea
evaluation.student_dataset.val_start=20100 \\
evaluation.student_model.model_path=gpt-4.1-mini \\
evaluation.output_dir=${output_dir}_inform \\
evaluation.expert_thinking_dir=/nlp/scr/qinanyu/rl-explanations/evaluate/results/o3-mini_gpt-4.1-mini/${task_name}/teacher/step_0/teacher_responses_step_0.json"
    else
        echo "❌ Unknown eval_type: ${EVAL_TYPE}"
        return 1
    fi
    
    cat > "$script_file" << EOF
#!/bin/bash
#SBATCH --partition=jag-standard --qos=normal
#SBATCH --account=nlp
#SBATCH --cpus-per-task=1
#SBATCH --exclude=jagupard[31]
#SBATCH --gres=gpu:1
#SBATCH --job-name=${job_name}
#SBATCH --mem=${mem_size}
#SBATCH --open-mode=append
#SBATCH --output=${output_file}
#SBATCH --time=14-0

# Unique per job
unset ROCR_VISIBLE_DEVICES 
export HYDRA_FULL_ERROR=1 

export JOB_TAG="\${SLURM_JOB_ID:-\${LSB_JOBID:-jid.\$(id -u)-\$\$-\$(date +%s)}}"

# Per-job dirs
#export BASE="/dev/shm/\$USER/vllm.\$JOB_TAG"
#export TORCH_EXTENSIONS_DIR="\${BASE}/torch_ext"
#export FLASHINFER_JIT_DIR="\${BASE}/flashinfer_jit"
#export CUDA_CACHE_PATH="\${BASE}/cuda_cache"
#export TMPDIR="\${BASE}/nlp/scr/qinanyu/rl-explanations/temp"
#
## Create them
#mkdir -p "\$TORCH_EXTENSIONS_DIR" "\$FLASHINFER_JIT_DIR" "\$CUDA_CACHE_PATH" "\$TMPDIR"
#chmod 700 "\$BASE" "\$TMPDIR"
#
## Optional
#export RAY_TMPDIR="/nlp/scr/qinanyu/ray_st"; mkdir -p "\$RAY_TMPDIR"
#
## ------- Pick arch for the current node -------
#export TORCH_CUDA_ARCH_LIST="8.0;9.0a" 
#
## If nvcc isn't present, avoid dead CUDA_HOME
#command -v nvcc >/dev/null 2>&1 || unset CUDA_HOME
#
## Clean up at exit
## trap 'rm -rf "\$BASE"' EXIT
#
## Debug (optional)
#echo "BASE=\$BASE"
#echo "TMPDIR=\$TMPDIR"
#echo "TORCH_EXTENSIONS_DIR=\$TORCH_EXTENSIONS_DIR"
#echo "FLASHINFER_JIT_DIR=\$FLASHINFER_JIT_DIR"
#echo "CUDA_CACHE_PATH=\$CUDA_CACHE_PATH"
#echo "TORCH_CUDA_ARCH_LIST=\$TORCH_CUDA_ARCH_LIST"
#
cd /nlp/scr/qinanyu/rl-explanations

${python_cmd}

rm -rf \$BASE
EOF

    echo "Submitting evaluation job for ${task_name} (${model_name}, ${EVAL_TYPE})..."
    sbatch "$script_file"
    
    # Clean up temporary script
    rm "$script_file"
}

# =====================================================================
# CONFIGURATION: Edit these arrays to control which tasks and models to evaluate
# =====================================================================

# Tasks to evaluate (comment out or remove tasks you don't want to evaluate)
EVAL_TASKS=(
    #"mini_sudoku"
    "spiral_matrix"
    #"family_relationships"
    #"simple_equations"
    #"futoshiki"
)

# Model configurations: each entry contains "model_path|model_name|checkpoint_base_dir"
MODEL_CONFIGS=(
    # LLaMA 3.2 3B configuration
    #"/nlp/scr/qinanyu/models/meta-llama-3.2-3B-Instruct|llama|/nlp/scr/qinanyu/rl-explanations/checkpoints/grpo_meta-llama-3.2-3B-Instruct"
    
    # Qwen configurations (uncomment to use)
    "/nlp/scr/qinanyu/models/qwen2.5-7b-instruct|qwen-7b-instruct|/nlp/scr/qinanyu/rl-explanations/checkpoints/grpo_q7b"
    #"/nlp/scr/qinanyu/models/qwen2.5-3b-instruct|qwen-3b-instruct|/nlp/scr/qinanyu/rl-explanations/checkpoints/grpo_q3b"
    #"/nlp/scr/qinanyu/models/qwen2.5-1.5b-instruct|qwen-1.5b-instruct|/nlp/scr/qinanyu/rl-explanations/checkpoints/grpo_q1.5b"
)

# =====================================================================
# Submit jobs based on configuration
# =====================================================================

echo "Submitting evaluation jobs for ${#EVAL_TASKS[@]} tasks with eval_type '$EVAL_TYPE' and ${#MODEL_CONFIGS[@]} model configurations..."

total_jobs=0

for model_config in "${MODEL_CONFIGS[@]}"; do
    # Parse the model configuration string
    IFS='|' read -r model_path model_name checkpoint_base_dir <<< "$model_config"
    
    echo ""
    echo "Processing model: $model_name"
    echo "Model path: $model_path"
    echo "Checkpoint base: $checkpoint_base_dir"
    
    for task in "${EVAL_TASKS[@]}"; do
        # Construct checkpoint directory based on eval_type
        if [ "$EVAL_TYPE" = "grpo" ]; then
            checkpoint_dir="${checkpoint_base_dir}/${task}"
        elif [ "$EVAL_TYPE" = "drgrpo" ]; then
            # For drgrpo, replace "grpo" with "drgrpo" in the checkpoint path
            drgrpo_checkpoint_base_dir="${checkpoint_base_dir/grpo/drgrpo}"
            checkpoint_dir="${drgrpo_checkpoint_base_dir}/${task}"
        else
            checkpoint_dir="${model_path}"
        fi
        
        submit_evaluation "$task" "$model_path" "$model_name" "$checkpoint_dir"
        ((total_jobs++))
    done
done

echo ""
echo "=================================="
echo "EVALUATION SUBMISSION SUMMARY"
echo "=================================="
echo "Total jobs submitted: $total_jobs"
echo "Evaluation type: $EVAL_TYPE"
echo "Tasks: ${EVAL_TASKS[*]}"
echo "Models: $(echo "${MODEL_CONFIGS[@]}" | cut -d'|' -f2 | tr '\n' ' ')"
echo ""
echo "Config pattern: taskname_${EVAL_TYPE}.yaml"
echo "Check job status with: squeue -u \$USER"
echo "Monitor outputs in: /nlp/scr/qinanyu/rl-explanations/bash_output/"
echo "Results will be saved in: evaluate/results/${EVAL_TYPE}_*/"