#!/bin/bash
#SBATCH --job-name=threshold_analysis_MMLU_Pro_s63_PRM72B_with_preds
#SBATCH --output=./slurm_output/%j_threshold_analysis_MMLU_Pro_s63_PRM72B_with_preds.out
#SBATCH --error=./slurm_output/%j_threshold_analysis_MMLU_Pro_s63_PRM72B_with_preds.err
#SBATCH --partition=cpu
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=8
#SBATCH --mem=10GB
#SBATCH --time=04:00:00

    # --model_path /datasets/ai/qwen/hub/models--Qwen--Qwen2.5-Math-PRM-7B/snapshots/0610740060112df12585d00a1c5f4624d2f59051/ \

# Load CUDA module
module load cuda/11.8

# Activate virtual environment
source ./venv/bin/activate

# Create output directory
mkdir -p ./slurm_output

# Run the script
python src/threshold_analysis.py \
    ./Results/skills_3/MMLU_Pro/self_con_seed63_budget16_acc61.38_models[Qwen]_v2.csv \
    --llm_call_limit_list 4 8 16 \
    --thresholds 0.8 0.85 0.9 0.95 0.99 0.999 0.9999 \
    --is_mc_question False \
    --existing_rewards True \
    --output_file ./Results/skills_3/MMLU_Pro/threshold_analysis_MMLU_Pro_s63_PRM72B.csv
