#!/bin/bash
#SBATCH --job-name=dpo_qwen25_1_5B
#SBATCH --account=<your-account-name>
#SBATCH --partition=<your-gpu-partition>
#SBATCH --gres=gpu:a100:1
#SBATCH --time=100:00:00
#SBATCH --mem=160G
#SBATCH --output=dpo_qwen25_1_5B_%j.out
#SBATCH --error=dpo_qwen25_1_5B_%j.err
#SBATCH --mail-type=BEGIN,END,FAIL
#SBATCH --mail-user=<your-email@domain.com>

# Print some information about the job
echo "Job ID: $SLURM_JOB_ID"
echo "Hostname: $(hostname)"
echo "Start time: $(date)"

# Initialize conda
source /path/to/your/miniconda3/etc/profile.d/conda.sh

eval "$(conda shell.bash hook)"

# Activate conda environment
conda activate <your-conda-env>

export PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True"

# Run your Python script
python3 evaluate.py --model_name /path/to/your/checkpoint-1154 \
    --num_generations 8 \
    --output_dir Qwen_1.5B_dpo_1154 \
    --run_name Qwen25_1.5B_DPO_gsm8k_reasoner \
    --max_completion_length 500 \
    --data_directory /path/to/data/gsm8k_fewshot_qwen25 \

# Print completion time
echo "End time: $(date)"