#!/bin/bash
#SBATCH --job-name=awdpo_mle_qwen25_05B
#SBATCH --account=<your-account-name>
#SBATCH --partition=<your-gpu-partition>
#SBATCH --gres=gpu:a100:1
#SBATCH --time=100:00:00
#SBATCH --mem=160G
#SBATCH --output=awdpo_mle_qwen25_05B_%j.out
#SBATCH --error=awdpo_mle_qwen25_05B_%j.err
#SBATCH --mail-type=BEGIN,END,FAIL
#SBATCH --mail-user=<your-email@domain.com>

# Print some information about the job
echo "Job ID: $SLURM_JOB_ID"
echo "Hostname: $(hostname)"
echo "Start time: $(date)"

# Initialize conda - replace with a generic path
source /path/to/your/miniconda3/etc/profile.d/conda.sh

eval "$(conda shell.bash hook)"

# Activate conda environment
conda activate <your-conda-env>

export PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True"

# Run your Python script - replace absolute paths with placeholders or relative paths
python3 main.py --model_name /path/to/models/Qwen2.5-0.5B-Base \
    --num_generations 6 \
    --save_steps 250 \
    --output_dir <your_output_dir> \
    --num_train_epochs 1 \
    --run_name <your_run_name> \
    --learning_rate 1e-5 \
    --weight_decay 0.01 \
    --warmup_steps 100 \
    --max_prompt_length 2000 \
    --max_completion_length 500 \
    --gradient_accumulation_steps 1 \
    --per_device_train_batch_size 1 \
    --training_data_directory data/gsm8k_fewshot_qwen25 \
    --max_steps 1000 \
    --use_vllm \
    --trainer_type "mle" \
    --use_lora \
    --lora_rank 64 \
    --lora_alpha 64 

# Print completion time
echo "End time: $(date)"