cd ~/dpo
conda activate dpo
RUN_METHOD=kto_math

for i in {1..12}
do
    echo "the number $i "
    CUDA_VISIBLE_DEVICES=0 ACCELERATE_LOG_LEVEL=info accelerate launch --config_file accelerate_configs/deepspeed_zero3.yaml scripts/run_rrpo.py training_configs/Llama-3.2-3B-Instruct/$RUN_METHOD.yaml
done
    
CUDA_VISIBLE_DEVICES=0 ACCELERATE_LOG_LEVEL=info accelerate launch --config_file accelerate_configs/deepspeed_zero3.yaml scripts/run_simpo.py training_configs/Llama-3.2-3B-Instruct/$RUN_METHOD.yaml
