cd ~/dpo
conda activate dpo
RUN_METHOD=ipo_math

for i in {1..20}
do
    echo "the number $i "
    CUDA_VISIBLE_DEVICES=0 ACCELERATE_LOG_LEVEL=info accelerate launch --main_process_port 29505 --config_file accelerate_configs/deepspeed_zero3.yaml scripts/run_rrpo.py training_configs/qwen2.5-3b/$RUN_METHOD.yaml
done

CUDA_VISIBLE_DEVICES=0 ACCELERATE_LOG_LEVEL=info accelerate launch --config_file accelerate_configs/deepspeed_zero3.yaml scripts/run_simpo.py training_configs/qwen2.5-3b/$RUN_METHOD.yaml
