cd ~/dpo
conda activate dpo
# RUN_METHOD=dpo_math


# for i in {1..24}
# do
#     echo "the number $i "
#     CUDA_VISIBLE_DEVICES=0 ACCELERATE_LOG_LEVEL=info accelerate launch --config_file accelerate_configs/deepspeed_zero3.yaml scripts/run_rrpo_iter_dpo.py training_configs/qwen2.5-7b/$RUN_METHOD.yaml
# done
    
RUN_METHOD=dpo_math_ourdata

for i in {1..24}
do
    echo "the number $i "
    CUDA_VISIBLE_DEVICES=0 ACCELERATE_LOG_LEVEL=info accelerate launch --config_file accelerate_configs/deepspeed_zero3.yaml scripts/run_rrpo_iter_dpo.py training_configs/qwen2.5-7b/$RUN_METHOD.yaml
done
    