# python openrlhf/main/dpo_training.py --pretrain Qwen/Qwen2.5-1.5B-Instruct --save_path ./checkpoint/qwen2.5-1.5b-dpo --result_path ./results/qwen2.5-1.5b-dpo.csv
# python openrlhf/main/dpo_training.py --pretrain Qwen/Qwen2.5-1.5B-Instruct --save_path ./checkpoint/qwen2.5-1.5b-cdpo --result_path ./results/qwen2.5-1.5b-cdpo.csv --label_smoothing 0.1
# python openrlhf/main/dpo_training.py --pretrain Qwen/Qwen2.5-1.5B-Instruct --save_path ./checkpoint/qwen2.5-1.5b-ipo --result_path ./results/qwen2.5-1.5b-ipo.csv --ipo
# python openrlhf/main/pmo_training.py --pretrain Qwen/Qwen2.5-1.5B-Instruct --save_path ./checkpoint/qwen2.5-1.5b-pmo --result_path ./results/qwen2.5-1.5b-pmo.csv
python openrlhf/main/dpo_training.py --pretrain Qwen/Qwen2.5-1.5B-Instruct --save_path /data/checkpoint/standard/qwen2.5-1.5b-dpo --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected
python openrlhf/main/dpo_training.py --pretrain Qwen/Qwen2.5-1.5B-Instruct --save_path /data/checkpoint/standard/qwen2.5-1.5b-cdpo --label_smoothing 0.1 --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected
python openrlhf/main/dpo_training.py --pretrain Qwen/Qwen2.5-1.5B-Instruct --save_path /data/checkpoint/standard/qwen2.5-1.5b-ipo --ipo --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected
python openrlhf/main/pmo_training.py --pretrain Qwen/Qwen2.5-1.5B-Instruct --save_path /data/checkpoint/standard/qwen2.5-1.5b-pmo  --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected
