python openrlhf/main/dpo_training.py --pretrain FuseAI/FuseChat-Llama-3.2-1B-SFT --save_path /data/checkpoint/standard/llama3-1b-dpo --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected
python openrlhf/main/dpo_training.py --pretrain FuseAI/FuseChat-Llama-3.2-1B-SFT --save_path /data/checkpoint/standard/llama3-1b-cdpo --label_smoothing 0.1 --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected
python openrlhf/main/dpo_training.py --pretrain FuseAI/FuseChat-Llama-3.2-1B-SFT --save_path /data/checkpoint/standard/llama3-1b-ipo --ipo --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected
python openrlhf/main/pmo_training.py --pretrain FuseAI/FuseChat-Llama-3.2-1B-SFT --save_path /data/checkpoint/standard/llama3-1b-pmo  --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected