# python openrlhf/main/dpo_training.py --pretrain google/gemma-3-1b-pt --save_path /data/checkpoint/gemma-3-1b-dpo --result_path ./results/gemma-3-1b-dpo.csv
# python openrlhf/main/dpo_training.py --pretrain google/gemma-3-1b-pt --save_path /data/checkpoint/gemma-3-1b-cdpo --result_path ./results/gemma-3-1b-cdpo.csv --label_smoothing 0.1
# python openrlhf/main/dpo_training.py --pretrain google/gemma-3-1b-pt --save_path /data/checkpoint/gemma-3-1b-ipo --result_path ./results/gemma-3-1b-ipo.csv --ipo
# python openrlhf/main/pmo_training.py --pretrain google/gemma-3-1b-pt --save_path /data/checkpoint/gemma-3-1b-pmo --result_path ./results/gemma-3-1b-pmo.csv
python openrlhf/main/dpo_training.py --pretrain google/gemma-3-1b-pt --save_path /data/checkpoint/standard/gemma-3-1b-dpo --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected
python openrlhf/main/dpo_training.py --pretrain google/gemma-3-1b-pt --save_path /data/checkpoint/standard/gemma-3-1b-cdpo --label_smoothing 0.1 --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected
python openrlhf/main/dpo_training.py --pretrain google/gemma-3-1b-pt --save_path /data/checkpoint/standard/gemma-3-1b-ipo --ipo --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected
