# python openrlhf/main/pmo_training.py --pretrain google/gemma-3-1b-pt --save_path /data/checkpoint/standard/beta0/gemma3-1b-pm-0.5 --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected --alpha 0.5 --beta 0
# python openrlhf/main/pmo_training.py --pretrain FuseAI/FuseChat-Llama-3.2-1B-SFT --save_path /data/checkpoint/standard/beta0/llama3-1b-pmo-0.5  --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected --alpha 0.5 --beta 0
# python openrlhf/main/pmo_training.py --pretrain Qwen/Qwen2.5-1.5B-Instruct --save_path /data/checkpoint/standard/beta0/qwen2.5-1.5b-pmo-0.5  --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected --alpha 0.5 --beta 0
# python openrlhf/main/pmo_training.py --pretrain google/gemma-3-1b-pt --save_path /data/checkpoint/standard/beta0/gemma3-1b-pm-1 --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected --alpha 1 --beta 0
# python openrlhf/main/pmo_training.py --pretrain FuseAI/FuseChat-Llama-3.2-1B-SFT --save_path /data/checkpoint/standard/beta0/llama3-1b-pmo-1  --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected --alpha 1 --beta 0
# python openrlhf/main/pmo_training.py --pretrain Qwen/Qwen2.5-1.5B-Instruct --save_path /data/checkpoint/standard/beta0/qwen2.5-1.5b-pmo-1  --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected --alpha 1 --beta 0
# python openrlhf/main/pmo_training.py --pretrain google/gemma-3-1b-pt --save_path /data/checkpoint/standard/sum0.1/gemma3-1b-pm --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected --alpha 0.05 --beta 0.05
# python openrlhf/main/pmo_training.py --pretrain FuseAI/FuseChat-Llama-3.2-1B-SFT --save_path /data/checkpoint/standard/sum0.1/llama3-1b-pmo  --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected --alpha 0.05 --beta 0.05
# python openrlhf/main/pmo_training.py --pretrain Qwen/Qwen2.5-1.5B-Instruct --save_path /data/checkpoint/standard/sum0.1/qwen2.5-1.5b-pmo  --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected --alpha 0.05 --beta 0.05
python openrlhf/main/pmo_training.py --pretrain google/gemma-3-1b-pt --save_path /data/checkpoint/standard/sum1/gemma3-1b-pm-0.05 --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected --alpha 0.95 --beta 0.05
python openrlhf/main/pmo_training.py --pretrain FuseAI/FuseChat-Llama-3.2-1B-SFT --save_path /data/checkpoint/standard/sum1/llama3-1b-pmo-0.05  --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected --alpha 0.95 --beta 0.05
python openrlhf/main/pmo_training.py --pretrain Qwen/Qwen2.5-1.5B-Instruct --save_path /data/checkpoint/standard/sum1/qwen2.5-1.5b-pmo-0.05  --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected --alpha 0.95 --beta 0.05
python openrlhf/main/pmo_training.py --pretrain google/gemma-3-1b-pt --save_path /data/checkpoint/standard/sum1/gemma3-1b-pm-0.1 --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected --alpha 0.9 --beta 0.1
python openrlhf/main/pmo_training.py --pretrain FuseAI/FuseChat-Llama-3.2-1B-SFT --save_path /data/checkpoint/standard/sum1/llama3-1b-pmo-0.1  --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected --alpha 0.9 --beta 0.1
python openrlhf/main/pmo_training.py --pretrain Qwen/Qwen2.5-1.5B-Instruct --save_path /data/checkpoint/standard/sum1/qwen2.5-1.5b-pmo-0.1  --dataset argilla/ultrafeedback-binarized-preferences-cleaned --chosen_key chosen --rejected_key rejected --alpha 0.9 --beta 0.1