python ./src/data/preprocess/PKUSafeRLHF.py
CUDA_VISIBLE_DEVICES=0 PYTHONPATH=. python scripts/baselines/dpo.py     --sft_model_name "PKU-Alignment/alpaca-7b-reproduced"     --prompt_template "BEGINNING OF CONVERSATION: USER: {raw_prompt} ASSISTANT:"     --dataset_name "PKU-Alignment/PKU-SafeRLHF-10K-safer"     --max_length 512     --training_args.output_dir "./output/PKU-Alignment/PKU-SafeRLHF-10K-safer"     --training_args.run_name "PKU-Alignment/PKU-SafeRLHF-10K-safer"     --training_args.per_device_train_batch_size 1     --training_args.per_device_eval_batch_size 6     --training_args.gradient_accumulation_steps 2     --training_args.learning_rate 5e-4     --peft_config.r 64     --peft_config.target_modules q_proj k_proj v_proj o_proj     --peft_config.lora_alpha 1     --peft_config.lora_dropout 0
CUDA_VISIBLE_DEVICES=0 PYTHONPATH=. python scripts/baselines/dpo.py     --sft_model_name "PKU-Alignment/alpaca-7b-reproduced"     --prompt_template "BEGINNING OF CONVERSATION: USER: {raw_prompt} ASSISTANT:"     --dataset_name "PKU-Alignment/PKU-SafeRLHF-10K-better"     --max_length 512     --training_args.output_dir "./output/PKU-Alignment/PKU-SafeRLHF-10K-better"     --training_args.run_name "PKU-Alignment/PKU-SafeRLHF-10K-better"     --training_args.per_device_train_batch_size 1     --training_args.per_device_eval_batch_size 6     --training_args.gradient_accumulation_steps 2     --training_args.learning_rate 5e-4     --peft_config.r 64     --peft_config.target_modules q_proj k_proj v_proj o_proj     --peft_config.lora_alpha 1     --peft_config.lora_dropout 0
PYTHONPATH=. accelerate launch scripts/baselines/mod.py     --soup_weights 0.2 0.4 0.6 0.8   --sft_model_name "PKU-Alignment/alpaca-7b-reproduced"     --dpo_model_1_name "./output/PKU-Alignment/PKU-SafeRLHF-10K-safer/best_checkpoint"    --dpo_model_2_name "./output/PKU-Alignment/PKU-SafeRLHF-10K-better/best_checkpoint"     --prompt_template "BEGINNING OF CONVERSATION: USER: {raw_prompt} ASSISTANT:"     --dataset_name "PKU-Alignment/PKU-SafeRLHF-10K-safer"     --output_dir "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/gen_sample"     --max_length 512  --eval_size 2500  --split "train_conflict"
CUDA_VISIBLE_DEVICES=0 PYTHONPATH=. python3 scripts/MOPO/select_sample.py    --soup_weights 0.2 0.4 0.6 0.8    --sft_model_name "PKU-Alignment/alpaca-7b-reproduced"    --adapter_model_name "./output/PKU-Alignment/PKU-SafeRLHF-10K-safer/best_checkpoint ,  ./output/PKU-Alignment/PKU-SafeRLHF-10K-better/best_checkpoint"   --prompt_template "BEGINNING OF CONVERSATION: USER: {raw_prompt} ASSISTANT:"    --dataset_name "PKU-Alignment/PKU-SafeRLHF-10K-safer"    --input_dir "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/gen_sample"    --output_dir "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/select_sample"    --replication 4    --eval_size 2500    --peft_config.r 64     --peft_config.target_modules q_proj k_proj v_proj o_proj     --peft_config.lora_alpha 1     --peft_config.lora_dropout 0
PYTHONPATH=. python scripts/MOPO/cache_conflict.py --prompt_template "BEGINNING OF CONVERSATION: USER: {raw_prompt} ASSISTANT:" --dataset_name "PKU-Alignment/PKU-SafeRLHF-10K-safer" --eval_size 2500
PYTHONPATH=. python src/tools/merge_peft_adapter.py     --adapter_model_name "./output/PKU-Alignment/PKU-SafeRLHF-10K-safer"     --base_model_name "PKU-Alignment/alpaca-7b-reproduced"     --output_name "/model/SafeRLHF_safer_DPO_model"    --dtype bf16
PYTHONPATH=. python src/tools/merge_peft_adapter.py     --adapter_model_name "./output/PKU-Alignment/PKU-SafeRLHF-10K-better"     --base_model_name "PKU-Alignment/alpaca-7b-reproduced"     --output_name "/model/SafeRLHF_better_DPO_model"    --dtype bf16
CUDA_VISIBLE_DEVICES=0 PYTHONPATH=. python scripts/MOPO/Iterative_DPO.py --sft_model_name "/model/SafeRLHF_safer_DPO_model"     --prompt_template "BEGINNING OF CONVERSATION: USER: {raw_prompt} ASSISTANT:"    --cached_data_dir "./output/cached_datasets/PKU-Alignment/PKU-SafeRLHF-10K-safer"    --dataset_name "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/select_sample"    --original_dataset_name "PKU-Alignment/PKU-SafeRLHF-10K-safer"    --max_length 512    --training_args.output_dir "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/safer"     --training_args.run_name "PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/safer"     --training_args.per_device_train_batch_size 1     --training_args.per_device_eval_batch_size 6     --training_args.gradient_accumulation_steps 2     --training_args.learning_rate 5e-6     --peft_config.r 64     --peft_config.target_modules q_proj k_proj v_proj o_proj     --peft_config.lora_alpha 1     --peft_config.lora_dropout 0    --alpha 0.1
CUDA_VISIBLE_DEVICES=0 PYTHONPATH=. python scripts/MOPO/Iterative_DPO.py --sft_model_name "/model/SafeRLHF_better_DPO_model"     --prompt_template "BEGINNING OF CONVERSATION: USER: {raw_prompt} ASSISTANT:"    --cached_data_dir "./output/cached_datasets/PKU-Alignment/PKU-SafeRLHF-10K-safer"    --dataset_name "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/select_sample"    --original_dataset_name "PKU-Alignment/PKU-SafeRLHF-10K-better"    --max_length 512    --training_args.output_dir "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/better"     --training_args.run_name "PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/better"     --training_args.per_device_train_batch_size 1     --training_args.per_device_eval_batch_size 6     --training_args.gradient_accumulation_steps 2     --training_args.learning_rate 5e-6     --peft_config.r 64     --peft_config.target_modules q_proj k_proj v_proj o_proj     --peft_config.lora_alpha 1     --peft_config.lora_dropout 0    --alpha 0.1
CUDA_VISIBLE_DEVICES=0 PYTHONPATH=. python3 scripts/utils/mod_general.py     --soup_weights 0.2   --sft_model_1_name "/model/SafeRLHF_safer_DPO_model"    --sft_model_2_name "/model/SafeRLHF_better_DPO_model"     --dpo_model_1_name "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/safer/best_checkpoint"    --dpo_model_2_name "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/better/best_checkpoint"     --prompt_template "BEGINNING OF CONVERSATION: USER: {raw_prompt} ASSISTANT:"     --dataset_name "PKU-Alignment/PKU-SafeRLHF-10K-safer"     --output_dir "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/0.2/gen_test"     --max_length 512  --eval_size -1  --split "test"
CUDA_VISIBLE_DEVICES=0 PYTHONPATH=. python3 scripts/utils/score.py  --input_dir "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/0.2/gen_test"     --output_dir "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/0.2/score"
CUDA_VISIBLE_DEVICES=0 PYTHONPATH=. python3 scripts/utils/mod_general.py     --soup_weights 0.4   --sft_model_1_name "/model/SafeRLHF_safer_DPO_model"    --sft_model_2_name "/model/SafeRLHF_better_DPO_model"     --dpo_model_1_name "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/safer/best_checkpoint"    --dpo_model_2_name "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/better/best_checkpoint"     --prompt_template "BEGINNING OF CONVERSATION: USER: {raw_prompt} ASSISTANT:"     --dataset_name "PKU-Alignment/PKU-SafeRLHF-10K-safer"     --output_dir "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/0.4/gen_test"     --max_length 512  --eval_size -1  --split "test"
CUDA_VISIBLE_DEVICES=0 PYTHONPATH=. python3 scripts/utils/score.py  --input_dir "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/0.4/gen_test"     --output_dir "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/0.4/score"
CUDA_VISIBLE_DEVICES=0 PYTHONPATH=. python3 scripts/utils/mod_general.py     --soup_weights 0.6   --sft_model_1_name "/model/SafeRLHF_safer_DPO_model"    --sft_model_2_name "/model/SafeRLHF_better_DPO_model"     --dpo_model_1_name "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/safer/best_checkpoint"    --dpo_model_2_name "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/better/best_checkpoint"     --prompt_template "BEGINNING OF CONVERSATION: USER: {raw_prompt} ASSISTANT:"     --dataset_name "PKU-Alignment/PKU-SafeRLHF-10K-safer"     --output_dir "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/0.6/gen_test"     --max_length 512  --eval_size -1  --split "test"
CUDA_VISIBLE_DEVICES=0 PYTHONPATH=. python3 scripts/utils/score.py  --input_dir "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/0.6/gen_test"     --output_dir "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/0.6/score"
CUDA_VISIBLE_DEVICES=0 PYTHONPATH=. python3 scripts/utils/mod_general.py     --soup_weights 0.8   --sft_model_1_name "/model/SafeRLHF_safer_DPO_model"    --sft_model_2_name "/model/SafeRLHF_better_DPO_model"     --dpo_model_1_name "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/safer/best_checkpoint"    --dpo_model_2_name "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/better/best_checkpoint"     --prompt_template "BEGINNING OF CONVERSATION: USER: {raw_prompt} ASSISTANT:"     --dataset_name "PKU-Alignment/PKU-SafeRLHF-10K-safer"     --output_dir "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/0.8/gen_test"     --max_length 512  --eval_size -1  --split "test"
CUDA_VISIBLE_DEVICES=0 PYTHONPATH=. python3 scripts/utils/score.py  --input_dir "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/0.8/gen_test"     --output_dir "./output/PKU-Alignment/PKU-SafeRLHF-10K/MOPO/re-alignment/0.8/score"