# # !/bin/sh
# # SBATCH -o ./our_method__14_1_gpu-job-%j.output
# # SBATCH -p HPCq
# # SBATCH -w node14
# # SBATCH -n 1

# module load slurm cuda12.2/toolkit/12.2.2

# export CUDA_VISIBLE_DEVICES=1

# __conda_setup="$('/export/home2/lyw/anaconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
# if [ $? -eq 0 ]; then
#     eval "$__conda_setup"
# else
#     if [ -f "/export/home2/lyw/anaconda3/etc/profile.d/conda.sh" ]; then
#         . "/export/home2/lyw/anaconda3/etc/profile.d/conda.sh"
#     else
#         export PATH="/export/home2/lyw/anaconda3/bin:$PATH"
#         export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/export/home2/lyw/anaconda3/lib
#     fi
# fi
# unset __conda_setup
# # <<< conda initialize <<<


# conda activate yw_asa

# CUDA_VISIBLE_DEVICES=1

python ./collect_model_outs.py \
--dataset Dahoas/full-hh-rlhf \
--split test \
--Reward_model RLHFlow/RewardModel-Mistral-7B-for-DPA-v1 \
--LLM_Decoding RLHFlow/LLaMA3-iterative-DPO-final \
--LLM_Prompt RLHFlow/LLaMA3-iterative-DPO-final \
--Max_Prompt_length 500 \
--Max_Node_length 200 \
--Max_Response_length 2000 \
--sample_num 300 \
--Sample_Prompt_num 2 \
--Sample_Node_num 1 \
--Sample_Original_Prompt 1 \
--LLM_Decoding_GPU cuda:0 \
--LLM_Prompt_GPU cuda:0 \
--Reward_model_GPU cuda:0 \
--config overall_score \
--out_file ./Experiment/LLama3_DPO \
--method our_method \
--backbone LLama3_DPO \

python ./collect_model_outs.py \
--dataset stingning/ultrachat \
--split test \
--Reward_model RLHFlow/RewardModel-Mistral-7B-for-DPA-v1 \
--LLM_Decoding RLHFlow/LLaMA3-iterative-DPO-final \
--LLM_Prompt RLHFlow/LLaMA3-iterative-DPO-final \
--Max_Prompt_length 500 \
--Max_Node_length 200 \
--Max_Response_length 2000 \
--sample_num 300 \
--Sample_Prompt_num 2 \
--Sample_Node_num 1 \
--Sample_Original_Prompt 1 \
--LLM_Decoding_GPU cuda:0 \
--LLM_Prompt_GPU cuda:0 \
--Reward_model_GPU cuda:0 \
--config overall_score \
--out_file ./Experiment/LLama3_DPO \
--method our_method \
--backbone LLama3_DPO \
