#!/bin/sh
#SBATCH -o ./beam_search__13_gpu-job-%j.output
#SBATCH -p NA100q
#SBATCH -w node13
#SBATCH -n 1

# module load slurm cuda12.2/toolkit/12.2.2

# export CUDA_VISIBLE_DEVICES=2

# __conda_setup="$('/export/home2/lyw/anaconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
# if [ $? -eq 0 ]; then
#     eval "$__conda_setup"
# else
#     if [ -f "/export/home2/lyw/anaconda3/etc/profile.d/conda.sh" ]; then
#         . "/export/home2/lyw/anaconda3/etc/profile.d/conda.sh"
#     else
#         export PATH="/export/home2/lyw/anaconda3/bin:$PATH"
#         export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/export/home2/lyw/anaconda3/lib
#     fi
# fi
# unset __conda_setup
# # <<< conda initialize <<<


# conda activate yw_asa

# CUDA_VISIBLE_DEVICES=2

python ./collect_model_outs.py \
--dataset Dahoas/full-hh-rlhf \
--split test \
--Reward_model RLHFlow/RewardModel-Mistral-7B-for-DPA-v1 \
--LLM_Decoding RLHFlow/LLaMA3-iterative-DPO-final \
--LLM_Prompt RLHFlow/LLaMA3-iterative-DPO-final \
--Max_Prompt_length 500 \
--Max_Node_length 200 \
--Max_Response_length 2000 \
--sample_num 150\
--Sample_Prompt_num 2 \
--Sample_Node_num 1 \
--Sample_Original_Prompt 1 \
--LLM_Decoding_GPU cuda:0 \
--LLM_Prompt_GPU cuda:0 \
--Reward_model_GPU cuda:0 \
--config num_beam_5 \
--out_file ./Experiment/LLama3_DPO \
--method beam_search_w_RM \
--backbone LLama3_DPO \
--batch 8 \
--topk 4 \


python ./collect_model_outs.py \
--dataset stingning/ultrachat \
--split test \
--Reward_model RLHFlow/RewardModel-Mistral-7B-for-DPA-v1 \
--LLM_Decoding RLHFlow/LLaMA3-iterative-DPO-final \
--LLM_Prompt RLHFlow/LLaMA3-iterative-DPO-final \
--Max_Prompt_length 500 \
--Max_Node_length 200 \
--Max_Response_length 2000 \
--sample_num 150\
--Sample_Prompt_num 2 \
--Sample_Node_num 1 \
--Sample_Original_Prompt 1 \
--LLM_Decoding_GPU cuda:0 \
--LLM_Prompt_GPU cuda:0 \
--Reward_model_GPU cuda:0 \
--config num_beam_5 \
--out_file ./Experiment/LLama3_DPO \
--method beam_search_w_RM \
--backbone LLama3_DPO \
--batch 8 \
--topk 4 \
