#!/bin/sh
#SBATCH -o ./our_method__14_PKU_1_gpu-job-%j.output
#SBATCH -p HPCq
#SBATCH -w node14
#SBATCH -n 1

# module load slurm cuda12.2/toolkit/12.2.2

# export CUDA_VISIBLE_DEVICES=0

# __conda_setup="$('/export/home2/lyw/anaconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
# if [ $? -eq 0 ]; then
#     eval "$__conda_setup"
# else
#     if [ -f "/export/home2/lyw/anaconda3/etc/profile.d/conda.sh" ]; then
#         . "/export/home2/lyw/anaconda3/etc/profile.d/conda.sh"
#     else
#         export PATH="/export/home2/lyw/anaconda3/bin:$PATH"
#         export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/export/home2/lyw/anaconda3/lib
#     fi
# fi
# unset __conda_setup
# # <<< conda initialize <<<


# conda activate yw_asa

# CUDA_VISIBLE_DEVICES=0

python ./example.py \
--dataset Dahoas/full-hh-rlhf \
--split test \
--Reward_model RLHFlow/RewardModel-Mistral-7B-for-DPA-v1 \
--LLM_Decoding RLHFlow/LLaMA3-iterative-DPO-final \
--max_response_length 1000 \
--sample_num 300 \
--reward_threshold 8.5 \
--entropy_threshold 2.0 \
--topk 10 \
--beta 0.7 \
--alpha 0.5 \
--LLM_Decoding_GPU cuda:0 \
--LLM_Prompt_GPU cuda:0 \
--Reward_model_GPU cuda:0 \
--out_file ./Experiment/LLama3_DPO \
--method ARGS \
--backbone LLama3_DPO \


python ./example.py \
--dataset stingning/ultrachat \
--split test \
--Reward_model RLHFlow/RewardModel-Mistral-7B-for-DPA-v1 \
--LLM_Decoding RLHFlow/LLaMA3-iterative-DPO-final \
--max_response_length 1000 \
--sample_num 300 \
--reward_threshold 8.5 \
--entropy_threshold 2.0 \
--topk 10 \
--beta 0.7 \
--alpha 0.5 \
--LLM_Decoding_GPU cuda:0 \
--LLM_Prompt_GPU cuda:0 \
--Reward_model_GPU cuda:0 \
--out_file ./Experiment/LLama3_DPO \
--method ARGS \
--backbone LLama3_DPO \

