TS=$(date "+%Y%0m%0d_%T")

project_root_path="../"
cli_path="${project_root_path}/src/benchmark_evaluation/bbh_eval.py"
data_path="${project_root_path}/data/bbh"

### Exps with Llama2-7B
# model_name="/mnt/llms/model/meta-llama/Llama-2-7b-chat-hf"
model_name="/mnt/llms/model/meta-llama/Llama-2-7b-hf"
# model_name="/mnt/llms/model/google-gemma/gemma-2b"
# amateur_model_name="HillZhang/untruthful_llama2_7b"

# ### For experiments using Baichuan2
# model_name="baichuan-inc/Baichuan2-7B-Chat"
# amateur_model_name="HillZhang/untruthful_baichuan2_7b"

# ### For experiments using Mistral
# model_name="mistralai/Mistral-7B-Instruct-v0.1"
# amateur_model_name="HillZhang/untruthful_mistral_7b"

### Baseline
output_path="${project_root_path}/exp_results/bbh/baseline_llama2_7b"
# output_path="${project_root_path}/exp_results/bbh/${TS}/baseline_llama2_7b_chat"
mkdir -p $output_path
cp $0 "$(dirname "$output_path")"

generation_args="
    --relative_top 0.0
"

echo "### BBH EVALUATE ### "
CMD="CUDA_VISIBLE_DEVICES=3 python ${cli_path}
        --model-name ${model_name} \
        --num-gpus 1 \
	--bias \
	--mode cot-enhance \
        --data-path ${data_path} \
        --output-path ${output_path}"
	# --is-chat \
    echo $CMD
    eval $CMD
wait
