TS=$(date "+%Y%0m%0d_%T")

project_root_path="../"
cli_path="${project_root_path}/src/benchmark_evaluation/gsm8k_eval.py"
data_path="${project_root_path}/data/gsm8k"

### Exps with Llama2-7B
# model_name="/mnt/llms/model/meta-llama/Llama-2-7b-chat-hf"
model_name="/mnt/llms/model/meta-llama/Llama-2-7b-hf"
model_name="/mnt/llms/model/google-gemma/gemma-2b"
# amateur_model_name="HillZhang/untruthful_llama2_7b"
MODE="basline"
MODE="cot-enhance"
# ### For experiments using Baichuan2
# model_name="baichuan-inc/Baichuan2-7B-Chat"
# amateur_model_name="HillZhang/untruthful_baichuan2_7b"

# ### For experiments using Mistral
# model_name="mistralai/Mistral-7B-Instruct-v0.1"
# amateur_model_name="HillZhang/untruthful_mistral_7b"

### Baseline
output_path="${project_root_path}/exp_results/gsm8k/${TS}/baseline_llama2_7b"
mkdir -p $output_path
cp $0 "$(dirname "$output_path")"

generation_args="
    --relative_top 0.0
"

echo "### GSM8K EVALUATE ### "
CMD="CUDA_VISIBLE_DEVICES=3 python ${cli_path}
        --model-name ${model_name} \
        --num-gpus 1 \
        --num-samples 80 \
	--mode $MODE \
        --data-path ${data_path} \
        --output-path ${output_path}"
	# --bias \
    echo $CMD
    eval $CMD
wait
