#!/bin/bash

# Support model: UltraCM-13B, Auto-J-13B
# promethus_path=/cpfs01/user/lantian/kaist-ai/prometheus-13b-v1.0
llama2_13b_chat_path=/cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub/models--meta-llama--Llama-2-13b-chat-hf/snapshots/c2f3ec81aac798ae26dcc57799a994dfbf521496
internlm2_20b_path=/cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub/models--internlm--internlm2-chat-20b/snapshots/3f710f76f56f8c40dc5dd800dbe66f3341cb2c87
# internlm2_7b_path=/cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub/models--internlm--internlm2-chat-7b/snapshots/baba19a1ae271df6fb4d1d091e95a0ff5b62fc18
internlm2_7b_path=/cpfs01/shared/public/public_hdd/lantian/ckpt/aliyun_Ampere_v1_1_FT_v1_0_0_s1_rc48_1660_hf_ckpt
# critique-tuned
ultracm_13b_path=/cpfs01/user/lantian/openbmb/UltraCM-13b
autoj_13b_path=/cpfs01/user/lantian/GAIR/autoj-13b
tigerscore_path=/cpfs01/user/lantian/TIGER-Lab/TIGERScore-13B

internlm2_7b_chat_nips2024_v1=/cpfs01/shared/public/public_hdd/lantian/ckpt/exps/sft_7b_critique_nips2024_v1/aliyun_Ampere_7B_FT_critic_nips2024_v1/84_hf_ckpt
internlm2_7b_chat_nips2024_v2=/cpfs01/shared/public/public_hdd/lantian/ckpt/exps/sft_7b_critique_nips2024_v2/aliyun_Ampere_7B_FT_critic_nips2024_v2/85_hf_ckpt
internlm2_7b_chat_nips2024_v3=/cpfs01/shared/public/public_hdd/lantian/ckpt/exps/sft_7b_critique_nips2024_v3/aliyun_Ampere_7B_FT_critic_nips2024_v3/85_hf_ckpt
internlm2_7b_chat_nips2024_v4=/cpfs01/shared/public/public_hdd/lantian/ckpt/exps/sft_7b_critique_nips2024_v4/aliyun_Ampere_7B_FT_critic_nips2024_v4/234_hf_ckpt
internlm2_7b_chat_autoj=/cpfs01/shared/public/public_hdd/lantian/ckpt/exps/sft_7b_critique_autoj/aliyun_Ampere_7B_FT_critic_autoj/48_hf_ckpt
internlm2_7b_chat_ultracm=/cpfs01/shared/public/public_hdd/lantian/ckpt/exps/sft_7b_critique_ultracm/aliyun_Ampere_7B_FT_critic_ultracm/2160_hf_ckpt
internlm2_7b_chat_tigerscore=/cpfs01/shared/public/public_hdd/lantian/ckpt/exps/sft_7b_critique_tigerscore/aliyun_Ampere_7B_FT_critic_tigerscore/236_hf_ckpt

# prompt_type: fs, zs-crit-cot, zs-crit-ao-1
# CUDA_VISIBLE_DEVICES=0 python evaluate.py  \
#     --available_gpus 0 \
#     --tasks Q \
#     --hf_model $internlm2_7b_path \
#     --prompt_type zs-crit-cot \
#     --enable_code_execution
# echo "[!] inference InternLM2 7B over"
# exit

# critique_tuned_pathes=($ultracm_13b_path $autoj_13b_path ${tigerscore_path})
# critique_tuned_pathes=(${internlm2_7b_chat_nips2024_v1} ${internlm2_7b_chat_nips2024_v2} ${internlm2_7b_chat_nips2024_v3} ${internlm2_7b_chat_nips2024_v4})
# critique_tuned_pathes=(${internlm2_7b_chat_autoj} ${internlm2_7b_chat_ultracm} ${internlm2_7b_chat_tigerscore})
# critique_tuned_pathes=(${internlm2_7b_chat_tigerscore} ${internlm2_7b_chat_ultracm} ${internlm2_7b_chat_autoj})

critique_tuned_pathes=(/cpfs01/shared/public/public_hdd/lantian/ckpt/exps/20240428_sft_7b_critique_nips2024_v4_mt/aliyun_Ampere_7B_FT_critic_nips2024_v4/195_hf_ckpt)

for path in ${critique_tuned_pathes[@]}
do
    # prompt_type: fs, zs-crit-cot, zs-crit-ao-1
    echo "Evaluate for model: " $path
    CUDA_VISIBLE_DEVICES=0 python evaluate.py  \
        --available_gpus 0 \
        --tasks Q \
        --hf_critic_model $path \
        --prompt_type zs-crit-cot \
        --enable_code_execution
    echo "[!] inference ${path} over"
done
