source scripts/common_setting.sh

tune_ckpt_path="untune"

base_models=(
#  "$llama1"
#  "$llama2"
#  "$llama3"
#  "$llama1b"
#  "$llama13b"
  "$qwen7b"
)

model_names=(
#  "llama1"
#  "llama2"
#  "llama3"
#  "llama1b"
#  "llama13b"
  "qwen7b"
)

gpu_ids=(7 2 3 6 7)

run_evaluation(){

  local gpu_id=$1
  local base_model=$2
  local model_name=$3

  # ---------------- eval acc ------------------
#  current_time=$(date "+%Y-%m-%d %H:%M:%S")
#  echo "Start evaluation on gpu: $gpu_id, $current_time"
#  CUDA_VISIBLE_DEVICES=$gpu_id lm_eval --model hf \
#      --model_args pretrained=$base_model \
#      --tasks mmlu,gsm8k,medqa_4options,openbookqa,piqa,boolq,social_iqa,hellaswag,arc_easy,winogrande,arc_challenge \
#      --device cuda:0 \
#      --trust_remote_code \
#      --output_path results/${tune_ckpt_path}/${model_name}
#
#  current_time=$(date "+%Y-%m-%d %H:%M:%S")
#  echo "End evaluation on gpu: $gpu_id, $current_time"

  # ---------------- eval flops ------------------
  echo "base_model: ${base_model}"
  current_time=$(date "+%Y-%m-%d %H:%M:%S")
  echo "Start evaluation on gpu: $gpu_id, $current_time"

  CUDA_VISIBLE_DEVICES=$gpu_id python ../Deputy/eval_metric.py \
    --base_model=$base_model \
    --name wo \
    --cutoff_len 256 \
    --tasks ptb,wikitext2,mmlu,gsm8k,medqa_4options,openbookqa,piqa,boolq,social_iqa,hellaswag,arc_c,arc_e,winogrande \
    --output_path results/${tune_ckpt_path}/${model_name}/metric/

  current_time=$(date "+%Y-%m-%d %H:%M:%S")
  echo "End evaluation on gpu: $gpu_id, $current_time"
}

for ((i=0; i<${#base_models[@]}; i+=1)); do
  gpu_id=${gpu_ids[$i]}
  base_model=${base_models[$i]}
  model_name=${model_names[$i]}
  run_evaluation "$gpu_id" "$base_model" "$model_name" &
done
wait