#!/bin/bash
# This is a script for evaluating the model for code generation.
##################################################################################################
### **Default parameters**
set -euo pipefail
MODES=(
  #"code_refinement_with_instructions_fc_cs" 
  #"code_refinement_with_instructions_fc_ct" 
  
  #"code_generation_cs" 
  #"code_generation_ct"
  #"code_generation_total_cs"
  "code_generation_total_ct"
)

CODE_GEN_MODE="full"
# Components: "base", "our", "full"

OUR_MODE="SFT"
# Components: "SFT", "RL", "train-SFT", "train-RL"

USE_NONE_LIST="False" # What to use for task_id?
# Components: "True" (use out_task_id), "False" (use in_task_id)

CONTRACT_TEST_CASE_TYPE="direct"
# Components: "grammar", "direct"

CODE_GENERATION_MODEL_NAMES=( ### Add code generation model names here
  ## base
  #"DeepSeek-R1-Distill-Qwen-14B"
  #"Qwen3-14B"
  #"Phi-4-reasoning-plus" # ct not done
  #"o4-mini"
  #"Mistral-Nemo-Base-2407" 

  ## full
  #"Qwen3-14B" # tmux3
  #"Llama-3.1-8B-Instruct" # tmux5
  "Qwen3-8B" # tmux5

  #"EXAONE-Deep-7.8B" # not use
  #"gemma-2-9b-it" # not use
  
)
# Components: "DeepSeek", "Mistral", "Qwen", "Phi"

# Map code generation model name -> contract test model directory basename
declare -A CONTRACT_TEST_MODEL_MAP=(
  ["DeepSeek-R1-Distill-Qwen-14B"]="DeepSeek-RL"
  ["Mistral-Nemo-Base-2407"]="Mistral-RL"
  ["o4-mini"]="o4-mini"
)

OUTPUT_PATH="../../code/evaluation_code_generation/total/"

HUMAN_EVAL_FUNCTIONALITY_DATASET_PATH="../../data/evalplus-0.1.1/HumanEvalPlus.jsonl"
MBPP_FUNCTIONALITY_DATASET_PATH="../../data/mbppplus-0.2.0/MbppPlus.jsonl"
HUMAN_EVAL_CONTRACTS_DATASET_PATH="../../code/evaluation_test_case_pass_k/humaneval/pre_filtering/multi_assert_specification/o4-mini/o4-mini_all_results.json"
MBPP_CONTRACTS_DATASET_PATH="../../code/evaluation_test_case_pass_k/mbpp/pre_filtering/multi_assert_specification/o4-mini/o4-mini_all_results.json"
##################################################################################################

echo "------------------------------------------------------------"
for MODE in "${MODES[@]}"; do
  for MODEL_NAME in "${CODE_GENERATION_MODEL_NAMES[@]}"; do
    # base model
    if [ "$CODE_GEN_MODE" == "base" ]; then
      if [ "$MODE" == "code_generation_total_cs" ]; then
        TOTAL_EVAL_CODE_GENERATION_MODEL_PATH=(
          "../../code/output_base/humaneval/inference/code_generation_cs/${MODEL_NAME}/generated_step_all.json" 
          "../../code/output_base/mbpp/inference/code_generation_cs/${MODEL_NAME}/generated_step_all.json"
        )
      else
        TOTAL_EVAL_CODE_GENERATION_MODEL_PATH=(
          "../../code/output_base/humaneval/inference/code_generation_ct/${MODEL_NAME}/generated_step_all.json" 
          "../../code/output_base/mbpp/inference/code_generation_ct/${MODEL_NAME}/generated_step_all.json"
        )
      fi
      TOTAL_EVAL_CODE_GENERATION_MODEL_PATH_STR=$(printf "%s "!next!"${TOTAL_EVAL_CODE_GENERATION_MODEL_PATH[@]}")
    

    # new input
    elif [ "$CODE_GEN_MODE" == "full" ]; then
      if [ "$MODE" == "code_generation_total_cs" ]; then
        TOTAL_EVAL_CODE_GENERATION_MODEL_PATH="../../code/output_full/total/inference/${MODEL_NAME}/code_generation_cs/generated_step_all.jsonl" 
      else
        TOTAL_EVAL_CODE_GENERATION_MODEL_PATH="../../code/output_full/total/inference/${MODEL_NAME}/code_generation_ct/generated_step_all.jsonl" 
      fi
      TOTAL_EVAL_CODE_GENERATION_MODEL_PATH_STR=$TOTAL_EVAL_CODE_GENERATION_MODEL_PATH


    # our model
    else
      if [ "$MODE" == "code_generation_total_cs" ]; then
        TOTAL_EVAL_CODE_GENERATION_MODEL_PATH="../../code/output_our/total/inference/code_generation_cs/${MODEL_NAME}/codegen_code_gen_cs_${OUR_MODE}@1.jsonl"
      else
        TOTAL_EVAL_CODE_GENERATION_MODEL_PATH="../../code/output_our/total/inference/code_generation_ct/${MODEL_NAME}/codegen_code_gen_ct_${OUR_MODE}@1.jsonl"
      fi
      TOTAL_EVAL_CODE_GENERATION_MODEL_PATH_STR=$TOTAL_EVAL_CODE_GENERATION_MODEL_PATH
    fi

    if [ "$CODE_GEN_MODE" == "our" ]; then
      OUT_DIR="${OUTPUT_PATH}${MODE}-${CONTRACT_TEST_CASE_TYPE}-${CODE_GEN_MODE}/${MODEL_NAME}-${OUR_MODE}/"
    elif [ "$CODE_GEN_MODE" == "full" ]; then
      OUT_DIR="${OUTPUT_PATH}${MODE}-${CONTRACT_TEST_CASE_TYPE}-${CODE_GEN_MODE}/${MODEL_NAME}/"
    else
      OUT_DIR="${OUTPUT_PATH}${MODE}-${CONTRACT_TEST_CASE_TYPE}/${MODEL_NAME}/"
    fi
    
    mkdir -p "$OUT_DIR"
    echo "→ MODE=$MODE  DATASET=total "
    echo "→ MODEL_NAME: $MODEL_NAME"
    echo "=== Evaluating ==="
    echo "    → output_dir: $OUT_DIR"
    echo "    → modeldir : $TOTAL_EVAL_CODE_GENERATION_MODEL_PATH"


    echo $TOTAL_EVAL_CODE_GENERATION_MODEL_PATH
    python ../../code/utils/evaluation_code_generation.py \
        --code_generation_model_path "$TOTAL_EVAL_CODE_GENERATION_MODEL_PATH_STR" \
        --code_generation_model_name "$MODEL_NAME" \
        --humaneval_functionality_dataset_path "$HUMAN_EVAL_FUNCTIONALITY_DATASET_PATH" \
        --mbpp_functionality_dataset_path "$MBPP_FUNCTIONALITY_DATASET_PATH" \
        --humaneval_contracts_dataset_path "$HUMAN_EVAL_CONTRACTS_DATASET_PATH" \
        --mbpp_contracts_dataset_path "$MBPP_CONTRACTS_DATASET_PATH" \
        --output_path "$OUT_DIR" \
        --mode "$MODE" \
        --use_None_list "$USE_NONE_LIST" \
        --contract_test_case_type "$CONTRACT_TEST_CASE_TYPE"
    echo "\n✅ All evaluations finished."
  done
done
