#!/bin/bash

if [ -z "$1" ]
  then
    echo "No exp supplied"
    echo "Available:"
    for folder in $(find "${HOME}/exp_artifacts" -maxdepth 1 -type d); do
        echo $(basename $folder)
    done
    exit 1
fi


EXP_NAME=$1
LORA_PATH=${HOME}/exp_artifacts/${EXP_NAME}

current_time=$(date "+%Y.%m.%d-%H.%M.%S")
echo "Start Time : $current_time"

# Get model if missing
if [ -d "${LORA_PATH}" ]; then
    echo "Model already exists locally"
else
    echo "Downloading model"
    python scripts/sync_artifacts.py $EXP_NAME
    sleep 5
fi

EVAL_DATA_PATH=${HOME}/data/eval
DOWLOAD_PATH=${HOME}/data/downloads
mkdir -p ${EVAL_DATA_PATH} 
mkdir -p ${DOWLOAD_PATH} 


MMLU_DATA_PATH=${HOME}/data/eval/mmlu/data
if [ -d "${MMLU_DATA_PATH}" ]; then
    echo "MMLU data already exists"
else
    echo "Downloading MMLU data"
    # MMLU dataset
    wget -O ${HOME}/data/downloads/mmlu_data.tar https://people.eecs.berkeley.edu/~hendrycks/data.tar
    mkdir -p ${HOME}/data/downloads/mmlu_data
    tar -xvf ${HOME}/data/downloads/mmlu_data.tar -C ${HOME}/data/downloads/mmlu_data
    mv ${HOME}/data/downloads/mmlu_data/data ${HOME}/data/eval/mmlu && rm -r ${HOME}/data/downloads/mmlu_data ${HOME}/data/downloads/mmlu_data.tar
    
    # Azure disks
    sleep 5
fi

# Big-Bench-Hard dataset
BBH_DATA_PATH=${HOME}/data/eval/bbh
if [ -d "${BBH_DATA_PATH}" ]; then
    echo "BBH data already exists"
else
    echo "Downloading BBH data"
    wget -O ${HOME}/data/downloads/bbh_data.zip https://github.com/suzgunmirac/BIG-Bench-Hard/archive/refs/heads/main.zip
    mkdir -p ${HOME}/data/downloads/bbh
    unzip ${HOME}/data/downloads/bbh_data.zip -d ${HOME}/data/downloads/bbh
    mv ${HOME}/data/downloads/bbh/BIG-Bench-Hard-main/ ${HOME}/data/eval/bbh && rm -r ${HOME}/data/downloads/bbh ${HOME}/data/downloads/bbh_data.zip
    
    echo "Done downloading BBH data"
fi

# TyDiQA-GoldP dataset
TYDIQA_DATA_PATH=${HOME}/data/eval/tydiqa
if [ -d "${TYDIQA_DATA_PATH}" ]; then
    echo "TyDiQA data already exists"
else
    echo "Downloading TyDiQA data"
    mkdir -p ${HOME}/data/eval/tydiqa
    wget -P ${HOME}/data/eval/tydiqa/ https://storage.googleapis.com/tydiqa/v1.1/tydiqa-goldp-v1.1-dev.json
    wget -P ${HOME}/data/eval/tydiqa/ https://storage.googleapis.com/tydiqa/v1.1/tydiqa-goldp-v1.1-train.json
    
    echo "Done downloading TyDiQA data"
fi

# GSM dataset
GSM_DATA_PATH=${HOME}/data/eval/gsm
if [ -d "${GSM_DATA_PATH}" ]; then
    echo "GSM data already exists"
else
    echo "Downloading GSM data"
    mkdir -p ${HOME}/data/eval/gsm
    wget -P ${HOME}/data/eval/gsm/ https://github.com/openai/grade-school-math/raw/master/grade_school_math/data/test.jsonl
    
    echo "Done downloading GSM data"
fi

# Codex HumanEval
CODEX_DATA_PATH=${HOME}/data/eval/codex_humaneval
if [ -d "${CODEX_DATA_PATH}" ]; then
    echo "Codex HumanEval data already exists"
else
    echo "Downloading Codex HumanEval data"
    mkdir -p ${HOME}/data/eval/codex_humaneval
    wget -P ${HOME}/data/eval/codex_humaneval https://github.com/openai/human-eval/raw/master/data/HumanEval.jsonl.gz
    
    echo "Done downloading Codex HumanEval data"
fi

BASE_MODEL_PATH=${HOME}/models/meta_llama_Llama_2_7b_hf
TMP_OUTPUT_DIR=${LORA_PATH}_merged

# Check if the model is already merged
if [ -d "${TMP_OUTPUT_DIR}" ]; then
    echo "Model already merged"
else
    echo "Creating merged model"
    mkdir -p $TMP_OUTPUT_DIR
    python open-instruct/open_instruct/merge_lora.py \
        --base_model_name_or_path ${BASE_MODEL_PATH} \
        --lora_model_name_or_path ${LORA_PATH} \
        --output_dir ${TMP_OUTPUT_DIR}

    sleep 5
fi

RESULT_PATH=${HOME}/exp_results/${EXP_NAME}
mkdir -p ${RESULT_PATH}

echo "Evaluating merged model"

export PYTHONPATH=open-instruct

echo "Evaluating MMLU"
MMLU_RESULT_PATH=${RESULT_PATH}/mmlu
if [ -d "${MMLU_RESULT_PATH}" ]; then
    echo "MMLU results already exist"
else
    mkdir -p ${MMLU_RESULT_PATH}
    python -m eval.mmlu.run_eval \
        --ntrain 0 \
        --data_dir ${MMLU_DATA_PATH} \
        --save_dir ${MMLU_RESULT_PATH} \
        --model_name_or_path ${TMP_OUTPUT_DIR} \
        --tokenizer_name_or_path ${LORA_PATH} \
        --eval_batch_size 20 \
        --load_in_8bit \
        --use_chat_format \
        --chat_formatting_function eval.templates.create_prompt_with_tulu_chat_format
    echo "Done evaluating MMLU"
    echo "Syncing results"
    python scripts/sync_artifacts.py
fi


echo "Evaluating GSM"
GSM_RESULT_PATH=${RESULT_PATH}/gsm
if [ -d "${GSM_RESULT_PATH}" ]; then
    echo "GSM results already exist"
else
    python -m eval.gsm.run_eval \
        --max_num_examples 200 \
        --data_dir ${GSM_DATA_PATH} \
        --save_dir ${GSM_RESULT_PATH} \
        --model_name_or_path ${TMP_OUTPUT_DIR} \
        --tokenizer_name_or_path ${LORA_PATH} \
        --eval_batch_size 20 \
        --n_shot 8 \
        --load_in_8bit \
        --use_chat_format \
        --chat_formatting_function eval.templates.create_prompt_with_tulu_chat_format
    echo "Syncing results"
    python scripts/sync_artifacts.py
fi

echo "Evaluating TyDiQA"
TYDIQA_RESULT_PATH=${RESULT_PATH}/tydiqa
if [ -d "${TYDIQA_RESULT_PATH}" ]; then
    echo "TyDiQA results already exist"
else
    mkdir -p ${TYDIQA_RESULT_PATH}
    python -m eval.tydiqa.run_eval \
        --n_shot 1 \
        --max_num_examples_per_lang 100 \
        --max_context_length 512 \
        --data_dir ${TYDIQA_DATA_PATH} \
        --save_dir ${TYDIQA_RESULT_PATH} \
        --model_name_or_path ${TMP_OUTPUT_DIR} \
        --tokenizer_name_or_path ${LORA_PATH} \
        --eval_batch_size 20 \
        --load_in_8bit \
        --use_chat_format \
        --chat_formatting_function eval.templates.create_prompt_with_tulu_chat_format
    echo "Done evaluating TyDiQA"
    echo "Syncing results"
    python scripts/sync_artifacts.py
fi

echo "Evaluating BBH"
BBH_RESULT_PATH=${RESULT_PATH}/bbh
if [ -d "${BBH_RESULT_PATH}" ]; then
    echo "BBH results already exist"
else
    mkdir -p ${BBH_RESULT_PATH}
    python -m eval.bbh.run_eval \
        --data_dir ${BBH_DATA_PATH} \
        --save_dir ${BBH_RESULT_PATH} \
        --model_name_or_path ${TMP_OUTPUT_DIR} \
        --tokenizer_name_or_path ${LORA_PATH} \
        --eval_batch_size 20 \
        --max_num_examples_per_task 40 \
        --load_in_8bit \
        --use_chat_format \
        --chat_formatting_function eval.templates.create_prompt_with_tulu_chat_format
    echo "Done evaluating BBH"
    echo "Syncing results"
    python scripts/sync_artifacts.py
fi


echo "Evaluating Codex HumanEval"
CODEX_RESULT_PATH=${RESULT_PATH}/codex_humaneval
if [ -d "${CODEX_RESULT_PATH}" ]; then
    echo "Codex HumanEval results already exist"
else
    mkdir -p ${CODEX_RESULT_PATH}
    python -m eval.codex_humaneval.run_eval \
        --data_file ${CODEX_DATA_PATH}/HumanEval.jsonl.gz \
        --eval_pass_at_ks 10 \
        --unbiased_sampling_size_n 20 \
        --temperature 0.8 \
        --eval_batch_size 32 \
        --save_dir ${CODEX_RESULT_PATH} \
        --model_name_or_path ${TMP_OUTPUT_DIR} \
        --tokenizer_name_or_path ${LORA_PATH} \
        --load_in_8bit
    echo "Syncing results"
    python scripts/sync_artifacts.py
fi

current_time=$(date "+%Y.%m.%d-%H.%M.%S")
echo "End Time : $current_time"

echo "Done evaluating"

echo "All done"