#!/bin/bash


eval_math=1
eval_multihop=1
eval_base=1

# -------------- GSM8K OPT-1.3B -----------------
if [ $eval_math -ne 0 ]; then

    if [ -f "trained_models/gsm8k/opt-1.3b/self_evolve_cl/training_args.bin" ]; then
        echo "GSM8K OPT-1.3B Self-Evolve CL found, evaluating..."
        accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
            --model hf --model_args pretrained=trained_models/gsm8k/opt-1.3b/self_evolve_cl \
            --tasks gsm8k_cot_my,addsub_cot,asdiv_cot,aquarat_cot,multiarith_cot,svamp_cot \
            --include_path "eval_tasks/" \
            --batch_size 8 \
            --output_path lm_eval_results/gsm8k/opt-1.3b/self_evolve_cl/ \
            --write_out --show_config --log_samples
        echo "Finished Evaluating GSM8K OPT-1.3B Self-Evolve CL"
    else
        echo "GSM8K OPT-1.3B Self-Evolve CL model not found, skipping evaluation."
    fi

    if [ -f "trained_models/gsm8k/opt-1.3b/kd_original/training_args.bin" ]; then
        echo "GSM8K OPT-1.3B KD Original found, evaluating..."
        accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
            --model hf --model_args pretrained=trained_models/gsm8k/opt-1.3b/kd_original \
            --tasks gsm8k_cot_my,addsub_cot,asdiv_cot,aquarat_cot,multiarith_cot,svamp_cot \
            --include_path "eval_tasks/" \
            --batch_size 8 \
            --output_path lm_eval_results/gsm8k/opt-1.3b/kd_original/ \
            --write_out --show_config --log_samples
        echo "Finished Evaluating GSM8K OPT-1.3B KD Original"
    else
        echo "GSM8K OPT-1.3B KD Original model not found, skipping evaluation."
    fi


    # -------------- GSM8K OPT-2.7B -----------------
    if [ -f "trained_models/gsm8k/opt-2.7b/self_evolve_cl/training_args.bin" ]; then
        echo "GSM8K OPT-2.7B Self-Evolve CL found, evaluating..."
        accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
            --model hf --model_args pretrained=trained_models/gsm8k/opt-2.7b/self_evolve_cl \
            --tasks gsm8k_cot_my,addsub_cot,asdiv_cot,aquarat_cot,multiarith_cot,svamp_cot \
            --include_path "eval_tasks/" \
            --batch_size 8 \
            --output_path lm_eval_results/gsm8k/opt-2.7b/self_evolve_cl/ \
            --write_out --show_config --log_samples
        echo "Finished Evaluating GSM8K OPT-2.7B Self-Evolve CL"
    else
        echo "GSM8K OPT-2.7B Self-Evolve CL model not found, skipping evaluation."
    fi

    if [ -f "trained_models/gsm8k/opt-2.7b/kd_original/training_args.bin" ]; then
        echo "GSM8K OPT-2.7B KD Original found, evaluating..."
        accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
            --model hf --model_args pretrained=trained_models/gsm8k/opt-2.7b/kd_original \
            --tasks gsm8k_cot_my,addsub_cot,asdiv_cot,aquarat_cot,multiarith_cot,svamp_cot \
            --include_path "eval_tasks/" \
            --batch_size 8 \
            --output_path lm_eval_results/gsm8k/opt-2.7b/kd_original/ \
            --write_out --show_config --log_samples
        echo "Finished Evaluating GSM8K OPT-2.7B KD Original"
    else
        echo "GSM8K OPT-2.7B KD Original model not found, skipping evaluation."
    fi


    # -------------- GSM8K Pythia-1.4B -----------------
    if [ -f "trained_models/gsm8k/pythia-1.4b/self_evolve_cl/training_args.bin" ]; then
        echo "GSM8K Pythia-1.4B Self-Evolve CL found, evaluating..."
        accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
            --model hf --model_args pretrained=trained_models/gsm8k/pythia-1.4b/self_evolve_cl \
            --tasks gsm8k_cot_my,addsub_cot,asdiv_cot,aquarat_cot,multiarith_cot,svamp_cot \
            --include_path "eval_tasks/" \
            --batch_size 8 \
            --output_path lm_eval_results/gsm8k/pythia-1.4b/self_evolve_cl/ \
            --write_out --show_config --log_samples
        echo "Finished Evaluating GSM8K Pythia-1.4B Self-Evolve CL"
    else
        echo "GSM8K Pythia-1.4B Self-Evolve CL model not found, skipping evaluation."
    fi  

    if [ -f "trained_models/gsm8k/pythia-1.4b/kd_original/training_args.bin" ]; then
        echo "GSM8K Pythia-1.4B KD Original found, evaluating..."
        accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
            --model hf --model_args pretrained=trained_models/gsm8k/pythia-1.4b/kd_original \
            --tasks gsm8k_cot_my,addsub_cot,asdiv_cot,aquarat_cot,multiarith_cot,svamp_cot \
            --include_path "eval_tasks/" \
            --batch_size 8 \
            --output_path lm_eval_results/gsm8k/pythia-1.4b/kd_original/ \
            --write_out --show_config --log_samples
        echo "Finished Evaluating GSM8K Pythia-1.4B KD Original"
    else
        echo "GSM8K Pythia-1.4B KD Original model not found, skipping evaluation."
    fi

    # -------------- GSM8K Pythia-2.8B -----------------
    if [ -f "trained_models/gsm8k/pythia-2.8b/self_evolve_cl/training_args.bin" ]; then
        echo "GSM8K Pythia-2.8B Self-Evolve CL found, evaluating..."
        accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
            --model hf --model_args pretrained=trained_models/gsm8k/pythia-2.8b/self_evolve_cl \
            --tasks gsm8k_cot_my,addsub_cot,asdiv_cot,aquarat_cot,multiarith_cot,svamp_cot \
            --include_path "eval_tasks/" \
            --batch_size 8 \
            --output_path lm_eval_results/gsm8k/pythia-2.8b/self_evolve_cl/ \
            --write_out --show_config --log_samples
        echo "Finished Evaluating GSM8K Pythia-2.8B Self-Evolve CL"
    else
        echo "GSM8K Pythia-2.8B Self-Evolve CL model not found, skipping evaluation."
    fi  

    if [ -f "trained_models/gsm8k/pythia-2.8b/kd_original/training_args.bin" ]; then
        echo "GSM8K Pythia-2.8B KD Original found, evaluating..."
        accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
            --model hf --model_args pretrained=trained_models/gsm8k/pythia-2.8b/kd_original \
            --tasks gsm8k_cot_my,addsub_cot,asdiv_cot,aquarat_cot,multiarith_cot,svamp_cot \
            --include_path "eval_tasks/" \
            --batch_size 8 \
            --output_path lm_eval_results/gsm8k/pythia-2.8b/kd_original/ \
            --write_out --show_config --log_samples
        echo "Finished Evaluating GSM8K Pythia-2.8B KD Original"
    else
        echo "GSM8K Pythia-2.8B KD Original model not found, skipping evaluation."
    fi

fi


if [ $eval_multihop -ne 0 ]; then
    # -------------- Entailment Bank OPT-1.3B -----------------

    if [ -f "trained_models/entailment-bank/opt-1.3b/self_evolve_cl/training_args.bin" ]; then
        echo "Entailment Bank OPT-1.3B Self-Evolve CL found, evaluating..."
        accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
            --model hf --model_args pretrained=trained_models/entailment-bank/opt-1.3b/self_evolve_cl \
            --tasks entailmentbank_cot,boolq_cot,qasc_cot,qasc_cot_option,strategyqa_cot,openbookqa_cot,openbookqa_cot_option,hotpot_support_only,musique_support_only \
            --include_path "eval_tasks/" \
            --batch_size 8 \
            --output_path lm_eval_results/entailment-bank/opt-1.3b/self_evolve_cl/ \
            --write_out --show_config --log_samples
        echo "Finished Evaluating Entailment Bank OPT-1.3B Self-Evolve CL"
    else
        echo "Entailment Bank OPT-1.3B Self-Evolve CL model not found, skipping evaluation."
    fi

    if [ -f "trained_models/entailment-bank/opt-1.3b/kd_original/training_args.bin" ]; then
        echo "Entailment Bank OPT-1.3B KD Original found, evaluating..."
        accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
            --model hf --model_args pretrained=trained_models/entailment-bank/opt-1.3b/kd_original \
            --tasks entailmentbank_cot,boolq_cot,qasc_cot,qasc_cot_option,strategyqa_cot,openbookqa_cot,openbookqa_cot_option,hotpot_support_only,musique_support_only \
            --include_path "eval_tasks/" \
            --batch_size 8 \
            --output_path lm_eval_results/entailmentbank/opt-1.3b/kd_original/ \
            --write_out --show_config --log_samples
        echo "Finished Evaluating Entailment Bank OPT-1.3B KD Original"
    else
        echo "Entailment Bank OPT-1.3B KD Original model not found, skipping evaluation."
    fi

    # -------------- Entailment Bank OPT-2.7B -----------------
    if [ -f "trained_models/entailment-bank/opt-2.7b/self_evolve_cl/training_args.bin" ]; then
        echo "Entailment Bank OPT-2.7B Self-Evolve CL found, evaluating..."
        accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
            --model hf --model_args pretrained=trained_models/entailment-bank/opt-2.7b/self_evolve_cl \
            --tasks entailmentbank_cot,boolq_cot,qasc_cot,qasc_cot_option,strategyqa_cot,openbookqa_cot,openbookqa_cot_option,hotpot_support_only,musique_support_only \
            --include_path "eval_tasks/" \
            --batch_size 8 \
            --output_path lm_eval_results/entailment-bank/opt-2.7b/self_evolve_cl/ \
            --write_out --show_config --log_samples
        echo "Finished Evaluating Entailment Bank OPT-2.7B Self-Evolve CL"
    else
        echo "Entailment Bank OPT-2.7B Self-Evolve CL model not found, skipping evaluation."
    fi

    if [ -f "trained_models/entailment-bank/opt-2.7b/kd_original/training_args.bin" ]; then
        echo "Entailment Bank OPT-2.7B KD Original found, evaluating..."
        accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
            --model hf --model_args pretrained=trained_models/entailment-bank/opt-2.7b/kd_original \
            --tasks entailmentbank_cot,boolq_cot,qasc_cot,qasc_cot_option,strategyqa_cot,openbookqa_cot,openbookqa_cot_option,hotpot_support_only,musique_support_only \
            --include_path "eval_tasks/" \
            --batch_size 8 \
            --output_path lm_eval_results/entailmentbank/opt-2.7b/kd_original/ \
            --write_out --show_config --log_samples
        echo "Finished Evaluating Entailment Bank OPT-2.7B KD Original"
    else
        echo "Entailment Bank OPT-2.7B KD Original model not found, skipping evaluation."
    fi

    # -------------- Entailment Bank Pythia-1.4B -----------------
    if [ -f "trained_models/entailment-bank/pythia-1.4b/self_evolve_cl/training_args.bin" ]; then
        echo "Entailment Bank Pythia-1.4B Self-Evolve CL found, evaluating..."
        accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
            --model hf --model_args pretrained=trained_models/entailment-bank/pythia-1.4b/self_evolve_cl \
            --tasks entailmentbank_cot,boolq_cot,qasc_cot,qasc_cot_option,strategyqa_cot,openbookqa_cot,openbookqa_cot_option,hotpot_support_only,musique_support_only \
            --include_path "eval_tasks/" \
            --batch_size 8 \
            --output_path lm_eval_results/entailment-bank/pythia-1.4b/self_evolve_cl/ \
            --write_out --show_config --log_samples
        echo "Finished Evaluating Entailment Bank Pythia-1.4B Self-Evolve CL"
    else
        echo "Entailment Bank Pythia-1.4B Self-Evolve CL model not found, skipping evaluation."
    fi  

    if [ -f "trained_models/entailment-bank/pythia-1.4b/kd_original/training_args.bin" ]; then
        echo "Entailment Bank Pythia-1.4B KD Original found, evaluating..."
        accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
            --model hf --model_args pretrained=trained_models/entailment-bank/pythia-1.4b/kd_original \
            --tasks entailmentbank_cot,boolq_cot,qasc_cot,qasc_cot_option,strategyqa_cot,openbookqa_cot,openbookqa_cot_option,hotpot_support_only,musique_support_only \
            --include_path "eval_tasks/" \
            --batch_size 8 \
            --output_path lm_eval_results/entailmentbank/pythia-1.4b/kd_original/ \
            --write_out --show_config --log_samples
        echo "Finished Evaluating Entailment Bank Pythia-1.4B KD Original"
    else
        echo "Entailment Bank Pythia-1.4B KD Original model not found, skipping evaluation."
    fi  

    # -------------- Entailment Bank Pythia-2.8B -----------------
    if [ -f "trained_models/entailment-bank/pythia-2.8b/self_evolve_cl/training_args.bin" ]; then
        echo "Entailment Bank Pythia-2.8B Self-Evolve CL found, evaluating..."
        accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
            --model hf --model_args pretrained=trained_models/entailment-bank/pythia-2.8b/self_evolve_cl \
            --tasks entailmentbank_cot,boolq_cot,qasc_cot,qasc_cot_option,strategyqa_cot,openbookqa_cot,openbookqa_cot_option,hotpot_support_only,musique_support_only \
            --include_path "eval_tasks/" \
            --batch_size 8 \
            --output_path lm_eval_results/entailment-bank/pythia-2.8b/self_evolve_cl/ \
            --write_out --show_config --log_samples
        echo "Finished Evaluating Entailment Bank Pythia-2.8B Self-Evolve CL"
    else
        echo "Entailment Bank Pythia-2.8B Self-Evolve CL model not found, skipping evaluation."
    fi  

    if [ -f "trained_models/entailment-bank/pythia-2.8b/kd_original/training_args.bin" ]; then
        echo "Entailment Bank Pythia-2.8B KD Original found, evaluating..."
        accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
            --model hf --model_args pretrained=trained_models/entailment-bank/pythia-2.8b/kd_original \
            --tasks entailmentbank_cot,boolq_cot,qasc_cot,qasc_cot_option,strategyqa_cot,openbookqa_cot,openbookqa_cot_option,hotpot_support_only,musique_support_only \
            --include_path "eval_tasks/" \
            --batch_size 8 \
            --output_path lm_eval_results/entailmentbank/pythia-2.8b/kd_original/ \
            --write_out --show_config --log_samples
        echo "Finished Evaluating Entailment Bank Pythia-2.8B KD Original"
    else
        echo "Entailment Bank Pythia-2.8B KD Original model not found, skipping evaluation."
    fi

fi


# -------------- Base Models (No Distillation) -----------------
if [ $eval_base -ne 0 ]; then
    echo "Evaluating Base Models (No Distillation)..."
    OPT-1.3B
    echo "Evaluating OPT-1.3B on Math..."
    accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
        --model hf --model_args pretrained="facebook/opt-1.3b" \
        --tasks gsm8k_cot_my,addsub_cot,asdiv_cot,aquarat_cot,multiarith_cot,svamp_cot \
        --include_path "eval_tasks/" \
        --batch_size 8 \
        --output_path lm_eval_results/base/opt-1.3b/ \
        --write_out --show_config --log_samples
    echo "Evaluating OPT-1.3B on Multihop..."

    echo "Evaluating OPT-1.3B on Multihop..."
    accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
    --model hf --model_args pretrained="facebook/opt-1.3b" \
    --tasks entailmentbank_cot,boolq_cot,qasc_cot,qasc_cot_option,strategyqa_cot,openbookqa_cot,openbookqa_cot_option,hotpot_support_only,musique_support_only \
    --include_path "eval_tasks/" \
    --batch_size 8 \
    --output_path lm_eval_results/base/opt-1.3b/ \
    --write_out --show_config --log_samples --limit 500
    echo "Finished Evaluating OPT-1.3B"
    

    # OPT-2.7B
    echo "Evaluating OPT-2.7B on Math..."
    accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
        --model hf --model_args pretrained="facebook/opt-2.7b" \
        --tasks gsm8k_cot_my,addsub_cot,asdiv_cot,aquarat_cot,multiarith_cot,svamp_cot \
        --batch_size 8 \
        --output_path lm_eval_results/base/opt-2.7b/ \
        --write_out --show_config --log_samples
    echo "Evaluating OPT-2.7B on Multihop..."
    accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
        --model hf --model_args pretrained="facebook/opt-2.7b" \
        --tasks entailmentbank_cot,boolq_cot,qasc_cot,qasc_cot_option,strategyqa_cot,openbookqa_cot,openbookqa_cot_option,hotpot_support_only,musique_support_only \
        --include_path "eval_tasks/" \
        --batch_size 8 \
        --output_path lm_eval_results/base/opt-2.7b/ \
        --write_out --show_config --log_samples --limit 500
    echo "Finished Evaluating OPT-2.7B"

    # Pythia-1.4B
    echo "Evaluating Pythia-1.4B on Math..."
    accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
        --model hf --model_args pretrained="EleutherAI/pythia-1.4b" \
        --tasks gsm8k_cot_my,addsub_cot,asdiv_cot,aquarat_cot,multiarith_cot,svamp_cot \
        --include_path "eval_tasks/" \
        --batch_size 8 \
        --output_path lm_eval_results/base/pythia-1.4b/ \
        --write_out --show_config --log_samples
    echo "Evaluating Pythia-1.4B on Multihop..."
    accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
        --model hf --model_args pretrained="EleutherAI/pythia-1.4b" \
        --tasks entailmentbank_cot,boolq_cot,qasc_cot,qasc_cot_option,strategyqa_cot,openbookqa_cot,openbookqa_cot_option,hotpot_support_only,musique_support_only \
        --include_path "eval_tasks/" \
        --batch_size 8 \
        --output_path lm_eval_results/base/pythia-1.4b/ \
        --write_out --show_config --log_samples --limit 500
    echo "Finished Evaluating Pythia-1.4B"

    # Pythia-2.8B
    echo "Evaluating Pythia-2.8B on Math..."
    accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
        --model hf --model_args pretrained="EleutherAI/pythia-2.8b" \
        --tasks gsm8k_cot_my,addsub_cot,asdiv_cot,aquarat_cot,multiarith_cot,svamp_cot \
        --include_path "eval_tasks/" \
        --batch_size 8 \
        --output_path lm_eval_results/base/pythia-2.8b/ \
        --write_out --show_config --log_samples
    echo "Evaluating Pythia-2.8B on Multihop..."
    accelerate launch --config_file configs/accelerate_config/multi_gpu.yaml -m lm_eval \
        --model hf --model_args pretrained="EleutherAI/pythia-2.8b" \
        --tasks entailmentbank_cot,boolq_cot,qasc_cot,qasc_cot_option,strategyqa_cot,openbookqa_cot,openbookqa_cot_option,hotpot_support_only,musique_support_only \
        --include_path "eval_tasks/" \
        --batch_size 8 \
        --output_path lm_eval_results/base/pythia-2.8b/ \
        --write_out --show_config --log_samples --limit 500
    echo "Finished Evaluating Pythia-2.8B"  
    echo "Finished Evaluating Base Models (No Distillation)."
fi