import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

MODELS = [
    'gpt-3.5-turbo',
    'flan-alpaca-gpt4-xl',
    'flan-alpaca-xxl',
    'vicuna-7b-v1.3',
    'vicuna-13b-v1.3',
    'Llama-2-7b-chat-hf',
    'Llama-2-13b-chat-hf',
    'stable-vicuna-13B-HF',
]

if __name__ == '__main__':
    all_data = dict()
    eval_names = None
    for model in MODELS:
        df = pd.read_csv(f'scripts/model_evals/results/{model}.csv')
        all_data[model] = df.accuracy
        eval_names = df.eval_name
    df = pd.DataFrame.from_dict(all_data)
    assert eval_names is not None
    df = df.set_index(eval_names)
    sns.heatmap(df, annot=True, vmin=0, vmax=1, cmap='rocket_r', cbar_kws={'label': 'Accuracy'})
    plt.title('Model Evaluation')
    plt.savefig('scripts/model_evals/results_summary.png', bbox_inches="tight")
