import os
import sys

root_dir = os.path.join(os.path.dirname(__file__), "../")
sys.path.insert(0, os.path.join(root_dir, 'third_party/alpaca_eval/src'))

import torch
import pandas as pd
from alpaca_eval.metrics import get_length_controlled_winrate

from text_embeddings import MODELS as EMBEDDING_MODELS, get_embeddings

ANNOTATOR = "weighted_alpaca_eval_gpt4_turbo"
MODELS = os.listdir(os.path.join(root_dir, 'third_party/alpaca_eval/results'))

data = {
    "instructions": [],
    "eval_results": {},
    "outputs": {}
}

for model in MODELS:
    annotation_file = os.path.join(root_dir, 'third_party/alpaca_eval/results', model, ANNOTATOR, "annotations.json")
    if not os.path.exists(annotation_file): 
        print(model, "does not exist")
        continue
    annotations = pd.read_json(annotation_file)
    if len(annotations) != 805: 
        print(model, "incomplete")
        continue
    
    if data['instructions']:
        assert data['instructions'] == annotations['instruction'].tolist()
    else:
        data['instructions'] = annotations['instruction'].tolist()
    
    metrics = get_length_controlled_winrate(annotations)
    probabilities = torch.tensor(annotations['glm_preference'].tolist())
    
    data['eval_results'][model] = probabilities
    data['outputs'][model] = annotations['output_2']
    
os.makedirs(os.path.join(root_dir, "data/alpaca_eval"), exist_ok=True)
torch.save(data, os.path.join(root_dir, "data/alpaca_eval/data.pth"))

embeddings = get_embeddings(EMBEDDING_MODELS["bert"](), data['instructions'], 8)
torch.save(embeddings, os.path.join(root_dir, "data/alpaca_eval/bert.pth"))