import pandas as pd
import re
import Levenshtein

def remove_tools(input_str):
    return re.sub(r'Tool\d+: ', '', input_str)

def cat_metric(pred_list, truth_list):
    TP = sum(i in truth_list for i in pred_list)
    FP = sum(i not in truth_list for i in pred_list)
    FN = sum(i not in pred_list for i in truth_list)

    precision = TP / (TP + FP) if (TP + FP) > 0 else 0
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0

    return 2*precision*recall / (precision+recall) if (precision+recall) > 0 else 0

def eval(path):
    df = pd.read_json(path, lines=True)
    # compute accuracy
    node_f1_all, edge_f1_all, ned_all = 0, 0, 0

    for pred, label, idx in zip(df["pred"], df["label"], df["id"]):
        p = pred.split("</s>")[0]
        p = p.replace("<|endoftext|>", "")
        p = remove_tools(p)
        l = remove_tools(label)
        pred_list = p.lower().rstrip().split("\n")
        label_list = l.lower().rstrip().split("\n")
        
        pred_link_list = [(pred_list[i], pred_list[i+1]) for i in range((len(pred_list) - 1))]
        label_link_list = [(label_list[i], label_list[i+1]) for i in range((len(label_list) - 1))]

        node_f1_all += cat_metric(pred_list, label_list)
        edge_f1_all += cat_metric(pred_link_list, label_link_list)
        ned_all += Levenshtein.ratio(pred_list, label_list)
    node_f1 = node_f1_all / len(df)
    edge_f1 = edge_f1_all / len(df)
    ned = 1 - (ned_all / len(df))
    return node_f1, edge_f1, ned
