import json
import pandas as pd
import os
import re

from nltk.tokenize import word_tokenize
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import SmoothingFunction
from rouge import Rouge

def split(a):
    return a.split(" ")

def evaluation_calc(inference, target):
    if inference == "":
        return {
            "BLEU-1": 0,
            "ROUGE-L_R": 0,
            "ROUGE-L_P": 0,
            "ROUGE-L_F": 0
        }
    ## BLEU-1
    candidate = split(inference)
    reference = [split(target)]
    
    smoothie = SmoothingFunction().method1
    BLEU1 = sentence_bleu(reference, candidate, weights=(1, 0, 0, 0), smoothing_function=smoothie)

    ## ROUGE-L
    rouge = Rouge()
    candidate = [" ".join(split(inference))]
    reference = [" ".join(split(target))]
    rouge_score = rouge.get_scores(hyps=candidate, refs=reference)
    ROUGEL = rouge_score[0]["rouge-l"]

    return {
        "BLEU-1": BLEU1,
        "ROUGE-L_R": ROUGEL["r"],
        "ROUGE-L_P": ROUGEL["p"],
        "ROUGE-L_F": ROUGEL["f"]
    }

if __name__ == "__main__":
    remap = {"Pure-LLM": "NL-LLM-SE", "RAG-LLM": "NL-RAG-LLM-SE", "Ours": "Ours-SE"}
    index = 0

    score_remap = {
        "BLEU-1": "BLEU-1",
        "ROUGE-L_R": "ROUGE-L(Recall)",
        "ROUGE-L_P": "ROUGE-L(Precision)",
        "ROUGE-L_F": "ROUGE-L(F1)"
    }

    csv_table = {"method": [], "evaluation": [], "value": [], "dataset": []}

    for dataset in ["Synthesis", "BioEng", "Ecology", "Genetics", "Medical"]:
        for method in ["Pure-LLM", "RAG-LLM", "Ours"]:                
            filepath = f"data/mathine_completion_results/{method}_{dataset}_{str(index)}/results/results.json"
            with open(filepath, "r") as f:
                data = json.load(f)
            
            for evaluation in data["evaluation_list"]:
                evaluation = json.loads(evaluation)
                for key in evaluation:
                    csv_table["method"].append(remap[method])
                    csv_table["evaluation"].append(score_remap[key])
                    csv_table["value"].append(evaluation[key])
                    csv_table["dataset"].append(dataset)
            index = index + 1
    
    print(len(csv_table["dataset"])) #575 blanks
    
    df = pd.DataFrame(csv_table)
    mean_values = df.groupby(['evaluation', 'method'])['value'].mean()
    mean_values.reset_index()
    df.to_csv("evaluation_result/stage_2.csv", index=False)
    print(mean_values)


# evaluation          method       
# BLEU-1              Human-SE         0.559338
#                     NL-LLM-SE        0.233729
#                     NL-RAG-LLM-SE    0.245814
#                     Ours-SE          0.301883
# ROUGE-L(F1)         Human-SE         0.610037
#                     NL-LLM-SE        0.247877
#                     NL-RAG-LLM-SE    0.260974
#                     Ours-SE          0.315540
# ROUGE-L(Precision)  Human-SE         0.668696
#                     NL-LLM-SE        0.246120
#                     NL-RAG-LLM-SE    0.256993
#                     Ours-SE          0.310381
# ROUGE-L(Recall)     Human-SE         0.586377
#                     NL-LLM-SE        0.254928
#                     NL-RAG-LLM-SE    0.276145
#                     Ours-SE          0.327971