import numpy as np
import json
import pandas as pd

from nltk.tokenize import word_tokenize
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import SmoothingFunction
from rouge import Rouge

def split(sentense:str):
    result = []
    json_strs = sentense.split("\n")
    for json_str in json_strs:
        if json_str == "":
            continue
        instrction = json.loads(json_str)
        for k in instrction:
            v = instrction[k]
            if isinstance(v, str):
                if k != "in" or k != "out":
                    result.extend(word_tokenize(k.lower()))
                if v != "":
                    result.extend(word_tokenize(v.lower()))
                else:
                    result.append("NONE".lower())
            else:
                if k != "in" or k != "out":
                    result.extend(word_tokenize(k.lower()))
                for i in v:
                    if i != "":
                        result.extend(word_tokenize(i.lower()))
                    else:
                        result.append("NONE".lower())
    if len(result) > 500:
        result = result[:500]
    return result


def evaluation(inference, target):
    if inference == "":
        return {
            "BLEU-1": 0,
            "ROUGE-L_R": 0,
            "ROUGE-L_P": 0,
            "ROUGE-L_F": 0
        }
    ## BLEU-1
    candidate = split(inference)
    reference = [split(target)]
    
    smoothie = SmoothingFunction().method1
    BLEU1 = sentence_bleu(reference, candidate, weights=(1, 0, 0, 0), smoothing_function=smoothie)

    ## ROUGE-L
    rouge = Rouge()
    candidate = [" ".join(split(inference))]
    reference = [" ".join(split(target))]
    rouge_score = rouge.get_scores(hyps=candidate, refs=reference)
    ROUGEL = rouge_score[0]["rouge-l"]

    return {
        "BLEU-1": BLEU1,
        "ROUGE-L_R": ROUGEL["r"],
        "ROUGE-L_P": ROUGEL["p"],
        "ROUGE-L_F": ROUGEL["f"]
    }

if __name__ == "__main__":
    with open("data/translate/translate_result.json", "r") as f:
        data = json.load(f)

    score_remap = {
        "BLEU-1": "BLEU-1",
        "ROUGE-L_R": "ROUGE-L(Recall)",
        "ROUGE-L_P": "ROUGE-L(Precision)",
        "ROUGE-L_F": "ROUGE-L(F1)"
    }

    print(len(data["ConDec-SY"]))
    methods = ["Ours-SY", "ConDec-SY", "DSL-LLM-SY"]

    csv_table = {"method": [], "evaluation": [], "value": [], "dataset": []}
    for method in methods:
        results = data[method]
        gt = data["GT-SY"]
        names = data["filename"]

        print(len(results), len(gt))

        for inf, target, name in zip(results, gt, names):
            eva = evaluation(inf, target)
            for key in eva:
                csv_table["method"].append(method)
                csv_table["evaluation"].append(score_remap[key])
                csv_table["value"].append(eva[key])
                csv_table["dataset"].append(name.split("_")[0])
    df = pd.DataFrame(csv_table)
    mean_values = df.groupby(['evaluation', 'method'])['value'].mean()
    mean_values.reset_index()
    df.to_csv("evaluation_result/stage_1.csv", index=False)
    print(mean_values)


    with open("data/translate/reagent_flow_graph.json", "r") as f:
        data = json.load(f)
    methods = ["Ours-E", "DSL-LLM-E"]
    remap = {"Ours-E":"Ours", "DSL-LLM-E": "Best-Baseline"}

    csv_table = {"method": [], "evaluation": [], "value": [], "dataset": []}
    for method in methods:
        results = data[method]
        gt = data["GT-E"]
        names = data["filename"]

        for inf, target, name in zip(results, gt, names):
            eva = evaluation(inf, target)
            for key in eva:
                csv_table["method"].append(remap[method])
                csv_table["evaluation"].append(score_remap[key])
                csv_table["value"].append(eva[key])
                csv_table["dataset"].append(name.split("_")[0])

    df = pd.DataFrame(csv_table)
    mean_values = df.groupby(['evaluation', 'method'])['value'].mean()
    mean_values.reset_index()
    df.to_csv("evaluation_result/stage_3.csv", index=False)
    print(mean_values)

# evaluation  method    
# BLEU-1      ConDec-SY     0.325319
#             DSL-LLM-SY    0.398418
#             Human-SY      0.833614
#             Ours-SY       0.772129
# ROUGE-L_F   ConDec-SY     0.383718
#             DSL-LLM-SY    0.369566
#             Human-SY      0.923745
#             Ours-SY       0.855052
# ROUGE-L_P   ConDec-SY     0.337166
#             DSL-LLM-SY    0.312133
#             Human-SY      0.989432
#             Ours-SY       0.947911
# ROUGE-L_R   ConDec-SY     0.490997
#             DSL-LLM-SY    0.487519
#             Human-SY      0.876000
#             Ours-SY       0.787290

# evaluation  method 
# BLEU-1      Human-E    0.889986
#             LLM-E      0.387294
#             Ours-E     0.810767
# ROUGE-L_F   Human-E    0.933353
#             LLM-E      0.271329
#             Ours-E     0.874679
# ROUGE-L_P   Human-E    0.987337
#             LLM-E      0.212531
#             Ours-E     0.978608
# ROUGE-L_R   Human-E    0.895059
#             LLM-E      0.444946
#             Ours-E     0.811830
