from eval_tool import load_all_questions, load_answers, enrich_answers, analysis, summary, summary_xidk
from eval_relation import load_relations, load_relation_clf, relation_summary, merge_relations_by_action, update_summary_by_relations
from eval_pvalue import compute_pvals, p_value_matrixs
from split_heatmap import dataframe_to_heatmap_csvs
from split import split
import os
import json
import datetime

if __name__ == "__main__":
    root_dir = os.path.dirname(os.path.abspath(__name__))
    print("Root directory:", root_dir)
    datasets=["spinach", "qawiki",'synthetic','LC-QuAD']

    llm_path = f"{root_dir}/data/llm_info.json"
    with open(llm_path, "r", encoding="utf-8") as f:
        llms = json.load(f).keys()
    actions = ["wikidata", "fixing", "classification","star","zero-shot"]
    tasks = ['equal', 'sup-sub', "minus"]
    languages = ['en']


    # Define output folder path
    output_folder = os.path.join(root_dir, "output")
    os.makedirs(output_folder, exist_ok=True)

    # Relation Classification
    df_relation = load_relations(root_dir + "/data/answers/zero-shot/", datasets, llms)
    df_relation_clf = load_relation_clf(root_dir + "/data/answers/rel_classification_and_questions/", datasets, llms, tasks)
    # From your df_relation:
    df_relation_summery = relation_summary(df_relation, include_overall=True, round_digits=4)

    # Save if you want:
    time = datetime.datetime.now()
    relation_file_format = time.strftime("relations_%Y-%m-%d_%H-%M.csv")
    summary_file_format = time.strftime("relation_summary_%Y-%m-%d_%H-%M.csv")
    summary_file_format_excel = time.strftime("relation_summary_%Y-%m-%d_%H-%M.xlsx")

    # df_relation.to_csv(os.path.join(output_folder, relation_file_format), index=False)
    df_relation_summery.to_csv(os.path.join(output_folder, "relation_summery.csv"), index=False)
    # df_relation_summery.to_excel(os.path.join(output_folder, summary_file_format_excel), index=False)


    df_questions = load_all_questions(root_dir, datasets, languages)
    df_answers = load_answers(
        folder=root_dir + "/data/answers/",
        datasets = datasets,
        llms=llms,
        actions=actions,
        tasks=tasks,
        languages=languages,
        questions=["Q1", "Q2", "Q3", "Q4"]
    )

    df_answers = enrich_answers(df_answers, df_questions)
    df_analysis = analysis(df_answers)
    df_analysis = merge_relations_by_action(df_analysis, df_relation, df_relation_clf)
    
    
    # Save results
    # analysis_file_format = time.strftime("analysis_%Y-%m-%d_%H-%M.csv")
    df_analysis.to_csv(os.path.join(output_folder, "analysis.csv"), index=False)

    # p-values
    df_pval = compute_pvals(df_analysis)

    df_summary = summary(df_analysis)
    
    df_summary = update_summary_by_relations(df_analysis, df_summary, task="zero-shot")
    df_summary = update_summary_by_relations(df_analysis, df_summary, task="classification")
    df_summary = df_summary.merge(df_pval, on=["dataset","llm","action"], how="left")
    df_summary.to_csv(os.path.join(output_folder, "summary.csv"), index=False)

    # summary_file_format = time.strftime("summary_%Y-%m-%d_%H-%M.csv")
    # summary_file_format_excel = time.strftime("summary_%Y-%m-%d_%H-%M.xlsx")


    df_summary_xidk = summary_xidk(df_analysis)
    df_summary_xidk = update_summary_by_relations(df_analysis, df_summary_xidk, task="zero-shot")
    df_summary_xidk = update_summary_by_relations(df_analysis, df_summary, task="classification")
    df_summary_xidk = df_summary_xidk.merge(df_pval, on=["dataset","llm","action"], how="left")
    
    df_summary_xidk.to_csv(os.path.join(output_folder, "summary_xidk.csv"), index=False)
    # df_summary.to_excel(os.path.join(output_folder, summary_file_format_excel), index=False)

    split(df_summary, "summary")
    split(df_summary_xidk, "summary_xidk")
    df_pvalue = p_value_matrixs(df_analysis, actions)
    p_value_matrixs_file_format = time.strftime("p_value_matrices_%Y-%m-%d_%H-%M.csv")
    df_pvalue.to_csv(os.path.join(output_folder, "p_value_matrices.csv"), index=False)
    dataframe_to_heatmap_csvs(df_pvalue, output_folder)
    print("Analysis and summary saved to:", output_folder)