import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from argparse import ArgumentParser, Namespace
from pathlib import Path
import json
import pandas as pd
from evaluate.evaluator import Evaluator
from evaluate.bootstrap import BootStraper
from util.scenario import Scenario
from util.interface import CausalModel


def parse_args() -> Namespace:
    parser = ArgumentParser(description="Output Results")
    parser.add_argument("--llm_names", type=list, 
                        default=["CogVideoX1.5-5b", "Hailuo", "Pika", "pyramid_flux", "CogVideoX-2b", "videocrafter2_base",
                                 "CogVideoX-5b", "HunyuanVideo", "Gen-3-Alpha", "Kling"], # ["CogvideoX-5b", "Pika", "pyramid_flux", "CogVideoX-2b", "videocrafter2_base", "HunyuanVideo"],
                          help="Model to evaluate. Stay empty to evaluate all models")
    parser.add_argument("--scenario_begin", type=int, default=1, help="Ask scenarios from scenario_begin to scenario_end")
    parser.add_argument("--scenario_end", type=int, default=60, help="Ask scenarios from scenario_begin to scenario_end, -1 to ask all scenarios")
    return parser.parse_args()

def main():
    args: Namespace= parse_args()
    database_folder = Path(__file__).resolve().parent.parent / "database"
    all_scenarios = Scenario.get_all_scenarios()
    if len(args.llm_names) == 0:
        llm_names = [folder.name for folder in database_folder.iterdir() if folder.is_dir() and not folder.name.endswith("sample")]
    else:
        llm_names = args.llm_names
    results = {}
    keys_dict = {
        "metric_1_all_ignore": "metric1_all_ignore",
        "metric_1_roots_ignore": "metric1_roots_ignore",
        "metric_1_all_fault": "metric1_all_fault",
        "metric_1_roots_fault": "metric1_roots_fault",
        "metric_2_truth": "metric2_by_truth",
        "metric_2_observe": "metric2_by_observe",
        "metric_3_truth": "metric3_by_truth",
        "metric_3_observe": "metric3_by_observe"
    }
    for llm_name in llm_names:
        llm_folder = database_folder / llm_name
        bootstraper = BootStraper(llm_name=llm_name)
        result_df = pd.read_csv(llm_folder / f"results_{llm_name}.csv", index_col=0)
        res_mean = result_df.mean()
        bootstraper.add_samples_small_scenarios(scenario_num=args.scenario_end)
        res_std = bootstraper.bootstrap_std()
        results[llm_name] = {}
        for metric in res_std.keys():
            results[llm_name][metric] = {
                "mean": res_mean[keys_dict[metric]],
                "std": res_std[metric]
            }
        results[llm_name]["nan_ratio"] = result_df["nan_cnt"].sum() / result_df["total_cnt"].sum()
        results[llm_name]["level_1"] = {
            "nan_ratio": result_df["level_1_nan_cnt"].sum() / result_df["level_1_total_cnt"].sum(),
            "correct_ratio": result_df["level_1_correct_cnt"].sum() / result_df["level_1_total_cnt"].sum()
        }
    output_file = database_folder / "final_res.json"
    with open(output_file, "w") as fp:
        json.dump(results, fp, indent=4)

if __name__ == '__main__':
    main()