import json
from dotenv import load_dotenv
import pandas as pd
from pathlib import Path

from llm_utils import TextGenApi
from tp_lodge.utils.pddl_domain_syntax import parse_predicate
from state_estimation.predicate_optim_params import compute_decision_metric, compute_decision_metric_all


def main():
    root_dir = Path(__file__).parent.parent.parent
    domain = "fb-lamp"
    vlm_llm = "gpt4.1-mini"

    textgen_api = TextGenApi.default(vlm_llm)

    results_dir = (
        root_dir / "results" / domain / "predicate-learning-eval" / textgen_api.connections.connections[0].model_dir
    )

    all_results = []
    for result_dir in results_dir.glob("*/sample-*"):
        if not result_dir.is_dir():
            continue

        predictions_file = result_dir / "predictions.json"
        assert predictions_file.is_file()

        predictions = json.loads(predictions_file.read_text())

        for state_hash, preds in predictions.items():
            for pred_name, pred_eval in preds.items():
                variant, seed = result_dir.relative_to(results_dir).as_posix().split("/", maxsplit=1)
                all_results.append(
                    {
                        "state_hash": state_hash,
                        "variant": variant,
                        "seed": seed,   
                        "predicate": pred_name,
                        "evaluation": pred_eval,
                    }
                )

    df = pd.DataFrame(all_results)

    variants = df["variant"].unique()

    gt = "lodge-grounder"
    sum_eval = []
    for variant in variants:
        for seed in df["seed"].unique():
            pred_df = df[(df["variant"] == variant) & (df["seed"] == seed)].rename(columns={"evaluation": "grounder"})
            gt_df = df[(df["variant"] == gt) & (df["seed"] == seed)].rename(columns={"evaluation": "vlm"})

            merged_df = pred_df.merge(gt_df, on=["state_hash", "predicate"], how="inner")
            merged_df["pred"] = merged_df["predicate"].apply(lambda p: parse_predicate(p, only_variables=False).name)
            merged_df = merged_df[["pred", "grounder", "vlm"]]

            decision_metric = compute_decision_metric(merged_df)
            avg_decision_metric = sum(decision_metric.values()) / len(decision_metric)
            # avg_decision_metric = compute_decision_metric_all(merged_df)

            sum_eval.append({
                "variant": variant,
                "seed": seed,
                "score": avg_decision_metric,
            })

    sum_df = pd.DataFrame(sum_eval)
    summed_df = sum_df.groupby("variant").mean(numeric_only=True)

    print(sum_df)
    print("-" * 100)
    print(summed_df)

    (results_dir / "summary.csv").write_text(sum_df.to_csv())
    (results_dir / "summary_mean.csv").write_text(summed_df.to_csv())


if __name__ == "__main__":
    load_dotenv()
    main()
