import os, csv, time
from typing import Dict, Any, List
from eval.utils import (
    read_json, write_json, ensure_dir,
    find_pred_files, kind_from_path, safe_set
)
from eval.metrics import (
    binary_confusion, binary_metrics,
    set_metrics, aggregate_macro, aggregate_micro
)

def evaluate(
    root_dir: str,
    out_root: str,
    kind: str = "all",
    pred_prefixes: List[str] | None = None,  
) -> Dict[str, Any]:
    ts = time.strftime("%Y%m%d_%H%M%S")
    out_dir = os.path.join(out_root, ts)
    ensure_dir(out_dir)

    files = find_pred_files(root_dir, prefixes=pred_prefixes)
    if kind != "all":
        files = [p for p in files if kind_from_path(p) == kind]

    step_counts = {"tp":0, "fp":0, "tn":0, "fn":0}
    step_records: List[Dict[str, Any]] = []
    op_records_macro_list: List[Dict[str, float]] = []
    op_pred_sets: List[set] = []
    op_label_sets: List[set] = []
    skipped: List[Dict[str, Any]] = []

    for p in files:
        obj = read_json(p)
        pred = obj.get("prediction", {})
        label = obj.get("label")
        if not label:
            skipped.append({"path": p, "reason": "no_label_in_pred_json"})
            continue

        step_pred = bool(pred.get("step_anomaly", False))
        step_label = bool(label.get("step_error", False))
        tp, fp, tn, fn = binary_confusion(step_pred, step_label)
        step_counts["tp"] += tp; step_counts["fp"] += fp
        step_counts["tn"] += tn; step_counts["fn"] += fn

        op_pred = safe_set(pred.get("op_error_names", []))
        op_true = safe_set(label.get("op_error_names", []))
        op_rec = set_metrics(op_pred, op_true)

        op_records_macro_list.append(op_rec)
        op_pred_sets.append(op_pred)
        op_label_sets.append(op_true)

        step_records.append({
            "pred_path": p,
            "kind": kind_from_path(p),
            "step_pred": int(step_pred),
            "step_label": int(step_label),
            "op_pred_count": len(op_pred),
            "op_label_count": len(op_true),
            "op_precision": op_rec["precision"],
            "op_recall": op_rec["recall"],
            "op_f1": op_rec["f1"],
            "op_jaccard": op_rec["jaccard"],
        })

    step_metrics = binary_metrics(step_counts["tp"], step_counts["fp"], step_counts["tn"], step_counts["fn"])
    op_macro = aggregate_macro(op_records_macro_list)
    op_micro = aggregate_micro(op_pred_sets, op_label_sets)
    op_macro_pos = aggregate_macro([m for m, lab in zip(op_records_macro_list, op_label_sets) if len(lab) > 0])

    summary = {
        "meta": {
            "root_dir": root_dir,
            "filter_kind": kind,
            "created_at": time.strftime("%Y-%m-%d %H:%M:%S"),
            "pred_files_total": len(files),
            "evaluated": len(step_records),
            "skipped": len(skipped),
            "pred_prefixes": pred_prefixes or ["pred"],
        },
        "step_level": {"counts": step_counts, **step_metrics},
        "operator_level": {
            "macro": op_macro,
            "micro": op_micro,
            "macro_positive_only": op_macro_pos
        }
    }

    summary_path = os.path.join(out_dir, "summary.json")
    write_json(summary_path, summary)

    csv_path = os.path.join(out_dir, "step_records.csv")
    with open(csv_path, "w", newline="", encoding="utf-8") as f:
        import csv
        w = csv.DictWriter(f, fieldnames=[
            "pred_path","kind","step_pred","step_label",
            "op_pred_count","op_label_count",
            "op_precision","op_recall","op_f1","op_jaccard"
        ])
        w.writeheader()
        w.writerows(step_records)

    skipped_path = os.path.join(out_dir, "skipped.json")
    write_json(skipped_path, {"skipped": skipped})

    return {
        "out_dir": out_dir,
        "files": {
            "summary": summary_path,
            "step_records": csv_path,
            "skipped": skipped_path,
        },
        "meta": summary["meta"]
    }
