from __future__ import annotations

import json
from typing import Any, Dict, List, Optional, Sequence

import pandas as pd
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix


def evaluate_predictions(
    df: pd.DataFrame,
    pred_col: str,
    gold_col: str,
    labels: Sequence[str] = ("Up", "Down", "Still"),
    output_file_json: Optional[str] = None,
) -> Dict[str, Any]:
    """Compute accuracy, macro/weighted PRF, and confusion matrix for specified columns.

    If output_file_json is provided, results are written as JSON.
    """
    valid = df[df[gold_col].isin(labels) & df[pred_col].isin(labels)]
    if valid.empty:
        results: Dict[str, Any] = {
            "n_samples_eval": 0,
            "note": "No valid (gold,pred) pairs among labels.",
        }
    else:
        y_true = valid[gold_col].tolist()
        y_pred = valid[pred_col].tolist()
        acc = accuracy_score(y_true, y_pred)
        prf_macro = precision_recall_fscore_support(
            y_true, y_pred, labels=list(labels), average="macro", zero_division=0
        )
        prf_weighted = precision_recall_fscore_support(
            y_true, y_pred, labels=list(labels), average="weighted", zero_division=0
        )
        cm = confusion_matrix(y_true, y_pred, labels=list(labels)).tolist()
        results = {
            "n_samples_eval": int(len(valid)),
            "accuracy": float(acc),
            "precision_macro": float(prf_macro[0]),
            "recall_macro": float(prf_macro[1]),
            "f1_macro": float(prf_macro[2]),
            "precision_weighted": float(prf_weighted[0]),
            "recall_weighted": float(prf_weighted[1]),
            "f1_weighted": float(prf_weighted[2]),
            "labels": list(labels),
            "confusion_matrix": cm,
        }
    if output_file_json:
        with open(output_file_json, "w", encoding="utf-8") as f:
            json.dump(results, f, indent=2)
    return results
