from typing import Any
import pandas as pd
from mledojo.metrics.base import CompetitionMetrics, InvalidSubmissionError


class AmateurRadioSimplexFMMetrics(CompetitionMetrics):
    """Macro-F1 metric for Amateur Radio 2-Meter FM Simplex callsign identification.

    Expected CSV/DataFrame schema:
    - Columns: ["id", "callsign"]
    - One row per id; ids in submission must match ground truth exactly.
    """

    def __init__(self, value: str = "callsign", higher_is_better: bool = True):
        super().__init__(higher_is_better)
        self.value = value

    def _macro_f1(self, y_true_labels: list[str], y_pred_labels: list[str]) -> float:
        # Compute per-class precision/recall/F1 and macro-average without sklearn
        labels = sorted(set(y_true_labels))
        tp = {lab: 0 for lab in labels}
        pred_pos = {lab: 0 for lab in labels}
        act_pos = {lab: 0 for lab in labels}

        for t, p in zip(y_true_labels, y_pred_labels):
            act_pos[t] += 1
            if p in pred_pos:
                pred_pos[p] += 1
            if p == t:
                tp[t] += 1

        f1s = []
        for lab in labels:
            a = act_pos[lab]
            b = pred_pos[lab]
            t = tp[lab]
            prec = t / b if b > 0 else 0.0
            rec = t / a if a > 0 else 0.0
            f1 = (2 * prec * rec) / (prec + rec) if (prec + rec) > 0 else 0.0
            f1s.append(max(0.0, min(1.0, float(f1))))
        return float(sum(f1s) / len(f1s)) if f1s else 0.0

    def evaluate(self, y_true: pd.DataFrame, y_pred: pd.DataFrame) -> float:
        # Validate basic structure first
        self.validate_submission(y_pred, y_true)

        # Align rows by id deterministically
        y_true_sorted = y_true.sort_values(by=y_true.columns[0]).reset_index(drop=True)
        y_pred_sorted = y_pred.sort_values(by=y_pred.columns[0]).reset_index(drop=True)

        true_labels = y_true_sorted[self.value].astype(str).tolist()
        pred_labels = y_pred_sorted[self.value].astype(str).tolist()
        return self._macro_f1(true_labels, pred_labels)

    def validate_submission(self, submission: Any, ground_truth: Any) -> str:
        # Type checks
        if not isinstance(submission, pd.DataFrame):
            raise InvalidSubmissionError(
                "Submission must be a pandas DataFrame. Please provide a valid pandas DataFrame."
            )
        if not isinstance(ground_truth, pd.DataFrame):
            raise InvalidSubmissionError(
                "Ground truth must be a pandas DataFrame. Please provide a valid pandas DataFrame."
            )

        # Column checks
        sub_cols = list(submission.columns)
        gt_cols = list(ground_truth.columns)
        expected_cols = ["id", self.value]
        if set(sub_cols) != set(expected_cols):
            raise InvalidSubmissionError(
                f"Submission must have exactly these columns: {expected_cols}. Found: {sub_cols}"
            )
        if set(gt_cols) != set(expected_cols):
            raise InvalidSubmissionError(
                f"Ground truth must have exactly these columns: {expected_cols}. Found: {gt_cols}"
            )

        # Row count must match
        if len(submission) != len(ground_truth):
            raise InvalidSubmissionError(
                f"Number of rows in submission ({len(submission)}) does not match ground truth ({len(ground_truth)})."
            )

        # Sort by id for deterministic comparisons
        sub_sorted = submission.sort_values(by="id").reset_index(drop=True)
        gt_sorted = ground_truth.sort_values(by="id").reset_index(drop=True)

        # IDs must match exactly and be unique and path-free
        sub_ids = sub_sorted["id"].astype(str)
        gt_ids = gt_sorted["id"].astype(str)
        if not sub_ids.equals(gt_ids):
            raise InvalidSubmissionError("Submission ids do not match ground truth ids.")
        if sub_ids.duplicated().any():
            raise InvalidSubmissionError("Duplicate ids found in submission.")
        if any(('/' in i) or ('\\' in i) for i in sub_ids):
            raise InvalidSubmissionError("IDs must not contain path separators.")

        # callsign values must be non-empty strings
        calls = sub_sorted[self.value]
        if calls.isnull().any() or any(str(x).strip() == "" for x in calls):
            raise InvalidSubmissionError("All callsign values must be non-empty strings.")

        return "Submission is valid."
