from __future__ import annotations

from typing import Any, Dict, List, Tuple

from pydantic import BaseModel, Field, ConfigDict

from learn.modules.base.evaluation import Evaluation, EvaluationResult, EvaluationMetric


class BenchmarkEvaluation(Evaluation):
    """Evaluation adapter that runs a candidate across benchmark worlds via a runner.

    Runner contract (expected):
      - run_candidate(candidate) -> Tuple[List[Dict], Dict[EvaluationMetric, float]]
        returns (trajectories, metrics)
    """

    model_config = ConfigDict(arbitrary_types_allowed=True)

    runner: Any = Field(..., description="Object exposing run_candidate(candidate)")

    async def evaluate_candidate(self, candidate) -> EvaluationResult:  # noqa: ANN001
        trajectories, metrics = await self.runner.run_candidate(candidate)
        return EvaluationResult(
            candidate_round=getattr(candidate, "round", -1),
            metrics=metrics,
            trajectories=trajectories,
        )

