"""
Site-specific evaluator for WebArena Verified tasks with dynamic evaluation function calls.

This module provides a site-specific evaluation system that dynamically calls
evaluation functions from resource-specific utility classes for domain-specific
validation logic.
"""

import logging
from typing import Any, Dict, Optional, Type

from .models import AllocationResource

from .types import (
    WebArenaSite,
    WebArenaTask,
    WebarenaTaskEvalResult,
    WebArenaTaskResponse,
    EvalFunc,
)
from .gitlab.validator import GitLabValidator

# Import validator classes directly
from .magento.validator import MagentoValidator
from .reddit.validator import RedditValidator

# Future implementations - uncomment when available
# from .wikipedia.wikipedia_validator import WikipediaValidator
# from .map.map_validator import MapValidator

logger = logging.getLogger(__name__)


class SiteSpecificEvaluator:
    """
    Site-specific evaluator for WebArena Verified tasks.

    Handles tasks with site-specific evaluation functions by dynamically calling
    evaluation functions from resource-specific utility classes.
    This evaluator is stateless and does not cache resources or validators.
    """

    # Direct mapping from resource site names to their validator classes
    VALIDATOR_CLASS_MAPPING: Dict[WebArenaSite, Type[Any]] = {
        WebArenaSite.SHOPPING_ADMIN: MagentoValidator,
        WebArenaSite.SHOPPING: MagentoValidator,
        WebArenaSite.GITLAB: GitLabValidator,
        WebArenaSite.REDDIT: RedditValidator,
        # Future implementations - uncomment when available
        # WebArenaSite.WIKIPEDIA: WikipediaValidator,
        # WebArenaSite.MAP: MapValidator,
    }

    async def get_validator_for_site(
        self, site: WebArenaSite, resource: AllocationResource
    ) -> Optional[Any]:
        """
        Get the appropriate validator instance for the given site.
        Creates a new validator instance for each call (stateless).

        Args:
            site: The WebArena site
            resource: The allocated resource

        Returns:
            Optional[Any]: The validator instance or None if not available
        """
        # Get the validator class directly
        validator_class = self.VALIDATOR_CLASS_MAPPING.get(site)

        if not validator_class:
            logger.warning(f"No validator mapping found for site: {site}")
            return None

        try:
            # Create validator instance with default constructor
            validator_instance = validator_class()
            return validator_instance

        except Exception as e:
            logger.error(f"Failed to create validator for site {site}: {e}")
            return None

    async def call_evaluation_function(
        self,
        validator: Any,
        function_name: str,
        task: WebArenaTask,
        task_result: WebArenaTaskResponse,
        resource: AllocationResource,
        eval_func: EvalFunc,
        **kwargs,
    ) -> WebarenaTaskEvalResult:
        """
        Dynamically call the evaluation function on the validator.

        Args:
            validator: The validator instance
            function_name: Name of the evaluation function to call
            task: The WebArena task
            task_result: The task execution result
            resource: The allocated resource
            **kwargs: Additional arguments

        Returns:
            WebarenaTaskEvalResult: The evaluation result
        """
        try:
            # Check if the function exists on the validator
            if not hasattr(validator, function_name):
                return WebarenaTaskEvalResult(
                    score=0.0,
                    assertion_msgs=[
                        f"Evaluation function '{function_name}' not found on validator"
                    ],
                    task_id=task.task_id,
                    task_description=task.intent,
                    task_type="site_specific_evaluation",
                )

            eval_function = getattr(validator, function_name)

            # Prepare arguments for the evaluation function
            eval_args = self._prepare_evaluation_arguments(
                task, task_result, resource, eval_func, **kwargs
            )

            # Call the evaluation function
            if callable(eval_function):
                # Handle both sync and async functions
                if hasattr(eval_function, "__call__"):
                    result = eval_function(**eval_args)
                    # If it's a coroutine, await it
                    if hasattr(result, "__await__"):
                        result = await result
                else:
                    result = eval_function
            else:
                return WebarenaTaskEvalResult(
                    score=0.0,
                    assertion_msgs=[f"'{function_name}' is not callable"],
                    task_id=task.task_id,
                    task_description=task.intent,
                    task_type="site_specific_evaluation",
                )

            # Convert the result to WebarenaTaskEvalResult if needed
            return self._convert_to_eval_result(result, task)

        except Exception as e:
            logger.error(
                f"Error calling task {task.task_id}'s evaluation function '{function_name}': {e}"
            )
            return WebarenaTaskEvalResult(
                score=0.0,
                assertion_msgs=[
                    f"Error calling task {task.task_id}'s evaluation function: {str(e)}"
                ],
                task_id=task.task_id,
                task_description=task.intent,
                task_type="site_specific_evaluation",
            )

    def _prepare_evaluation_arguments(
        self,
        task: WebArenaTask,
        task_result: WebArenaTaskResponse,
        resource: AllocationResource,
        eval_func: EvalFunc,
        **kwargs,
    ) -> Dict[str, Any]:
        """
        Prepare arguments to pass to the evaluation function.

        Args:
            task: The WebArena task
            task_result: The task execution result
            resource: The allocated resource
            **kwargs: Additional arguments

        Returns:
            Dict[str, Any]: Arguments dictionary for the evaluation function
        """
        args = {
            "task": task,
            "task_result": task_result,
            "resource": resource,
            "eval_func": eval_func,
        }

        # Add any additional kwargs
        args.update(kwargs)

        return args

    def _convert_to_eval_result(
        self, result: Any, task: WebArenaTask
    ) -> WebarenaTaskEvalResult:
        """
        Convert the evaluation function result to WebarenaTaskEvalResult.

        Args:
            result: The result from the evaluation function
            task: The WebArena task

        Returns:
            WebarenaTaskEvalResult: The standardized evaluation result
        """
        # If it's already a WebarenaTaskEvalResult, return as-is
        if isinstance(result, WebarenaTaskEvalResult):
            return result

        # If it's a validation response with success/message pattern
        if hasattr(result, "success") and hasattr(result, "message"):
            score = 1.0 if result.success else 0.0
            assertion_msgs = [result.message] if not result.success else []
            return WebarenaTaskEvalResult(
                score=score,
                assertion_msgs=assertion_msgs,
                task_id=task.task_id,
                task_description=task.intent,
                task_type="site_specific_evaluation",
            )

        # If it's a boolean result
        if isinstance(result, bool):
            score = 1.0 if result else 0.0
            assertion_msgs = ["Site-specific evaluation failed"] if not result else []
            return WebarenaTaskEvalResult(
                score=score,
                assertion_msgs=assertion_msgs,
                task_id=task.task_id,
                task_description=task.intent,
                task_type="site_specific_evaluation",
            )

        # If it's a numeric score
        if isinstance(result, (int, float)):
            score = float(result)
            assertion_msgs = ["Site-specific evaluation failed"] if score != 1.0 else []
            return WebarenaTaskEvalResult(
                score=score,
                assertion_msgs=assertion_msgs,
                task_id=task.task_id,
                task_description=task.intent,
                task_type="site_specific_evaluation",
            )

        # Default case - treat as failure
        return WebarenaTaskEvalResult(
            score=0.0,
            assertion_msgs=[f"Unknown evaluation result type: {type(result)}"],
            task_id=task.task_id,
            task_description=task.intent,
            task_type="site_specific_evaluation",
        )
