"""
Main evaluator orchestrator for WebArena Verified tasks.

This module provides the primary evaluation interface that determines whether
to use custom evaluation (for webarena_verified tasks) or standard evaluation,
and coordinates the evaluation process.
"""

import logging
from pathlib import Path
from urllib.parse import urlparse

from .models import AllocationResource

from .types import (
    EvalFunc,
    WebArenaTask,
    WebarenaTaskEvalResult,
    WebArenaTaskResponse,
)
from .evaluator_functions import execute_eval_function

logger = logging.getLogger(__name__)


class WebArenaEvaluator:
    """
    Main evaluator that orchestrates the evaluation process for WebArena tasks.

    Determines whether to use custom evaluation (for tasks with webarena_verified field)
    or standard evaluation, and coordinates the evaluation process.
    """

    def __init__(self, base_dir: Path | None = None):
        """Initialize the evaluator.

        Args:
            base_dir: The base directory for the run data, used for url_only evals.
        """
        self.base_dir = base_dir

    async def evaluate_task(
        self,
        *,
        task: WebArenaTask,
        task_result: WebArenaTaskResponse,
        resources: list[AllocationResource],
        **kwargs,
    ) -> list[WebarenaTaskEvalResult]:
        """
        Main evaluation method that determines and executes the appropriate evaluation strategy.

        Args:
            task: The WebArena task being evaluated
            task_result: The result from the test execution
            resources: List of allocated resources used for the task
            **kwargs: Additional arguments that might be useful

        Returns:
            WebarenaTaskEvalResult: The evaluation result
        """
        try:
            # Check if this is a custom evaluation task (has eval_function)
            resource = self._select_eval_resource(task, resources)
            eval_results = []
            if self._has_ui_eval(task):
                logger.info(f"Using UI evaluation for task {task.task_id}")
                eval_results.extend(
                    await self._process_ui_eval(
                        task=task, task_result=task_result, resource=resource
                    )
                )

            if self._has_retrieve_eval(task):
                logger.info(f"Using retrieve evaluation for task {task.task_id}")
                eval_results.extend(
                    await self._process_eval_list(
                        task=task,
                        evals=task.eval.expected_retrieve_value,
                        task_result=task_result,
                        resource=resource,
                    )
                )

            if self._has_backend_eval(task):
                logger.info(f"Using backend evaluation for task {task.task_id}")
                eval_results.extend(
                    await self._process_eval_list(
                        task=task,
                        evals=task.eval.expected_backend_state,
                        task_result=task_result,
                        resource=resource,
                    )
                )

            return eval_results

        except Exception as e:
            logger.error(
                f"Error during evaluation orchestration for task {task.task_id}: {e}"
            )
            return [
                WebarenaTaskEvalResult(
                    score=0.0,
                    assertion_msgs=[f"Evaluation orchestration error: {str(e)}"],
                    task_id=task.task_id,
                    task_description=task.intent,
                    task_type="orchestration_error",
                )
            ]

    def _has_ui_eval(self, task: WebArenaTask) -> bool:
        """Check if task has UI evaluation data."""
        return (
            task.eval.expected_ui_state is not None
            and len(task.eval.expected_ui_state) > 0
        )

    def _has_backend_eval(self, task: WebArenaTask) -> bool:
        """Check if task has backend evaluation data."""
        return (
            task.eval.expected_backend_state is not None
            and len(task.eval.expected_backend_state) > 0
        )

    def _has_retrieve_eval(self, task: WebArenaTask) -> bool:
        """Check if task has retrieve evaluation data."""
        return (
            task.eval.expected_retrieve_value is not None
            and len(task.eval.expected_retrieve_value) > 0
        )

    def _select_eval_resource(
        self, task: WebArenaTask, resources: list[AllocationResource]
    ) -> AllocationResource:
        """Select the appropriate resource for evaluation based on eval site."""
        if not resources:
            raise ValueError("No resources available for evaluation")

        # Get the site from the eval object
        eval_site = task.eval.site

        # Find a resource that matches the eval site
        for resource in resources:
            if resource.website_type == eval_site:
                return resource

        # If no exact match found, log warning and return first resource
        logger.warning(
            f"No resource found matching eval site '{eval_site}'. "
            f"Available resource types: {[r.website_type for r in resources]}. "
            f"Using first available resource."
        )
        return resources[0]

    async def _process_eval_list(
        self,
        task: WebArenaTask,
        evals: list[EvalFunc],
        task_result: WebArenaTaskResponse,
        resource: AllocationResource,
    ) -> list[WebarenaTaskEvalResult]:
        """Process a list of evaluations by calling _process_eval for each."""
        results = []
        for eval_item in evals:
            result = await self._process_eval(
                task=task,
                eval_func=eval_item.eval_func,
                task_result=task_result,
                resource=resource,
            )
            results.append(result)

        # If no evaluations, return a list with a failure result
        if not results:
            return [
                WebarenaTaskEvalResult(
                    score=0.0,
                    assertion_msgs=["No evaluations found in eval list"],
                    task_id=task.task_id,
                    task_description=task.intent,
                    task_type="no_evaluations",
                )
            ]

        return results

    async def _process_ui_eval(
        self,
        task: WebArenaTask,
        task_result: WebArenaTaskResponse,
        resource: AllocationResource,
    ) -> list[WebarenaTaskEvalResult]:
        """Process UI evaluations with special handling for URL checking."""
        if not task.eval.expected_ui_state:
            return [
                WebarenaTaskEvalResult(
                    score=0.0,
                    assertion_msgs=["No UI state evaluations found"],
                    task_id=task.task_id,
                    task_description=task.intent,
                    task_type="no_ui_evaluations",
                )
            ]

        results = []
        for ui_state in task.eval.expected_ui_state:
            if ui_state.eval_func:
                # Has eval function - add URL as eval param
                eval_params = ui_state.eval_func.eval_params or {}
                eval_params["url"] = ui_state.url

                logger.debug(
                    f"Adding URL {ui_state.url} to eval params for {ui_state.eval_func.name}"
                )
                modified_eval_func = EvalFunc(
                    name=ui_state.eval_func.name,
                    eval_params=eval_params,
                    expected_data=ui_state.eval_func.expected_data,
                )

                result = await self._process_eval(
                    task=task,
                    eval_func=modified_eval_func,
                    task_result=task_result,
                    resource=resource,
                )
            else:
                # No eval function - just check URL
                result = await self._process_url_only_eval(
                    task=task,
                    expected_url=ui_state.url,
                    task_result=task_result,
                    resource=resource,
                    ignored_query_parameters=ui_state.ignored_query_parameters,
                )

            results.append(result)

        return results

    async def _process_eval(
        self,
        task: WebArenaTask,
        eval_func: EvalFunc,
        task_result: WebArenaTaskResponse,
        resource: AllocationResource,
    ) -> WebarenaTaskEvalResult:
        """Process a specific evaluation function."""
        try:
            # Use the unified function dispatcher
            return await execute_eval_function(
                eval_func=eval_func,
                task=task,
                task_result=task_result,
                resource=resource,
            )

        except Exception as e:
            logger.error(
                f"Error processing evaluation function '{eval_func.name}': {e}"
            )
            return WebarenaTaskEvalResult(
                score=0.0,
                assertion_msgs=[f"Error processing evaluation function: {str(e)}"],
                task_id=task.task_id,
                task_description=task.intent,
                task_type="eval_processing_error",
            )

    async def _process_url_only_eval(
        self,
        task: WebArenaTask,
        expected_url: str | list[str],
        task_result: WebArenaTaskResponse,
        resource: AllocationResource,
        ignored_query_parameters: list[str],
    ) -> WebarenaTaskEvalResult:
        """Process URL-only evaluation for UI states without eval functions."""

        def _normalize_url(
            url: str, ignored_params: list[str]
        ) -> tuple[str, frozenset[str]]:
            """Normalize a URL by cleaning it and filtering query parameters."""

            def _filter_query_params(query: str, ignored_params: list[str]) -> set[str]:
                """Filter out pagination and visibility parameters from a query string."""
                if not query:
                    return set()
                ignore_prefixes = ignored_params
                return {
                    p
                    for p in query.split("&")
                    if not any(p.startswith(prefix) for prefix in ignore_prefixes)
                }

            cleaned_url = url.replace(
                f"__{resource.website_type.upper()}__", resource.base_url
            )

            if cleaned_url.endswith("/"):
                cleaned_url = cleaned_url[:-1]  # Remove trailing slash

            parts = urlparse(cleaned_url)
            query_set = _filter_query_params(parts.query, ignored_params)
            return (parts.path, frozenset(query_set))

        def _strip_url(url: str) -> str:
            """Strips the scheme and netloc from a URL."""
            parts = urlparse(url)
            return parts._replace(scheme="", netloc="").geturl()

        def _format_bullet_list(
            items: list[str], strip_urls: bool = False
        ) -> list[str]:
            """Formats a list of strings into a markdown bulleted list."""
            if not items:
                return ["  - (None)"]
            if strip_urls:
                items = [_strip_url(item) for item in items]
            return [f"  - `{item}`" for item in items]

        def _format_urls_for_assertion(
            normalized_urls: set[tuple[str, frozenset[str]]],
        ) -> list[str]:
            """Formats a set of normalized URLs into a markdown bulleted list."""
            if not normalized_urls:
                return ["  - (No URLs)"]

            formatted_urls = []
            for path, params in normalized_urls:
                if params:
                    param_str = "&".join(sorted(list(params)))
                    formatted_urls.append(f"  - `{path}?{param_str}`")
                else:
                    formatted_urls.append(f"  - `{path}`")

            return formatted_urls

        if not task_result.last_urls:
            return WebarenaTaskEvalResult.create_failed(
                assertion_msgs=[
                    "No URLs were provided in the response for evaluation."
                ],
                site=task.eval.site,
                task_id=task.task_id,
                task_description=task.intent,
                task_type="url_eval_failure",
            )

        expected_urls = (
            [expected_url] if isinstance(expected_url, str) else expected_url
        )

        # Normalize both lists of URLs to a canonical representation (path, query_params)
        normalized_expected = {
            _normalize_url(url, ignored_query_parameters) for url in expected_urls
        }
        normalized_actual = {
            _normalize_url(url, ignored_query_parameters)
            for url in task_result.last_urls
        }

        # Convert frozensets to lists for JSON serialization
        validation_data = {
            "expected": {
                "path": [path for path, params in normalized_expected],
                "params": [
                    sorted(list(params)) for path, params in normalized_expected
                ],
            },
            "actual": {
                "path": [path for path, params in normalized_actual],
                "params": [sorted(list(params)) for path, params in normalized_actual],
            },
        }

        # Check if any of the actual URLs are in the expected set
        is_match = any(actual in normalized_expected for actual in normalized_actual)

        all_ignored_params = ignored_query_parameters
        base_assertion_msgs = [
            "**Expected URLs (one of):**",
            *_format_urls_for_assertion(normalized_expected),
            "**Actual URLs:**",
            *_format_bullet_list(task_result.last_urls, strip_urls=True),
            "**Ignored Query Parameters:**",
            *_format_bullet_list(all_ignored_params),
        ]

        if is_match:
            assertion_msgs = [
                "The final URL(s) match the expected URL(s).",
                *base_assertion_msgs,
            ]
            return WebarenaTaskEvalResult.create_success(
                assertion_msgs=assertion_msgs,
                site=task.eval.site,
                validation_data=validation_data,
                task_id=task.task_id,
                task_description=task.intent,
                task_type="url_eval_success",
            )
        else:
            assertion_msgs = [
                "The final URL(s) do not match the expected URL(s).",
                *base_assertion_msgs,
            ]
            return WebarenaTaskEvalResult.create_failed(
                assertion_msgs=assertion_msgs,
                site=task.eval.site,
                validation_data=validation_data,
                task_id=task.task_id,
                task_description=task.intent,
                task_type="url_eval_failure",
            )
