import json
import logging
from datetime import datetime
from enum import StrEnum, auto
from pathlib import Path
from typing import Any, Self

import pandas as pd
from pydantic import BaseModel, ConfigDict, Field, field_serializer, model_validator

logger = logging.getLogger(__name__)

SUCCESS_SCORE = 1.0

# Minimal validation provided by loading and dumping the schema
project_root = Path(__file__).parent
RESPONSE_SCHEMA = json.dumps(
    json.loads((project_root / "assets/agent_response_schema.json").read_text())
)


class ActionType(StrEnum):
    """The type of action performed"""

    RETRIEVE = "retrieve"
    """Retrieve information without modifying the test environment's state."""

    MUTATE = "mutate"
    """Modify the state of the test environment. Eg., create, update, or delete resources."""

    NAVIGATE = "navigate"
    """Navigate to a specific page or view without retrieving data. Use only for navigation actions that satisfy the user request by reaching a destination."""


class StatusType(StrEnum):
    """The outcome status of the task"""

    SUCCESS = "SUCCESS"
    """Task completed successfully"""

    ACTION_NOT_ALLOWED_ERROR = "ACTION_NOT_ALLOWED_ERROR"
    """Platform doesn't support or allow this operation"""

    PERMISSION_DENIED_ERROR = "PERMISSION_DENIED_ERROR"
    """Lacks authorization to perform the action"""

    NOT_FOUND_ERROR = "NOT_FOUND_ERROR"
    """Target entity doesn't exist (e.g., issue, user, product)"""

    DATA_VALIDATION_ERROR = "DATA_VALIDATION_ERROR"
    """Input doesn't meet requirements (e.g., invalid value format, missing required fields)"""

    UNKNOWN_ERROR = "UNKNOWN_ERROR"
    """Unexpected failure that doesn't fit other categories"""


class WebArenaVerifiedAgentResponse(BaseModel):
    """Response format for WebArena Verified"""

    action: ActionType = Field(description="The type of action performed")
    status: StatusType = Field(description="The outcome status of the task")
    results: list[Any] | None = Field(
        default=None,
        description="Only populated when action is 'retrieve' and status is 'SUCCESS'",
    )
    error_details: str | None = Field(
        default=None,
        max_length=500,
        description="Optional human-readable error details when status indicates an error",
    )

    @model_validator(mode="after")
    def validate_results(self) -> Self:
        """Validate that results field is populated correctly based on action and status"""
        if self.action != ActionType.RETRIEVE or self.status != StatusType.SUCCESS:
            if self.results is not None:
                raise ValueError(
                    "results must be null when action is not 'retrieve' or status is not 'SUCCESS'"
                )
        return self


class WebArenaSite(StrEnum):
    GITLAB = "gitlab"
    MAP = "map"
    REDDIT = "reddit"
    SHOPPING_ADMIN = "shopping_admin"
    SHOPPING = "shopping"
    WIKIPEDIA = "wikipedia"
    HOMEPAGE = "homepage"

    @property
    def url_name(self) -> str:
        # The name that appears in the URL for this site.
        return f"__{self.value.upper()}__"

    @classmethod
    def from_str(cls, s: str) -> Self:
        return cls[s.strip("_").lower()]


class ChangelogCategory(StrEnum):
    """Categories for changelog entries."""

    TASK_AMBIGUITY = auto()
    """When an intent is ambiguous and needs clarification (e.g., 'show' -> 'get me')."""

    REFERENCE_ALIGNMENT = auto()
    """When fixing an invalid target/intent pair."""

    PERMISSIVE_STRING_MATCH = auto()
    """For evaluations that allow for flexible string matching."""

    CONTEXT_FREE_EVALUATION = auto()
    """For evaluations that do not depend on the page's context."""

    UNACHIEVABLE_TASKS = auto()
    """For tasks that are impossible to complete as stated."""

    DOM_DEPENDENT_EVAL = auto()
    """For evaluations that depend on the structure of the DOM."""

    SPELLING_OR_GRAMMAR = auto()
    """For updates to correct spelling or grammar in the task description."""

    MISC = auto()
    """For updates that don't fit into any other category. Including changes based on framework updates."""

    LLM_AS_JUDGE_EVAL = auto()
    """Indicates replacing LLM-as-judge eval with programmatic verification for better reliability"""


class Changelog(BaseModel):
    """Represents a changelog entry for task overrides."""

    key: str
    category: ChangelogCategory
    note: str


class EvalFunc(BaseModel):
    """Evaluation function specification for verification."""

    name: str
    eval_params: dict[str, Any] | None = None
    expected_data: dict[str, Any] | list[Any] | None = None


class ExpectedEval(BaseModel):
    """Expected evaluation for task validation."""

    eval_func: EvalFunc


class ExpectedUIState(BaseModel):
    """Expected UI state for task validation."""

    url: str | list[str]
    ignored_query_parameters: list[str] = Field(default_factory=list)
    eval_func: EvalFunc | None = None


class WebArenaEval(BaseModel):
    site: str
    expected_ui_state: list[ExpectedUIState] | None = None
    expected_backend_state: list[ExpectedEval] | None = None
    expected_retrieve_value: list[ExpectedEval] | None = None


class WebArenaTask(BaseModel):
    sites: list[WebArenaSite]
    task_id: int
    intent: str
    intent_template: str
    start_url: str | list[str]
    require_login: bool
    require_reset: bool
    start_url_context: str | None = None
    format_specification: str | None = None
    eval: WebArenaEval
    changelogs: list[Changelog] | None = None
    intent_template_id: int
    instantiation_dict: dict[str, object]
    storage_state: str | None = None
    geolocation: dict | None = None

    # Custom fields
    # recording: bool | None = None # TODO: Place back when excel files are fixed
    _recording_path: Path | None = None
    _is_valid: bool = False

    model_config = ConfigDict(frozen=True)

    def __init__(self, **data):
        super().__init__(**data)
        self._recording_path = self._parse_path_value(data.get("recording_path"))
        self._is_valid = self._parse_bool(data.get("is_valid", False))

    @model_validator(mode="after")
    def validate_model(self) -> Self:
        if self._recording_path and not self._recording_path.exists():
            raise FileNotFoundError(
                f"Recording path does not exist: {self._recording_path}"
            )

        return self

    @property
    def recording_path(self) -> Path | None:
        return self._recording_path

    @property
    def sites_str(self) -> str:
        return "_".join(sorted(self.sites))

    @property
    def eval_types(self) -> list[str]:
        return self.eval["eval_types"]

    @property
    def has_url_match_eval(self) -> bool:
        return "url_match" in self.eval_types

    @property
    def has_string_match_eval(self) -> bool:
        return "string_match" in self.eval_types

    @property
    def has_program_html_eval(self) -> bool:
        return "program_html" in self.eval_types

    @staticmethod
    def _parse_path_value(v):
        if isinstance(v, Path):
            return v
        elif pd.isna(v):
            return None
        elif isinstance(v, str):
            return Path(v)
        else:
            raise ValueError(
                f"Expected recording_path to be a string or Path, got {type(v)}"
            )

    @staticmethod
    def _parse_bool(v) -> bool:
        if isinstance(v, bool):
            return v
        elif isinstance(v, int):
            return bool(v)
        elif pd.isna(v):
            return False
        elif isinstance(v, str):
            v_lower = v.strip().lower()
            if v_lower in {"yes", "true", "1"}:
                return True
            if v_lower in {"no", "false", "0"}:
                return False

        raise ValueError(
            f"Expected is_valid to be a boolean, int, or string, got {type(v)} with value {v}"
        )


class WebArenaTaskStatus(StrEnum):
    """The outcome status of the task"""

    SUCCESS = "SUCCESS"
    """Task completed successfully"""

    AGENT_FAILURE = "AGENT_FAILURE"
    """Agent failed when running the task"""


class WebArenaTaskResponse(BaseModel):
    response: WebArenaVerifiedAgentResponse | None
    last_urls: list[str]
    status: WebArenaTaskStatus
    error_details: list[str] | None = None


class WebarenaTaskEvalResult(BaseModel):
    score: float
    site: str | None = None
    exec_time: float = 0.0
    assertion_msgs: tuple[str, ...]
    validation_data: Any | None = None
    task_id: int | None = None
    task_description: str = ""
    task_type: str = "unknown"

    def __init__(
        self,
        score: float,
        assertion_msgs: list[str],
        **data,
    ):
        if score != SUCCESS_SCORE:
            assert assertion_msgs, (
                f"assertion_msgs must be provided when score is not {SUCCESS_SCORE}"
            )

        super().__init__(score=score, assertion_msgs=tuple(assertion_msgs), **data)

    model_config = ConfigDict(frozen=True)

    @field_serializer("validation_data")
    def serialize_validation_data(self, value: Any) -> Any:
        """Custom serializer to handle nested Pydantic models and other non-serializable types."""

        def to_serializable(obj: Any) -> Any:
            if isinstance(obj, BaseModel):
                obj = obj.model_dump()

            if isinstance(obj, dict):
                return {k: to_serializable(v) for k, v in obj.items()}
            if isinstance(obj, list):
                return [to_serializable(i) for i in obj]
            if isinstance(obj, datetime):
                return obj.isoformat()
            if hasattr(obj, "value"):  # For enums
                return obj.value
            return obj

        return to_serializable(value)

    @classmethod
    def create_failed(
        cls,
        assertion_msgs: list[str],
        site: str,
        validation_data: Any | None = None,
        **kwargs,
    ) -> "WebarenaTaskEvalResult":
        return cls(
            score=0.0,
            assertion_msgs=tuple(assertion_msgs),
            exec_time=0.0,
            site=site,
            validation_data=validation_data,
            **kwargs,
        )

    @classmethod
    def create_success(
        cls,
        assertion_msgs: list[str],
        site: str,
        validation_data: Any | None = None,
        **kwargs,
    ) -> "WebarenaTaskEvalResult":
        return cls(
            score=SUCCESS_SCORE,
            assertion_msgs=tuple(assertion_msgs),
            exec_time=0.0,
            site=site,
            validation_data=validation_data,
            **kwargs,
        )

    @property
    def is_success(self) -> bool:
        return self.score == SUCCESS_SCORE

    @property
    def assertion_msg(self) -> str:
        return "\n".join(list(self.assertion_msgs))


class Site(BaseModel):
    site: str
    base_url: str
    cdp_url: str
    vnc_url: str
    username: str
    password: str


class TaskData(BaseModel):
    task_id: int
    response_schema: str
    intent: str
    sites: list[Site]
    start_url: str


class WebArenaTaskResult(BaseModel):
    task_id: int
    execution_start_time: str
    score: float
    assertion_msgs: list[str]
    execution_completed: bool
    execution_time_seconds: float | None
    task: WebArenaTask
    trace: Any | None = None
    agent_response: WebArenaTaskResponse | None = None
    eval_results: list[WebarenaTaskEvalResult]
