from __future__ import annotations

import argparse
import json
import os
import time
import urllib.error
import urllib.request
from typing import Any, Dict, List, Optional


class LLMClient:
    def generate(self, messages: List[Dict[str, str]]) -> str:
        raise NotImplementedError


DEFAULT_LLM_MODEL = ""
DEFAULT_LLM_ENABLE_THINKING = True
DEFAULT_REASONING_EFFORT = "low"


def _is_truthy(value: Any) -> bool:
    return str(value).strip().lower() in ("1", "true", "yes", "on")


def build_production_llm_namespace(
    *,
    llm_api_base: Optional[str] = None,
    llm_api_key: Optional[str] = None,
    llm_model: Optional[str] = None,
    llm_timeout_s: Optional[int] = None,
    llm_enable_thinking: Optional[bool] = None,
    llm_reasoning_effort: Optional[str] = None,
) -> argparse.Namespace:
    return argparse.Namespace(
        llm_api_base=llm_api_base,
        llm_api_key=llm_api_key,
        llm_model=llm_model,
        llm_timeout_s=llm_timeout_s,
        llm_enable_thinking=llm_enable_thinking,
        llm_reasoning_effort=llm_reasoning_effort or DEFAULT_REASONING_EFFORT,
    )


class OpenAICompatibleClient(LLMClient):
    RETRIABLE_HTTP_STATUS = {408, 409, 425, 429, 500, 502, 503, 504}

    def __init__(
        self,
        api_base: str,
        api_key: str,
        model: str,
        temperature: float = 0.2,
        timeout_s: int = 120,
        retry_max_attempts: int = 3,
        retry_backoff_s: float = 2.0,
        retry_max_backoff_s: float = 30.0,
        extra_body: Optional[Dict[str, Any]] = None,
    ) -> None:
        self.api_base = api_base.rstrip("/")
        self.api_key = api_key
        self.model = model
        self.temperature = temperature
        self.timeout_s = timeout_s
        self.retry_max_attempts = max(1, int(retry_max_attempts))
        self.retry_backoff_s = max(0.1, float(retry_backoff_s))
        self.retry_max_backoff_s = max(self.retry_backoff_s, float(retry_max_backoff_s))
        self.extra_body = extra_body or {}

    def _retry_backoff(self, attempt: int) -> None:
        if attempt >= self.retry_max_attempts:
            return
        delay = min(self.retry_max_backoff_s, self.retry_backoff_s * (2 ** (attempt - 1)))
        time.sleep(delay)

    def _build_request(self, messages: List[Dict[str, str]]) -> Dict[str, Any]:
        payload: Dict[str, Any] = {
            "model": self.model,
            "messages": messages,
            "temperature": self.temperature,
        }
        extra = dict(self.extra_body) if self.extra_body else {}
        effort = extra.pop("reasoning_effort", None)
        if effort is not None:
            payload["reasoning_effort"] = effort
        payload.update(extra)
        return {"url": f"{self.api_base}/chat/completions", "payload": payload}

    @staticmethod
    def _extract_chat_content(result: Dict[str, Any]) -> str:
        choices = result.get("choices") or []
        if not choices:
            raise RuntimeError("LLM response missing choices.")
        message = choices[0].get("message") or {}
        content = message.get("content")
        if not content:
            raise RuntimeError("LLM response missing content.")
        return content

    def generate(self, messages: List[Dict[str, str]]) -> str:
        def _request_json(request_spec: Dict[str, Any]) -> Dict[str, Any]:
            url = request_spec["url"]
            payload = request_spec["payload"]
            if self.extra_body:
                if self.extra_body.get("stream") is True:
                    raise RuntimeError("Streaming is not supported in this client.")
            data = json.dumps(payload).encode("utf-8")
            request = urllib.request.Request(
                url,
                data=data,
                headers={
                    "Authorization": f"Bearer {self.api_key}",
                    "Content-Type": "application/json",
                },
            )
            body = ""
            for attempt in range(1, self.retry_max_attempts + 1):
                try:
                    with urllib.request.urlopen(request, timeout=self.timeout_s) as response:
                        body = response.read().decode("utf-8")
                    break
                except urllib.error.HTTPError as exc:
                    err_body = ""
                    try:
                        err_body = exc.read().decode("utf-8", errors="replace")
                    except Exception:
                        err_body = str(exc)
                    if exc.code in self.RETRIABLE_HTTP_STATUS and attempt < self.retry_max_attempts:
                        self._retry_backoff(attempt)
                        continue
                    raise RuntimeError(f"LLM HTTP {exc.code}: {err_body[:600]}") from exc
                except (urllib.error.URLError, TimeoutError, ConnectionError, ConnectionResetError) as exc:
                    if attempt < self.retry_max_attempts:
                        self._retry_backoff(attempt)
                        continue
                    raise RuntimeError(
                        f"LLM request failed after {self.retry_max_attempts} attempts: {exc}"
                    ) from exc
            try:
                return json.loads(body)
            except json.JSONDecodeError as exc:
                raise RuntimeError(f"LLM returned invalid JSON payload: {body[:600]}") from exc

        request_spec = self._build_request(messages)
        result = _request_json(request_spec)
        return self._extract_chat_content(result)


def _build_llm_from_env(args: argparse.Namespace) -> LLMClient:
    def _read_int_env(name: str, default: int, minimum: int = 1) -> int:
        raw = os.getenv(name)
        if raw is None:
            return default
        try:
            return max(minimum, int(raw))
        except ValueError:
            return default

    def _read_float_env(name: str, default: float, minimum: float = 0.1) -> float:
        raw = os.getenv(name)
        if raw is None:
            return default
        try:
            return max(minimum, float(raw))
        except ValueError:
            return default

    def _resolve_reasoning_effort(default_when_enabled: Optional[str] = None) -> Optional[str]:
        valid = {"none", "minimal", "low", "medium", "high", "xhigh"}
        arg_effort = getattr(args, "llm_reasoning_effort", None)
        env_effort = os.getenv("ANUM_LLM_REASONING_EFFORT")
        selected = arg_effort or env_effort
        if selected is None and getattr(args, "llm_enable_thinking", False):
            selected = default_when_enabled
        if not selected:
            return None
        token = str(selected).strip().lower()
        if token in valid:
            return token
        raise SystemExit(
            "Invalid reasoning effort. Use one of: none|minimal|low|medium|high|xhigh."
        )

    def _resolve_timeout_s(default: int = 120) -> int:
        arg_timeout = getattr(args, "llm_timeout_s", None)
        env_timeout = os.getenv("ANUM_LLM_TIMEOUT_S")
        selected = arg_timeout if arg_timeout is not None else env_timeout
        if selected is None:
            if _resolve_enable_thinking():
                return max(default, 600)
            return default
        try:
            value = int(selected)
        except (TypeError, ValueError):
            raise SystemExit("Invalid LLM timeout. Use integer seconds, e.g. 120 or 600.")
        return max(10, value)

    def _resolve_enable_thinking() -> bool:
        enable_thinking_env = os.getenv("ANUM_LLM_ENABLE_THINKING")
        arg_value = getattr(args, "llm_enable_thinking", None)
        if arg_value is not None:
            return bool(arg_value)
        return _is_truthy(enable_thinking_env)

    api_base = args.llm_api_base or os.getenv("ANUM_LLM_API_BASE")
    if not api_base:
        api_base = "https://api.openai.com/v1"
    api_key = args.llm_api_key or os.getenv("ANUM_LLM_API_KEY")
    model = getattr(args, "llm_model", None) or os.getenv("ANUM_LLM_MODEL") or DEFAULT_LLM_MODEL
    if not api_key:
        raise SystemExit("LLM API key is required (ANUM_LLM_API_KEY).")
    if not model:
        raise SystemExit("LLM model is required (ANUM_LLM_MODEL or --llm_model).")
    timeout_s = _resolve_timeout_s(default=120)
    retry_max_attempts = _read_int_env("ANUM_LLM_MAX_ATTEMPTS", default=3, minimum=1)
    retry_backoff_s = _read_float_env("ANUM_LLM_RETRY_BACKOFF_S", default=2.0, minimum=0.1)
    retry_max_backoff_s = _read_float_env(
        "ANUM_LLM_RETRY_MAX_BACKOFF_S",
        default=30.0,
        minimum=retry_backoff_s,
    )
    extra_body: Dict[str, Any] = {}
    llm_enable_thinking = _resolve_enable_thinking()
    if llm_enable_thinking:
        effort = _resolve_reasoning_effort(default_when_enabled=DEFAULT_REASONING_EFFORT)
        if effort is not None:
            extra_body.setdefault("reasoning_effort", effort)
    return OpenAICompatibleClient(
        api_base=api_base,
        api_key=api_key,
        model=model,
        timeout_s=timeout_s,
        retry_max_attempts=retry_max_attempts,
        retry_backoff_s=retry_backoff_s,
        retry_max_backoff_s=retry_max_backoff_s,
        extra_body=extra_body,
    )
