from __future__ import annotations

from typing import List, Dict, Any, Optional

from src.a4s.llm_client import LLMClient


class CallLimitedClient:
    """A thin proxy around LLMClient that enforces a hard cap on API calls.

    This ensures each baseline adheres to the per-experiment budget.
    """

    def __init__(self, base: LLMClient, max_calls: int) -> None:
        self._base = base
        self._max_calls = max_calls
        self._num_calls = 0

    @property
    def remaining(self) -> int:
        return max(self._max_calls - self._num_calls, 0)

    def chat(
        self,
        messages: List[Dict[str, Any]],
        model: Optional[str] = None,
        temperature: float = 0.2,
        max_tokens: Optional[int] = None,
    ) -> Dict[str, Any]:
        if self._num_calls >= self._max_calls:
            raise RuntimeError(f"API call budget exceeded: {self._num_calls}/{self._max_calls}")
        self._num_calls += 1
        return self._base.chat(messages, model=model, temperature=temperature, max_tokens=max_tokens)


