from typing import List, Optional, Dict, Any
import math


class WangBidding:
    def __init__(self, T: int,
                 B: Optional[float] = None,
                 eta: float = 0.1,
                 rho: Optional[float] = None,
                 delta: float = 0.01,
                 grid_size: int = 101,
                 init_lambda: float = 0.0,
                 use_optimism: bool = False):
        self.T = int(T)
        self.B = float(B) if B is not None else float(T)
        self.eta = float(eta)
        self.rho = float(rho) if rho is not None else (self.B / float(max(1, self.T)))
        self.delta = float(delta)
        self.grid_size = max(2, int(grid_size))
        self.lambda_ = max(0.0, float(init_lambda))
        self.use_optimism = use_optimism
        self.remaining_budget = float(self.B)
        self.history_d: List[float] = []
        self.history_b: List[float] = []
        self.history_reward: List[float] = []
        self.history_cost: List[float] = []
        self._last_ctx: Optional[Dict[str, Any]] = None
        self.t = 1

    def _empirical_win_prob(self, b: float) -> float:
        if len(self.history_d) == 0:
            return 0.0
        cnt = 0
        for d in self.history_d:
            if d <= b:
                cnt += 1
        return cnt / len(self.history_d)

    def _estimate_r_c(self, v_t: float, b: float) -> (float, float):
        p = self._empirical_win_prob(b)
        r_hat = p * (v_t - b)
        c_hat = p * b
        if self.use_optimism:
            r_hat += math.sqrt(math.log(2*self.T/self.delta) / 2*(max(1, self.t-1)))
            c_hat -= math.sqrt(math.log(2*self.T/self.delta) / 2*(max(1, self.t-1)))
        return r_hat, c_hat

    def act(self, ctx: Dict[str, Any]) -> float:
        self._last_ctx = dict(ctx)
        v = float(ctx.get("v", 0.0))
        vbar = float(ctx.get("vbar", 1.0))
        if self.remaining_budget <= 0:
            bid = 0.0
            self.history_b.append(bid)
            return bid
        if self.t == 1:
            bid = 0.0
            self.history_b.append(bid)
            return bid
        max_bid = min(v, vbar, self.remaining_budget)
        bids = [max_bid * i / (self.grid_size - 1) for i in range(self.grid_size)]
        best_b = 0.0
        best_val = -float("inf")
        for b in bids:
            r_hat, c_hat = self._estimate_r_c(v, b)
            val = r_hat - self.lambda_ * c_hat
            if val > best_val - 1e-12:
                if val > best_val + 1e-12 or b < best_b:
                    best_val = val
                    best_b = b
        if self.remaining_budget - best_b < 0.1 and self.remaining_budget < vbar:
            best_b = self.remaining_budget
        bid = float(max(0.0, min(best_b, vbar)))
        self.history_b.append(bid)
        return bid

    def update(self, reward: float, cost: float, info: Dict[str, Any]) -> None:
        self.history_reward.append(float(reward))
        self.history_cost.append(float(cost))
        d_compete = info.get("d_compete") if isinstance(info, dict) else None
        if d_compete is not None:
            try:
                d_val = float(d_compete)
                self.history_d.append(d_val)
            except Exception:
                pass
        try:
            c = float(cost)
        except Exception:
            c = 0.0
        self.remaining_budget = max(0.0, self.remaining_budget - c)
        b_t = self.history_b[-1] if len(self.history_b) > 0 else 0.0
        v_t = float(self._last_ctx.get("v", 0.0)) if self._last_ctx is not None else 0.0
        _, c_hat_bt = self._estimate_r_c(v_t, b_t)
        self.lambda_ = max(0.0, self.lambda_ + self.eta * (c_hat_bt - self.rho))
        self.t += 1

    def get_state(self) -> Dict[str, Any]:
        return {
            "t": self.t,
            "lambda": self.lambda_,
            "history_len": len(self.history_d),
            "total_spent": sum(self.history_cost),
            "total_reward": sum(self.history_reward),
            "B": self.B,
            "T": self.T,
            "remaining_budget": self.remaining_budget,
        }


class WangNoBudgetControlBidding:
    def __init__(self, B,  grid_size: int = 101):
        self.grid_size = max(2, int(grid_size))
        self.history_d: List[float] = []
        self.history_b: List[float] = []
        self.history_reward: List[float] = []
        self.history_cost: List[float] = []
        self.B = float(B)
        self.remaining_budget = self.B
        self._last_ctx: Optional[Dict[str, Any]] = None
        self.t = 1

    def _empirical_win_prob(self, b: float) -> float:
        if len(self.history_d) == 0:
            return 0.0
        return sum(1 for d in self.history_d if d <= b) / len(self.history_d)

    def _estimate_r(self, v_t: float, b: float) -> float:
        return self._empirical_win_prob(b) * (v_t - b)

    def act(self, ctx: Dict[str, Any]) -> float:
        self._last_ctx = dict(ctx)
        v = float(ctx.get("v", 0.0))
        vbar = float(ctx.get("vbar", 1.0))
        if self.remaining_budget <= 0:
            bid = 0.0
            self.history_b.append(bid)
            return bid
        if self.t == 1:
            bid = 0.0
            self.history_b.append(bid)
            return bid
        max_bid = min(v, vbar, self.remaining_budget)
        bids = [max_bid * i / (self.grid_size - 1) for i in range(self.grid_size)]
        best_b = 0.0
        best_val = -float("inf")
        for b in bids:
            r_hat = self._estimate_r(v, b)
            if r_hat > best_val + 1e-12 or (abs(r_hat - best_val) < 1e-12 and b < best_b):
                best_val = r_hat
                best_b = b
        if self.remaining_budget - best_b < 0.2 and self.remaining_budget < vbar:
            best_b = self.remaining_budget
        bid = float(max(0.0, min(best_b, vbar)))
        self.history_b.append(bid)
        return bid

    def update(self, reward: float, cost: float, info: Dict[str, Any]) -> None:
        self.history_reward.append(float(reward))
        self.history_cost.append(float(cost))
        d_compete = info.get("d_compete") if isinstance(info, dict) else None
        if d_compete is not None:
            try:
                self.history_d.append(float(d_compete))
            except Exception:
                pass
        try:
            c = float(cost)
        except Exception:
            c = 0.0
        self.remaining_budget = max(0.0, self.remaining_budget - c)
        self.t += 1

    def get_state(self) -> Dict[str, Any]:
        return {
            "t": self.t,
            "history_len": len(self.history_d),
            "total_spent": sum(self.history_cost),
            "total_reward": sum(self.history_reward),
        }
