import random
import numpy as np
import math
from numpy.random import seed
from numpy.random import rand
from Environment import *
from tqdm import tqdm



class ETD_LCBT:
    def __init__(self, seed, d, n):
        print('LCB-Threshold Algorithm (Explore-then-Decide)')
        self.seed = seed
        np.random.seed(seed)
        random.seed(seed)

        self.n = int(n)
        self.d = int(d)
        self.lambd = 1
        # self.beta = np.sqrt(self.d * np.log(self.n))  # Confidence parameter

        # Exploration length (can be overridden via setter)
        self.l_n = int(np.sqrt(self.n))
        self.l_n = int((self.n)**(2/3))

        # Running (exploration) accumulators: V = lambd*I + sum x x^T, b = sum y x
        self.V = self.lambd * np.eye(self.d)
        self.b = np.zeros(self.d)

        # Frozen (decision-phase) params computed once at t == l_n
        self.Vinv= None
        self.theta_hat= None
    
        # Bookkeeping
        self.stopped = False
        self.tau = None
        self.lcb_values = np.array([np.nan])
        self.rewards = np.array([0.0])

        # Placeholder: before freezing, alpha is not known
        # Keep the param but the *actual* alpha for ETD will be computed from theta_hat at t == l_n
        self.alpha = 100

    # ---------- optional: let user change l_n while keeping the original constructor ----------
    #     self.l_n = int(l_n)
    def _xi(self, x, Vinv):
        self.S=1
        self.L=1
        # ξ(x) := sqrt(x^T V^{-1} x) * ( sqrt(d * log(n + n^2 L^2 / λ)) + S * sqrt(λ) )
        # print('x',x)
        # print('Vinv',Vinv)
        rad = np.sqrt(float(x @ Vinv @ x ))
        # term = np.sqrt(self.d * np.log(self.n + (self.n**2) * (self.L**2) / self.lambd)) + self.S * np.sqrt(self.lambd)
        # term = 1
        term = np.sqrt(self.d * np.log(self.n))
        return rad * term
        
    def _compute_alpha_from_empirical_cdf(self):
        # Bootstrap z from exploration features (proxy for 𝔻_x)
        # X = np.asarray(self._x_hist, dtype=float)
        # if X.shape[0] == 0:
        #     # Fallback (no exploration data; should not happen when l_n >= 1)
        #     self.alpha = -np.inf
        #     return
        
        self.Z_dist = (lambda: np.random.uniform(0, 1/np.sqrt(self.d), size=self.d))
        sample=10000
        self.Z = [self.Z_dist() for _ in range(sample)]
        # Sample with replacement
        # idx = np.random.randint(0, X.shape[0], size=min(self.alpha_mc_samples, max(1, X.shape[0] * 10)))
        # Z = X[idx]

        # Compute Z^{LCB} = z^T θ̂ - ξ(z)
        z_lcbs = []
        for z in self.Z:
            xi_z = self._xi(z, self.Vinv)
            z_lcbs.append(float(z @ self.theta_hat - xi_z))
        z_lcbs = np.array(z_lcbs)
        # print('z_lcbs',z_lcbs)
        # Set α so that F(α | θ̂, V) = 1 - 1/n  ⇒ α is (1 - 1/n)-quantile
        q = 1.0 - 1.0 / self.n
        q = np.clip(q, 0.0, 1.0)
        self.alpha = float(np.quantile(z_lcbs, q))
        print('alpha',self.alpha)
    def run(self, t, x, y):

        if t<=self.l_n:    
            self.V += np.outer(x, x)
            self.b += x * y
            # self._x_hist.append(x)
        if t ==self.l_n:
            self.theta_hat = np.linalg.solve(self.V, self.b)
            self.Vinv = np.linalg.inv(self.V)
            self._compute_alpha_from_empirical_cdf()


        # Compute LCB_t(X_t)
        # V_inv = np.linalg.inv(self.V)
        # lcb = x_t @ theta_hat - self.beta * np.linalg.norm(x_t, ord=2) / max(1, np.sqrt(np.linalg.det(self.V)))
        # Use the correct LCB formula: x_t^T theta_hat - beta * ||x_t||_{V^{-1}}
        if t>self.l_n:
            xi_x = self._xi(x, self.Vinv)
            lcb=float(x @ self.theta_hat - xi_x)

            if not self.stopped and lcb >= self.alpha:
                print(lcb, self.alpha)
                print('self.theta',self.theta_hat)
                self.tau = t
                self.stopped = True
                self.rewards = y
            elif t == self.n - 1:
                # If not stopped by threshold, stop at last round
                self.tau = self.n
                self.stopped = True
                self.rewards = y

    def get_stopping_time(self):
        return self.tau

    def get_lcb_values(self):
        return self.lcb_values

    def get_rewards(self):
        return self.rewards

    def reset(self):
        np.random.seed(self.seed)
        random.seed(self.seed)

    def name(self):
        return 'ETD-LCBT(iid)'
    



class Secretary:
    def __init__(self, seed, d, n):
        print('LCB-Threshold Algorithm (Explore-then-Decide)')
        self.seed = seed
        np.random.seed(seed)
        random.seed(seed)
        self.y_his=[]
        self.n = int(n)
        self.d = int(d)
        self.lambd = 1
        # self.beta = np.sqrt(self.d * np.log(self.n))  # Confidence parameter

        # Exploration length (can be overridden via setter)
        self.l_n = int(np.sqrt(self.n))
    
        # Running (exploration) accumulators: V = lambd*I + sum x x^T, b = sum y x
        self.V = self.lambd * np.eye(self.d)
        self.b = np.zeros(self.d)

        # Frozen (decision-phase) params computed once at t == l_n
        self.Vinv= None
        self.theta_hat= None
    
        # Bookkeeping
        self.stopped = False
        self.tau = None
        self.rewards = np.array([0.0])
        self.y_his=[]
        # Placeholder: before freezing, alpha is not known
        # Keep the param but the *actual* alpha for ETD will be computed from theta_hat at t == l_n
        self.alpha = 100

    # ---------- optional: let user change l_n while keeping the original constructor ----------
    #     self.l_n = int(l_n)

        
    def run(self, t, x, y):

        if t < math.ceil(self.n / math.e):
            self.y_his.append(y)
        elif t == math.ceil(self.n / math.e):
            self.alpha=max(self.y_his)


        # Compute LCB_t(X_t)
        # V_inv = np.linalg.inv(self.V)
        # lcb = x_t @ theta_hat - self.beta * np.linalg.norm(x_t, ord=2) / max(1, np.sqrt(np.linalg.det(self.V)))
        # Use the correct LCB formula: x_t^T theta_hat - beta * ||x_t||_{V^{-1}}
        else:

            if not self.stopped and y >= self.alpha:
                print(y, self.alpha)
                self.tau = t
                self.stopped = True
                self.rewards = y
            elif t == self.n - 1:
                print(y, self.alpha)
                # If not stopped by threshold, stop at last round
                self.tau = self.n
                self.stopped = True
                self.rewards = y

    def get_stopping_time(self):
        return self.tau

    def get_lcb_values(self):
        return self.lcb_values

    def get_rewards(self):
        return self.rewards

    def reset(self):
        np.random.seed(self.seed)
        random.seed(self.seed)

    def name(self):
        return 'Gusein-Zade'

class greedy:
    def __init__(self, seed, d, n):
        print('ε-Greedy-LCBT')
        self.seed = seed
        np.random.seed(seed)
        random.seed(seed)


        self.n = int(n)
        self.d = int(d)
        self.lambd = 1.0
        self.beta = np.sqrt(self.lambd) + np.sqrt(self.d * np.log(self.n))  # Confidence parameter

        # Exploration length (can be overridden via setter)
        self.l_n = max(1, int(np.sqrt(self.n)))
        self.l_n = int((self.n)**(2/3))

        # Running (exploration) accumulators: V = lambd*I + sum x x^T, b = sum y x
        self.V = self.lambd * np.eye(self.d)
        self.b = np.zeros(self.d)

        # Frozen (decision-phase) params computed once at t == l_n
        self.Vinv= None
        self.theta_hat= None
    
        # Bookkeeping
        self.stopped = False
        self.tau = None
        self.lcb_values = np.array([np.nan])
        self.rewards = np.array([0.0])
        self.epsilon=np.sqrt(self.l_n/self.n)
        self.bool=True
        # Placeholder: before freezing, alpha is not known
        # Keep the param but the *actual* alpha for ETD will be computed from theta_hat at t == l_n
        self.alpha = 100

    # ---------- optional: let user change l_n while keeping the original constructor ----------
    #     self.l_n = int(l_n)
    def _xi(self, x, Vinv):
        self.S=1
        self.L=1
        # ξ(x) := sqrt(x^T V^{-1} x) * ( sqrt(d * log(n + n^2 L^2 / λ)) + S * sqrt(λ) )
        rad = np.sqrt(float(x @ Vinv @ x ))
        term = np.sqrt(self.d * np.log(self.n + (self.n**2) * (self.L**2) / self.lambd)) + self.S * np.sqrt(self.lambd)
        term = np.sqrt(self.d * np.log(self.n))
        return rad * term
        
    def _compute_alpha_from_empirical_cdf(self):
        # Bootstrap z from exploration features (proxy for 𝔻_x)
        # X = np.asarray(self._x_hist, dtype=float)
        # if X.shape[0] == 0:
        #     # Fallback (no exploration data; should not happen when l_n >= 1)
        #     self.alpha = -np.inf
        #     return
        
        self.Z_dist = (lambda: np.random.uniform(0, 1/np.sqrt(self.d), size=self.d))
        sample=10000
        self.Z = [self.Z_dist() for _ in range(sample)]
        # Sample with replacement
        # idx = np.random.randint(0, X.shape[0], size=min(self.alpha_mc_samples, max(1, X.shape[0] * 10)))
        # Z = X[idx]

        # Compute Z^{LCB} = z^T θ̂ - ξ(z)
        z_lcbs = []
        for z in self.Z:
            xi_z = self._xi(z, self.Vinv)
            z_lcbs.append(float(z @ self.theta_hat - xi_z))
        z_lcbs = np.array(z_lcbs)
        # Set α so that F(α | θ̂, V) = 1 - 1/n  ⇒ α is (1 - 1/n)-quantile
        q = 1.0 - (1.0 / self.n)
        q = np.clip(q, 0.0, 1.0)
        self.alpha = float(np.quantile(z_lcbs, q))

    def run(self, t, x, y):
        ber=np.random.binomial(1,self.epsilon)
        if ber==1 and t!=self.n-1:
            self.V += np.outer(x, x)
            self.b += x * y
            self.bool=True
        else:
            self.theta_hat = np.linalg.solve(self.V, self.b)
            self.Vinv = np.linalg.inv(self.V)
            if self.bool==True:
                self._compute_alpha_from_empirical_cdf()
                self.bool=False
            xi_x = self._xi(x, self.Vinv)
            lcb=float(x @ self.theta_hat - xi_x)

            if not self.stopped and lcb >= self.alpha:
                print(lcb, self.alpha)
                print('self.theta',self.theta_hat)
                print('x',x)
                self.tau = t
                self.stopped = True
                self.rewards = y
            elif t == self.n - 1:
                # If not stopped by threshold, stop at last round
                self.tau = self.n
                self.stopped = True
                self.rewards = y

    def get_stopping_time(self):
        return self.tau

    def get_lcb_values(self):
        return self.lcb_values

    def get_rewards(self):
        return self.rewards

    def reset(self):
        np.random.seed(self.seed)
        random.seed(self.seed)

    def name(self):
        return 'ε-Greedy-LCBT'



class ETD_LCBT_NonIID:
    def __init__(self, seed, d, n):
        print('LCB-Threshold Algorithm (Explore-then-Decide)')
        self.seed = seed
        np.random.seed(seed)
        random.seed(seed)
        self.highs=np.zeros((n, d))
        self.lows=np.zeros((n, d))  
        self.n = int(n)
        self.d = int(d)
        self.lambd = 1
        # self.beta = np.sqrt(self.d * np.log(self.n))  # Confidence parameter

        # Exploration length (can be overridden via setter)
        # self.l_n = int(np.sqrt(self.n))
        self.l_n = int(self.n**(2/3))
        # self.l_n = int(np.sqrt(self.n))

        # Running (exploration) accumulators: V = lambd*I + sum x x^T, b = sum y x
        self.V = self.lambd * np.eye(self.d)
        self.b = np.zeros(self.d)

        # Frozen (decision-phase) params computed once at t == l_n
        self.Vinv= None
        self.theta_hat= None

        # Bookkeeping
        self.stopped = False
        self.tau = None
        self.lcb_values = np.array([np.nan])
        self.rewards = np.array([0.0])

        # Placeholder: before freezing, alpha is not known
        # Keep the param but the *actual* alpha for ETD will be computed from theta_hat at t == l_n
        self.alpha = 100

    # ---------- optional: let user change l_n while keeping the original constructor ----------
    #     self.l_n = int(l_n)
    def _xi(self, x, Vinv):
        self.S=1
        self.L=1
        # ξ(x) := sqrt(x^T V^{-1} x) * ( sqrt(d * log(n + n^2 L^2 / λ)) + S * sqrt(λ) )
        # print('x',x)
        # print('Vinv',Vinv)
        rad = np.sqrt(float(x @ Vinv @ x ))
        # term = np.sqrt(self.d * np.log(self.n + (self.n**2) * (self.L**2) / self.lambd)) + self.S * np.sqrt(self.lambd)
        # term = 1
        term = np.sqrt(self.d * np.log(self.n))
        # term = 2
        return rad * term
        
    def _compute_alpha_from_empirical_cdf(self):
        # Bootstrap z from exploration features (proxy for 𝔻_x)
        # X = np.asarray(self._x_hist, dtype=float)
        # if X.shape[0] == 0:
        #     # Fallback (no exploration data; should not happen when l_n >= 1)
        #     self.alpha = -np.inf
        #     return

        # self.item_dist = (lambda: np.random.uniform(0, 1/np.sqrt(d), size=d))
        sum=0
        sample=1000
        # sample=10000
        for k in range(sample):
            self.items = [np.random.uniform(self.lows[i], self.highs[i], size=self.d) for i in range(self.l_n,self.n)]
            sum+=(1/2)*max([x.dot(self.theta_hat) for x in self.items])
        self.alpha=(1/sample)*sum
    def run(self, t, x, y,l,h):
        self.lows=l
        self.highs=h

        if t<=self.l_n:    
            self.V += np.outer(x, x)
            self.b += x * y
            # self._x_hist.append(x)
        if t ==self.l_n:
            self.theta_hat = np.linalg.solve(self.V, self.b)
            self.Vinv = np.linalg.inv(self.V)
            self._compute_alpha_from_empirical_cdf()


        # Compute LCB_t(X_t)
        # V_inv = np.linalg.inv(self.V)
        # lcb = x_t @ theta_hat - self.beta * np.linalg.norm(x_t, ord=2) / max(1, np.sqrt(np.linalg.det(self.V)))
        # Use the correct LCB formula: x_t^T theta_hat - beta * ||x_t||_{V^{-1}}
        
        if t>self.l_n:
            xi_x = self._xi(x, self.Vinv)
            lcb=float(x @ self.theta_hat - xi_x)
            # print(lcb, self.alpha)

            if not self.stopped and lcb >= self.alpha:
                # print('self.theta',self.theta_hat)
                self.tau = t
                self.stopped = True
                self.rewards = y
            elif t == self.n - 1:
                # If not stopped by threshold, stop at last round
                self.tau = self.n
                self.stopped = True
                self.rewards = y


    def get_stopping_time(self):
        return self.tau

    def get_lcb_values(self):
        return self.lcb_values

    def get_rewards(self):
        return self.rewards

    def reset(self):
        np.random.seed(self.seed)
        random.seed(self.seed)

    def name(self):
        return 'ETD-LCBT(non-iid)'
    



class ETD_LCBT_NonIID_Window:
    def __init__(self, seed, d, n):
        print('LCB-Threshold Algorithm (Explore-then-Decide)')
        self.seed = seed
        np.random.seed(seed)
        random.seed(seed)

        self.n = int(n)
        self.d = int(d)
        self.lambd = 1
        # self.beta = np.sqrt(self.d * np.log(self.n))  # Confidence parameter
        self.highs=np.zeros((n, d))
        self.lows=np.zeros((n, d))  
        # Exploration length (can be overridden via setter)
        # self.l_n = int(np.sqrt(self.n))
        self.l_n = int(self.n**(2/3))
        # self.l_n = int(np.sqrt(self.n))

        # Running (exploration) accumulators: V = lambd*I + sum x x^T, b = sum y x
        self.V = self.lambd * np.eye(self.d)
        self.b = np.zeros(self.d)
        self.x_hist=[]
        # Frozen (decision-phase) params computed once at t == l_n
        self.Vinv= None
        self.theta_hat= None

        # Bookkeeping
        self.stopped = False
        self.tau = None
        self.lcb_values = np.array([np.nan])
        self.rewards = np.array([0.0])

        # Placeholder: before freezing, alpha is not known
        # Keep the param but the *actual* alpha for ETD will be computed from theta_hat at t == l_n
        self.alpha = 100

    # ---------- optional: let user change l_n while keeping the original constructor ----------
    #     self.l_n = int(l_n)
    def _xi(self, x, Vinv):
        self.S=1
        self.L=1
        # ξ(x) := sqrt(x^T V^{-1} x) * ( sqrt(d * log(n + n^2 L^2 / λ)) + S * sqrt(λ) )
        # print('x',x)
        # print('Vinv',Vinv)
        rad = np.sqrt(float(x @ Vinv @ x ))
        # term = np.sqrt(self.d * np.log(self.n + (self.n**2) * (self.L**2) / self.lambd)) + self.S * np.sqrt(self.lambd)
        # term = 1
        term = np.sqrt(self.d * np.log(self.n))
        return rad * term
        
    def _compute_alpha_from_empirical_cdf(self):
        # Bootstrap z from exploration features (proxy for 𝔻_x)
        # X = np.asarray(self._x_hist, dtype=float)
        # if X.shape[0] == 0:
        #     # Fallback (no exploration data; should not happen when l_n >= 1)
        #     self.alpha = -np.inf
        #     return

        # self.item_dist = (lambda: np.random.uniform(0, 1/np.sqrt(d), size=d))
        sum=0
        sample=50
        sample=10000

        for k in range(sample):
            self.items = [np.random.uniform(self.lows[i], self.highs[i], size=self.d) for i in range(self.n)]
            sum+=(1/2)*max([x.dot(self.theta_hat) for x in self.items])
        self.alpha=(1/sample)*sum
        print('alpha',self.alpha)
    def run(self, t, x, y,l,h):
        self.lows=l
        self.highs=h
        self.x_hist.append(x)
        if t<=self.l_n:    
            self.V += np.outer(x, x)
            self.b += x * y
            # self._x_hist.append(x)
        if t ==self.l_n+1:
            self.theta_hat = np.linalg.solve(self.V, self.b)
            self.Vinv = np.linalg.inv(self.V)
            self._compute_alpha_from_empirical_cdf()
            
            
            lcb=max(float(x @ self.theta_hat - self._xi(x, self.Vinv)) for x in self.x_hist)
            ind = np.argmax([float(x @ self.theta_hat - self._xi(x, self.Vinv)) for x in self.x_hist])
            if not self.stopped and lcb >= self.alpha:
                print(lcb, self.alpha)
                print('self.theta',self.theta_hat)
                self.tau = ind
                self.stopped = True
                self.rewards = y

        # Compute LCB_t(X_t)
        # V_inv = np.linalg.inv(self.V)
        # lcb = x_t @ theta_hat - self.beta * np.linalg.norm(x_t, ord=2) / max(1, np.sqrt(np.linalg.det(self.V)))
        # Use the correct LCB formula: x_t^T theta_hat - beta * ||x_t||_{V^{-1}}
        if t>self.l_n+1:
            xi_x = self._xi(x, self.Vinv)
            lcb=float(x @ self.theta_hat - xi_x)

            if not self.stopped and lcb >= self.alpha:
                print(lcb, self.alpha)
                print('self.theta',self.theta_hat)
                self.tau = t
                self.stopped = True
                self.rewards = y
            elif t == self.n - 1:
                # If not stopped by threshold, stop at last round
                self.tau = self.n
                self.stopped = True
                self.rewards = y

    def get_stopping_time(self):
        return self.tau

    def get_lcb_values(self):
        return self.lcb_values

    def get_rewards(self):
        return self.rewards

    def reset(self):
        np.random.seed(self.seed)
        random.seed(self.seed)

    def name(self):
        return 'ETD-LCBT-WA'
    

# class Oracle:
#     def __init__(self, seed, d, n):
#         print('LCB-Threshold Algorithm (Explore-then-Decide)')
#         self.seed = seed
#         np.random.seed(seed)
#         random.seed(seed)

#         self.n = int(n)
#         self.d = int(d)
#         self.lambd = 1
#         # self.beta = np.sqrt(self.d * np.log(self.n))  # Confidence parameter

#         # Exploration length (can be overridden via setter)
#         self.l_n = int(np.sqrt(self.n))
    
#         # Running (exploration) accumulators: V = lambd*I + sum x x^T, b = sum y x
#         self.V = self.lambd * np.eye(self.d)
#         self.b = np.zeros(self.d)

#         # Frozen (decision-phase) params computed once at t == l_n
#         self.Vinv= None
#         self.theta_hat= None
    
#         # Bookkeeping
#         self.stopped = False
#         self.tau = None
#         self.lcb_values = np.array([np.nan])
#         self.rewards = np.array([0.0])

#         # Placeholder: before freezing, alpha is not known
#         # Keep the param but the *actual* alpha for ETD will be computed from theta_hat at t == l_n
#         self.alpha = 100

#     # ---------- optional: let user change l_n while keeping the original constructor ----------
#     #     self.l_n = int(l_n)

        
#     def _compute_alpha_from_empirical_cdf(self,theta):
#         # Bootstrap z from exploration features (proxy for 𝔻_x)
#         # X = np.asarray(self._x_hist, dtype=float)
#         # if X.shape[0] == 0:
#         #     # Fallback (no exploration data; should not happen when l_n >= 1)
#         #     self.alpha = -np.inf
#         #     return
        
#         self.Z_dist = (lambda: np.random.uniform(0, 1/np.sqrt(self.d), size=self.d))
#         sample=100
#         self.Z = [self.Z_dist() for _ in range(sample)]
#         # Sample with replacement
#         # idx = np.random.randint(0, X.shape[0], size=min(self.alpha_mc_samples, max(1, X.shape[0] * 10)))
#         # Z = X[idx]

#         # Compute Z^{LCB} = z^T θ̂ - ξ(z)
#         z_lcbs = []
#         for z in self.Z:
#             z_lcbs.append(float(z @ theta))
#         z_lcbs = np.array(z_lcbs)
#         # print('z_lcbs',z_lcbs)
#         # Set α so that F(α | θ̂, V) = 1 - 1/n  ⇒ α is (1 - 1/n)-quantile
#         q = 1.0 - 1.0 / self.n
#         q = np.clip(q, 0.0, 1.0)
#         self.alpha = float(np.quantile(z_lcbs, q))
#         print('alpha',self.alpha)
#     def run(self, t, x, y, theta):

#         if t==0:    
#             self._compute_alpha_from_empirical_cdf(theta)


#         # Compute LCB_t(X_t)
#         # V_inv = np.linalg.inv(self.V)
#         # lcb = x_t @ theta_hat - self.beta * np.linalg.norm(x_t, ord=2) / max(1, np.sqrt(np.linalg.det(self.V)))
#         # Use the correct LCB formula: x_t^T theta_hat - beta * ||x_t||_{V^{-1}}
#         else:
#             lcb=float(x @ theta)

#             if not self.stopped and lcb >= self.alpha:
#                 print(lcb, self.alpha)
#                 self.tau = t
#                 self.stopped = True
#                 self.rewards = y
#             elif t == self.n - 1:
#                 # If not stopped by threshold, stop at last round
#                 self.tau = self.n
#                 self.stopped = True
#                 self.rewards = y

#     def get_stopping_time(self):
#         return self.tau

#     def get_lcb_values(self):
#         return self.lcb_values

#     def get_rewards(self):
#         return self.rewards

#     def reset(self):
#         np.random.seed(self.seed)
#         random.seed(self.seed)

#     def name(self):
#         return 'Oracle'


# class Oracle_NonIID:
#     def __init__(self, seed, d, n):
#         print('LCB-Threshold Algorithm (Explore-then-Decide)')
#         self.seed = seed
#         np.random.seed(seed)
#         random.seed(seed)
#         self.highs=np.zeros((n, d))
#         self.lows=np.zeros((n, d))  
#         self.n = int(n)
#         self.d = int(d)
#         self.lambd = 1
#         # self.beta = np.sqrt(self.d * np.log(self.n))  # Confidence parameter

#         # Exploration length (can be overridden via setter)
#         self.l_n = int(np.sqrt(self.n))
    
#         # Running (exploration) accumulators: V = lambd*I + sum x x^T, b = sum y x
#         self.V = self.lambd * np.eye(self.d)
#         self.b = np.zeros(self.d)

#         # Frozen (decision-phase) params computed once at t == l_n
#         self.Vinv= None
#         self.theta_hat= None

#         # Bookkeeping
#         self.stopped = False
#         self.tau = None
#         self.lcb_values = np.array([np.nan])
#         self.rewards = np.array([0.0])

#         # Placeholder: before freezing, alpha is not known
#         # Keep the param but the *actual* alpha for ETD will be computed from theta_hat at t == l_n
#         self.alpha = 100

#     # ---------- optional: let user change l_n while keeping the original constructor ----------
#     #     self.l_n = int(l_n)
#     def _xi(self, x, Vinv):
#         self.S=1
#         self.L=1
#         # ξ(x) := sqrt(x^T V^{-1} x) * ( sqrt(d * log(n + n^2 L^2 / λ)) + S * sqrt(λ) )
#         # print('x',x)
#         # print('Vinv',Vinv)
#         rad = np.sqrt(float(x @ Vinv @ x ))
#         # term = np.sqrt(self.d * np.log(self.n + (self.n**2) * (self.L**2) / self.lambd)) + self.S * np.sqrt(self.lambd)
#         # term = 1
#         term = np.sqrt(self.d * np.log(self.n))
#         return rad * term
        
#     def _compute_alpha_from_empirical_cdf(self, theta):
#         # Bootstrap z from exploration features (proxy for 𝔻_x)
#         # X = np.asarray(self._x_hist, dtype=float)
#         # if X.shape[0] == 0:
#         #     # Fallback (no exploration data; should not happen when l_n >= 1)
#         #     self.alpha = -np.inf
#         #     return

#         # self.item_dist = (lambda: np.random.uniform(0, 1/np.sqrt(d), size=d))
#         sum=0
#         sample=50
#         sample=10000

#         for k in range(sample):
#             self.items = [np.random.uniform(self.lows[i], self.highs[i], size=self.d) for i in range(self.l_n+1,self.n)]
#             sum+=(1/2)*max([x.dot(theta) for x in self.items])
#         self.alpha=(1/sample)*sum
#         print('alpha',self.alpha)
#     def run(self, t, x, y,theta, l,h):
#         self.lows=l
#         self.highs=h


#         if t ==0:
#             self._compute_alpha_from_empirical_cdf(theta)


#         # Compute LCB_t(X_t)
#         # V_inv = np.linalg.inv(self.V)
#         # lcb = x_t @ theta_hat - self.beta * np.linalg.norm(x_t, ord=2) / max(1, np.sqrt(np.linalg.det(self.V)))
#         # Use the correct LCB formula: x_t^T theta_hat - beta * ||x_t||_{V^{-1}}
#         if t>0:
#             lcb=float(x @ theta)

#             if not self.stopped and lcb >= self.alpha:
#                 print(lcb, self.alpha)
#                 self.tau = t
#                 self.stopped = True
#                 self.rewards = y
#             elif t == self.n - 1:
#                 # If not stopped by threshold, stop at last round
#                 self.tau = self.n
#                 self.stopped = True
#                 self.rewards = y

#     def get_stopping_time(self):
#         return self.tau

#     def get_lcb_values(self):
#         return self.lcb_values

#     def get_rewards(self):
#         return self.rewards

#     def reset(self):
#         np.random.seed(self.seed)
#         random.seed(self.seed)

#     def name(self):
#         return 'Oracle'
    