import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from scipy.optimize import minimize
import warnings

warnings.filterwarnings('ignore')

def get_feature(U, V, u_idx, i_idx):
    """User-Item Feature Vector (u kron v, 1) 생성"""
    u = U[u_idx]
    v = V[i_idx]
    x = np.outer(u, v).flatten()
    return x

class CLogUCB:
    # (self, dim, T, K, L, S, lambda_, kappa, alpha=None
    def __init__(self, dim, T, K, S, kappa):
        self.dim = dim
        self.T = T
        self.K = K
        self.S = S
        self.kappa = kappa
        
        self.t = 1
        self.delta = 1 / (self.T)
        # self.lambda_t = self.dim * np.log(4 * (1 + self.t * self.K) / self.delta)
        self.lambda_t = 1.0
        
        self.alpha_t = 0.1*(self.S**2 + 4 * self.S + 19/4) * np.sqrt((self.dim / self.kappa) * np.log(4 * (1 + self.K * self.t)/ self.delta))

        self.theta = np.zeros(dim)
        self.Gram_raw = np.zeros((dim, dim))
        
        self.X_history = []
        self.y_history = []
        
        self.model = LogisticRegression(
            penalty='l2', 
            solver='lbfgs', 
            fit_intercept=False, 
        )

        self.avg_bonus_history = []
        self.avg_alpha_history = []
        self.avg_norm_history = []
        self.avg_det_history = []

    def _compute_mle(self, lambda_t):
        if len(self.y_history) < 2 or len(np.unique(self.y_history)) < 2:
            return self.theta

        self.model.C = 1.0 / lambda_t
        
        self.model.fit(self.X_history, self.y_history)
        
        theta = self.model.coef_[0]
        return theta

    def _sigmoid(self, z):
        return 1.0 / (1.0 + np.exp(-z))

    def select_arms(self, U, V, u_idx, item_pool_indices):

        features = []
        for i_idx in item_pool_indices:
            features.append(get_feature(U, V, u_idx, i_idx))
        features = np.array(features)

        mu = self._sigmoid(np.dot(features, self.theta))
        
        Gram_t = self.Gram_raw + (1/ self.kappa) * self.lambda_t * np.eye(self.dim)
        
        V_t_inv = np.linalg.inv(Gram_t)
        
        norms_V = np.sqrt(np.sum(np.dot(features, V_t_inv) * features, axis=1))
        
        ucb_bonus = self.alpha_t * norms_V
        ucb_scores = mu + ucb_bonus

        top_k_indices = np.argsort(ucb_scores)[-self.K:][::-1]

        selected_bonuses = ucb_bonus[top_k_indices]
        selected_norm = norms_V[top_k_indices]
        avg_bonus_val = np.max(selected_bonuses)
        avg_norm_val = np.max(selected_norm)
        avg_alpha_val = self.alpha_t
        avg_det_val = np.log(np.linalg.det(Gram_t))
        
        self.avg_bonus_history.append(avg_bonus_val)
        self.avg_alpha_history.append(avg_alpha_val)
        self.avg_norm_history.append(avg_norm_val)
        self.avg_det_history.append(avg_det_val)  
        
        return top_k_indices, features[top_k_indices]
    
    def update(self, features_observed, rewards_observed):
        """
        Update internal matrices based on observations.
        """
        self.t += 1

        if len(features_observed) == 0:
            return

        self.X_history.extend(features_observed)
        self.y_history.extend(rewards_observed)

        # self.lambda_t = 0.01*self.dim * np.log(4 * (1 + self.t * self.K) / self.delta)  # Time-varying regularization
        self.alpha_t = 0.1 * (self.S**2 + 4 * self.S + 19/4) * np.sqrt((self.dim / self.kappa) * np.log(4 * (1 + self.K * self.t)/self.delta))

        self.theta = self._compute_mle(self.lambda_t)

        self.Gram_raw += np.dot(features_observed.T, features_observed)

class VACLogUCB:
    def __init__(self, dim, T, K, S, kappa):
        self.dim = dim
        self.K = K 
        self.T = T     
        self.S = S    
        self.delta = 1.0 / T
        self.kappa = kappa

        self.lambda_t = 1.0
        # self.lambda_t = self.dim * np.log(4 * (1 + 1 * self.K) / self.delta) 
        self.sigma_t = 0.1 * (2 * self.S + 1) * (2 * self.S + 3) * np.sqrt(self.dim * np.log(4 * (1 + 1 * self.K) / self.delta))  # Confidence radius
        
        self.theta = np.zeros(dim)
        
        self.H_raw = np.zeros((dim, dim)) 
        self.Gram_raw = np.zeros((dim, dim))
        
        self.X_history = [] # Features
        self.y_history = [] # Labels
        
        self.t = 1 

        self.model = LogisticRegression(
            penalty='l2', 
            solver='lbfgs', 
            fit_intercept=False
        )

        self.avg_bonus_history = []
        self.avg_alpha_history = []
        self.avg_norm_history = []
        self.avg_det_history = []

    def _compute_mle(self, lambda_t):
        if len(self.y_history) < 2 or len(np.unique(self.y_history)) < 2:
            return self.theta 

        self.model.C = 1.0 / lambda_t
        
        try:
            self.model.fit(self.X_history, self.y_history)
            theta = self.model.coef_[0]
            return theta
            
        except Exception as e:
            print(f"[Warning] MLE Fit Failed: {e}")
            return self.theta
    
    def _sigmoid(self, z):
        return 1.0 / (1.0 + np.exp(-z))
    
    def _compute_Q_t_bound(self, sigma_t):
        if not self.X_history:  
            return self.S
        X_history = np.array(self.X_history)  
        norms = np.sqrt(np.sum(X_history ** 2, axis=1))  
        bounds = sigma_t * norms 
        return max(bounds)  

        
    def _compute_g_t(self, theta, lambda_t):
        recent_X = np.array(self.X_history)
        if len(recent_X) == 0:
            return lambda_t * theta
            
        mu = self._sigmoid(np.dot(recent_X, theta))
        g_val = np.dot(recent_X.T, mu) + lambda_t * theta
        return g_val

    def _project_mle(self, theta, H_inv, Q_t_bound, lambda_t):
        target_g = self._compute_g_t(theta, lambda_t)

        def objective(theta):
            current_g = self._compute_g_t(theta, lambda_t)
            diff = current_g - target_g
            return np.dot(diff, np.dot(H_inv, diff))

        constraints = {'type': 'ineq', 'fun': lambda theta: Q_t_bound - np.linalg.norm(theta)}
        
        result = minimize(objective, theta, constraints=constraints, method='SLSQP', tol=1e-4)
        
        return result.x if result.success else theta

    def select_arms(self, U, V, u_idx, item_pool_indices):
        features = []
        for i_idx in item_pool_indices:
            features.append(get_feature(U, V, u_idx, i_idx))
        features = np.array(features)

        mu = self._sigmoid(np.dot(features, self.theta))
        
        dot_ell = mu * (1 - mu) 
        
        H_t = self.H_raw + self.lambda_t * np.eye(self.dim)
        Gram_t = self.Gram_raw + (1/self.kappa) * self.lambda_t * np.eye(self.dim)

        H_t_inv = np.linalg.inv(H_t)
        V_t_inv = np.linalg.inv(Gram_t)
        
        norms_H = np.sqrt(np.sum(np.dot(features, H_t_inv) * features, axis=1))
        norms_V = np.sqrt(np.sum(np.dot(features, V_t_inv) * features, axis=1))
        
        ucb_bonus = (self.sigma_t * dot_ell * norms_H) + \
                (0.125 * (1/self.kappa) * (self.sigma_t**2) * (norms_V**2))
                
        ucb_scores = mu + ucb_bonus

        top_k_indices = np.argsort(ucb_scores)[-self.K:][::-1]

        selected_bonuses = ucb_bonus[top_k_indices]
        selected_norm = norms_H[top_k_indices]
        avg_bonus_val = np.max(selected_bonuses)
        avg_norm_val = np.max(selected_norm)
        avg_alpha_val = self.sigma_t * dot_ell
        avg_det_val = np.log(np.linalg.det(H_t))
        
        self.avg_bonus_history.append(avg_bonus_val)
        self.avg_alpha_history.append(avg_alpha_val)
        self.avg_norm_history.append(avg_norm_val)
        self.avg_det_history.append(avg_det_val)  
        
        return top_k_indices, features[top_k_indices]

    def update(self, features_observed, rewards_observed):
        """
        Update internal matrices based on observations.
        """
        self.t += 1
        # self.lambda_t = self.dim * np.log(4 * (1 + self.t * self.K) / self.delta)  # Time-varying regularization
        self.sigma_t = 0.1 * (2 * self.S + 1) * (2 * self.S + 3) * np.sqrt(self.dim * np.log(4 * (1 + self.t * self.K) / self.delta))        
        if len(features_observed) == 0:
            return

        self.X_history.extend(features_observed)
        self.y_history.extend(rewards_observed)

        self.theta = self._compute_mle(self.lambda_t)
        
        Q_t_bound = self._compute_Q_t_bound(self.sigma_t) 
    
        if np.linalg.norm(self.theta) > Q_t_bound:
            H_t = self.H_raw + self.lambda_t * np.eye(self.dim)
            H_t_inv = np.linalg.inv(H_t)
            self.theta = self._project_mle(self.theta, H_t_inv, Q_t_bound, self.lambda_t)
        
        for x in features_observed:
            pred = self._sigmoid(np.dot(x, self.theta))
            weight = pred * (1 - pred)
            
            outer_x = np.outer(x, x)
            self.H_raw += weight * outer_x
            self.Gram_raw += outer_x


class UCBCCA:
    def __init__(self, dim, T, K, S, kappa):
        self.dim = dim
        self.T = T
        self.K = K
        self.S = S
        self.lambda_ = 1.0
        self.kappa = kappa
        
        self.delta = 1 / (self.T ** 2)

        self.t = 1

        self.alpha_t =  0.1 * (1 / self.kappa) * ((1/2) * np.sqrt(self.dim * np.log(1 + (self.t * self.K ) / (self.dim * self.lambda_)) + np.log(1/ self.delta))  + np.sqrt(self.lambda_) * self.S)
            
        self.X_history = [] 
        self.y_history = [] 
        
        self.A = np.eye(dim) * self.lambda_
        self.A_inv = np.eye(dim) * (1.0 / self.lambda_)
        
        self.theta = np.zeros(dim)
        
        self.model = LogisticRegression(fit_intercept=False, solver='lbfgs', C= (1.0 / self.lambda_))

        self.avg_bonus_history = []
        self.avg_alpha_history = []
        self.avg_norm_history = []
        self.avg_det_history = []
        
    def select_arms(self, U, V, u_idx, item_pool_indices):

        features = []
        for i_idx in item_pool_indices:
            features.append(get_feature(U, V, u_idx, i_idx))
        features = np.array(features)
        
        mle_scores = np.dot(features, self.theta)
        
        exploration_scores = np.sqrt(np.sum(np.dot(features, self.A_inv) * features, axis=1))
        
        ucb_bonus = self.alpha_t * exploration_scores
        ucb_scores = mle_scores + ucb_bonus
        
        top_k_indices = np.argsort(ucb_scores)[-self.K:][::-1] 
 
        top_k_expl_scores = exploration_scores[top_k_indices]
        
        max_expl_local_idx = np.argmax(top_k_expl_scores)
        
        if max_expl_local_idx != 0:
            top_k_indices[0], top_k_indices[max_expl_local_idx] = \
                top_k_indices[max_expl_local_idx], top_k_indices[0]
        
        selected_items = [item_pool_indices[i] for i in top_k_indices]

        selected_bonuses = ucb_bonus[top_k_indices]
        selected_norm = exploration_scores[top_k_indices]
        avg_bonus_val = np.max(selected_bonuses)
        avg_norm_val = np.max(selected_norm)
        avg_alpha_val = self.alpha_t
        avg_det_val = np.log(np.linalg.det(self.A))
        
        self.avg_bonus_history.append(avg_bonus_val)
        self.avg_alpha_history.append(avg_alpha_val)
        self.avg_norm_history.append(avg_norm_val)
        self.avg_det_history.append(avg_det_val)  
        
        return selected_items, features[top_k_indices]

    def update(self, features_observed, rewards_observed):
        self.t +=1 
        self.alpha_t = 0.1 *(1 / self.kappa) * ((1/2) * np.sqrt(self.dim * np.log(1 + (self.t * self.K ) / (self.dim * self.lambda_)) + np.log(1/ self.delta))  + np.sqrt(self.lambda_) * self.S)    

        self.X_history.extend(features_observed)
        self.y_history.extend(rewards_observed)
            
        if len(np.unique(self.y_history)) >= 2: 
            self.model.fit(self.X_history, self.y_history)
            self.theta = self.model.coef_[0]
        
        self.A += np.dot(features_observed.T, features_observed)
        self.A_inv = np.linalg.inv(self.A)

    
class UCBCLB:
    def __init__(self, dim, T, K, S, kappa):
        
        self.dim = dim
        self.T = T
        self.K = K
        self.S = S
        
        self.t = 1

        self.eta = (1/2) * np.log(2) + self.S + 1  
        self.alpha_t = 0.1 * self.S * np.sqrt(self.dim) * np.log(self.t) * np.log(2) + (self.S**(3/2)) * np.sqrt(self.dim * np.log(2))
        # self.lambda_t = self.dim * self.S * np.log(2)
        self.lambda_t = 1.0

        self.theta = np.zeros(dim)
     
        self.H = np.eye(dim) * self.lambda_t
        self.H_inv = np.eye(dim) * (1.0 / self.lambda_t)

        self.avg_bonus_history = []
        self.avg_alpha_history = []
        self.avg_norm_history = []
        self.avg_det_history = []

    def _sigmoid(self, z):
        return 1.0 / (1.0 + np.exp(-z))
        
    def select_arms(self, U, V, u_idx, item_pool_indices):
    
        features = []
        for i_idx in item_pool_indices:
            features.append(get_feature(U, V, u_idx, i_idx))
        features = np.array(features)
        
        omd_scores = np.dot(features, self.theta)
        
        exploration_scores_H = np.sqrt(np.sum(np.dot(features, self.H_inv) * features, axis=1)) 
        
        ucb_bonus = self.alpha_t * exploration_scores_H
        ucb_scores = omd_scores + ucb_bonus
        
        top_k_indices = np.argsort(ucb_scores)[-self.K:][::-1]
 
        prob = (np.exp(omd_scores) / ((1 + np.exp(omd_scores))**2))[top_k_indices]

        top_k_expl_scores_1 = prob * (exploration_scores_H[top_k_indices])
        top_k_expl_scores_2 = exploration_scores_H[top_k_indices]
        
        max_expl_local_idx_1 = np.argmax(top_k_expl_scores_1)
        max_expl_local_idx_2 = np.argmax(top_k_expl_scores_2)
        
        if max_expl_local_idx_1 == max_expl_local_idx_2:

            if max_expl_local_idx_1 != 0:
                top_k_indices[0], top_k_indices[max_expl_local_idx_1] = \
                    top_k_indices[max_expl_local_idx_1], top_k_indices[0]
        else:
            remaining_indices = np.delete(top_k_indices, [max_expl_local_idx_1, max_expl_local_idx_2])
            top_k_indices = np.concatenate(([top_k_indices[max_expl_local_idx_1]], [top_k_indices[max_expl_local_idx_2]], remaining_indices))

        selected_items = [item_pool_indices[i] for i in top_k_indices]

        selected_bonuses = ucb_bonus[top_k_indices]
        selected_norm = exploration_scores_H[top_k_indices]
        avg_bonus_val = np.max(selected_bonuses)
        avg_norm_val = np.max(selected_norm)
        avg_alpha_val = self.alpha_t
        avg_det_val = np.log(np.linalg.det(self.H))
        
        self.avg_bonus_history.append(avg_bonus_val)
        self.avg_alpha_history.append(avg_alpha_val)
        self.avg_norm_history.append(avg_norm_val)
        self.avg_det_history.append(avg_det_val)             
        
        return selected_items, features[top_k_indices]

    def update(self, features_observed, rewards_observed):

        self.t += 1
        self.alpha_t = 0.1 * self.S * np.sqrt(self.dim) * np.log(self.t) * np.log(2) + (self.S**(3/2)) * np.sqrt(self.dim * np.log(2))
        
        H_tilde = np.copy(self.H)
        
        for x, y in zip(features_observed, rewards_observed):

            pred = self._sigmoid(np.dot(x, self.theta))
            
            grad = (pred - y) * x
            
            weight_tilde = self.eta * pred * (1-pred)
            H_tilde = self.H + weight_tilde * np.outer(x, x) 
            H_inv_tilde = np.linalg.inv(H_tilde)
            
            step_direction = np.dot(H_inv_tilde, grad)
            theta_prime = self.theta - self.eta * step_direction

            norm_theta = np.linalg.norm(theta_prime)

            if norm_theta > self.S:
                theta_prime = theta_prime * (self.S / norm_theta)
            
            self.theta = theta_prime

            pred_new = self._sigmoid(np.dot(x, self.theta))
            sigma_prime_new = pred_new * (1.0 - pred_new)
            
            self.H += sigma_prime_new * np.outer(x, x)
            self.H_inv = np.linalg.inv(self.H)
            