import numpy as np

class KNNBandit:
    def __init__(self, k=5, max_history=5000):
        """
        kNN Bandit implementation.
        Estimates reward based on the average reward of the k-nearest neighbors in history.
        
        Args:
            k: Number of neighbors to consider.
            max_history: Maximum number of historical observations to store.
        """
        self.k = k
        self.max_history = max_history
        self.history_x = []
        self.history_y = []

    def calc_ucb(self, x):
        x = np.array(x).flatten()
        
        if not self.history_x:
            # If no history, return 0.0 (or random fallback will happen in caller)
            return 0.0, 0.0, 0.0
        
        # Convert history to numpy array for efficient calculation
        # Note: If history is large, this might be slow. 
        # For typical bandit experiments with a few thousand steps, it's fine.
        X = np.array(self.history_x)
        
        # Calculate Euclidean distances
        # dists = sqrt(sum((X - x)^2))
        dists = np.linalg.norm(X - x, axis=1)
        
        # Get indices of k nearest neighbors
        k = min(self.k, len(self.history_x))
        
        # argsort gives indices that would sort the array
        idx = np.argsort(dists)[:k]
        
        # Calculate average reward of neighbors
        rewards = np.array(self.history_y)[idx]
        pred = np.mean(rewards)
        
        # We return pred as the UCB value (Greedy strategy based on kNN estimate)
        return pred, pred, 0.0

    def update(self, x, reward):
        x = np.array(x).flatten()
        
        if len(self.history_x) >= self.max_history:
            # Remove oldest
            self.history_x.pop(0)
            self.history_y.pop(0)
            
        self.history_x.append(x)
        self.history_y.append(reward)

    def train(self, contexts, rewards, **kwargs):
        # No batch training needed for lazy learning kNN
        pass
