# !/usr/bin/env python
# coding: utf-8

# Importing python packages
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ConstantKernel, Matern

# ### Useful functions ###
# Different Goodness value functions
def get_goodness_value(utility, agent_weights=None, agent=None):    
    # Weighed sum of utility: Weighted Gini Social Welfare
    goodness_value = np.sum(agent_weights*utility[np.argsort(utility)])  # Sum w_i*phi(u_i)
    
    # Targeted fairness [rho=3]
    # goodness_value = agent_weights[agent]*np.sum(utility) - utility[agent]
    
    # NSW value 
    # goodness_value = np.power(np.prod(utility), (1.0/len(utility)))
    
    # log NSW value
    # goodness_value = (1.0/len(utility)*np.log(np.prod(utility))
    
    return goodness_value


# Compute Gini coefficient
def compute_gini_coefficient(x):
    # Mean absolute difference
    mad = np.abs(np.subtract.outer(x, x)).mean()
    
    # Relative mean absolute difference
    rmad = mad/np.mean(x)
    
    # Gini coefficient
    gini_coefficient = 0.5 * rmad
    
    return gini_coefficient


# Different latent reward functions
def get_utility(utility_function, x, sigma):   
    if utility_function == 'linear':
        return max(x + np.random.normal(0, sigma), 0)
    
    elif utility_function == 'square':
        return max(10*(x**2) + np.random.normal(0, sigma), 0)
    
    elif utility_function == 'cosine':    
        return max(np.cos(3*x) + np.random.normal(0, sigma), 0)
    
    elif utility_function == 'sine':    
        return max(3*np.sin(x) + np.random.normal(0, sigma), 0)
    
    else:
        raise ValueError('Unknown utility function: {}'.format(utility_function))


# ### Online Fair Division Linear Algorithms ###
# OFD algorithm with uniform allocation given weights of agents
def ofd_uniform(ofd_problem, utility_function='linear'):
    # OFD problem instance: [item_agents, theta, d, lambda, L, S, sigma, delta]
    all_item_agents = ofd_problem[0]        # Item-agent feature vectors
    theta           = ofd_problem[1]        # True parameter vector
    sigma           = ofd_problem[6]        # Sub-gaussian noise parameter of utility
    agent_weights   = ofd_problem[8]        # Weights of each agent
    
    # Initialization of different variables
    T               = len(all_item_agents)      # Total number of items
    K               = len(all_item_agents[0])   # Total number of agents 
    agent_utility   = np.zeros(K)               # Utility of each agent
    
    # Stores instantaneous regret of each round
    instantaneous_regret = []     
    total_utility = []
    min_total_utility_ratio = []     
    gini_coefficient = []       
    target_weights_diff = []   
    
    # Allocating item to each agent in round-robin fashion
    for t in range(K):
        # Item_agents vector
        item_agents = all_item_agents[t]
        
        # Select agent in round-robin fashion
        a_t = t
        
        # Assigning item to the selected agent and observing the utility
        item_agent = item_agents[a_t]
        noisy_utility = get_utility(utility_function, item_agent.dot(theta), sigma)
        
        # Updating variables
        agent_utility[a_t]  += noisy_utility        
        # Regret is not calculated for first K rounds aa it is common for all algorithms
        
    # Iterating over the remaining items
    for t in range(K, T):
        # Observe item-agent vectors
        item_agents = all_item_agents[t]
        
        # Current estimate of latent reward
        a_t = np.random.choice(K)
            
        # Finding the agent who maximizes the value of the goodness function
        true_agents_score = np.zeros(K)
        for a in range(K): 
            xt_a = item_agents[a]
            
            # Compute true NSW: For regret calculation
            agent_true_utility = np.copy(agent_utility)
            noisy_free_utility = get_utility(utility_function, xt_a.dot(theta), 0)
            agent_true_utility[a] += noisy_free_utility
            true_agents_score[a] = get_goodness_value(agent_true_utility, agent_weights, a)

        # Assigning item to the selected agent and observing the utility
        item_agent = item_agents[a_t]
        noisy_utility = get_utility(utility_function, item_agent.dot(theta), sigma)
        
        # Updating variables
        agent_utility[a_t]  += noisy_utility
        
        # Instantaneous Regret        
        regret = max(true_agents_score) - true_agents_score[a_t]
        instantaneous_regret.append(regret)
        
        # Update other statistics
        total_utility.append(np.sum(agent_utility))
        min_total_utility_ratio.append(np.min(agent_utility)/np.sum(agent_utility))
        gini_coefficient.append(compute_gini_coefficient(agent_utility))
        target_weights_diff.append(np.linalg.norm(agent_weights - (agent_utility/np.sum(agent_utility))))
    
    other_statistics = [total_utility, gini_coefficient, min_total_utility_ratio, target_weights_diff]
    return instantaneous_regret, other_statistics


# Online Fair Division (OFD) algorithm with linear contextual bandits
def ofd_linear(ofd_problem, strategy='ucb', utility_function='linear'):
    # OFD problem instance: [item_agents, theta, d, lambda, L, S, sigma, delta]
    all_item_agents = ofd_problem[0]        # Item-agent feature vectors
    theta           = ofd_problem[1]        # True parameter vector
    d               = ofd_problem[2]        # Dimension of item-agent feature vector
    lambda_value    = ofd_problem[3]        # Lambda value to ensure invertability
    L               = ofd_problem[4]        # Value of L, i.e., max ||x_i||
    S               = ofd_problem[5]        # Value of S, i.e., max ||\theta||
    sigma           = ofd_problem[6]        # Sub-gaussian noise parameter of utility
    delta           = ofd_problem[7]        # Confidance in the regret
    agent_weights   = ofd_problem[8]        # Weights of each agent
    
    # Initialization of different variables
    T               = len(all_item_agents)          # Total number of items
    K               = len(all_item_agents[0])       # Total number of agents 
    agent_utility   = np.zeros(K)                   # Utility of each agent
    XY_sum          = np.zeros(d)                   # Sum of XY 
    V               = lambda_value * np.identity(d) # Initialization of data matrix
    V_inv           = np.linalg.inv(V)              # Initialization of inverse of data matrix
    theta_hat       = np.ones(d)/d                  # Initial estimate of theta
    
    # Stores instantaneous regret of each round
    instantaneous_regret = []     
    total_utility = []
    min_total_utility_ratio = []     
    gini_coefficient = []      
    target_weights_diff = []              
    
    # Allocating item to each agent in round-robin fashion
    for t in range(K):
        # Item_agents vector
        item_agents = all_item_agents[t]
        
        # Select agent in round-robin fashion
        a_t = t
                
        # Assigning item to the selected agent and observing the utility
        item_agent = item_agents[a_t]
        noisy_utility = get_utility(utility_function, item_agent.dot(theta), sigma)
        
        # Updating variables
        agent_utility[a_t]  += noisy_utility
        XY_sum              += item_agent*noisy_utility
        V                   += np.outer(item_agent, item_agent)
        
        # Regret is not calculated for first K rounds aa it is common for all algorithms
        
    # Fixed terms in confidence bound used in UCB strategy
    alpha_fix   = S*np.sqrt(lambda_value)   # Fix term for alpha
    conf_ratio  = (L*L)/lambda_value        # Ratio term in confidence bound
    
    # Updating theta estimate
    V_inv       = np.linalg.inv(V)
    theta_hat   = np.inner(V_inv, XY_sum)
    
    for t in range(K, T):
        # Observe item-agent vectors
        item_agents = all_item_agents[t]
        
        # Current estimate of latent reward
        est_utility = item_agents.dot(theta)
        
        # Sample function for TS as need to be same for all agents
        if strategy == 'ts':
            # Following Linear Contextual TS (ICML 2023) paper approach
            alpha_t = max(sigma*np.sqrt(9.0*d*np.log((t+1)/delta)), 0)
            theta_tilde = np.random.multivariate_normal(theta_hat, alpha_t*alpha_t*V_inv)
            
        # Selecting the agent who maximizes the value of the goodness function
        agents_score = np.zeros(K)
        true_agents_score = np.zeros(K)
        for a in range(K): 
            xt_a = item_agents[a]
            
            # Selecting the agent based on the strategy   
            if strategy == 'ts':
                # Utility based on TS
                agent_ts_utility = max(xt_a.dot(theta_tilde), 0)
                
                # Compute NSW using optimistic utility
                agents_ts_utility = np.copy(agent_utility)
                agents_ts_utility[a] += agent_ts_utility            
                agents_score[a] = get_goodness_value(agents_ts_utility, agent_weights, a)
                
            elif strategy == 'ucb':
                # Confidance term
                log_cnfterm = d*np.log((1.0 + ((t+1)*conf_ratio))/delta)
                alpha_t = alpha_fix + (sigma*log_cnfterm)
                conf_term = alpha_t * np.sqrt(np.inner(np.inner(xt_a, V_inv), xt_a) )
                
                # Utility based on UCB
                agent_ucb_utility = est_utility[a] + conf_term
                
                # Compute NSW using optimistic utility
                agents_ucb_utility = np.copy(agent_utility)
                agents_ucb_utility[a] += agent_ucb_utility
                agents_score[a] = get_goodness_value(agents_ucb_utility, agent_weights, a)

            elif strategy == 'greedy':                
                # Utility based on Greedy strategy
                agent_greedy_utility = est_utility[a]
                
                # Compute NSW using estimated utility
                agents_greedy_utility = np.copy(agent_utility)
                agents_greedy_utility[a] += agent_greedy_utility
                agents_score[a] = get_goodness_value(agents_greedy_utility, agent_weights, a)
            
            else:
                raise RuntimeError('Exploration strategy not set')
            
            # Compute true NSW: For regret calculation
            agent_true_utility = np.copy(agent_utility)
            noisy_free_utility = get_utility(utility_function, xt_a.dot(theta), 0)
            agent_true_utility[a] += noisy_free_utility
            true_agents_score[a] = get_goodness_value(agent_true_utility, agent_weights, a)            
            
        # Selecting agent with maximum NSW
        a_t = np.argmax(agents_score)
        
        # For greedy strategy, do exploration with epsilon probability
        if strategy == 'greedy':
            epsilon = 0.1
            if np.random.uniform(0, 1) < epsilon:
                a_t = np.random.choice(K)

        # Assigning item to the selected agent and observing the utility
        item_agent = item_agents[a_t]
        noisy_utility = get_utility(utility_function, item_agent.dot(theta), sigma)
        
        # Updating variables
        agent_utility[a_t]  += noisy_utility
        XY_sum              += item_agent*noisy_utility
        V                   += np.outer(item_agent, item_agent)
        V_inv               = np.linalg.inv(V)

        # Updating theta estimate
        theta_hat = np.inner(V_inv, XY_sum)
        
        # Instantaneous Regret        
        regret = max(true_agents_score) - true_agents_score[a_t]
        instantaneous_regret.append(regret)
        
        # Update other statistics
        total_utility.append(np.sum(agent_utility))
        min_total_utility_ratio.append(np.min(agent_utility)/np.sum(agent_utility))
        gini_coefficient.append(compute_gini_coefficient(agent_utility))
        target_weights_diff.append(np.linalg.norm(agent_weights - (agent_utility/np.sum(agent_utility))))

    other_statistics = [total_utility, gini_coefficient, min_total_utility_ratio, target_weights_diff]
    return instantaneous_regret, other_statistics


# Online Fair Division (OFD) algorithm with Gaussian process
def ofd_gp(ofd_problem, strategy='ucb', utility_function='linear'):
    # OFD problem instance: [item_agents, theta, d, lambda, L, S, sigma, delta]
    all_item_agents = ofd_problem[0]        # Item-agent feature vectors
    theta           = ofd_problem[1]        # True parameter vector
    d               = ofd_problem[2]        # Dimension of item-agent feature vector
    lambda_value    = ofd_problem[3]        # Lambda value to ensure invertability
    L               = ofd_problem[4]        # Value of L, i.e., max ||x_i||
    S               = ofd_problem[5]        # Value of S, i.e., max ||\theta||
    sigma           = ofd_problem[6]        # Sub-gaussian noise parameter of utility
    delta           = ofd_problem[7]        # Confidance in the regret
    agent_weights   = ofd_problem[8]        # Weights of each agent
    
    # Initialization of different variables
    T               = len(all_item_agents)          # Total number of items
    K               = len(all_item_agents[0])       # Total number of agents 
    agent_utility   = np.zeros(K)                   # Utility of each agent
    
    # Stores instantaneous regret of each round
    instantaneous_regret = []     
    total_utility = []
    min_total_utility_ratio = []     
    gini_coefficient = []      
    target_weights_diff = []    
    
    # Gaussian process with Matérn kernel as surrogate model
    m52 = ConstantKernel(1.0) * Matern(length_scale=1.0, nu=2.5)
    gpr = GaussianProcessRegressor(kernel=m52, alpha=sigma**2)        
    
    # Data
    item_agent_data = None
    utility_data = None
    
    # Allocating item to each agent in round-robin fashion
    for t in range(K):
        # Item_agents vector
        item_agents = all_item_agents[t]
        
        # Select agent in round-robin fashion
        a_t = t
                
        # Assigning item to the selected agent and observing the utility
        item_agent = item_agents[a_t]
        noisy_utility = get_utility(utility_function, item_agent.dot(theta), sigma)
        
        # Updating variables
        agent_utility[a_t]  += noisy_utility
        
        if item_agent_data is None:
            item_agent_data = item_agent
            utility_data = noisy_utility
            
        else:
            item_agent_data = np.vstack((item_agent_data, item_agent))
            utility_data = np.vstack((utility_data, noisy_utility))
        
        # Regret is not calculated for first K rounds aa it is common for all algorithms
        
    # Fixed terms in confidence bound used in UCB strategy
    alpha_fix   = S*np.sqrt(lambda_value)   # Fix term for alpha
    conf_ratio  = (L*L)/lambda_value        # Ratio term in confidence bound
    
    for t in range(K, T):
        # Update Gaussian process with existing samples
        if t % 20 == 0:
            gpr.fit(item_agent_data, utility_data)
        
        # Observe item-agent vectors
        item_agents = all_item_agents[t]
        
        # Current estimate of latent reward and its variance
        est_utility, variance = gpr.predict(item_agents, return_std=True)
        utility_sigma = np.sqrt(variance)
        # return (mu + alpha_t * np.sqrt(var))
        
        # Selecting the agent who maximizes the value of the goodness function
        agents_score = np.zeros(K)
        true_agents_score = np.zeros(K)
        for a in range(K): 
            xt_a = item_agents[a]
            
            # Selecting the agent based on the strategy   
            if strategy == 'ts':
                # Utility based on TS
                alpha_t = max(sigma*np.sqrt(9.0*d*np.log((t+1)/delta)), 0)
                alpha_t = 1.0
                agent_ts_utility = np.random.normal(loc=est_utility[a], scale=alpha_t*utility_sigma[a])
             
                # Compute NSW using optimistic utility
                agents_ts_utility = np.copy(agent_utility)
                agents_ts_utility[a] += agent_ts_utility            
                agents_score[a] = get_goodness_value(agents_ts_utility, agent_weights, a)
                
            elif strategy == 'ucb':
                # Confidance term
                log_cnfterm = d*np.log((1.0 + ((t+1)*conf_ratio))/delta)
                alpha_t = alpha_fix + (sigma*log_cnfterm)
                alpha_t = 1.0
                conf_term = alpha_t * utility_sigma[a]
                
                # Utility based on UCB
                agent_ucb_utility = est_utility[a] + conf_term
                
                # Compute NSW using optimistic utility
                agents_ucb_utility = np.copy(agent_utility)
                agents_ucb_utility[a] += agent_ucb_utility
                agents_score[a] = get_goodness_value(agents_ucb_utility, agent_weights, a)

            elif strategy == 'greedy':                
                # Utility based on Greedy strategy
                agent_greedy_utility = est_utility[a]
                
                # Compute NSW using estimated utility
                agents_greedy_utility = np.copy(agent_utility)
                agents_greedy_utility[a] += agent_greedy_utility
                agents_score[a] = get_goodness_value(agents_greedy_utility, agent_weights, a)
            
            else:
                raise RuntimeError('Exploration strategy not set')
            
            # Compute true NSW: For regret calculation
            agent_true_utility = np.copy(agent_utility)
            agent_true_utility[a] += xt_a.dot(theta)
            true_agents_score[a] = get_goodness_value(agent_true_utility, agent_weights, a)            
            
        # Selecting agent with maximum NSW
        a_t = np.argmax(agents_score)
        
        # For greedy strategy, do exploration with epsilon probability
        if strategy == 'greedy':
            epsilon = 0.1
            if np.random.uniform(0, 1) < epsilon:
                a_t = np.random.choice(K)

        
        # Assigning item to the selected agent and observing the utility
        item_agent = item_agents[a_t]
        noisy_utility = get_utility(utility_function, item_agent.dot(theta), sigma)
        
        # Updating variables
        agent_utility[a_t]  += noisy_utility
        item_agent_data = np.vstack((item_agent_data, item_agent))
        utility_data = np.vstack((utility_data, noisy_utility))
        
        # Instantaneous Regret        
        regret = max(true_agents_score) - true_agents_score[a_t]
        instantaneous_regret.append(regret)
        
        # Update other statistics
        total_utility.append(np.sum(agent_utility))
        min_total_utility_ratio.append(np.min(agent_utility)/np.sum(agent_utility))
        gini_coefficient.append(compute_gini_coefficient(agent_utility))
        target_weights_diff.append(np.linalg.norm(agent_weights - (agent_utility/np.sum(agent_utility))))

    other_statistics = [total_utility, gini_coefficient, min_total_utility_ratio, target_weights_diff]
    return instantaneous_regret, other_statistics


# Online Fair Division (OFD) algorithm with Neural contextual bandits
def ofd_neural(ofd_problem, strategy='ucb', utility_function='linear'):
    # OFD problem instance: [item_agents, theta, d, lambda, L, S, sigma, delta]
    all_item_agents = ofd_problem[0]        # Item-agent feature vectors
    theta           = ofd_problem[1]        # True parameter vector
    d               = ofd_problem[2]        # Dimension of item-agent feature vector
    lambda_value    = ofd_problem[3]        # Lambda value to ensure invertability    
    sigma           = ofd_problem[6]        # Sub-gaussian noise parameter of utility
    agent_weights   = ofd_problem[8]        # Weights of each agent
    
    # Initialization of different variables
    T               = len(all_item_agents)          # Total number of items
    K               = len(all_item_agents[0])       # Total number of agents 
    agent_utility   = np.zeros(K)                   # Utility of each agent
    
    # Stores instantaneous regret of each round
    instantaneous_regret = []     
    total_utility = []
    min_total_utility_ratio = []     
    gini_coefficient = []      
    target_weights_diff = []      
    
    # Neural algorithm agent
    # Creating instance of algorithm 
    neural_agent = NeuralOFD(d, strategy=strategy) 
    
    # Allocating item to each agent in round-robin fashion
    for t in range(K):
        # Item_agents vector
        item_agents = all_item_agents[t]
        
        # Select agent in round-robin fashion
        a_t = t
                
        # Assigning item to the selected agent and observing the utility
        item_agent = item_agents[a_t]
        noisy_utility = get_utility(utility_function, item_agent.dot(theta), sigma)
        
        # Updating variables
        agent_utility[a_t]  += noisy_utility
        neural_agent.update(item_agent, noisy_utility, update_model=False)
        
        # Regret is not calculated for first K rounds aa it is common for all algorithms
    
    # Iterating over the remaining items 
    for t in range(K, T):
        # Observe item-agent vectors
        item_agents = all_item_agents[t]
        
        # Get optimistic utility for each agent from the neural model
        opt_utility = neural_agent.get_optimistic_score(item_agents)
           
        # Selecting the agent who maximizes the value of the goodness function
        agents_score = np.zeros(K)
        true_agents_score = np.zeros(K)
        for a in range(K):             
            # # Compute NSW using optimistic utility
            agents_opt_utility = np.copy(agent_utility)
            agents_opt_utility[a] += opt_utility[a]            
            agents_score[a] = get_goodness_value(agents_opt_utility, agent_weights, a)
            
            # Compute true NSW: For regret calculation
            agent_true_utility = np.copy(agent_utility)
            noise_free_utility = get_utility(utility_function, item_agents[a].dot(theta), 0)
            agent_true_utility[a] += noise_free_utility
            true_agents_score[a] = get_goodness_value(agent_true_utility, agent_weights, a)
   
        # Selecting agent with maximum NSW
        a_t = np.argmax(agents_score)
        
        # For greedy strategy, do exploration with epsilon probability
        if strategy == 'greedy':
            epsilon = 0.1
            if np.random.uniform(0, 1) < epsilon:
                a_t = np.random.choice(K)

        # Assigning item to the selected agent and observing the utility
        item_agent = item_agents[a_t]
        noisy_utility = get_utility(utility_function, item_agent.dot(theta), sigma)
        
        # Updating variables
        agent_utility[a_t]  += noisy_utility
        neural_agent.update(item_agent, noisy_utility)
        
        # Instantaneous Regret        
        regret = max(true_agents_score) - true_agents_score[a_t]
        instantaneous_regret.append(regret)
        
        # Update other statistics
        total_utility.append(np.sum(agent_utility))
        min_total_utility_ratio.append(np.min(agent_utility)/np.sum(agent_utility))
        gini_coefficient.append(compute_gini_coefficient(agent_utility))
        target_weights_diff.append(np.linalg.norm(agent_weights - (agent_utility/np.sum(agent_utility))))

    other_statistics = [total_utility, gini_coefficient, min_total_utility_ratio, target_weights_diff]
    return instantaneous_regret, other_statistics


# Online Fair Division (OFD) algorithm that only cares about efficiency
def ofd_efficient(ofd_problem, strategy='ucb', utility_function='linear'):
    # OFD problem instance: [item_agents, theta, d, lambda, L, S, sigma, delta]
    all_item_agents = ofd_problem[0]        # Item-agent feature vectors
    theta           = ofd_problem[1]        # True parameter vector
    d               = ofd_problem[2]        # Dimension of item-agent feature vector
    lambda_value    = ofd_problem[3]        # Lambda value to ensure invertability
    L               = ofd_problem[4]        # Value of L, i.e., max ||x_i||
    S               = ofd_problem[5]        # Value of S, i.e., max ||\theta||
    sigma           = ofd_problem[6]        # Sub-gaussian noise parameter of utility
    delta           = ofd_problem[7]        # Confidance in the regret
    agent_weights   = ofd_problem[8]        # Weights of each agent
    
    # Initialization of different variables
    T               = len(all_item_agents)          # Total number of items
    K               = len(all_item_agents[0])       # Total number of agents 
    agent_utility   = np.zeros(K)                   # Utility of each agent
    XY_sum          = np.zeros(d)                   # Sum of XY 
    V               = lambda_value * np.identity(d) # Initialization of data matrix
    V_inv           = np.linalg.inv(V)              # Initialization of inverse of data matrix
    theta_hat       = np.ones(d)/d                  # Initial estimate of theta
    
    # Stores instantaneous regret of each round
    instantaneous_regret = []     
    total_utility = []
    min_total_utility_ratio = []     
    gini_coefficient = []      
    target_weights_diff = []              
    
    # Allocating item to each agent in round-robin fashion
    for t in range(K):
        # Item_agents vector
        item_agents = all_item_agents[t]
        
        # Select agent in round-robin fashion
        a_t = t
                
        # Assigning item to the selected agent and observing the utility
        item_agent = item_agents[a_t]
        noisy_utility = get_utility(utility_function, item_agent.dot(theta), sigma)
        
        # Updating variables
        agent_utility[a_t]  += noisy_utility
        XY_sum              += item_agent*noisy_utility
        V                   += np.outer(item_agent, item_agent)
        
        # Regret is not calculated for first K rounds aa it is common for all algorithms
        
    # Fixed terms in confidence bound used in UCB strategy
    alpha_fix   = S*np.sqrt(lambda_value)   # Fix term for alpha
    conf_ratio  = (L*L)/lambda_value        # Ratio term in confidence bound
    
    # Updating theta estimate
    V_inv       = np.linalg.inv(V)
    theta_hat   = np.inner(V_inv, XY_sum)
    
    for t in range(K, T):
        # Observe item-agent vectors
        item_agents = all_item_agents[t]
        
        # Current estimate of latent reward
        est_utility = item_agents.dot(theta)
        
        # Sample function for TS as need to be same for all agents
        if strategy == 'ts':
            # Following Linear Contextual TS (ICML 2023) paper approach
            alpha_t = max(sigma*np.sqrt(9.0*d*np.log((t+1)/delta)), 0)
            theta_tilde = np.random.multivariate_normal(theta_hat, alpha_t*alpha_t*V_inv)
            
        # Selecting the agent who maximizes the value of the goodness function
        agents_score = np.zeros(K)
        true_agents_score = np.zeros(K)
        for a in range(K): 
            xt_a = item_agents[a]
            
            # Selecting the agent based on the strategy   
            if strategy == 'ts':
                # Utility based on TS
                agent_ts_utility = max(xt_a.dot(theta_tilde), 0)          
                agents_score[a] = agent_ts_utility
                
            elif strategy == 'ucb':
                # Confidance term
                log_cnfterm = d*np.log((1.0 + ((t+1)*conf_ratio))/delta)
                alpha_t = alpha_fix + (sigma*log_cnfterm)
                conf_term = alpha_t * np.sqrt(np.inner(np.inner(xt_a, V_inv), xt_a) )
                
                # Utility based on UCB
                agent_ucb_utility = est_utility[a] + conf_term
                agents_score[a] = agent_ucb_utility

            elif strategy == 'greedy':                
                # Utility based on Greedy strategy
                agent_greedy_utility = est_utility[a]
                agents_score[a] = agent_greedy_utility
            
            else:
                raise RuntimeError('Exploration strategy not set')
            
            # Compute true NSW: For regret calculation
            agent_true_utility = np.copy(agent_utility)
            noisy_free_utility = get_utility(utility_function, xt_a.dot(theta), 0)
            agent_true_utility[a] += noisy_free_utility
            true_agents_score[a] = get_goodness_value(agent_true_utility, agent_weights, a)           
            
        # Selecting agent with maximum NSW
        a_t = np.argmax(agents_score)
        
        # For greedy strategy, do exploration with epsilon probability
        if strategy == 'greedy':
            epsilon = 0.1
            if np.random.uniform(0, 1) < epsilon:
                a_t = np.random.choice(K)

        # Assigning item to the selected agent and observing the utility
        item_agent = item_agents[a_t]
        noisy_utility = get_utility(utility_function, item_agent.dot(theta), sigma)
        
        # Updating variables
        agent_utility[a_t]  += noisy_utility
        XY_sum              += item_agent*noisy_utility
        V                   += np.outer(item_agent, item_agent)
        V_inv               = np.linalg.inv(V)

        # Updating theta estimate
        theta_hat = np.inner(V_inv, XY_sum)
        
        # Instantaneous Regret        
        regret = max(true_agents_score) - true_agents_score[a_t]
        instantaneous_regret.append(regret)
        
        # Update other statistics
        total_utility.append(np.sum(agent_utility))
        min_total_utility_ratio.append(np.min(agent_utility)/np.sum(agent_utility))
        gini_coefficient.append(compute_gini_coefficient(agent_utility))
        target_weights_diff.append(np.linalg.norm(agent_weights - (agent_utility/np.sum(agent_utility))))

    other_statistics = [total_utility, gini_coefficient, min_total_utility_ratio, target_weights_diff]
    return instantaneous_regret, other_statistics


# Online Fair Division (OFD) algorithm that only cares about fairness
def ofd_fair(ofd_problem, strategy='ucb', utility_function='linear'):
    # OFD problem instance: [item_agents, theta, d, lambda, L, S, sigma, delta]
    all_item_agents = ofd_problem[0]        # Item-agent feature vectors
    theta           = ofd_problem[1]        # True parameter vector
    d               = ofd_problem[2]        # Dimension of item-agent feature vector
    lambda_value    = ofd_problem[3]        # Lambda value to ensure invertability
    L               = ofd_problem[4]        # Value of L, i.e., max ||x_i||
    S               = ofd_problem[5]        # Value of S, i.e., max ||\theta||
    sigma           = ofd_problem[6]        # Sub-gaussian noise parameter of utility
    delta           = ofd_problem[7]        # Confidance in the regret
    agent_weights   = ofd_problem[8]        # Weights of each agent
    
    # Initialization of different variables
    T               = len(all_item_agents)          # Total number of items
    K               = len(all_item_agents[0])       # Total number of agents 
    agent_utility   = np.zeros(K)                   # Utility of each agent
    XY_sum          = np.zeros(d)                   # Sum of XY 
    V               = lambda_value * np.identity(d) # Initialization of data matrix
    V_inv           = np.linalg.inv(V)              # Initialization of inverse of data matrix
    theta_hat       = np.ones(d)/d                  # Initial estimate of theta
    
    # Stores instantaneous regret of each round
    instantaneous_regret = []     
    total_utility = []
    min_total_utility_ratio = []     
    gini_coefficient = []      
    target_weights_diff = []              
    
    # Allocating item to each agent in round-robin fashion
    for t in range(K):
        # Item_agents vector
        item_agents = all_item_agents[t]
        
        # Select agent in round-robin fashion
        a_t = t
                
        # Assigning item to the selected agent and observing the utility
        item_agent = item_agents[a_t]
        noisy_utility = get_utility(utility_function, item_agent.dot(theta), sigma)
        
        # Updating variables
        agent_utility[a_t]  += noisy_utility
        XY_sum              += item_agent*noisy_utility
        V                   += np.outer(item_agent, item_agent)
        
        # Regret is not calculated for first K rounds aa it is common for all algorithms
        
    # Fixed terms in confidence bound used in UCB strategy
    alpha_fix   = S*np.sqrt(lambda_value)   # Fix term for alpha
    conf_ratio  = (L*L)/lambda_value        # Ratio term in confidence bound
    
    # Updating theta estimate
    V_inv       = np.linalg.inv(V)
    theta_hat   = np.inner(V_inv, XY_sum)
    
    for t in range(K, T):
        # Observe item-agent vectors
        item_agents = all_item_agents[t]
        
        # Current estimate of latent reward
        est_utility = item_agents.dot(theta)
        
        # Sample function for TS as need to be same for all agents
        if strategy == 'ts':
            # Following Linear Contextual TS (ICML 2023) paper approach
            alpha_t = max(sigma*np.sqrt(9.0*d*np.log((t+1)/delta)), 0)
            theta_tilde = np.random.multivariate_normal(theta_hat, alpha_t*alpha_t*V_inv)
            
        # Selecting the agent who maximizes the value of the goodness function
        agents_score = np.zeros(K)
        true_agents_score = np.zeros(K)
        for a in range(K): 
            xt_a = item_agents[a]
            
            # Selecting the agent based on the strategy   
            if strategy == 'ts':
                # Utility based on TS
                agent_ts_utility = max(xt_a.dot(theta_tilde), 0)   

                # Compute minimum value
                agents_ts_utility = np.copy(agent_utility)
                agents_ts_utility[a] += agent_ts_utility
                agent_ts_utility = min(agents_ts_utility)

                # Updare minimum value
                agents_score[a] = agent_ts_utility
                
            elif strategy == 'ucb':
                # Confidance term
                log_cnfterm = d*np.log((1.0 + ((t+1)*conf_ratio))/delta)
                alpha_t = alpha_fix + (sigma*log_cnfterm)
                conf_term = alpha_t * np.sqrt(np.inner(np.inner(xt_a, V_inv), xt_a) )
                
                # Utility based on UCB
                agent_ucb_utility = est_utility[a] + conf_term

                # Compute minimum value
                agents_ucb_utility = np.copy(agent_utility)
                agents_ucb_utility[a] += agent_ucb_utility
                agent_ucb_utility = min(agents_ucb_utility)

                # Updare minimum value
                agents_score[a] = agent_ucb_utility

            elif strategy == 'greedy':                
                # Utility based on Greedy strategy
                agent_greedy_utility = est_utility[a]

                # Compute minimum value
                agents_greedy_utility = np.copy(agent_utility)
                agents_greedy_utility[a] += agent_greedy_utility
                agent_greedy_utility = min(agents_greedy_utility)

                agents_score[a] = agent_greedy_utility
            
            else:
                raise RuntimeError('Exploration strategy not set')
            
            # Compute true NSW: For regret calculation
            agent_true_utility = np.copy(agent_utility)
            noisy_free_utility = get_utility(utility_function, xt_a.dot(theta), 0)
            agent_true_utility[a] += noisy_free_utility
            true_agents_score[a] = get_goodness_value(agent_true_utility, agent_weights, a)                 
            
        # Selecting agent with maximum NSW
        a_t = np.argmax(agents_score)
        
        # For greedy strategy, do exploration with epsilon probability
        if strategy == 'greedy':
            epsilon = 0.1
            if np.random.uniform(0, 1) < epsilon:
                a_t = np.random.choice(K)

        # Assigning item to the selected agent and observing the utility
        item_agent = item_agents[a_t]
        noisy_utility = get_utility(utility_function, item_agent.dot(theta), sigma)
        
        # Updating variables
        agent_utility[a_t]  += noisy_utility
        XY_sum              += item_agent*noisy_utility
        V                   += np.outer(item_agent, item_agent)
        V_inv               = np.linalg.inv(V)

        # Updating theta estimate
        theta_hat = np.inner(V_inv, XY_sum)
        
        # Instantaneous Regret        
        regret = max(true_agents_score) - true_agents_score[a_t]
        instantaneous_regret.append(regret)
        
        # Update other statistics
        total_utility.append(np.sum(agent_utility))
        min_total_utility_ratio.append(np.min(agent_utility)/np.sum(agent_utility))
        gini_coefficient.append(compute_gini_coefficient(agent_utility))
        target_weights_diff.append(np.linalg.norm(agent_weights - (agent_utility/np.sum(agent_utility))))

    other_statistics = [total_utility, gini_coefficient, min_total_utility_ratio, target_weights_diff]
    return instantaneous_regret, other_statistics
