# !/usr/bin/env python
# coding: utf-8

# Importing python packages
import sys
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import scipy.stats as ss
from scipy.optimize import minimize


# ########################## Plotting functions #########################
# Getting Average regret and Confidence interval
def cumulative_regret_error(regret):
    time_horizon = [0]
    samples = len(regret[0])
    runs = len(regret)
    batch = samples / 20
    # batch = 40

    # Time horizon
    t = 0
    while True:
        t += 1
        if time_horizon[-1] + batch > samples:
            if time_horizon[-1] != samples:
                time_horizon.append(time_horizon[-1] + samples % batch)
            break
        time_horizon.append(time_horizon[-1] + batch)

    # Mean batch regret of R runs
    avg_batched_regret = []
    for r in range(runs):
        count = 0
        accumulative_regret = 0
        batch_regret = [0]
        for s in range(samples):
            count += 1
            accumulative_regret += regret[r][s]
            if count == batch:
                batch_regret.append(accumulative_regret)
                count = 0

        if samples % batch != 0:
            batch_regret.append(accumulative_regret)
        avg_batched_regret.append(batch_regret)

    regret = np.mean(avg_batched_regret, axis=0)

    # Confidence interval
    conf_regret = []
    freedom_degree = runs - 1
    for r in range(len(avg_batched_regret[0])):
        conf_regret.append(ss.t.ppf(0.95, freedom_degree) *
                           ss.sem(np.array(avg_batched_regret)[:, r]))
    return time_horizon, regret, conf_regret


# Regret Plotting
def cumulative_regret_plotting(regret, cases, file_name, plot_location):
    colors = list("gbcmrykb")
    shape = ['--^', '--v', '--H', '--d', '--+', '--*', '--v', '--^']
    total_cases = len(cases)

    # Scatter Error bar with scatter plot
    for c in range(total_cases):
        horizon, batched_regret, error = cumulative_regret_error(np.array(regret)[:, c])
        plt.errorbar(horizon, batched_regret, error, color=colors[c])
        plt.plot(horizon, batched_regret, colors[c] + shape[c], label=cases[c])

    # Plot details
    plt.rc('font', size=12)                     # controls default text sizes
    plt.legend(loc=plot_location, numpoints=1)  # Location of the legend
    plt.xlabel("Rounds", fontsize=20)
    plt.ylabel("Regret", fontsize=20)

    # plt.title("Comparison of Algorithms")
    # plt.axis([0, samples, -20, samples])
    # plt.xscale('log')
     
    # Saving plot
    plt.savefig(file_name, bbox_inches='tight', dpi=600)
    plt.close()


# ############################# Algorithms #############################
# Computing best parameters
def next_sample(theta_est, V_in, max_value, beta_val, dim):
    # Objective function
    def oful(x):
        return -x.dot(theta_est) - (beta_val * np.sqrt(np.inner(np.inner(x, V_in), x)))

    def x_grad(x):
        second_grad_nume = np.inner((V_in + np.matrix.transpose(V_in)), x)
        second_grad_deno = 2 * np.sqrt(np.inner(np.inner(x, V_in), x))
        return - theta_est - ((beta_val * second_grad_nume) / second_grad_deno)

    # cons = {'type': 'ineq', 'fun': lambda x: max_value ** 2 - x.dot(x)}
    bounds = []
    for i in range(dim):
        bounds.append((-3, 3))
    x = np.random.uniform(low=-1 * max_value, high=max_value, size=dim)
    res = minimize(oful, x, jac=x_grad, bounds=np.asarray(bounds))  # constraints=cons)
    x = np.array(res['x']).flatten()
    return x


# OFUL algorithm
def oful(theta, algorithm_parameters):
    # Algorithm parameters
    d               = algorithm_parameters[0]       # Dimension of x
    lambda_value    = algorithm_parameters[1]       # Lambda value to ensure invertability
    L               = algorithm_parameters[2]       # Value of L, i.e., max ||x_i||
    S               = algorithm_parameters[3]       # Value of S, i.e., max ||\theta||
    v_sigma         = algorithm_parameters[4]       # Sub-gaussian noise parameter of latent feedback
    w_sigma         = algorithm_parameters[5]       # Sub-gaussian noise parameter of auxiliary feedback
    delta           = algorithm_parameters[6]       # Confidance in the regret
    T               = algorithm_parameters[7]       # Number of contexts used for experiments
    
    # Initialization of different variables
    XY_sum      = np.zeros(d)                       # Sum of XY 
    V           = lambda_value * np.identity(d)     # Initialization of data matrix
    V_inv       = np.linalg.inv(V)                  # Initialization of inverse of data matrix
    theta_hat   = np.ones(d)/d                      # Initial estimate of theta
    sigma       = np.sqrt(v_sigma**2 + w_sigma**2)  # Observation standard deviation
    conf_term   = 1.0                               # Initial confidence term
   

    # Best parameters and maximum value
    best_x = next_sample(theta, V_inv, L, 0, d)
    max_value = best_x.dot(theta)

    # Stores instantaneous regret of each round
    instantaneous_regret = []  

    # ### Main Part ###
    for t in range(T):
        # Get next sample
        x = next_sample(theta_hat, V_inv, L, conf_term, d)

        # Observe noisy output
        epsilon_t = np.random.normal(0, sigma, 1)[0]
        y = np.inner(x, theta) + epsilon_t

        # Update variables
        XY_sum      += x*y
        V           += np.outer(x, x)
        V_inv       = np.linalg.inv(V)
        log_cnfterm = (2.0 * np.log(1.0 / delta)) + (d * np.log(1.0 + ((t * L * np.sqrt(d)) / (lambda_value * d))))
        conf_term   = (S * np.sqrt(lambda_value)) + (sigma * log_cnfterm)

        # Update theta estimate
        theta_hat = np.inner(V_inv, XY_sum)

        # Instantaneous Regret
        round_regret = max_value - x.dot(theta)
        instantaneous_regret.append(round_regret)
        # print (t+1, theta_hat, x, round_regret)

    return instantaneous_regret


# OFUL-AF for linear function with known variance and unknown correlation-coefficient
def oful_af(theta, algorithm_parameters):
    # Algorithm parameters
    d               = algorithm_parameters[0]       # Dimension of x
    lambda_value    = algorithm_parameters[1]       # Lambda value to ensure invertability
    L               = algorithm_parameters[2]       # Value of L, i.e., max ||x_i||
    S               = algorithm_parameters[3]       # Value of S, i.e., max ||\theta||
    v_sigma         = algorithm_parameters[4]       # Sub-gaussian noise parameter of latent feedback
    w_sigma         = algorithm_parameters[5]       # Sub-gaussian noise parameter of auxiliary feedback
    delta           = algorithm_parameters[6]       # Confidance in the regret
    T               = algorithm_parameters[7]       # Number of contexts used for experiments
    
    # Initialization of different variables
    XY_sum      = np.zeros(d)                       # Sum of XY 
    V           = lambda_value * np.identity(d)     # Initialization of data matrix
    V_inv       = np.linalg.inv(V)                  # Initialization of inverse of data matrix
    sigma       = np.sqrt(v_sigma**2 + w_sigma**2)  # Observation standard deviation
    conf_term   = 1.0                               # Initial confidence term
    
    # Additional variables for handling Auxiliary feedback
    af_seq          = 0                             # Sum of sequare of auxiliary feedback
    mean_af         = 0                             # Sum of mean values of auxiliary feedback
    af_mean_af      = 0                             # Sum of product of auxiliary feedback and its mean
    XZ_sum          = np.zeros(d)                   # Sum of XZ 
    reward_af       = 0                             # Sum of product of reward and auxiliary feedback
    reward_mean_af  = 0                             # Sum of product of reward and mean auxiliary feedback
    x_ta_af         = np.zeros(d)                   # Sum of product of x_ta and auxiliary feedback
    x_ta_mean_af    = np.zeros(d)                   # Sum of product of x_ta and mean auxiliary feedback
    XWnoise_sum     = np.zeros(d)                   # Sum of X and W noise
    theta_zhat      = np.ones(d)/d                  # Initial estimate of theta using AF
    rho_hat         = 0                             # Estimated correlation-coefficient between Reward and its auxiliary feedback
    beta            = 1.0                           # Initial beta est.

    # Best parameters and maximum value
    best_x = next_sample(theta, V_inv, L, 0, d)
    max_value = best_x.dot(theta)

    # Sub-Thetas
    theta_w = np.zeros(d)
    theta_v = np.zeros(d)
    for i in range(d):
        if i % 2 == 0:
            theta_w[i] = theta[i]
        else:
            theta_v[i] = theta[i]

    # Stores instantaneous regret of each round
    instantaneous_regret = []  

    # ### Main Part ###
    for t in range(T):
        # Get next sample
        x_t = next_sample(theta_zhat, V_inv, L, conf_term, d)

        # Observe noisy output
        # Generate noisy outputs
        mean_vt     = np.inner(x_t, theta_v)
        v_t         = mean_vt + np.random.normal(0, v_sigma, 1)[0]
        mean_wt     = np.inner(x_t, theta_w)
        wt_noise    = np.random.normal(0, w_sigma, 1)[0]

        # Observing noisy output
        w_t         = mean_wt + wt_noise
        y_t         = v_t + w_t

        # Updating context variables [@TODO: Use Sherman-Morrison formula]
        V           += np.outer(x_t, x_t)
        V_inv       = np.linalg.inv(V)

        # Updating variables needed to estimate theta_z
        XY_sum += x_t*y_t
        XWnoise_sum += x_t*wt_noise
        if t <= 2:
            XZ_sum = XY_sum

        else:
            theta_hat       = np.inner(V_inv, XY_sum)
            cov_yw          = (reward_af - reward_mean_af - x_ta_af.dot(theta_hat) + x_ta_mean_af.dot(theta_hat))/(t-1)
            af_var          = (af_seq + (mean_af) - (2.0*af_mean_af))/(t-1)
            beta            = cov_yw/af_var
            XZ_sum          = XY_sum - (beta*XWnoise_sum)

            # Updating estimated correlation-coefficient
            rho_hat = (cov_yw)/(np.sqrt(af_var)*sigma)      # KNown sigma, but unknown covariance and w_sigma (i.e., rho)
            # rho_hat = np.sqrt(af_var)/sigma               # Given problem, this is short-hand
            # rho_hat = w_sigma/sigma                       # Known rho 
        
        # Updating theta_z estimate
        theta_zhat  = np.inner(V_inv, XZ_sum)

        # Observation variance
        observation_sd = np.sqrt(1 - min(rho_hat**2, 1))*sigma  

        # Update confidence intervals
        log_cnfterm = (2.0 * np.log(1.0 / delta)) + (d * np.log(1.0 + ((t * L * np.sqrt(d)) / (lambda_value * d))))
        conf_term   = (S * np.sqrt(lambda_value)) + (observation_sd * log_cnfterm)

        # Updating AF variables
        reward_af       += (y_t*w_t)                 
        reward_mean_af  += (y_t*mean_wt)                 
        x_ta_af         += x_t*w_t   
        x_ta_mean_af    += x_t*mean_wt         
        af_seq          += (w_t**2)
        mean_af         += (mean_wt**2)
        af_mean_af      += w_t*mean_wt 

        # Instantaneous Regret
        round_regret = max_value - np.inner(x_t, theta)
        instantaneous_regret.append(round_regret)

    return instantaneous_regret


# OFUL-BE with biased auxiliary estimated function
def oful_be(theta, algorithm_parameters, w_error):
    # Algorithm parameters
    d               = algorithm_parameters[0]       # Dimension of x
    lambda_value    = algorithm_parameters[1]       # Lambda value to ensure invertability
    L               = algorithm_parameters[2]       # Value of L, i.e., max ||x_i||
    S               = algorithm_parameters[3]       # Value of S, i.e., max ||\theta||
    v_sigma         = algorithm_parameters[4]       # Sub-gaussian noise parameter of latent feedback
    w_sigma         = algorithm_parameters[5]       # Sub-gaussian noise parameter of auxiliary feedback
    delta           = algorithm_parameters[6]       # Confidance in the regret
    T               = algorithm_parameters[7]       # Number of contexts used for experiments
    
    # Initialization of different variables
    XY_sum      = np.zeros(d)                       # Sum of XY 
    V           = lambda_value * np.identity(d)     # Initialization of data matrix
    V_inv       = np.linalg.inv(V)                  # Initialization of inverse of data matrix
    sigma       = np.sqrt(v_sigma**2 + w_sigma**2)  # Observation standard deviation
    conf_term   = 1.0                               # Initial confidence term
    
    # Additional variables for handling Auxiliary feedback
    af_seq          = 0                             # Sum of sequare of auxiliary feedback
    mean_af         = 0                             # Sum of mean values of auxiliary feedback
    af_mean_af      = 0                             # Sum of product of auxiliary feedback and its mean
    XZ_sum          = np.zeros(d)                   # Sum of XZ 
    reward_af       = 0                             # Sum of product of reward and auxiliary feedback
    reward_mean_af  = 0                             # Sum of product of reward and mean auxiliary feedback
    x_ta_af         = np.zeros(d)                   # Sum of product of x_ta and auxiliary feedback
    x_ta_mean_af    = np.zeros(d)                   # Sum of product of x_ta and mean auxiliary feedback
    XWnoise_sum     = np.zeros(d)                   # Sum of X and W noise
    theta_zhat      = np.ones(d)/d                  # Initial estimate of theta using AF
    rho_hat         = 0                             # Estimated correlation-coefficient between Reward and its auxiliary feedback
    beta            = 1.0                           # Initial beta est.

    # Best parameters and maximum value
    best_x = next_sample(theta, V_inv, L, 0, d)
    max_value = best_x.dot(theta)

    # Sub-Thetas
    theta_w = np.zeros(d)
    theta_v = np.zeros(d)
    for i in range(d):
        if i % 2 == 0:
            theta_w[i] = theta[i]
        else:
            theta_v[i] = theta[i]

    # Stores instantaneous regret of each round
    instantaneous_regret = []  

    # ### Main Part ###
    for t in range(T):
        # Get next sample
        x_t = next_sample(theta_zhat, V_inv, L, conf_term, d)

        # Observe noisy output
        # Generate noisy outputs
        mean_vt     = np.inner(x_t, theta_v)
        v_t         = mean_vt + np.random.normal(0, v_sigma, 1)[0]
        mean_wt     = np.inner(x_t, theta_w)
        wt_noise    = np.random.normal(0, w_sigma, 1)[0]

        # Observing noisy output
        w_t         = mean_wt + wt_noise
        y_t         = v_t + w_t

        # Updating context variables [@TODO: Use Sherman-Morrison formula]
        V           += np.outer(x_t, x_t)
        V_inv       = np.linalg.inv(V)

        # Updating variables needed to estimate theta_z
        XY_sum += x_t*y_t
        XWnoise_sum += x_t*wt_noise
        if t <= 2:
            XZ_sum = XY_sum

        else:
            theta_hat       = np.inner(V_inv, XY_sum)
            cov_yw          = (reward_af - reward_mean_af - x_ta_af.dot(theta_hat) + x_ta_mean_af.dot(theta_hat))/(t-1)
            af_var          = (af_seq + (mean_af) - (2.0*af_mean_af))/(t-1)
            beta            = cov_yw/af_var
            XZ_sum          = XY_sum - (beta*XWnoise_sum)

            # Updating estimated correlation-coefficient
            rho_hat = (cov_yw)/(np.sqrt(af_var)*sigma)      # KNown sigma, but unknown covariance and w_sigma (i.e., rho)
            # rho_hat = np.sqrt(af_var)/sigma               # Given problem, this is short-hand
            # rho_hat = w_sigma/sigma                       # Known rho 
        
        # Updating theta_z estimate
        theta_zhat  = np.inner(V_inv, XZ_sum)

        # Observation variance
        observation_sd = np.sqrt(1 - min(rho_hat**2, 1))*sigma  

        # Update confidence intervals
        log_cnfterm = (2.0 * np.log(1.0 / delta)) + (d * np.log(1.0 + ((t * L * np.sqrt(d)) / (lambda_value * d))))
        conf_term   = (S * np.sqrt(lambda_value)) + (observation_sd * log_cnfterm)

        # Updating AF variables
        reward_af       += (y_t*w_t)                 
        reward_mean_af  += (y_t*(mean_wt + w_error))                 
        x_ta_af         += x_t*w_t   
        x_ta_mean_af    += x_t*(mean_wt + w_error)    
        af_seq          += (w_t**2)
        mean_af         += ((mean_wt + w_error)**2)
        af_mean_af      += w_t*(mean_wt + w_error) 

        # Instantaneous Regret
        round_regret = max_value - np.inner(x_t, theta)
        instantaneous_regret.append(round_regret)

    return instantaneous_regret


# Lin-UCB-EH with auxiliary estimated function using hiostorical data
def oful_eh(theta, algorithm_parameters, est_theta):
    # Algorithm parameters
    d               = algorithm_parameters[0]       # Dimension of x
    lambda_value    = algorithm_parameters[1]       # Lambda value to ensure invertability
    L               = algorithm_parameters[2]       # Value of L, i.e., max ||x_i||
    S               = algorithm_parameters[3]       # Value of S, i.e., max ||\theta||
    v_sigma         = algorithm_parameters[4]       # Sub-gaussian noise parameter of latent feedback
    w_sigma         = algorithm_parameters[5]       # Sub-gaussian noise parameter of auxiliary feedback
    delta           = algorithm_parameters[6]       # Confidance in the regret
    T               = algorithm_parameters[7]       # Number of contexts used for experiments
    
    # Initialization of different variables
    XY_sum      = np.zeros(d)                       # Sum of XY 
    V           = lambda_value * np.identity(d)     # Initialization of data matrix
    V_inv       = np.linalg.inv(V)                  # Initialization of inverse of data matrix
    sigma       = np.sqrt(v_sigma**2 + w_sigma**2)  # Observation standard deviation
    conf_term   = 1.0                               # Initial confidence term
    
    # Additional variables for handling Auxiliary feedback
    af_seq          = 0                             # Sum of sequare of auxiliary feedback
    mean_af         = 0                             # Sum of mean values of auxiliary feedback
    af_mean_af      = 0                             # Sum of product of auxiliary feedback and its mean
    XZ_sum          = np.zeros(d)                   # Sum of XZ 
    reward_af       = 0                             # Sum of product of reward and auxiliary feedback
    reward_mean_af  = 0                             # Sum of product of reward and mean auxiliary feedback
    x_ta_af         = np.zeros(d)                   # Sum of product of x_ta and auxiliary feedback
    x_ta_mean_af    = np.zeros(d)                   # Sum of product of x_ta and mean auxiliary feedback
    XWnoise_sum     = np.zeros(d)                   # Sum of X and W noise
    theta_zhat      = np.ones(d)/d                  # Initial estimate of theta using AF
    rho_hat         = 0                             # Estimated correlation-coefficient between Reward and its auxiliary feedback
    beta            = 1.0                           # Initial beta est.

    # Best parameters and maximum value
    best_x = next_sample(theta, V_inv, L, 0, d)
    max_value = best_x.dot(theta)

    # Sub-Thetas
    theta_w = np.zeros(d)
    theta_est_w = np.zeros(d)
    theta_v = np.zeros(d)
    for i in range(d):
        if i % 2 == 0:
            theta_w[i] = theta[i]
            theta_est_w[i] = est_theta[i]
        else:
            theta_v[i] = theta[i]

    # Stores instantaneous regret of each round
    instantaneous_regret = []  

    # ### Main Part ###
    for t in range(T):
        # Get next sample
        x_t = next_sample(theta_zhat, V_inv, L, conf_term, d)

        # Observe noisy output
        # Generate noisy outputs
        mean_vt     = np.inner(x_t, theta_v)
        v_t         = mean_vt + np.random.normal(0, v_sigma, 1)[0]
        mean_wt     = np.inner(x_t, theta_w)
        wt_noise    = np.random.normal(0, w_sigma, 1)[0]

        # Observing noisy output
        w_t         = mean_wt + wt_noise
        y_t         = v_t + w_t

        # Updating context variables [@TODO: Use Sherman-Morrison formula]
        V           += np.outer(x_t, x_t)
        V_inv       = np.linalg.inv(V)

        # Updating variables needed to estimate theta_z
        XY_sum += x_t*y_t
        XWnoise_sum += x_t*wt_noise
        if t <= 2:
            XZ_sum = XY_sum

        else:
            theta_hat       = np.inner(V_inv, XY_sum)
            cov_yw          = (reward_af - reward_mean_af - x_ta_af.dot(theta_hat) + x_ta_mean_af.dot(theta_hat))/(t-1)
            af_var          = (af_seq + (mean_af) - (2.0*af_mean_af))/(t-1)
            beta            = cov_yw/af_var
            XZ_sum          = XY_sum - (beta*XWnoise_sum)

            # Updating estimated correlation-coefficient
            rho_hat = (cov_yw)/(np.sqrt(af_var)*sigma)      # KNown sigma, but unknown covariance and w_sigma (i.e., rho)
            # rho_hat = np.sqrt(af_var)/sigma               # Given problem, this is short-hand
            # rho_hat = w_sigma/sigma                       # Known rho 
        
        # Updating theta_z estimate
        theta_zhat  = np.inner(V_inv, XZ_sum)

        # Observation variance
        observation_sd = np.sqrt(1 - min(rho_hat**2, 1))*sigma  

        # Update confidence intervals
        log_cnfterm = (2.0 * np.log(1.0 / delta)) + (d * np.log(1.0 + ((t * L * np.sqrt(d)) / (lambda_value * d))))
        conf_term   = (S * np.sqrt(lambda_value)) + (observation_sd * log_cnfterm)

        # Updating AF variables
        est_wt          = np.inner(x_t, theta_est_w)
        reward_af       += (y_t*w_t)                 
        reward_mean_af  += (y_t*est_wt)                 
        x_ta_af         += x_t*w_t   
        x_ta_mean_af    += x_t*est_wt          
        af_seq          += (w_t**2)
        mean_af         += ((est_wt)**2)
        af_mean_af      += w_t*(est_wt)  

        # Instantaneous Regret
        round_regret = max_value - np.inner(x_t, theta)
        instantaneous_regret.append(round_regret)

    return instantaneous_regret


# Lin-UCB-IS with auxiliary estimated function using independent samples
def oful_is(theta, algorithm_parameters, r):
    # Algorithm parameters
    d               = algorithm_parameters[0]       # Dimension of x
    lambda_value    = algorithm_parameters[1]       # Lambda value to ensure invertability
    L               = algorithm_parameters[2]       # Value of L, i.e., max ||x_i||
    S               = algorithm_parameters[3]       # Value of S, i.e., max ||\theta||
    v_sigma         = algorithm_parameters[4]       # Sub-gaussian noise parameter of latent feedback
    w_sigma         = algorithm_parameters[5]       # Sub-gaussian noise parameter of auxiliary feedback
    delta           = algorithm_parameters[6]       # Confidance in the regret
    T               = algorithm_parameters[7]       # Number of contexts used for experiments
    
    # Initialization of different variables
    XY_sum      = np.zeros(d)                       # Sum of XY 
    V           = lambda_value * np.identity(d)     # Initialization of data matrix
    V_inv       = np.linalg.inv(V)                  # Initialization of inverse of data matrix
    sigma       = np.sqrt(v_sigma**2 + w_sigma**2)  # Observation standard deviation
    conf_term   = 1.0                               # Initial confidence term
    
    # Additional variables for handling Auxiliary feedback
    af_seq          = 0                             # Sum of sequare of auxiliary feedback
    mean_af         = 0                             # Sum of mean values of auxiliary feedback
    af_mean_af      = 0                             # Sum of product of auxiliary feedback and its mean
    XZ_sum          = np.zeros(d)                   # Sum of XZ 
    reward_af       = 0                             # Sum of product of reward and auxiliary feedback
    reward_mean_af  = 0                             # Sum of product of reward and mean auxiliary feedback
    x_ta_af         = np.zeros(d)                   # Sum of product of x_ta and auxiliary feedback
    x_ta_mean_af    = np.zeros(d)                   # Sum of product of x_ta and mean auxiliary feedback
    XWnoise_sum     = np.zeros(d)                   # Sum of X and W noise
    theta_zhat      = np.ones(d)/d                  # Initial estimate of theta using AF
    rho_hat         = 0                             # Estimated correlation-coefficient between Reward and its auxiliary feedback
    beta            = 1.0                           # Initial beta est.
    af_data         = []                            # Store new auxiliary feedback data
    all_X           = []                            # Store all X (context-action pairs)

    # Best parameters and maximum value
    best_x = next_sample(theta, V_inv, L, 0, d)
    max_value = best_x.dot(theta)

    # Sub-Thetas
    theta_w = np.zeros(d)
    theta_v = np.zeros(d)
    for i in range(d):
        if i % 2 == 0:
            theta_w[i] = theta[i]
        else:
            theta_v[i] = theta[i]

    # Stores instantaneous regret of each round
    instantaneous_regret = []  

    # ### Main Part ###
    for t in range(T):
        # Get next sample
        x_t = next_sample(theta_zhat, V_inv, L, conf_term, d)

        # Observe noisy output
        # Generate noisy outputs
        mean_vt     = np.inner(x_t, theta_v)
        v_t         = mean_vt + np.random.normal(0, v_sigma, 1)[0]
        mean_wt     = np.inner(x_t, theta_w)
        wt_noise    = np.random.normal(0, w_sigma, 1)[0]

        # Observing noisy output
        w_t         = mean_wt + wt_noise
        y_t         = v_t + w_t

        # Updating context variables [@TODO: Use Sherman-Morrison formula]
        V           += np.outer(x_t, x_t)
        V_inv       = np.linalg.inv(V)

        # Updating variables needed to estimate theta_z
        XY_sum += x_t*y_t
        XWnoise_sum += x_t*wt_noise
        if t <= 2:
            XZ_sum = XY_sum

        else:
            theta_hat       = np.inner(V_inv, XY_sum)
            cov_yw          = (reward_af - reward_mean_af - x_ta_af.dot(theta_hat) + x_ta_mean_af.dot(theta_hat))/(t-1)
            af_var          = (af_seq + (mean_af) - (2.0*af_mean_af))/(t-1)
            beta            = cov_yw/af_var
            XZ_sum          = XY_sum - (beta*XWnoise_sum)

            # Updating estimated correlation-coefficient
            rho_hat = (cov_yw)/(np.sqrt(af_var)*sigma)      # KNown sigma, but unknown covariance and w_sigma (i.e., rho)
            # rho_hat = np.sqrt(af_var)/sigma               # Given problem, this is short-hand
            # rho_hat = w_sigma/sigma                       # Known rho 
        
        # Updating theta_z estimate
        theta_zhat  = np.inner(V_inv, XZ_sum)

        # Observation variance
        observation_sd = np.sqrt(1 - min(rho_hat**2, 1))*sigma  

        # Update confidence intervals
        log_cnfterm = (2.0 * np.log(1.0 / delta)) + (d * np.log(1.0 + ((t * L * np.sqrt(d)) / (lambda_value * d))))
        conf_term   = (S * np.sqrt(lambda_value)) + (observation_sd * log_cnfterm)

         # Adding data for estimating auxiliary feedback function
        for _ in range(r):
             af_data.append(list(np.random.uniform(low=[-1, -1, -1, -1, -1], high=[1, 1, 1, 1, 1])))

        # Estimating auxiliary feedback function
        est_theta   = estimated_theta_data(theta, af_data, V, XY_sum, algorithm_parameters)
        theta_est_w = np.multiply(est_theta, [1, 0, 1, 0, 1])

        # Updating variables   
        all_X.append(x_t)
        reward_af       += (y_t*w_t)                 
        reward_mean_af  = np.inner(XY_sum, theta_est_w)                
        x_ta_af         += x_t*w_t   
        x_ta_mean_af    = sum((x*np.inner(x, theta_est_w)) for x in all_X)
        af_seq          += (w_t**2)        
        mean_af         = sum((np.inner(x, theta_est_w)**2) for x in all_X)
        af_mean_af      = np.inner(x_ta_af, theta_est_w)  

        # Instantaneous Regret
        round_regret = max_value - np.inner(x_t, theta)
        instantaneous_regret.append(round_regret)

    return instantaneous_regret


# ######################### Experiment Setting #########################
# Linear bandit problem instance
def problem_instance_oful(rounds):
    # ######## Problem Instances ########
    dimension   = 5
    lambda_val  = 0.01
    l_value     = 2.236
    s_value     = 1
    v_noise     = 0.1
    w_noise     = 0.1
    delta_val   = 0.05

    # Underlying parameter
    theta_vector = np.random.uniform(low=0, high=1, size=dimension)
    theta_vector = s_value * theta_vector/np.linalg.norm(theta_vector)

    # Algorithm parameters: [dimension, lambda, L, S, v_sigma, w_sigma, delta, T]
    alg_parameters = [dimension, lambda_val, l_value, s_value, v_noise, w_noise, delta_val, rounds]

    return theta_vector, alg_parameters


# Estimating auxiliary function using historical data
def get_estimated_af(theta, parameters, n):
    # History
    hd = parameters[0]
    hdata = np.random.uniform(low=-1, high=1, size=(n,hd))

    # Build context-action pair with AF dataset
    hV = parameters[1] * np.identity(hd)
    hXY_sum = np.zeros(hd)
    sigma_val = np.sqrt(parameters[4]**2 + parameters[5]**2) 
    for s in range(n):
        y_s = np.inner(hdata[s], theta) + np.random.normal(0, sigma_val, 1)[0]
        hV  += np.outer(hdata[s], hdata[s])
        hXY_sum += hdata[s]*y_s

    # Updating theta_z estimate
    hV_inv = np.linalg.inv(hV)
    return np.inner(hV_inv, hXY_sum)


# Estimating auxiliary function using given data
def estimated_theta_data(theta, data, pV, pXY_sum, parameters):
    # Build context-action pair with AF dataset
    d = parameters[0]
    eV = parameters[1] * np.identity(d)
    eXY_sum = np.zeros(d)
    sigma_val = np.sqrt(parameters[4]**2 + parameters[5]**2) 
    for s in range(len(data)):
        y_s = np.inner(data[s], theta) + np.random.normal(0, sigma_val, 1)[0]
        eV  += np.outer(data[s], data[s])
        eXY_sum += np.multiply(data[s],y_s)

    # Updating theta_z estimate
    eV_inv = np.linalg.inv(eV + pV)
    return np.inner(eV_inv, (eXY_sum+pXY_sum))


# ### Experiment 1: Comparing algorithms ###
def compare_algorithms(theta, algorithm_parameters, T, R, save_regret_data):
    # Different algorithms
    cases           = ['OFUL', r'OFUL-EH $(n_h=10)$', r'OFUL-BE $(\epsilon_g=0.1)$', r'OFUL-IS/MF $(r=2)$', 'OFUL-AF'] 
    htheta_hat      = get_estimated_af(theta, algorithm_parameters, 10)
    total_cases     = len(cases)
    algos_regret    = []
    for _ in tqdm(range(R)):
        run_regret = []
        iter_regret = []
        for c in range(total_cases):
            if cases[c] == 'OFUL':
                iter_regret = oful(theta, algorithm_parameters)

            elif cases[c] == r'OFUL-EH $(n_h=10)$':
                iter_regret = oful_eh(theta, algorithm_parameters, htheta_hat)

            elif cases[c] == r'OFUL-BE $(\epsilon_g=0.1)$':
                iter_regret = oful_be(theta, algorithm_parameters, 0.1)
            
            elif cases[c] == r'OFUL-IS/MF $(r=2)$':
                iter_regret = oful_is(theta, algorithm_parameters, 2)

            elif cases[c] == 'OFUL-AF':
                iter_regret = oful_af(theta, algorithm_parameters)
                    
            run_regret.append(iter_regret)
        algos_regret.append(run_regret)

    # Save the file
    if save_regret_data:
        np.save("results/oful_compAlgos_{}_{}.npy".format(T, R), algos_regret)

    # ### Plotting Regret ###
    file_to_save = "plots/oful_compAlgos_{}_{}.png".format(T, R)
    cumulative_regret_plotting(algos_regret, cases, file_to_save, 'lower right')


# ### Experiment 2: Varying correlation ###
def varying_correlation(theta, algorithm_parameters, T, R, save_regret_data):
    # Different value of standard deviation
    sigma_v         = [0.3, 0.2, 0.1528, 0.1, 0.0655]
    sigma_v_algs    = [r'OFUL-AF $(\rho^2 = $' + str(float("{:.3f}".format(0.01/((sigma_v[c]**2) + 0.01)))) + ')' for c in range(len(sigma_v))]
    sigma_v_cases   = len(sigma_v_algs)
    algos_regret    = []
    for _ in tqdm(range(R)):
        run_regret = []
        iter_regret = []

        for s in range(sigma_v_cases):
            algorithm_parameters[4] = sigma_v[s]
            iter_regret = oful_af(theta, algorithm_parameters)
            run_regret.append(iter_regret)

        algos_regret.append(run_regret)

    # Save the regret data
    if save_regret_data:
        np.save("results/oful_vs_correlation_{}_{}.npy".format(T, R), algos_regret)

    # ### Plotting Regret ###
    file_to_save = "plots/oful_vs_correlation_{}_{}.png".format(T, R)
    cumulative_regret_plotting(algos_regret, sigma_v_algs, file_to_save, 'upper left')


# ### Experiment 3: biased estimated auxiliary feedback function ###
def biased_af(theta, algorithm_parameters, T, R, save_regret_data):
    # Different biased values
    w_errors        = [1, 0.2, 0.1, 0.07, 0.05, 0.0]
    w_errors_algs   = [r'OFUL-BE $(\epsilon_g=$' + str(w_errors[c]) + ')' for c in range(len(w_errors)-1)]
    w_errors_algs.append('OFUL-AF')
    w_errors_cases  = len(w_errors_algs)
    algos_regret    = []
    for _ in tqdm(range(R)):
        run_regret = []
        iter_regret = []

        # OFUL
        iter_regret = oful(theta, algorithm_parameters)
        run_regret.append(iter_regret)
        
        # OFUL-BE and OFUL-AF (last case)
        for e in range(w_errors_cases):
            iter_regret = oful_be(theta, algorithm_parameters, w_errors[e])
            run_regret.append(iter_regret)

        algos_regret.append(run_regret)

    # Save the regret data
    if save_regret_data:
        np.save("results/oful_biasAF_{}_{}.npy".format(T, R), algos_regret)

    # ### Plotting Regret ###
    file_to_save = "plots/oful_biasAF_{}_{}.png".format(T, R)
    cumulative_regret_plotting(algos_regret, ['OFUL'] + w_errors_algs, file_to_save, 'upper left') 
    

# ### Experiment 4: Varying history and random selection for each round ###
def estimated_history(theta, algorithm_parameters, T, R, save_regret_data):
    # Different number of history data (nhd)
    nhd_list        = [5, 7, 10, 15, 20]
    est_theta_list  = [get_estimated_af(theta, algorithm_parameters, nhd_list[n]) for n in range(len(nhd_list))]
    nhd_algs        = [r'OFUL-EH $(n_h=$' + str(nhd_list[n]) + ')' for n in range(len(nhd_list))]

    # Adding known AF function as special case
    est_theta_list.append(theta)
    nhd_algs.append('OFUL-AF')
    nhd_cases       = len(nhd_algs)
    algos_regret    = []
    for _ in tqdm(range(R)):
        run_regret = []
        iter_regret = []

        # OFUL 
        iter_regret = oful(theta, algorithm_parameters)
        run_regret.append(iter_regret)
        
        # OFUL-EH and OFUL-AF (last case)
        for n in range(nhd_cases-1):
            iter_regret = oful_eh(theta, algorithm_parameters, est_theta_list[n])
            run_regret.append(iter_regret)

        algos_regret.append(run_regret)

    # Save the regret data
    if save_regret_data:
        np.save("results/oful_eh_{}_{}.npy".format(T, R), algos_regret)

    # ### Plotting Regret ###
    file_to_save = "plots/oful_eh_{}_{}.png".format(T, R)
    cumulative_regret_plotting(algos_regret, ['OFUL'] + nhd_algs, file_to_save, 'upper left') 


# ### Experiment 5: IS or MF ###
def compare_is(theta, algorithm_parameters, T, R, save_regret_data):
    # Different number of history data (nhd)
    r_list          = [2, 3, 4, 5, 6]
    # extra_samples   = [problem_instance_lin_ucb(T*r_list[r])[0] for r in range(len(r_list))]
    r_algs          = [r'OFUL-IS $(r=$' + str(r_list[r]) + ')' for r in range(len(r_list))]
    r_cases         = len(r_algs)
    algos_regret    = []
    for _ in tqdm(range(R)):
        run_regret = []
        iter_regret = []

        # OFUL
        iter_regret = oful(theta, algorithm_parameters)
        run_regret.append(iter_regret)
        
        # OFUL-IS
        for r in range(r_cases):
            iter_regret = oful_is(theta, algorithm_parameters, r_list[r])
            run_regret.append(iter_regret)
        
        # OFUL-AF
        iter_regret = oful_af(theta, algorithm_parameters)
        run_regret.append(iter_regret)

        algos_regret.append(run_regret)

    # Save the regret data
    if save_regret_data:
        np.save("results/oful_is_{}_{}.npy".format(T, R), algos_regret)

    # ### Plotting Regret ###
    file_to_save = "plots/oful_is_{}_{}.png".format(T, R)
    cumulative_regret_plotting(algos_regret, ['OFUL'] + r_algs + ['OFUL-AF'], file_to_save, 'upper left') 



# ### Experiment 6: Varying history and random selection for each round ###
def estimated_random_history(theta, algorithm_parameters, T, R, save_regret_data):
    # Different number of history data (nhd)
    nhd_list        = [5, 7, 10, 15, 20]
    nhd_algs        = [r'OFUL-EH $(n_h=$' + str(nhd_list[n]) + ')' for n in range(len(nhd_list))]

    # Adding known AF function as special case
    nhd_algs.append('OFUL-AF')
    nhd_cases       = len(nhd_algs)
    algos_regret    = []
    for _ in tqdm(range(R)):
        run_regret = []
        iter_regret = []

        # OFUL 
        iter_regret = oful(theta, algorithm_parameters)
        run_regret.append(iter_regret)
        
        # OFUL-EH
        for n in range(nhd_cases-1):
            est_theta = get_estimated_af(theta, algorithm_parameters, nhd_list[n])
            iter_regret = oful_eh(theta, algorithm_parameters, est_theta)
            run_regret.append(iter_regret)

        # OFUL-AF (Version of OFUL-EH with theta as estimated AF function)
        iter_regret = oful_eh(theta, algorithm_parameters, theta)
        run_regret.append(iter_regret)

        algos_regret.append(run_regret)

    # Save the regret data
    if save_regret_data:
        np.save("results/oful_eh_{}_{}.npy".format(T, R), algos_regret)

    # ### Plotting Regret ###
    file_to_save = "plots/oful_eh_{}_{}.png".format(T, R)
    cumulative_regret_plotting(algos_regret, ['OFUL'] + nhd_algs, file_to_save, 'upper left') 


# ########################### Bandit problem ###########################
# Samples and rounds
rounds  = 5000
runs    = 50
save_data = False
np.random.seed(0)

# Synthetic dataset
theta_vector, algo_parameters = problem_instance_oful(rounds)

# ### Running different experiments ###
if len(sys.argv) == 1 or sys.argv[1] == "compare":
    compare_algorithms(theta_vector, algo_parameters, rounds, runs, save_data)

elif sys.argv[1] == "correlation":
    varying_correlation(theta_vector, algo_parameters, rounds, runs, save_data)

elif sys.argv[1] == "bias":
    biased_af(theta_vector, algo_parameters, rounds, runs, save_data) 

elif sys.argv[1] == "history":
    estimated_history(theta_vector, algo_parameters, rounds, runs, save_data)

elif sys.argv[1] == "is":
    compare_is(theta_vector, algo_parameters, rounds, runs, save_data)

elif sys.argv[1] == "random_history":
    estimated_random_history(theta_vector, algo_parameters, rounds, runs, save_data)

else:
    print ("Invalid argument passed")