# !/usr/bin/env python
# coding: utf-8

# Importing python packages
import sys
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import scipy.stats as ss
from sklearn.preprocessing import PolynomialFeatures


# ########################## Plotting functions #########################
# Getting Average regret and Confidence interval
def cumulative_regret_error(regret):
    time_horizon = [0]
    samples = len(regret[0])
    runs = len(regret)
    batch = samples / 20
    # batch = 40

    # Time horizon
    t = 0
    while True:
        t += 1
        if time_horizon[-1] + batch > samples:
            if time_horizon[-1] != samples:
                time_horizon.append(time_horizon[-1] + samples % batch)
            break
        time_horizon.append(time_horizon[-1] + batch)

    # Mean batch regret of R runs
    avg_batched_regret = []
    for r in range(runs):
        count = 0
        accumulative_regret = 0
        batch_regret = [0]
        for s in range(samples):
            count += 1
            accumulative_regret += regret[r][s]
            if count == batch:
                batch_regret.append(accumulative_regret)
                count = 0

        if samples % batch != 0:
            batch_regret.append(accumulative_regret)
        avg_batched_regret.append(batch_regret)

    regret = np.mean(avg_batched_regret, axis=0)

    # Confidence interval
    conf_regret = []
    freedom_degree = runs - 1
    for r in range(len(avg_batched_regret[0])):
        conf_regret.append(ss.t.ppf(0.95, freedom_degree) *
                           ss.sem(np.array(avg_batched_regret)[:, r]))
    return time_horizon, regret, conf_regret


# Regret Plotting
def cumulative_regret_plotting(regret, cases, file_name, plot_location):
    colors = list("gbcmrykb")
    shape = ['--^', '--v', '--H', '--d', '--+', '--*', '--v', '--^']
    total_cases = len(cases)

    # Scatter Error bar with scatter plot
    for c in range(total_cases):
        horizon, batched_regret, error = cumulative_regret_error(np.array(regret)[:, c])
        plt.errorbar(horizon, batched_regret, error, color=colors[c])
        plt.plot(horizon, batched_regret, colors[c] + shape[c], label=cases[c])

    # Plot details
    plt.rc('font', size=12)                     # controls default text sizes
    plt.legend(loc=plot_location, numpoints=1)  # Location of the legend
    plt.xlabel("Rounds", fontsize=20)
    plt.ylabel("Regret", fontsize=20)

    # plt.title("Comparison of Algorithms")
    # plt.axis([0, samples, -20, samples])
    # plt.xscale('log')
     
    # Saving plot
    plt.savefig(file_name, bbox_inches='tight', dpi=600)
    plt.close()


# Lin-UCB algorithm
def lin_ucb(context, theta, algorithm_parameters):
    # Algorithm parameters
    d               = algorithm_parameters[0]       # Dimension of x
    lambda_value    = algorithm_parameters[1]       # Lambda value to ensure invertability
    L               = algorithm_parameters[2]       # Value of L, i.e., max ||x_i||
    S               = algorithm_parameters[3]       # Value of S, i.e., max ||\theta||
    v_sigma         = algorithm_parameters[4]       # Sub-gaussian noise parameter of latent feedback
    w_sigma         = algorithm_parameters[5]       # Sub-gaussian noise parameter of auxiliary feedback
    delta           = algorithm_parameters[6]       # Confidance in the regret
    T               = algorithm_parameters[7]       # Number of contexts used for experiments

    # Initialization of different variables
    A           = 6                                 # Number of actions
    d_ca        = 2*d                               # Dimension of context-action feature vector
    XY_sum      = np.zeros(d_ca)                    # Sum of XY 
    V           = lambda_value * np.identity(d_ca)  # Initialization of data matrix
    V_inv       = np.linalg.inv(V)                  # Initialization of inverse of data matrix
    theta_hat   = np.ones(d_ca)/d_ca                # Initial estimate of theta
    sigma       = np.sqrt(v_sigma**2 + w_sigma**2)

    # Action-Matrix (@TODO: Make a general form to create context-action pairs)
    # action_matrix = np.diag(np.ones(d))
    # action_matrix = np.repeat(action_matrix, repeats=1, axis=0)
    # action_matrix[action_matrix == 0] = -1
    
    
    # Stores instantaneous regret of each round
    instantaneous_regret = []  

    # ### Main part ###
    for t in range(T):
        x_t = context[t]

        # Get context-action feature vectors
        x_ta = np.array([
                [x_t[0], x_t[1], x_t[2], x_t[3], x_t[0], x_t[1], -x_t[2], -x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], x_t[0], -x_t[1], x_t[2], -x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], -x_t[0], x_t[1], x_t[2], -x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], x_t[0], -x_t[1], -x_t[2], x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], -x_t[0], x_t[1], -x_t[2], x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], -x_t[0], -x_t[1], x_t[2], x_t[3]]
            ])

        # ## Selecting action for context x_t
        # Confidence term
        log_cnfterm = (2.0*np.log(1.0/delta)) + (d_ca*np.log(1.0 + ((t*L*np.sqrt(d_ca))/(lambda_value*d_ca))) )
        alpha        = (S*np.sqrt(lambda_value)) + (sigma*log_cnfterm)
        
        # Calculating the UCBs for each action\
        action_ucb = np.ones(A)
        for a in range(A):
            conf_term = alpha* np.sqrt(np.inner(np.inner(x_ta[a], V_inv), x_ta[a])) 
            action_ucb[a] =  x_ta[a].dot(theta_hat) + conf_term

        # Selecting action with maximum UCB index value
        a_t = np.argmax(action_ucb)

        # ## Updating variables
        # Observe noisy output
        epsilon_t   = np.random.normal(0, sigma, 1)[0]
        y_t         = np.inner(x_ta[a_t], theta) + epsilon_t

        # Update variables
        XY_sum      += x_ta[a_t]*y_t
        V           += np.outer(x_ta[a_t], x_ta[a_t])
        V_inv       = np.linalg.inv(V)

        # Updating theta estimate
        theta_hat = np.inner(V_inv, XY_sum)

        # Instantaneous Regret
        round_regret = np.max(x_ta.dot(theta)) - x_ta[a_t].dot(theta)   
        instantaneous_regret.append(round_regret)
    
    return instantaneous_regret


# Lin-UCB-AF for linear function with known variance and unknown correlation-coefficient
def lin_ucb_af(context, theta, algorithm_parameters):
    # Algorithm parameters
    d               = algorithm_parameters[0]       # Dimension of x
    lambda_value    = algorithm_parameters[1]       # Lambda value to ensure invertability
    L               = algorithm_parameters[2]       # Value of L, i.e., max ||x_i||
    S               = algorithm_parameters[3]       # Value of S, i.e., max ||\theta||
    v_sigma         = algorithm_parameters[4]       # Sub-gaussian noise parameter of latent feedback
    w_sigma         = algorithm_parameters[5]       # Sub-gaussian noise parameter of auxiliary feedback
    delta           = algorithm_parameters[6]       # Confidance in the regret
    T               = algorithm_parameters[7]       # Number of contexts used for experiments
    
    # Initialization of different variables
    A           = 6                                 # Number of actions
    d_ca        = 2*d                               # Dimension of context-action feature vector
    XY_sum      = np.zeros(d_ca)                    # Sum of XY 
    V           = lambda_value * np.identity(d_ca)  # Initialization of data matrix
    V_inv       = np.linalg.inv(V)                  # Initialization of inverse of data matrix
    sigma       = np.sqrt(v_sigma**2 + w_sigma**2)  # Noise variance
    
    # Additional variables for handling Auxiliary feedback
    af_seq          = 0                             # Sum of sequare of auxiliary feedback
    mean_af         = 0                             # Sum of mean values of auxiliary feedback
    af_mean_af      = 0                             # Sum of product of auxiliary feedback and its mean
    XZ_sum          = np.zeros(d_ca)                # Sum of XZ 
    reward_af       = 0                             # Sum of product of reward and auxiliary feedback
    reward_mean_af  = 0                             # Sum of product of reward and mean auxiliary feedback
    x_ta_af         = np.zeros(d_ca)                # Sum of product of x_ta and auxiliary feedback
    x_ta_mean_af    = np.zeros(d_ca)                # Sum of product of x_ta and mean auxiliary feedback
    XWnoise_sum     = np.zeros(d_ca)                # Sum of X and W noise
    theta_zhat      = np.ones(d_ca)/d_ca            # Initial estimate of theta using AF
    rho_hat         = 0                             # Estimated correlation-coefficient between Reward and its auxiliary feedback
    beta            = 1.0                           # Initial beta est.
    
    # Sub-Thetas
    theta_w = np.zeros(d_ca)
    theta_v = np.zeros(d_ca)
    for i in range(d_ca):
        if i % 2 == 0:
            theta_w[i] = theta[i]
        else:
            theta_v[i] = theta[i]

    # Stores instantaneous regret of each round
    instantaneous_regret = []  

    # ### Main part ###
    for t in range(T):
        x_t = context[t]
        
        # Get context-action feature vectors
        x_ta = np.array([
                [x_t[0], x_t[1], x_t[2], x_t[3], x_t[0], x_t[1], -x_t[2], -x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], x_t[0], -x_t[1], x_t[2], -x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], -x_t[0], x_t[1], x_t[2], -x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], x_t[0], -x_t[1], -x_t[2], x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], -x_t[0], x_t[1], -x_t[2], x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], -x_t[0], -x_t[1], x_t[2], x_t[3]]
            ])

        # ## Selecting action for context x_t
        # Confidence term without variance
        log_cnfterm = (2.0*np.log(1.0/delta)) + (d_ca*np.log(1.0 + ((t*L*np.sqrt(d_ca))/(lambda_value*d_ca))) )
        
        # Observation variance
        observation_sd = np.sqrt(1 - min(rho_hat**2, 1))*sigma  

        # Calculating the UCBs for each action
        alpha = (S*np.sqrt(lambda_value)) + (observation_sd*log_cnfterm)
        action_ucb = np.zeros(A)
        for a in range(A):
            conf_term = alpha*np.sqrt(np.inner(np.inner(x_ta[a], V_inv), x_ta[a])) 
            action_ucb[a] =  x_ta[a].dot(theta_zhat) + conf_term

        # Selecting action with maximum UCB index value
        a_t = np.argmax(action_ucb)

        # ## Updating variables
        # Generate noisy outputs
        mean_vt     = np.inner(x_ta[a_t], theta_v)
        v_t         = mean_vt + np.random.normal(0, v_sigma, 1)[0]
        mean_wt     = np.inner(x_ta[a_t], theta_w)
        wt_noise    = np.random.normal(0, w_sigma, 1)[0]
        
        # Observing noisy output
        w_t         = mean_wt + wt_noise
        y_t         = v_t + w_t

        # Updating context variables [@TODO: Use Sherman-Morrison formula]
        V           += np.outer(x_ta[a_t], x_ta[a_t])
        V_inv       = np.linalg.inv(V)

        # Updating variables needed to estimate theta_z
        XY_sum += x_ta[a_t]*y_t
        XWnoise_sum += x_ta[a_t]*wt_noise
        if t <= 2:
            XZ_sum = XY_sum

        else:
            theta_hat       = np.inner(V_inv, XY_sum)
            cov_yw          = (reward_af - reward_mean_af - x_ta_af.dot(theta_hat) + x_ta_mean_af.dot(theta_hat))/(t-1)
            af_var          = (af_seq + (mean_af) - (2.0*af_mean_af))/(t-1)
            beta            = cov_yw/af_var
            XZ_sum          = XY_sum - (beta*XWnoise_sum)

            # Updating estimated correlation-coefficient
            rho_hat = (cov_yw)/(np.sqrt(af_var)*sigma)
        
        # Updating theta_z estimate
        theta_zhat  = np.inner(V_inv, XZ_sum)

        # Updating AF variables
        reward_af       += (y_t*w_t)                 
        reward_mean_af  += (y_t*mean_wt)                 
        x_ta_af         += x_ta[a_t]*w_t   
        x_ta_mean_af    += x_ta[a_t]*mean_wt 
        af_seq          += (w_t**2)
        mean_af         += (mean_wt**2)
        af_mean_af      += w_t*mean_wt   

        # Instantaneous Regret
        round_regret = np.max(x_ta.dot(theta)) - x_ta[a_t].dot(theta)   
        instantaneous_regret.append(round_regret)
    
    return instantaneous_regret


# Lin-UCB-BE with biased auxiliary estimated function
def lin_ucb_be(context, theta, algorithm_parameters, w_error):
    # Algorithm parameters
    d               = algorithm_parameters[0]       # Dimension of x
    lambda_value    = algorithm_parameters[1]       # Lambda value to ensure invertability
    L               = algorithm_parameters[2]       # Value of L, i.e., max ||x_i||
    S               = algorithm_parameters[3]       # Value of S, i.e., max ||\theta||
    v_sigma         = algorithm_parameters[4]       # Sub-gaussian noise parameter of latent feedback
    w_sigma         = algorithm_parameters[5]       # Sub-gaussian noise parameter of auxiliary feedback
    delta           = algorithm_parameters[6]       # Confidance in the regret
    T               = algorithm_parameters[7]       # Number of contexts used for experiments
    
    # Initialization of different variables
    A           = 6                                 # Number of actions
    d_ca        = 2*d                               # Dimension of context-action feature vector
    XY_sum      = np.zeros(d_ca)                    # Sum of XY 
    V           = lambda_value * np.identity(d_ca)  # Initialization of data matrix
    V_inv       = np.linalg.inv(V)                  # Initialization of inverse of data matrix
    sigma       = np.sqrt(v_sigma**2 + w_sigma**2)  # Noise variance
    
    # Additional variables for handling Auxiliary feedback
    af_seq          = 0                             # Sum of sequare of auxiliary feedback
    mean_af         = 0                             # Sum of mean values of auxiliary feedback
    af_mean_af      = 0                             # Sum of product of auxiliary feedback and its mean
    XZ_sum          = np.zeros(d_ca)                # Sum of XZ (product of hybrod reward and context-action vector) 
    reward_af       = 0                             # Sum of product of reward and auxiliary feedback
    reward_mean_af  = 0                             # Sum of product of reward and mean auxiliary feedback
    x_ta_af         = np.zeros(d_ca)                # Sum of product of x_ta and auxiliary feedback
    x_ta_mean_af    = np.zeros(d_ca)                # Sum of product of x_ta and mean auxiliary feedback
    XWnoise_sum     = np.zeros(d_ca)                # Sum of X and W noise
    theta_zhat      = np.ones(d_ca)/d_ca            # Initial estimate of theta using AF
    rho_hat         = 0                             # Estimated correlation-coefficient between Reward and its auxiliary feedback
    beta            = 1.0                           # Initial beta estimate
    
    # Sub-Thetas
    theta_w = np.zeros(d_ca)
    theta_v = np.zeros(d_ca)
    for i in range(d_ca):
        if i % 2 == 0:
            theta_w[i] = theta[i]
        else:
            theta_v[i] = theta[i]

    # Stores instantaneous regret of each round
    instantaneous_regret = []  

    # ### Main part ###
    for t in range(T):
        x_t = context[t]
        
        # Get context-action feature vectors
        x_ta = np.array([
                [x_t[0], x_t[1], x_t[2], x_t[3], x_t[0], x_t[1], -x_t[2], -x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], x_t[0], -x_t[1], x_t[2], -x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], -x_t[0], x_t[1], x_t[2], -x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], x_t[0], -x_t[1], -x_t[2], x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], -x_t[0], x_t[1], -x_t[2], x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], -x_t[0], -x_t[1], x_t[2], x_t[3]]
            ])

        # ## Selecting action for context x_t
        # Confidence term without variance
        log_cnfterm = (2.0*np.log(1.0/delta)) + (d_ca*np.log(1.0 + ((t*L*np.sqrt(d_ca))/(lambda_value*d_ca))) )
        
        # Observation variance
        observation_sd = np.sqrt(1 - min(rho_hat**2, 1))*sigma  

        # Calculating the UCBs for each action
        alpha = (S*np.sqrt(lambda_value)) + (observation_sd*log_cnfterm)
        action_ucb = np.zeros(A)
        for a in range(A):
            conf_term = alpha*np.sqrt(np.inner(np.inner(x_ta[a], V_inv), x_ta[a])) 
            action_ucb[a] =  x_ta[a].dot(theta_zhat) + conf_term

        # Selecting action with maximum UCB index value
        a_t = np.argmax(action_ucb)

        # ## Updating variables
        # Generate noisy outputs
        mean_vt     = np.inner(x_ta[a_t], theta_v)
        v_t         = mean_vt + np.random.normal(0, v_sigma, 1)[0]
        mean_wt     = np.inner(x_ta[a_t], theta_w)
        wt_noise    = np.random.normal(0, w_sigma, 1)[0]
        
        # Observing noisy output
        w_t         = mean_wt + wt_noise
        y_t         = v_t + w_t

        # Updating context variables [@TODO: Use Sherman-Morrison formula]
        V           += np.outer(x_ta[a_t], x_ta[a_t])
        V_inv       = np.linalg.inv(V)

        # Updating variables needed to estimate theta_z
        XY_sum += x_ta[a_t]*y_t
        XWnoise_sum += x_ta[a_t]*wt_noise
        if t <= 2:
            XZ_sum = XY_sum

        else:
            theta_hat       = np.inner(V_inv, XY_sum)
            cov_yw          = (reward_af - reward_mean_af - x_ta_af.dot(theta_hat) + x_ta_mean_af.dot(theta_hat))/(t-1)
            af_var          = (af_seq + (mean_af) - (2.0*af_mean_af))/(t-1)
            beta            = cov_yw/af_var
            XZ_sum          = XY_sum - (beta*XWnoise_sum)

            # Updating estimated correlation-coefficient
            rho_hat = (cov_yw)/(np.sqrt(af_var)*sigma)
        
        # Updating theta_z estimate
        theta_zhat  = np.inner(V_inv, XZ_sum)

        # Updating variables     
        reward_af       += (y_t*w_t)                 
        reward_mean_af  += (y_t*(mean_wt + w_error))                 
        x_ta_af         += x_ta[a_t]*w_t   
        x_ta_mean_af    += x_ta[a_t]*(mean_wt + w_error)    
        af_seq          += (w_t**2)
        mean_af         += ((mean_wt + w_error)**2)
        af_mean_af      += w_t*(mean_wt + w_error)   

        # Instantaneous Regret
        round_regret = np.max(x_ta.dot(theta)) - x_ta[a_t].dot(theta)   
        instantaneous_regret.append(round_regret)
    
    return instantaneous_regret


# Lin-UCB-EH with auxiliary estimated function using hiostorical data
def lin_ucb_eh(context, theta, algorithm_parameters, est_theta):
    # Algorithm parameters
    d               = algorithm_parameters[0]       # Dimension of x
    lambda_value    = algorithm_parameters[1]       # Lambda value to ensure invertability
    L               = algorithm_parameters[2]       # Value of L, i.e., max ||x_i||
    S               = algorithm_parameters[3]       # Value of S, i.e., max ||\theta||
    v_sigma         = algorithm_parameters[4]       # Sub-gaussian noise parameter of latent feedback
    w_sigma         = algorithm_parameters[5]       # Sub-gaussian noise parameter of auxiliary feedback
    delta           = algorithm_parameters[6]       # Confidance in the regret
    T               = algorithm_parameters[7]       # Number of contexts used for experiments
    
    # Initialization of different variables
    A           = 6                                 # Number of actions
    d_ca        = 2*d                               # Dimension of context-action feature vector
    XY_sum      = np.zeros(d_ca)                    # Sum of XY 
    V           = lambda_value * np.identity(d_ca)  # Initialization of data matrix
    V_inv       = np.linalg.inv(V)                  # Initialization of inverse of data matrix
    sigma       = np.sqrt(v_sigma**2 + w_sigma**2)  # Noise variance
    
    # Additional variables for handling Auxiliary feedback
    af_seq          = 0                             # Sum of sequare of auxiliary feedback
    mean_af         = 0                             # Sum of mean values of auxiliary feedback
    af_mean_af      = 0                             # Sum of product of auxiliary feedback and its mean
    XZ_sum          = np.zeros(d_ca)                # Sum of XZ (product of hybrod reward and context-action vector) 
    reward_af       = 0                             # Sum of product of reward and auxiliary feedback
    reward_mean_af  = 0                             # Sum of product of reward and mean auxiliary feedback
    x_ta_af         = np.zeros(d_ca)                # Sum of product of x_ta and auxiliary feedback
    x_ta_mean_af    = np.zeros(d_ca)                # Sum of product of x_ta and mean auxiliary feedback
    XWnoise_sum     = np.zeros(d_ca)                # Sum of X and W noise
    theta_zhat      = np.ones(d_ca)/d_ca            # Initial estimate of theta using AF
    rho_hat         = 0                             # Estimated correlation-coefficient between Reward and its auxiliary feedback
    beta            = 1.0                           # Initial beta estimate
    
    # Sub-Thetas
    theta_w     = np.zeros(d_ca)
    theta_est_w = np.zeros(d_ca)
    theta_v     = np.zeros(d_ca)
    for i in range(d_ca):
        if i % 2 == 0:
            theta_w[i]      = theta[i]
            theta_est_w[i] = est_theta[i]
        else:
            theta_v[i] = theta[i]

    # Stores instantaneous regret of each round
    instantaneous_regret = []  

    # ### Main part ###
    for t in range(T):
        x_t = context[t]
        
        # Get context-action feature vectors
        x_ta = np.array([
                [x_t[0], x_t[1], x_t[2], x_t[3], x_t[0], x_t[1], -x_t[2], -x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], x_t[0], -x_t[1], x_t[2], -x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], -x_t[0], x_t[1], x_t[2], -x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], x_t[0], -x_t[1], -x_t[2], x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], -x_t[0], x_t[1], -x_t[2], x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], -x_t[0], -x_t[1], x_t[2], x_t[3]]
            ])

        # ## Selecting action for context x_t
        # Confidence term without variance
        log_cnfterm = (2.0*np.log(1.0/delta)) + (d_ca*np.log(1.0 + ((t*L*np.sqrt(d_ca))/(lambda_value*d_ca))) )
        
        # Observation variance
        observation_sd = np.sqrt(1 - min(rho_hat**2, 1))*sigma  

        # Calculating the UCBs for each action
        alpha = (S*np.sqrt(lambda_value)) + (observation_sd*log_cnfterm)
        action_ucb = np.zeros(A)
        for a in range(A):
            conf_term = alpha*np.sqrt(np.inner(np.inner(x_ta[a], V_inv), x_ta[a])) 
            action_ucb[a] =  x_ta[a].dot(theta_zhat) + conf_term

        # Selecting action with maximum UCB index value
        a_t = np.argmax(action_ucb)

        # ## Updating variables
        # Generate noisy outputs
        mean_vt     = np.inner(x_ta[a_t], theta_v)
        v_t         = mean_vt + np.random.normal(0, v_sigma, 1)[0]
        mean_wt     = np.inner(x_ta[a_t], theta_w)
        wt_noise    = np.random.normal(0, w_sigma, 1)[0]
        
        # Observing noisy output
        w_t         = mean_wt + wt_noise
        y_t         = v_t + w_t

        # Updating context variables [@TODO: Use Sherman-Morrison formula]
        V           += np.outer(x_ta[a_t], x_ta[a_t])
        V_inv       = np.linalg.inv(V)

        # Updating variables needed to estimate theta_z
        XY_sum += x_ta[a_t]*y_t
        XWnoise_sum += x_ta[a_t]*wt_noise
        if t <= 2:
            XZ_sum = XY_sum

        else:
            theta_hat       = np.inner(V_inv, XY_sum)
            cov_yw          = (reward_af - reward_mean_af - x_ta_af.dot(theta_hat) + x_ta_mean_af.dot(theta_hat))/(t-1)
            af_var          = (af_seq + (mean_af) - (2.0*af_mean_af))/(t-1)
            beta            = cov_yw/af_var
            XZ_sum          = XY_sum - (beta*XWnoise_sum)

            # Updating estimated correlation-coefficient
            rho_hat = (cov_yw)/(np.sqrt(af_var)*sigma)
        
        # Updating theta_z estimate
        theta_zhat  = np.inner(V_inv, XZ_sum)

        # Updating variables     
        est_wt          = np.inner(x_ta[a_t], theta_est_w)
        reward_af       += (y_t*w_t)                 
        reward_mean_af  += (y_t*est_wt)                 
        x_ta_af         += x_ta[a_t]*w_t   
        x_ta_mean_af    += x_ta[a_t]*est_wt 
        af_seq          += (w_t**2)
        mean_af         += ((est_wt)**2)
        af_mean_af      += w_t*(est_wt)   

        # Instantaneous Regret
        round_regret = np.max(x_ta.dot(theta)) - x_ta[a_t].dot(theta)   
        instantaneous_regret.append(round_regret)
    
    return instantaneous_regret


# Lin-UCB-IS with auxiliary estimated function using independent samples
def lin_ucb_is(context, theta, algorithm_parameters, r):
    # Algorithm parameters
    d               = algorithm_parameters[0]       # Dimension of x
    lambda_value    = algorithm_parameters[1]       # Lambda value to ensure invertability
    L               = algorithm_parameters[2]       # Value of L, i.e., max ||x_i||
    S               = algorithm_parameters[3]       # Value of S, i.e., max ||\theta||
    v_sigma         = algorithm_parameters[4]       # Sub-gaussian noise parameter of latent feedback
    w_sigma         = algorithm_parameters[5]       # Sub-gaussian noise parameter of auxiliary feedback
    delta           = algorithm_parameters[6]       # Confidance in the regret
    T               = algorithm_parameters[7]       # Number of contexts used for experiments
    
    # Initialization of different variables
    A           = 6                                 # Number of actions
    d_ca        = 2*d                               # Dimension of context-action feature vector
    XY_sum      = np.zeros(d_ca)                    # Sum of XY 
    V           = lambda_value * np.identity(d_ca)  # Initialization of data matrix
    V_inv       = np.linalg.inv(V)                  # Initialization of inverse of data matrix
    sigma       = np.sqrt(v_sigma**2 + w_sigma**2)  # Noise variance
    
    # Additional variables for handling Auxiliary feedback
    af_seq          = 0                             # Sum of sequare of auxiliary feedback
    mean_af         = 0                             # Sum of mean values of auxiliary feedback
    af_mean_af      = 0                             # Sum of product of auxiliary feedback and its mean
    XZ_sum          = np.zeros(d_ca)                # Sum of XZ (product of hybrod reward and context-action vector)
    reward_af       = 0                             # Sum of product of reward and auxiliary feedback
    reward_mean_af  = 0                             # Sum of product of reward and mean auxiliary feedback
    x_ta_af         = np.zeros(d_ca)                # Sum of product of x_ta and auxiliary feedback
    x_ta_mean_af    = np.zeros(d_ca)                # Sum of product of x_ta and mean auxiliary feedback
    XWnoise_sum     = np.zeros(d_ca)                # Sum of X and W noise
    theta_zhat      = np.ones(d_ca)/d_ca            # Initial estimate of theta using AF
    rho_hat         = 0                             # Estimated correlation-coefficient between Reward and its auxiliary feedback
    beta            = 1.0                           # Initial beta estimate
    af_data         = []                            # Store new auxiliary feedback data
    all_X           = []                            # Store all X (context-action pairs)

    # Sub-Thetas
    theta_w     = np.zeros(d_ca)
    theta_v     = np.zeros(d_ca)
    for i in range(d_ca):
        if i % 2 == 0:
            theta_w[i]      = theta[i]
        else:
            theta_v[i] = theta[i]

    # Stores instantaneous regret of each round
    instantaneous_regret = []  

    # ### Main part ###
    for t in range(T):
        x_t = context[t]
        
        # Get context-action feature vectors
        x_ta = np.array([
                [x_t[0], x_t[1], x_t[2], x_t[3], x_t[0], x_t[1], -x_t[2], -x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], x_t[0], -x_t[1], x_t[2], -x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], -x_t[0], x_t[1], x_t[2], -x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], x_t[0], -x_t[1], -x_t[2], x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], -x_t[0], x_t[1], -x_t[2], x_t[3]],
                [x_t[0], x_t[1], x_t[2], x_t[3], -x_t[0], -x_t[1], x_t[2], x_t[3]]
            ])

        # ## Selecting action for context x_t
        # Confidence term without variance
        log_cnfterm = (2.0*np.log(1.0/delta)) + (d_ca*np.log(1.0 + ((t*L*np.sqrt(d_ca))/(lambda_value*d_ca))) )
        
        # Observation variance
        observation_sd = np.sqrt(1 - min(rho_hat**2, 1))*sigma  

        # Calculating the UCBs for each action
        alpha = (S*np.sqrt(lambda_value)) + (observation_sd*log_cnfterm)
        action_ucb = np.zeros(A)
        for a in range(A):
            conf_term = alpha*np.sqrt(np.inner(np.inner(x_ta[a], V_inv), x_ta[a])) 
            action_ucb[a] =  x_ta[a].dot(theta_zhat) + conf_term

        # Selecting action with maximum UCB index value
        a_t = np.argmax(action_ucb)

        # ## Updating variables
        # Generate noisy outputs
        mean_vt     = np.inner(x_ta[a_t], theta_v)
        v_t         = mean_vt + np.random.normal(0, v_sigma, 1)[0]
        mean_wt     = np.inner(x_ta[a_t], theta_w)
        wt_noise    = np.random.normal(0, w_sigma, 1)[0]
        
        # Observing noisy output
        w_t         = mean_wt + wt_noise
        y_t         = v_t + w_t

        # Updating context variables [@TODO: Use Sherman-Morrison formula]
        V           += np.outer(x_ta[a_t], x_ta[a_t])
        V_inv       = np.linalg.inv(V)

        # Updating variables needed to estimate theta_z
        XY_sum += x_ta[a_t]*y_t
        XWnoise_sum += x_ta[a_t]*wt_noise
        if t <= 2:
            XZ_sum = XY_sum

        else:
            theta_hat       = np.inner(V_inv, XY_sum)
            cov_yw          = (reward_af - reward_mean_af - x_ta_af.dot(theta_hat) + x_ta_mean_af.dot(theta_hat))/(t-1)
            af_var          = (af_seq + (mean_af) - (2.0*af_mean_af))/(t-1)
            beta            = cov_yw/af_var
            XZ_sum          = XY_sum - (beta*XWnoise_sum)

            # Updating estimated correlation-coefficient
            rho_hat = (cov_yw)/(np.sqrt(af_var)*sigma)
        
        # Updating theta_z estimate
        theta_zhat  = np.inner(V_inv, XZ_sum)

        # Adding data for estimating auxiliary feedback function
        for _ in range(r):
             af_data.append(list(np.random.uniform(low=[-1, -1], high=[1,1])))

        # Dataset pre-processing
        poly            = PolynomialFeatures(2)                 # Polynomial kernel
        hd_af_data      = poly.fit_transform(af_data)           # Lifting the data
        hd_af_data_no1  = np.delete(hd_af_data, 0, axis=1)      # Removing first column as it is 1 only
        hd_af_data      = np.delete(hd_af_data_no1,-1,axis=1)   # Removing last column as it is 1 only

        # Estimating auxiliary feedback function
        est_theta   = estimated_theta_data(theta, hd_af_data, V, XY_sum, algorithm_parameters)
        theta_est_w = np.multiply(est_theta, [1, 0, 1, 0, 1, 0, 1, 0])

        # Updating variables   
        all_X.append(x_ta[a_t])
        reward_af       += (y_t*w_t)                 
        reward_mean_af  = np.inner(XY_sum, theta_est_w)                
        x_ta_af         += x_ta[a_t]*w_t   
        x_ta_mean_af    = sum((x*np.inner(x, theta_est_w)) for x in all_X)
        af_seq          += (w_t**2)        
        mean_af         = sum((np.inner(x, theta_est_w)**2) for x in all_X)
        af_mean_af      = np.inner(x_ta_af, theta_est_w)           

        # Instantaneous Regret
        round_regret = np.max(x_ta.dot(theta)) - x_ta[a_t].dot(theta)   
        instantaneous_regret.append(round_regret)
    return instantaneous_regret


# ######################### Experiment Setting #########################
# Non-linear contextual bandit problem instance
def problem_instance_non_lin_ucb(rounds, degree):
    # ######## Problem Instances ########
    dimension   = 2
    lambda_val  = 0.01
    l_value     = 2
    s_value     = 1
    v_noise     = 0.1
    w_noise     = 0.1
    delta_val   = 0.05

    # Generate contexts
    contexts = np.random.uniform(low=[-1, -1], high=[1,1], size=(rounds, dimension))

    # Dataset pre-processing
    poly        = PolynomialFeatures(degree)        # Polynomial kernel
    hd_data     = poly.fit_transform(contexts)      # Lifting the data
    hd_data_no1 = np.delete(hd_data, 0, axis=1)     # Removing first column as it is 1 only
    hd_contexts = np.delete(hd_data_no1,-1,axis=1)  # Removing last column as it is 1 only
    hdcontx_dim = len(hd_contexts[0])               # Dimensional of lifted space

    # Underlying parameter
    theta_half = np.random.uniform(low=0, high=1, size=hdcontx_dim)
    theta_vector = np.concatenate((theta_half, -0.5*theta_half))
    theta_vector = s_value * theta_vector/np.linalg.norm(theta_vector)

    # Algorithm parameters: [hd_context_dimension, lambda, L, S, v_sigma, w_sigma, delta, T]
    alg_parameters = [hdcontx_dim, lambda_val, l_value, s_value, v_noise, w_noise, delta_val, rounds]

    return hd_contexts, theta_vector, alg_parameters


# Estimating auxiliary function using historical data
def get_estimated_af(theta, parameters, n):
    # History
    hcontexts, _, _ = problem_instance_non_lin_ucb(n, 2)

    # Build context-action pair with AF dataset
    hd_ca = 2*parameters[0]
    hV = parameters[1] * np.identity(hd_ca)
    hXY_sum = np.zeros(hd_ca)
    sigma_val = np.sqrt(parameters[4]**2 + parameters[5]**2) 
    for s in range(n):
        hx_s = hcontexts[s]

        # Get context-action feature vectors
        hx_sa = np.array([
                [hx_s[0], hx_s[1], hx_s[2], hx_s[3], hx_s[0], hx_s[1], -hx_s[2], -hx_s[3]],
                [hx_s[0], hx_s[1], hx_s[2], hx_s[3], hx_s[0], -hx_s[1], hx_s[2], -hx_s[3]],
                [hx_s[0], hx_s[1], hx_s[2], hx_s[3], -hx_s[0], hx_s[1], hx_s[2], -hx_s[3]],
                [hx_s[0], hx_s[1], hx_s[2], hx_s[3], hx_s[0], -hx_s[1], -hx_s[2], hx_s[3]],
                [hx_s[0], hx_s[1], hx_s[2], hx_s[3], -hx_s[0], hx_s[1], -hx_s[2], hx_s[3]],
                [hx_s[0], hx_s[1], hx_s[2], hx_s[3], -hx_s[0], -hx_s[1], hx_s[2], hx_s[3]]
            ])
        a_s = np.random.randint(0, 6)
        y_s = np.inner(hx_sa[a_s], theta) + np.random.normal(0, sigma_val, 1)[0]
        hV  += np.outer(hx_sa[a_s], hx_sa[a_s])
        hXY_sum += hx_sa[a_s]*y_s

    # Updating theta_z estimate
    hV_inv = np.linalg.inv(hV)
    return np.inner(hV_inv, hXY_sum)


# Estimating auxiliary function using given data
def estimated_theta_data(theta, data, pV, pXY_sum, parameters):
    # Build context-action pair with AF dataset
    d_ca = 2*parameters[0]
    eV = parameters[1] * np.identity(d_ca)
    eXY_sum = np.zeros(d_ca)
    sigma_val = np.sqrt(parameters[4]**2 + parameters[5]**2) 
    for s in range(len(data)):
        x_s = data[s]

        # Get context-action feature vectors
        x_sa = np.array([
                [x_s[0], x_s[1], x_s[2], x_s[3], x_s[0], x_s[1], -x_s[2], -x_s[3]],
                [x_s[0], x_s[1], x_s[2], x_s[3], x_s[0], -x_s[1], x_s[2], -x_s[3]],
                [x_s[0], x_s[1], x_s[2], x_s[3], -x_s[0], x_s[1], x_s[2], -x_s[3]],
                [x_s[0], x_s[1], x_s[2], x_s[3], x_s[0], -x_s[1], -x_s[2], x_s[3]],
                [x_s[0], x_s[1], x_s[2], x_s[3], -x_s[0], x_s[1], -x_s[2], x_s[3]],
                [x_s[0], x_s[1], x_s[2], x_s[3], -x_s[0], -x_s[1], x_s[2], x_s[3]]
            ])
        a_s = np.random.randint(0, 6)
        y_s = np.inner(x_sa[a_s], theta) + np.random.normal(0, sigma_val, 1)[0]
        eV  += np.outer(x_sa[a_s], x_sa[a_s])
        eXY_sum += x_sa[a_s]*y_s

    # Updating theta_z estimate
    eV_inv = np.linalg.inv(eV + pV)
    return np.inner(eV_inv, (eXY_sum+pXY_sum))


# ### Experiment 1: Comparing algorithms ###
def compare_algorithms(contexts, theta, algorithm_parameters, T, R, save_regret_data):
    # Different algorithms
    cases           = ['NLin-UCB', r'NLin-UCB-EH $(n_h=10)$', r'NLin-UCB-BE $(\epsilon_g=0.1)$', r'NLin-UCB-IS/MF $(r=2)$', 'NLin-UCB-AF'] 
    htheta_hat      = get_estimated_af(theta, algorithm_parameters, 10)
    total_cases     = len(cases)
    algos_regret    = []
    for _ in tqdm(range(R)):
        run_regret = []
        iter_regret = []
        np.random.shuffle(contexts)
        for c in range(total_cases):
            if cases[c] == 'NLin-UCB':
                iter_regret = lin_ucb(contexts, theta, algorithm_parameters)

            elif cases[c] == r'NLin-UCB-EH $(n_h=10)$':
                iter_regret = lin_ucb_eh(contexts, theta, algorithm_parameters, htheta_hat)

            elif cases[c] == r'NLin-UCB-BE $(\epsilon_g=0.1)$':
                iter_regret = lin_ucb_be(contexts, theta, algorithm_parameters, 0.1)
            
            elif cases[c] == r'NLin-UCB-IS/MF $(r=2)$':
                iter_regret = lin_ucb_is(contexts, theta, algorithm_parameters, 2)

            elif cases[c] == 'NLin-UCB-AF':
                iter_regret = lin_ucb_af(contexts, theta, algorithm_parameters)
                    
            run_regret.append(iter_regret)
        algos_regret.append(run_regret)

    # Save the file
    if save_regret_data:
        np.save("results/non_lin_ucb_compAlgos_{}_{}.npy".format(T, R), algos_regret)

    # ### Plotting Regret ###
    file_to_save = "plots/non_lin_ucb_compAlgos_{}_{}.png".format(T, R)
    cumulative_regret_plotting(algos_regret, cases, file_to_save, 'lower right')


# ### Experiment 2: Varying correlation ###
def varying_correlation(contexts, theta, algorithm_parameters, T, R, save_regret_data):
    # Different value of standard deviation
    sigma_v         = [0.3, 0.2, 0.1528, 0.1, 0.0655]
    sigma_v_algs    = [r'NLin-UCB-AF $(\rho^2 = $' + str(float("{:.3f}".format(0.01/((sigma_v[c]**2) + 0.01)))) + ')' for c in range(len(sigma_v))]
    sigma_v_cases   = len(sigma_v_algs)
    algos_regret    = []
    for _ in tqdm(range(R)):
        run_regret = []
        iter_regret = []
        np.random.shuffle(contexts)

        for s in range(sigma_v_cases):
            algorithm_parameters[4] = sigma_v[s]
            iter_regret = lin_ucb_af(contexts, theta, algorithm_parameters)
            run_regret.append(iter_regret)

        algos_regret.append(run_regret)

    # Save the regret data
    if save_regret_data:
        np.save("results/non_lin_ucb_vs_correlation_{}_{}.npy".format(T, R), algos_regret)

    # ### Plotting Regret ###
    file_to_save = "plots/non_lin_ucb_vs_correlation_{}_{}.png".format(T, R)
    cumulative_regret_plotting(algos_regret, sigma_v_algs, file_to_save, 'upper left')


# ### Experiment 3: biased estimated auxiliary feedback function ###
def biased_af(contexts, theta, algorithm_parameters, T, R, save_regret_data):
    # Different biased values
    w_errors        = [1, 0.2, 0.1, 0.07, 0.05, 0.0]
    w_errors_algs   = [r'NLin-UCB-BE $(\epsilon_g=$' + str(w_errors[c]) + ')' for c in range(len(w_errors)-1)]
    w_errors_algs.append('NLin-UCB-AF')
    w_errors_cases  = len(w_errors_algs)
    algos_regret    = []
    for _ in tqdm(range(R)):
        run_regret = []
        iter_regret = []
        np.random.shuffle(contexts)

        # NLin-UCB
        iter_regret = lin_ucb(contexts, theta, algorithm_parameters)
        run_regret.append(iter_regret)
        
        # NLin-UCB-BE and NLin-UCB-AF (last case)
        for e in range(w_errors_cases):
            iter_regret = lin_ucb_be(contexts, theta, algorithm_parameters, w_errors[e])
            run_regret.append(iter_regret)

        algos_regret.append(run_regret)

    # Save the regret data
    if save_regret_data:
        np.save("results/non_lin_ucb_biasAF_{}_{}.npy".format(T, R), algos_regret)

    # ### Plotting Regret ###
    file_to_save = "plots/non_lin_ucb_biasAF_{}_{}.png".format(T, R)
    cumulative_regret_plotting(algos_regret, ['NLin-UCB'] + w_errors_algs, file_to_save, 'upper left') 
    

# ### Experiment 4: Varying history ###
def estimated_history(contexts, theta, algorithm_parameters, T, R, save_regret_data):
    # Different number of history data (nhd)
    nhd_list        = [5, 7, 10, 15, 20]
    est_theta_list  = [get_estimated_af(theta, algorithm_parameters, nhd_list[n]) for n in range(len(nhd_list))]
    nhd_algs        = [r'NLin-UCB-EH $(n_h=$' + str(nhd_list[n]) + ')' for n in range(len(nhd_list))]

    # Adding known AF function as special case
    est_theta_list.append(theta)
    nhd_algs.append('NLin-UCB-AF')
    nhd_cases       = len(nhd_algs)
    algos_regret    = []
    for _ in tqdm(range(R)):
        run_regret = []
        iter_regret = []
        np.random.shuffle(contexts)

        # NLin-UCB
        iter_regret = lin_ucb(contexts, theta, algorithm_parameters)
        run_regret.append(iter_regret)
        
        # NLin-UCB-EH and NLin-UCB-AF (last case)
        for n in range(nhd_cases):
            iter_regret = lin_ucb_eh(contexts, theta, algorithm_parameters, est_theta_list[n])
            run_regret.append(iter_regret)

        algos_regret.append(run_regret)

    # Save the regret data
    if save_regret_data:
        np.save("results/non_lin_ucb_eh_{}_{}.npy".format(T, R), algos_regret)

    # ### Plotting Regret ###
    file_to_save = "plots/non_lin_ucb_eh_{}_{}.png".format(T, R)
    cumulative_regret_plotting(algos_regret, ['NLin-UCB'] + nhd_algs, file_to_save, 'upper left') 


# ### Experiment 5: IS or MF ###
def compare_is(contexts, theta, algorithm_parameters, T, R, save_regret_data):
    # Different number of history data (nhd)
    r_list          = [2, 3, 4, 5, 6]
    # extra_samples   = [problem_instance_non_lin_ucb(T*r_list[r])[0] for r in range(len(r_list))]
    r_algs          = [r'NLin-UCB-IS $(r=$' + str(r_list[r]) + ')' for r in range(len(r_list))]
    r_cases         = len(r_algs)
    algos_regret    = []
    for _ in tqdm(range(R)):
        run_regret = []
        iter_regret = []
        np.random.shuffle(contexts)

        # NLin-UCB
        iter_regret = lin_ucb(contexts, theta, algorithm_parameters)
        run_regret.append(iter_regret)
        
        # NLin-UCB-IS
        for r in range(r_cases):
            iter_regret = lin_ucb_is(contexts, theta, algorithm_parameters, r_list[r])
            run_regret.append(iter_regret)
        
        # NLin-UCB-AF
        iter_regret = lin_ucb_af(contexts, theta, algorithm_parameters)
        run_regret.append(iter_regret)

        algos_regret.append(run_regret)

    # Save the regret data
    if save_regret_data:
        np.save("results/non_lin_ucb_is_{}_{}.npy".format(T, R), algos_regret)

    # ### Plotting Regret ###
    file_to_save = "plots/non_lin_ucb_is_{}_{}.png".format(T, R)
    cumulative_regret_plotting(algos_regret, ['NLin-UCB'] + r_algs + ['NLin-UCB-AF'], file_to_save, 'upper left') 


# ### Experiment 6: Varying history and random selection for each round ###
def estimated_random_history(contexts, theta, algorithm_parameters, T, R, save_regret_data):
    # Different number of history data (nhd)
    nhd_list        = [5, 7, 10, 15, 20]
    nhd_algs        = [r'NLin-UCB-EH $(n_h=$' + str(nhd_list[n]) + ')' for n in range(len(nhd_list))]

    # Adding known AF function as special case
    nhd_algs.append('NLin-UCB-AF')
    nhd_cases       = len(nhd_algs)
    algos_regret    = []
    for _ in tqdm(range(R)):
        run_regret = []
        iter_regret = []
        np.random.shuffle(contexts)

        # NLin-UCB
        iter_regret = lin_ucb(contexts, theta, algorithm_parameters)
        run_regret.append(iter_regret)
        
        # NLin-UCB-EH
        for n in range(nhd_cases-1):
            est_theta = get_estimated_af(theta, algorithm_parameters, nhd_list[n])
            iter_regret = lin_ucb_eh(contexts, theta, algorithm_parameters, est_theta)
            run_regret.append(iter_regret)

        # NLin-UCB-AF (Version of NLin-UCB-EH with theta as estimated AF function)
        iter_regret = lin_ucb_eh(contexts, theta, algorithm_parameters, theta)
        run_regret.append(iter_regret)

        algos_regret.append(run_regret)

    # Save the regret data
    if save_regret_data:
        np.save("results/non_lin_ucb_erh_{}_{}.npy".format(T, R), algos_regret)

    # ### Plotting Regret ###
    file_to_save = "plots/non_lin_ucb_erh_{}_{}.png".format(T, R)
    cumulative_regret_plotting(algos_regret, ['NLin-UCB'] + nhd_algs, file_to_save, 'upper left') 


# ########################### Bandit problem ###########################
# Samples and rounds
rounds  = 5000
runs    = 50
save_data = False
degree  = 2
np.random.seed(0)

# Synthetic dataset
context_data, theta_vector, algo_parameters = problem_instance_non_lin_ucb(rounds, degree)

# ### Running different experiments ###
if len(sys.argv) == 1 or sys.argv[1] == "compare":
    compare_algorithms(context_data, theta_vector, algo_parameters, rounds, runs, save_data)

elif sys.argv[1] == "correlation":
    varying_correlation(context_data, theta_vector, algo_parameters, rounds, runs, save_data)

elif sys.argv[1] == "bias":
    biased_af(context_data, theta_vector, algo_parameters, rounds, runs, save_data)  

elif sys.argv[1] == "history":
    estimated_history(context_data, theta_vector, algo_parameters, rounds, runs, save_data)

elif sys.argv[1] == "is":
    compare_is(context_data, theta_vector, algo_parameters, rounds, runs, save_data)

elif sys.argv[1] == "random_history":
    estimated_random_history(context_data, theta_vector, algo_parameters, rounds, runs, save_data)
    
else:
    print ("Invalid argument passed")