"""
Plot End of Optimisim (Compare IDS, LinUCB. LinTS and LinIMED)

"""

import numpy as np
import math
import random
from numpy.linalg import inv
import matplotlib.pyplot as plt
from matplotlib import pyplot
from scipy.io import loadmat
import time
plt.style.use('seaborn-whitegrid')
palette = pyplot.get_cmap('Set1')
font1 = {'family' : 'Times New Roman',
'weight' : 'normal',
'size'   : 30,
}
start=time.time()
T=1000000
fig=plt.figure(figsize=(10,10))
iters=list(range(T))
def IDS(n,eeps):
    T=n
    k=2
    d=2
    epsilon=eeps
    x=[np.array([0,1]).T]
    x_axis=0
    for i in range(k):
        if i<k-1:        
            x_axis=1-epsilon
            y_axis=2*epsilon
            x.append(np.array([x_axis,y_axis]).T) 
            x[i].shape=(d,1)
        else:
            x_axis=1
            y_axis=math.sqrt(1-x_axis**2)
            x.append(np.array([x_axis,y_axis]).T) 
            x[i].shape=(d,1)
    x[k].shape=(d,1)
    theta_true=np.array([1,0]).T
    reward=[]
    for i in range(k+1):
        reward.append(theta_true.T.dot(x[i])[0])
    rew=np.array(reward)
    ###rew=np.linspace(0.0, 2.0, num=11)
    gap=max(rew)*np.ones([k+1,])-rew
    
    nam=1
    V=nam*np.eye(d)
    hmu=np.zeros(k+1,)
    hgap=np.zeros(k+1,)
    weight=np.exp(-hgap**2)
    numberList=np.linspace(0, k, num=k+1)
    pp=[]
    gamma=np.zeros([d,1])
    N=np.zeros(k+1)
    y=[]
    GAP=[]
    s=1
    MS=float('-inf')
    X=[]
    for t in range(T):
        print(t)
        delta=1/s**2
        if s==1:
            V=nam*np.eye(d)
        else:
            V=nam*np.eye(d)+sum([X[i].dot(X[i].T) for i in range(s-1)])
        in_V=inv(V)
        if s==1:
            theta=in_V.dot(gamma)
        else:
            theta=in_V.dot(sum([X[i]*y[i] for i in range(s-1)]))
        greedy_index=np.argmax([theta.T.dot(i) for i in x])
        #beta=(0.1*(2*np.log(1/delta)+np.log(np.linalg.det(V)))**0.5+1)**2
        #beta=(0.1*(d*3*np.log(1+t))**0.5+2**0.5)**2
        beta=(0.1**2*(2*np.log(t+1)+d*np.log(1+np.log(t+1))))
        hmu=[theta.T.dot(i) for i in x]
        hUCB=[theta.T.dot(i)+(beta**0.5)*(i.T.dot(in_V.dot(i)))**0.5 for i in x]
        hgap=max(hUCB)-hmu
        vz=[[] for i in range(k+1)]
        for i in range(k+1):
            if i!=greedy_index:
                vz[i]=theta-((theta.T.dot(x[greedy_index]-x[i]))[0][0]/((x[greedy_index]-x[i]).T.dot(in_V.dot(x[greedy_index]-x[i])))[0][0])*in_V.dot(x[greedy_index]-x[i])
        mvalue=np.zeros(k+1)
        for i in range(k+1):
            if i==greedy_index:
                mvalue[i]=float('inf')
            else:
                mvalue[i]=(1/2)*((vz[i]-theta).T.dot(V.dot(vz[i]-theta)))
        ms=min(mvalue)
        if ms>=MS:
            MS=ms
        #Ns=MS**(-0.5)*np.log(k+1)
        Ns=1/(0.1**2*(2*np.log(s)+d*np.log(1+np.log(s))))**0.5
        qz=np.zeros(k+1)
        for i in range(k+1):
            if i!=greedy_index:
                qz[i]=np.exp(-Ns*((vz[i]-theta).T.dot(V.dot(vz[i]-theta))))
        Is=np.zeros(k+1)
        for i in range(k+1):
            for j in range(k+1):
                if j!=greedy_index:
                    Is[i]=Is[i]+1/2*qz[j]*(abs((vz[j]-theta).T.dot(x[i]))+beta**0.5*(x[i].T.dot(in_V.dot(x[i])))**0.5)**2
        
                    
        if ms> (1/2)*(0.1*(2*np.log(1+t*np.log(t))+np.log(np.linalg.det(V)))**0.5+1)**2:
            p=greedy_index
            observed_value=theta_true.T.dot(x[greedy_index])+np.random.normal(0,0.1,1)[0]
            y.append(observed_value)
            X.append(x[p])
            GAP.append(gap[p])
                    
        else:
            sol_p=np.zeros(k+1)
            prob_greedy=np.zeros(k+1)
            for i in range(k+1):
                if i==greedy_index:
                    sol_p[i]=float('inf')
                else:
                    if hgap[greedy_index]<=hgap[i]:
                        
                        D1=hgap[greedy_index]
                        D2=hgap[i]
                        I1=Is[greedy_index]
                        I2=Is[i]
                        index_1=greedy_index
                        index_2=i
                    else:
                        D2=hgap[greedy_index]
                        D1=hgap[i]
                        I2=Is[greedy_index]
                        I1=Is[i]
                        index_1=i
                        index_2=greedy_index
                    if I1>=I2:
                        p_opt=0
                        sol_p[i]=D1**2/I1
                        if index_2==greedy_index:
                            prob_greedy[i]=0
                        else:
                            prob_greedy[i]=1
                    else:
                        p_opt=max(min((D1/(D2-D1)-2*I1/(I2-I1)),1),0)
                        sol_p[i]=((1-p_opt)*D1+p_opt*D2)**2/((1-p_opt)*I1+p_opt*I2)
                        if index_2==greedy_index:
                            prob_greedy[i]=p_opt
                        else:
                            prob_greedy[i]=1-p_opt
            p=np.argmin(sol_p)
            probb=prob_greedy[p]
            if np.random.rand()<=probb:            
                observed_value=theta_true.T.dot(x[greedy_index])+np.random.normal(0,0.1,1)[0]
                y.append(observed_value)
                X.append(x[greedy_index])
                GAP.append(gap[greedy_index])
                s=s+1
                print(t,probb,p,greedy_index)
            else:
                observed_value=theta_true.T.dot(x[p])+np.random.normal(0,0.1,1)[0]
                y.append(observed_value)
                X.append(x[p])
                GAP.append(gap[p])
                s=s+1
        
    return GAP        
        

def LinTS(n,w):
    k=2
    d=2

    
    nam=1
    B=nam*np.eye(d)
    hmu=np.zeros(d,)
    hgap=np.zeros(k+1,)
    f=np.zeros([d,1])
    numberList=np.linspace(0, k, num=k+1)
    T=n
    pp=[]
    GAP=[]
    gamma=np.zeros([d,1])
    for t in range(T):
       x=[np.array([0]*(d-1)+[1]).T]
       x_axis=t/(t+1)
       for i in range(k):
           if i<k-1:        
               x_axis=0.98
               #x_axis=0.8
               y_axis=0.04
               x.append(np.array([x_axis]*(d-1)+[y_axis]).T) 
               x[i].shape=(d,1)
           else:
               x.append(np.array([1/math.sqrt(d-1)]*(d-1)+[0]).T) 
               x[i].shape=(d,1)
       x[k].shape=(d,1)
       theta_true=np.array([1/math.sqrt(d-1)]*(d-1)+[0]).T
       reward=[]
       for i in range(k+1):
            reward.append(theta_true.T.dot(x[i])[0])
       rew=np.array(reward)
        ###rew=np.linspace(0.0, 2.0, num=11)
       gap=max(rew)*np.ones([k+1,])-rew
       beta=(0.1*(d*3*np.log(1+t))**0.5+2**0.5)**2    
       beta=beta*w
       Hmu=np.random.multivariate_normal(hmu, beta*(inv(B)), 1).T
       lis=[np.dot(i.T,Hmu)[0][0] for i in x]
       ind=np.argmax(lis)
       pp.append(ind)
       GAP.append(gap[ind])
       y=rew[ind]+np.random.normal(0,0.1,1)[0]
       B=B+x[ind].dot(x[ind].T)
       f=f+x[ind]*y
       hmu=inv(B).dot(f)
       hmu=np.squeeze(hmu)
    return GAP

def LinIMED(n,w):
    k=2
    d=2
    
    
    nam=1
    V=nam*np.eye(d)
    hmu=np.zeros(k+1,)
    hgap=np.zeros(k+1,)
    weight=np.exp(-hgap**2)
    p_sum=np.zeros(k+1,)
    numberList=np.linspace(0, k, num=k+1)
    ###T=n
    pp=[]
    GAP=[]
    gamma=np.zeros([d,1])
    N=np.zeros(k+1)
    for t in range(T):
        x=[np.array([0]*(d-1)+[1]).T]
        x_axis=t/(t+1)
        for i in range(k):
            if i<k-1:        
                x_axis=0.98
                #x_axis=0.8
                y_axis=0.04
                x.append(np.array([x_axis]*(d-1)+[y_axis]).T)  
                x[i].shape=(d,1) 
            else:
                x.append(np.array([1/math.sqrt(d-1)]*(d-1)+[0]).T) 
                x[i].shape=(d,1)
        x[k].shape=(d,1)
        theta_true=np.array([1/math.sqrt(d-1)]*(d-1)+[0]).T
        reward=[]
        for i in range(k+1):
            reward.append(theta_true.T.dot(x[i])[0])
        rew=np.array(reward)
        ###rew=np.linspace(0.0, 2.0, num=11)
        gap=max(rew)*np.ones([k+1,])-rew
        p=np.argmin(weight)
        GAP.append(gap[int(p)])
        N[int(p)]+=1
        pp.append(int(p))
        V+=x[int(p)].dot(x[int(p)].T)
        ###beta=0.54*np.log(1+t**2)+1 
        beta=(0.1*(d*3*np.log(1+t))**0.5+2**0.5)**2
        beta=beta*w
        y=rew[int(p)]+np.random.normal(0,0.1,1)[0]
        gamma+=x[int(p)]*y
        htheta=inv(V).dot(gamma)
        ###newhtheta=np.random.multivariate_normal(np.squeeze(htheta), beta*(inv(V)), 1).T 
    
        
        hmean=[float(htheta.T.dot(x[i])[0]) for i in range(k+1)]    
        ###hmean=[float(newhtheta.T.dot(x[i])[0]) for i in range(k+1)]      
        hmean=max(hmean)*np.ones(k+1)-hmean
        
        ###original weight=[np.array(float(np.exp(-hmean[i]**2/(beta*(x[i].T.dot(inv(V).dot(x[i])))))[0])) for i in range (k+1)]
        weight=[np.array(float(np.exp(hmean[i]**2/(beta*(x[i].T.dot(inv(V).dot(x[i])))))[0]))/(((x[i].T.dot(inv(V).dot(x[i]))))[0][0]*beta) for i in range(k+1)]
        ###weight=[np.array(float(np.exp(-N[i]*N[i]*hmean[i]**2/1))) for i in range(11)]
        ###weight=[float(i) for i in weight]
        ###weight=np.array(weight)
    return GAP
    
def LinIMED2(n,w):
    k=2
    d=2
    
    nam=1
    V=nam*np.eye(d)
    hmu=np.zeros(k+1,)
    hgap=np.zeros(k+1,)
    weight=np.exp(-hgap**2)
    p_sum=np.zeros(k+1,)
    numberList=np.linspace(0, k, num=k+1)
    ###T=n
    pp=[]
    GAP=[]
    gamma=np.zeros([d,1])
    N=np.zeros(k+1)
    for t in range(T):
        x=[np.array([0]*(d-1)+[1]).T]
        x_axis=t/(t+1)
        for i in range(k):
            if i<k-1:        
                x_axis=0.98
                #x_axis=0.8
                y_axis=0.04
                x.append(np.array([x_axis]*(d-1)+[y_axis]).T) 
                x[i].shape=(d,1)
            else:
                x.append(np.array([1/math.sqrt(d-1)]*(d-1)+[0]).T) 
                x[i].shape=(d,1)
        x[k].shape=(d,1)
        theta_true=np.array([1/math.sqrt(d-1)]*(d-1)+[0]).T
        reward=[]
        for i in range(k+1):
            reward.append(theta_true.T.dot(x[i])[0])
        rew=np.array(reward)
        ###rew=np.linspace(0.0, 2.0, num=11)
        gap=max(rew)*np.ones([k+1,])-rew
        p=np.argmin(weight)
        GAP.append(gap[int(p)])
        N[int(p)]+=1
        pp.append(int(p))
        V+=x[int(p)].dot(x[int(p)].T)
        ###beta=0.54*np.log(1+t**2)+1 
        beta=(0.1*(d*3*np.log(1+t))**0.5+2**0.5)**2
        beta=beta*w
        y=rew[int(p)]+np.random.normal(0,0.1,1)[0]
        gamma+=x[int(p)]*y
        htheta=inv(V).dot(gamma)
        ###newhtheta=np.random.multivariate_normal(np.squeeze(htheta), beta*(inv(V)), 1).T 
    
        
        hmean=[float(htheta.T.dot(x[i])[0]) for i in range(k+1)]    
        ###hmean=[float(newhtheta.T.dot(x[i])[0]) for i in range(k+1)]      
        hmean=max(hmean)*np.ones(k+1)-hmean
        inde=np.argmin(hmean)
        
        ###original weight=[np.array(float(np.exp(-hmean[i]**2/(beta*(x[i].T.dot(inv(V).dot(x[i])))))[0])) for i in range (k+1)]
        weight=[np.array(float(np.exp(hmean[i]**2/(beta*(x[i].T.dot(inv(V).dot(x[i])))))[0]))/(((x[i].T.dot(inv(V).dot(x[i]))))[0][0]*beta) for i in range(k+1)]
        weight[inde]=np.min([weight[inde], T])
        ###weight=[np.array(float(np.exp(-N[i]*N[i]*hmean[i]**2/1))) for i in range(11)]
        ###weight=[float(i) for i in weight]
        ###weight=np.array(weight)
    return GAP


def LinIMED3(n,w,c):
    k=2
    d=2
   
    
    nam=1
    V=nam*np.eye(d)
    hmu=np.zeros(k+1,)
    hgap=np.zeros(k+1,)
    weight=np.exp(-hgap**2)
    p_sum=np.zeros(k+1,)
    numberList=np.linspace(0, k, num=k+1)
    ###T=n
    pp=[]
    GAP=[]
    gamma=np.zeros([d,1])
    N=np.zeros(k+1)
    for t in range(T):
        x=[np.array([0]*(d-1)+[1]).T]
        x_axis=t/(t+1)
        for i in range(k):
            if i<k-1:        
                x_axis=0.98
                #x_axis=0.8
                y_axis=0.04
                x.append(np.array([x_axis]*(d-1)+[y_axis]).T) 
                x[i].shape=(d,1)
            else:
                x.append(np.array([1/math.sqrt(d-1)]*(d-1)+[0]).T) 
                x[i].shape=(d,1)
        x[k].shape=(d,1)
        theta_true=np.array([1/math.sqrt(d-1)]*(d-1)+[0]).T
        reward=[]
        for i in range(k+1):
            reward.append(theta_true.T.dot(x[i])[0])
        rew=np.array(reward)
        ###rew=np.linspace(0.0, 2.0, num=11)
        gap=max(rew)*np.ones([k+1,])-rew
        p=np.argmin(weight)
        GAP.append(gap[int(p)])
        N[int(p)]+=1
        pp.append(int(p))
        V+=x[int(p)].dot(x[int(p)].T)
        ###beta=0.54*np.log(1+t**2)+1 
        beta=(0.1*(d*3*np.log(1+t))**0.5+2**0.5)**2
        beta=beta*w
        y=rew[int(p)]+np.random.normal(0,0.1,1)[0]
        gamma+=x[int(p)]*y
        htheta=inv(V).dot(gamma)
        ###newhtheta=np.random.multivariate_normal(np.squeeze(htheta), beta*(inv(V)), 1).T 
    
        
        hmean=[float(htheta.T.dot(x[i])[0]+(((x[i].T.dot(inv(V).dot(x[i]))))[0][0]*beta)**0.5) for i in range(k+1)]    
        ###hmean=[float(newhtheta.T.dot(x[i])[0]) for i in range(k+1)]      
        hmean=max(hmean)*np.ones(k+1)-hmean
        inde=np.argmin(hmean)
        ruaindex=np.argmax(hmean)
        
        ###original weight=[np.array(float(np.exp(-hmean[i]**2/(beta*(x[i].T.dot(inv(V).dot(x[i])))))[0])) for i in range (k+1)]
        weight=[np.array(float(np.exp(hmean[i]**2/(beta*(x[i].T.dot(inv(V).dot(x[i])))))[0]))/(((x[i].T.dot(inv(V).dot(x[i]))))[0][0]*beta) for i in range(k+1)]
        weight[inde]=np.min([weight[inde], c/hmean[ruaindex]**2])
        #print(weigh
    return GAP        
        
        
        
def linUCB(n,w):
    k=2
    d=2
   
    
    nam=1
    V=nam*np.eye(d)
    hmu=np.zeros(k+1,)
    hgap=np.zeros(k+1,)
    weight=np.exp(-hgap**2)
    numberList=np.linspace(0, k, num=k+1)
    T=n
    pp=[]
    GAP=[]
    gamma=np.zeros([d,1])
    htheta=np.zeros([d,1])
    for t in range(T):
        x=[np.array([0]*(d-1)+[1]).T]
        x_axis=t/(t+1)
        for i in range(k):
            if i<k-1:        
                x_axis=0.98
                #x_axis=0.8
                y_axis=0.04
                x.append(np.array([x_axis]*(d-1)+[y_axis]).T) 
                x[i].shape=(d,1)
            else:
                x.append(np.array([1/math.sqrt(d-1)]*(d-1)+[0]).T) 
                x[i].shape=(d,1)
        x[k].shape=(d,1)
        theta_true=np.array([1/math.sqrt(d-1)]*(d-1)+[0]).T
        reward=[]
        for i in range(k+1):
            reward.append(theta_true.T.dot(x[i])[0])
        rew=np.array(reward)
        ###rew=np.linspace(0.0, 2.0, num=11)
        gap=max(rew)*np.ones([k+1,])-rew
        if t<0:
            p=np.random.choice(numberList, 1, p=np.ones(k+1)/(k+1))
            GAP.append(gap[int(p[0])])
            pp.append(int(p[0]))
            V+=x[int(p[0])].dot(x[int(p[0])].T)
            #beta=2*np.log(1+t**2)+1
            beta=(0.1*(d*3*np.log(1+t))**0.5+2**0.5)**2
            beta=beta*w
            y=rew[int(p[0])]+np.random.normal(0,0.1,1)[0]
            gamma+=x[int(p[0])]*y
            htheta=inv(V).dot(gamma)
            hmean=[float(htheta.T.dot(x[i])[0]) for i in range(k+1)]
            hmean=max(hmean)*np.ones(k+1)-hmean
        else:            
            beta=(0.1*(d*3*np.log(1+t))**0.5+2**0.5)**2
            beta=beta*w
            p=np.argmax([float(htheta.T.dot(x[i])[0]+1*((x[i].T.dot(inv(V).dot(x[i])))**0.5)*((beta)**0.5)) for i in range(k+1)])
           ### print([float(htheta.T.dot(x[i])[0]+0*((x[i].T.dot(inv(V).dot(x[i])))**0.5)*(beta)**0.5) for i in range(11)])
            pp.append(int(p))
            GAP.append(gap[int(p)])
            V+=x[int(p)].dot(x[int(p)].T)
            #beta=2*np.log(1+t**2)+1           
            y=rew[int(p)]+np.random.normal(0,0.1,1)[0]
            gamma+=x[int(p)]*y
            htheta=inv(V).dot(gamma)
            hmean=[float(htheta.T.dot(x[i])[0]) for i in range(k+1)]
            hmean=max(hmean)*np.ones(k+1)-hmean
    return GAP


def LinTS(n,w):
    k=2
    d=2

    
    nam=1
    B=nam*np.eye(d)
    hmu=np.zeros(d,)
    hgap=np.zeros(k+1,)
    f=np.zeros([d,1])
    numberList=np.linspace(0, k, num=k+1)
    T=n
    pp=[]
    GAP=[]
    gamma=np.zeros([d,1])
    for t in range(T):
       x=[np.array([0]*(d-1)+[1]).T]
       x_axis=t/(t+1)
       for i in range(k):
           if i<k-1:        
               x_axis=0.98
               #x_axis=0.8
               y_axis=0.04
               x.append(np.array([x_axis]*(d-1)+[y_axis]).T) 
               x[i].shape=(d,1)
           else:
               x.append(np.array([1/math.sqrt(d-1)]*(d-1)+[0]).T) 
               x[i].shape=(d,1)
       x[k].shape=(d,1)
       theta_true=np.array([1/math.sqrt(d-1)]*(d-1)+[0]).T
       reward=[]
       for i in range(k+1):
            reward.append(theta_true.T.dot(x[i])[0])
       rew=np.array(reward)
        ###rew=np.linspace(0.0, 2.0, num=11)
       gap=max(rew)*np.ones([k+1,])-rew
       beta=(0.1*(d*3*np.log(1+t))**0.5+2**0.5)**2    
       beta=beta*w
       Hmu=np.random.multivariate_normal(hmu, beta*(inv(B)), 1).T
       lis=[np.dot(i.T,Hmu)[0][0] for i in x]
       ind=np.argmax(lis)
       pp.append(ind)
       GAP.append(gap[ind])
       y=rew[ind]+np.random.normal(0,0.1,1)[0]
       B=B+x[ind].dot(x[ind].T)
       f=f+x[ind]*y
       hmu=inv(B).dot(f)
       hmu=np.squeeze(hmu)
    return GAP









k=2
d=2




c=30

#w=0.1**2
runs=10

alldata1=[]
for s in range(runs):   
    history=LinIMED(T,1**2)
    
    regret1=[history[0]]
    for i in range(1,T):
        g=history[i]+regret1[i-1]
        regret1.append(g)
    
    regret1=np.array(regret1)
    data=regret1
    alldata1.append(data)
alldata1=np.array(alldata1)
print("1")
alldata2=[]
for s in range(runs):   
    history=linUCB(T,1**2)
    
    regret2=[history[0]]
    for i in range(1,T):
        g=history[i]+regret2[i-1]
        regret2.append(g)
    
    regret2=np.array(regret2)
    data=regret2
    alldata2.append(data)
alldata2=np.array(alldata2)
print("2")

alldata3=[]
for s in range(runs):   
    history=LinTS(T,1**2)
    #history=LinTS(T,1**2)
    regret3=[history[0]]
    for i in range(1,T):
        g=history[i]+regret3[i-1]
        regret3.append(g)
    
    regret3=np.array(regret3)
    data=regret3
    alldata3.append(data)
alldata3=np.array(alldata3)
print("3")
alldata4=[]
for s in range(runs):   
    history=LinIMED2(T,1**2)
    
    regret4=[history[0]]
    for i in range(1,T):
        g=history[i]+regret4[i-1]
        regret4.append(g)
    
    regret4=np.array(regret4)
    data=regret4#
    alldata4.append(data)
alldata4=np.array(alldata4)
print("4")

alldata5=[]
for s in range(runs):   
    history=LinIMED3(T,1**2,c)
    
    regret5=[history[0]]
    for i in range(1,T):
        g=history[i]+regret5[i-1]
        regret5.append(g)
    
    regret5=np.array(regret5)
    data=regret5#
    alldata5.append(data)
alldata5=np.array(alldata5)
print("5")

alldata6=[]
for s in range(runs):  
    print(((s+1))/runs/3)
    history=IDS(T,0.02)
    
    regret6=[history[0]]
    for i in range(1,T):
        g=history[i]+regret6[i-1]
        regret6.append(g)
    
    regret6=np.array(regret6)
    data=regret6#
    alldata6.append(data)
alldata6=np.array(alldata6)
print("1")



alldata7=[]
for s in range(runs):  
    print(((s+1))/runs/3+1/3)
    history=IDS(T,0.01)
    
    regret7=[history[0]]
    for i in range(1,T):
        g=history[i]+regret7[i-1]
        regret7.append(g)
    
    regret7=np.array(regret7)
    data=regret7
    alldata7.append(data)
alldata7=np.array(alldata7)
print("2")




alldata8=[]
for s in range(runs): 
    end=time.time()
    print(s/runs,end-start)
    history=IDS(T,0.02)
    
    regret8=[history[0]]
    for i in range(1,T):
        g=history[i]+regret8[i-1]
        regret8.append(g)
    
    regret8=np.array(regret8)
    data=regret8
    alldata8.append(data)
alldata8=np.array(alldata8)
print("3")











resultdata=[]
for i in range(1):
    ax=fig.add_subplot(1,1,1)      

    
    color=palette(0)
    avg=np.mean(alldata2,axis=0)[:T]
    std=np.std(alldata2,axis=0)[:T]
    resultdata.append(["LinUCB   ", avg[T-1], std[T-1]])
    r1 = list(map(lambda x: x[0]-x[1], zip(avg, std)))
    r2 = list(map(lambda x: x[0]+x[1], zip(avg, std)))
    ax.plot(iters, avg, color=color,label="LinUCB",linewidth=3.0)
    ax.fill_between(iters, r1, r2, color=color, alpha=0.2)
    
    color=palette(1)
    avg=np.mean(alldata3,axis=0)[:T]
    std=np.std(alldata3,axis=0)[:T]
    resultdata.append(["LinTS    ", avg[T-1], std[T-1]])
    r1 = list(map(lambda x: x[0]-x[1], zip(avg, std)))
    r2 = list(map(lambda x: x[0]+x[1], zip(avg, std)))
    ax.plot(iters, avg, color=color,label="LinTS",linewidth=3.0)
    ax.fill_between(iters, r1, r2, color=color, alpha=0.2)

    color=palette(2)
    avg=np.mean(alldata1,axis=0)[:T]
    std=np.std(alldata1,axis=0)[:T]
    resultdata.append(["LinIMED-1", avg[T-1], std[T-1]])
    r1 = list(map(lambda x: x[0]-x[1], zip(avg, std)))
    r2 = list(map(lambda x: x[0]+x[1], zip(avg, std)))
    ax.plot(iters, avg, color=color,label="LinIMED-1",linewidth=3.0)
    ax.fill_between(iters, r1, r2, color=color, alpha=0.2)

    color=palette(6)
    avg=np.mean(alldata4,axis=0)[:T]
    std=np.std(alldata4,axis=0)[:T]
    resultdata.append(["LinIMED-2", avg[T-1], std[T-1]])
    r1 = list(map(lambda x: x[0]-x[1], zip(avg, std)))
    r2 = list(map(lambda x: x[0]+x[1], zip(avg, std)))
    ax.plot(iters, avg, color=color,label="LinIMED-2",linewidth=3.0)
    ax.fill_between(iters, r1, r2, color=color, alpha=0.2)
    
    color=palette(3)
    avg=np.mean(alldata5,axis=0)[:T]
    std=np.std(alldata5,axis=0)[:T]
    resultdata.append(["LinIMED-3", avg[T-1], std[T-1]])
    r1 = list(map(lambda x: x[0]-x[1], zip(avg, std)))
    r2 = list(map(lambda x: x[0]+x[1], zip(avg, std)))
    ax.plot(iters, avg, color=color,label="LinIMED-3",linewidth=3.0)
    ax.fill_between(iters, r1, r2, color=color, alpha=0.2)

    color=palette(9)
    avg=np.mean(alldata6,axis=0)[:T]
    std=np.std(alldata6,axis=0)[:T]
    #resultdata.append(["IDS", avg[T-1], std[T-1]])
    resultdata.append(["IDS", avg[T-1], std[T-1]])
    r1 = list(map(lambda x: x[0]-x[1], zip(avg, std)))
    r2 = list(map(lambda x: x[0]+x[1], zip(avg, std)))
    #ax.plot(iters, avg, color=color,label="IDS",linewidth=3.0)
    #ax.fill_between(iters, r1, r2, color=color, alpha=0.2)
    ax.plot(iters, avg, color=color,label="IDS",linewidth=3.0)
    ax.fill_between(iters, r1, r2, color=color, alpha=0.2)
                    
    ax.legend(loc='upper left',prop=font1)
    ax.set_xlabel('Time',fontsize=24)
    ax.set_ylabel('Regret',fontsize=24)
    
plt.legend(loc='upper left',fontsize=28)
#ax.legend().set_visible(False)
plt.grid('on')
plt.xlim((0,1000000))
plt.ylim((0,300))
#plt.ylim((0,7))
plt.xticks([0,200000,400000,600000,800000,1000000], fontsize=22)
ax.xaxis.get_offset_text().set_fontsize(22)
#plt.xticks([0,200,400,600,800,1000], fontsize=22)
#plt.xticks([0,500,1000,1500,2000,2500,3000], fontsize=22)
#plt.xticks([0,200,400,600,800,1000], fontsize=22)
#plt.yticks([0,2,4,6,8], fontsize=22)
#plt.yticks([0,1,2,3,4,5,6,7], fontsize=22)
#plt.yticks([100,200,300,400,500], fontsize=22)
plt.yticks([50,100,150,200,250,300], fontsize=22)
plt.xlabel('Time', fontsize=30)
plt.ylabel('Regret', fontsize=30)


#fig.savefig('./eps=0.005.png', format='png', dpi=300)
print(np.array(resultdata))


