"""
MovieLens_top10 movies versus other algorithms
"""
import pandas as pd
import numpy as np
import cupy as cp
import matplotlib.pyplot as plt
import math
import random
from numpy.linalg import inv
from matplotlib import pyplot
from scipy.io import loadmat
import time
plt.style.use('seaborn-whitegrid')
palette = pyplot.get_cmap('Set1')
font1 = {'family' : 'Times New Roman',
'weight' : 'normal',
'size'   : 30,
}
start=time.time()
T=1000
fig=plt.figure(figsize=(10,10))
iters=list(range(T))
"""
actions--- movie ids
"""
streaming_batch = pd.read_csv('streaming_batch.csv', sep='\t', names=['user_id'], engine='c')
user_feature = pd.read_csv('user_feature.csv', sep='\t', header=0, index_col=0, engine='c')
actions_id = list(pd.read_csv('actions.csv', sep='\t', header=0, engine='c')['movie_id'])
reward_list = pd.read_csv('reward_list.csv', sep='\t', header=0, engine='c')
action_context = pd.read_csv('action_context.csv', sep='\t', header=0, engine='c')

kk=user_feature[0:1]

Context=action_context.to_numpy()
#Steam=streaming_batch.to_numpy()

#np.random.shuffle(Steam[10000:30001])

Steam=np.load("Steam_100.npy",allow_pickle=True)

User_feature=user_feature.to_numpy()

Reward=reward_list.to_numpy()






index=np.where(Context[:,0]==150)[0][0]
zzz=Context[index,2:22]
zzz.shape=(20,1)


index_reward_m=np.where(Reward[:,1]==150)
watched_list=Reward[index_reward_m[0],:]
if 11 not in watched_list[:,0]:
    reward=0
else:
    reward=1




"""
LinIMED-1
"""
def LinIMED1(n,w,startindex):
    k=99
    d=20
    
    T=n
    
    nam=20
    
    V=nam*np.eye(d)
    V_inv=(1/nam)*np.eye(d)
    hmu=np.zeros(k+1,)
    hgap=np.zeros(k+1,)
    weight=np.exp(-hgap**2)
    p_sum=np.zeros(k+1,)
    numberList=np.linspace(0, k, num=k+1)
    ###T=n
    pp=[]
    Click=[]
    gamma=np.zeros([d,1])
    N=np.zeros(k+1)
    Sum=0
    
    
    
    
    
    
    
    for t in range(T):
        x=[Context[0,2:22].astype(float)]
        x[0].shape=(d,1)
        for i in range(1,k+1):
                x.append(Context[i,2:22].astype(float))  
                x[i].shape=(d,1)
        userID=int(Steam[t+1+startindex,0])
        
    
        ###rew=np.linspace(0.0, 2.0, num=11)
    
        p=np.argmin(weight)
        movieID=Context[p,0]
    
        pp.append(int(p))
        V=V+x[int(p)].dot(x[int(p)].T)
        ###beta=0.54*np.log(1+t**2)+1 
        #beta=0.27*d*np.log(t+1)+0.00000000000000001
        beta=(0.1*(d*np.log((t+1)*t**2))**0.5+20**0.5)**2
        beta=beta*w*w
        index_reward_m=np.where(Reward[:,1]==movieID)
        watched_list=Reward[index_reward_m[0],:]
        if userID not in watched_list[:,0]:
            reward=0
        else:
            reward=1
        y=reward
        Sum=Sum+y
        Click.append(Sum/(t+1))
        gamma=gamma+x[int(p)]*y
        V_inv=V_inv-np.dot((V_inv.dot(x[int(p)].dot(x[int(p)].T))),V_inv)/(1+np.dot(x[int(p)].T,np.dot(V_inv,x[int(p)])))
        htheta=V_inv.dot(gamma)
        #htheta=inv(V).dot(gamma)
        
        ###newhtheta=np.random.multivariate_normal(np.squeeze(htheta), beta*(inv(V)), 1).T 
    
        
        hmean=[float(htheta.T.dot(x[i])[0]) for i in range(k+1)]    
        ###hmean=[float(newhtheta.T.dot(x[i])[0]) for i in range(k+1)]      
        hmean=max(hmean)*np.ones(k+1)-hmean
        
        ###original weight=[np.array(float(np.exp(-hmean[i]**2/(beta*(x[i].T.dot(inv(V).dot(x[i])))))[0])) for i in range (k+1)]
        weight=[np.array(float(np.exp(hmean[i]**2/(beta*(x[i].T.dot(V_inv.dot(x[i])))))[0]))/(((x[i].T.dot(V_inv.dot(x[i]))))[0][0]*beta) for i in range(k+1)]
        ###weight=[np.array(float(np.exp(-N[i]*N[i]*hmean[i]**2/1))) for i in range(11)]
        ###weight=[float(i) for i in weight]
        ###weight=np.array(weight)
        print(t)
    
    
    Click_1=Click;
    #print("1");
    return Click_1

"""LinUCB"""
def LinUCB(n,w,startindex):
    k=99
    d=20
    T=n
    
    nam=20
    V=nam*np.eye(d)
    V_inv=(1/nam)*np.eye(d)
    hmu=np.zeros(k+1,)
    hgap=np.zeros(k+1,)
    weight=np.exp(-hgap**2)
    numberList=np.linspace(0, k, num=k+1)
    
    pp=[]
    Click=[]
    gamma=np.zeros([d,1])
    htheta=np.zeros([d,1])
    N=np.zeros(k+1)
    Sum=0
    for t in range(T):
        x=[Context[0,2:22].astype(float)]
        x[0].shape=(d,1)
        for i in range(1,k+1):
                x.append(Context[i,2:22].astype(float))  
                x[i].shape=(d,1)
        userID=int(Steam[t+1+startindex,0])
        
    
        ###rew=np.linspace(0.0, 2.0, num=11)
    
        #p=np.argmin(weight)
        #movieID=Context[p,0]
        #print(movieID)
    
        if t<0:
            #p=np.random.choice(numberList, 1, p=np.ones(k+1)/(k+1))
            #movieID=Context[int(p),0]
            p=np.argmin(weight)
            movieID=Context[p,0]
            pp.append(int(p))
            V+=x[int(p)].dot(x[int(p)].T)
            #beta=2*np.log(1+t**2)+1
            #beta=0.27*d*np.log(t+1)+0.00000000000000001
            beta=(0.1*(d*np.log((t+1)*t**2))**0.5+20**0.5)**2
            beta=beta*w*w
            index_reward_m=np.where(Reward[:,1]==movieID)
            watched_list=Reward[index_reward_m[0],:]
            if userID not in watched_list[:,0]:
                reward=0
            else:
                reward=1
            y=reward
            Sum=Sum+y
            Click.append(Sum/(t+1))
            gamma+=x[int(p)]*y
            V_inv=V_inv-np.dot((V_inv.dot(x[int(p)].dot(x[int(p)].T))),V_inv)/(1+np.dot(x[int(p)].T,np.dot(V_inv,x[int(p)])))
            htheta=V_inv.dot(gamma)
            #htheta=inv(V).dot(gamma)
            hmean=[float(htheta.T.dot(x[i])[0]) for i in range(k+1)]
            hmean=max(hmean)*np.ones(k+1)-hmean
        else:     
            beta=(0.1*(d*np.log((t+1)*t**2))**0.5+20**0.5)**2
            beta=beta*w*w
            p=np.argmax([float(htheta.T.dot(x[i])[0]+0.2*((x[i].T.dot(V_inv.dot(x[i])))**0.5)*((beta)**0.5)) for i in range(k+1)])
            movieID=Context[int(p),0]
           ### print([float(htheta.T.dot(x[i])[0]+0*((x[i].T.dot(inv(V).dot(x[i])))**0.5)*(beta)**0.5) for i in range(11)])
            pp.append(int(p))
           
            V+=x[int(p)].dot(x[int(p)].T)
            #beta=2*np.log(1+t**2)+1
            #beta=0.27*d*np.log(t+1)+0.00000000000000001
            
            index_reward_m=np.where(Reward[:,1]==movieID)
            watched_list=Reward[index_reward_m[0],:]
            if userID not in watched_list[:,0]:
                reward=0
            else:
                reward=1
            y=reward
            Sum=Sum+y
            Click.append(Sum/(t+1))
            gamma+=x[int(p)]*y
            V_inv=V_inv-np.dot((V_inv.dot(x[int(p)].dot(x[int(p)].T))),V_inv)/(1+np.dot(x[int(p)].T,np.dot(V_inv,x[int(p)])))
            htheta=V_inv.dot(gamma)
            #htheta=inv(V).dot(gamma)
            hmean=[float(htheta.T.dot(x[i])[0]) for i in range(k+1)]
            hmean=max(hmean)*np.ones(k+1)-hmean
    
    
    
    
    
    Click_2=Click;
    
    #print("2");
    return Click_2







"""LinTS"""
def LinTS(n,w,startindex):
    k=99
    d=20
    
    T=n
    nam=20
    B=nam*np.eye(d)
    V=nam*np.eye(d)
    V_inv=(1/nam)*np.eye(d)
    B_inv=(1/nam)*np.eye(d)
    hmu=np.zeros(d,)
    hgap=np.zeros(k+1,)
    weight=np.exp(-hgap**2)
    numberList=np.linspace(0, k, num=k+1)
    f=np.zeros([d,1])
    
    pp=[]
    Click=[]
    gamma=np.zeros([d,1])
    N=np.zeros(k+1)
    Sum=0
    
    for t in range(T):
        x=[Context[0,2:22].astype(float)]
        x[0].shape=(d,1)
        for i in range(1,k+1):
                x.append(Context[i,2:22].astype(float))  
                x[i].shape=(d,1)
        userID=int(Steam[t+1+startindex,0])
       
    
        ###rew=np.linspace(0.0, 2.0, num=11)
    
        
        Hmu=np.random.multivariate_normal(hmu, w*w*0.01*(27*d*np.log(t+1))*(B_inv), 1).T
        #Hmu=np.random.multivariate_normal(hmu, 0.01*(27*d*np.log(t+1))*(inv(B)), 1).T
        lis=[np.dot(i.T,Hmu)[0][0] for i in x]
        ind=np.argmax(lis)
        pp.append(ind)
        B_inv=B_inv-np.dot((B_inv.dot(x[ind].dot(x[ind].T))),B_inv)/(1+np.dot(x[ind].T,np.dot(B_inv,x[ind])))
       
        movieID=Context[ind,0]
        index_reward_m=np.where(Reward[:,1]==movieID)
        watched_list=Reward[index_reward_m[0],:]
        if userID not in watched_list[:,0]:
            reward=0
        else:
            reward=1
        y=reward
        Sum=Sum+y
        Click.append(Sum/(t+1))
       
      
        B=B+x[ind].dot(x[ind].T)
        f=f+x[ind]*y
        hmu=B_inv.dot(f)
        hmu=np.squeeze(hmu)
    
    
    
    
    
    Click_3=Click;
    #print("3");
    return Click_3



"""
LinIMED-2
"""
def LinIMED2(n,w,startindex):
    k=99
    d=20
    T=n
    nam=20
    V=nam*np.eye(d)
    V_inv=(1/nam)*np.eye(d)
    hmu=np.zeros(k+1,)
    hgap=np.zeros(k+1,)
    weight=np.exp(-hgap**2)
    p_sum=np.zeros(k+1,)
    numberList=np.linspace(0, k, num=k+1)
    ###T=n
    pp=[]
    Click=[]
    gamma=np.zeros([d,1])
    N=np.zeros(k+1)
    Sum=0
    for t in range(T):
        x=[Context[0,2:22].astype(float)]
        x[0].shape=(d,1)
        for i in range(1,k+1):
                x.append(Context[i,2:22].astype(float))  
                x[i].shape=(d,1)
        userID=int(Steam[t+1+startindex,0])
        
    
        ###rew=np.linspace(0.0, 2.0, num=11)
    
        p=np.argmin(weight)
        movieID=Context[p,0]
    
        pp.append(int(p))
        V=V+x[int(p)].dot(x[int(p)].T)
        ###beta=0.54*np.log(1+t**2)+1 
        #beta=0.27*d*np.log(t+1)+0.00000000000000001
        beta=(0.1*(d*np.log((t+1)*t**2))**0.5+20**0.5)**2
        beta=beta*w*w
        index_reward_m=np.where(Reward[:,1]==movieID)
        watched_list=Reward[index_reward_m[0],:]
        if userID not in watched_list[:,0]:
            reward=0
        else:
            reward=1
        y=reward
        Sum=Sum+y
        Click.append(Sum/(t+1))
        gamma=gamma+x[int(p)]*y
        V_inv=V_inv-np.dot((V_inv.dot(x[int(p)].dot(x[int(p)].T))),V_inv)/(1+np.dot(x[int(p)].T,np.dot(V_inv,x[int(p)])))
        htheta=V_inv.dot(gamma)
        ###newhtheta=np.random.multivariate_normal(np.squeeze(htheta), beta*(inv(V)), 1).T 
    
        
        hmean=[float(htheta.T.dot(x[i])[0]) for i in range(k+1)]    
        ###hmean=[float(newhtheta.T.dot(x[i])[0]) for i in range(k+1)]      
        hmean=max(hmean)*np.ones(k+1)-hmean
        inde=np.argmin(hmean)
        
        ###original weight=[np.array(float(np.exp(-hmean[i]**2/(beta*(x[i].T.dot(inv(V).dot(x[i])))))[0])) for i in range (k+1)]
        weight=[np.array(float(np.exp(hmean[i]**2/(beta*(x[i].T.dot(V_inv.dot(x[i])))))[0]))/(((x[i].T.dot(V_inv.dot(x[i]))))[0][0]*beta) for i in range(k+1)]
        weight[inde]=np.min([weight[inde],T])
        ###weight=[float(i) for i in weight]
        ###weight=np.array(weight)
    
    
    
    Click_4=Click;
    #print("4");
    return Click_4





"""LinIMED-3"""
def LinIMED3(n,w,c,startindex):
    k=99
    d=20
    T=n
    nam=20
    V=nam*np.eye(d)
    V_inv=(1/nam)*np.eye(d)
    hmu=np.zeros(k+1,)
    hgap=np.zeros(k+1,)
    weight=np.exp(-hgap**2)
    p_sum=np.zeros(k+1,)
    numberList=np.linspace(0, k, num=k+1)
    ###T=n
    pp=[]
    Click=[]
    gamma=np.zeros([d,1])
    N=np.zeros(k+1)
    Sum=0
    for t in range(T):
        x=[Context[0,2:22].astype(float)]
        x[0].shape=(d,1)
        for i in range(1,k+1):
                x.append(Context[i,2:22].astype(float))  
                x[i].shape=(d,1)
        userID=int(Steam[t+1+startindex,0])
        
    
        ###rew=np.linspace(0.0, 2.0, num=11)
    
        p=np.argmin(weight)
        movieID=Context[p,0]
    
        pp.append(int(p))
        V=V+x[int(p)].dot(x[int(p)].T)
        ###beta=0.54*np.log(1+t**2)+1 
        #beta=0.27*d*np.log(t+1)+0.00000000000000001
        beta=(0.1*(d*np.log((t+1)*t**2))**0.5+20**0.5)**2
        beta=beta*w*w
        #beta=beta*0.09
        index_reward_m=np.where(Reward[:,1]==movieID)
        watched_list=Reward[index_reward_m[0],:]
        if userID not in watched_list[:,0]:
            reward=0
        else:
            reward=1
        y=reward
        Sum=Sum+y
        Click.append(Sum/(t+1))
        gamma=gamma+x[int(p)]*y
        V_inv=V_inv-np.dot((V_inv.dot(x[int(p)].dot(x[int(p)].T))),V_inv)/(1+np.dot(x[int(p)].T,np.dot(V_inv,x[int(p)])))
        htheta=V_inv.dot(gamma)
        ###newhtheta=np.random.multivariate_normal(np.squeeze(htheta), beta*(inv(V)), 1).T 
    
        hmean=[float(htheta.T.dot(x[i])[0]+(((x[i].T.dot(inv(V).dot(x[i]))))[0][0]*beta)**0.5) for i in range(k+1)]    
        ###hmean=[float(newhtheta.T.dot(x[i])[0]) for i in range(k+1)]      
        hmean=max(hmean)*np.ones(k+1)-hmean
        inde=np.argmin(hmean)
        ruaindex=np.argmax(hmean)
        C=c
        ###original weight=[np.array(float(np.exp(-hmean[i]**2/(beta*(x[i].T.dot(inv(V).dot(x[i])))))[0])) for i in range (k+1)]
        weight=[np.array(float(np.exp(hmean[i]**2/(beta*(x[i].T.dot(inv(V).dot(x[i])))))[0]))/(((x[i].T.dot(inv(V).dot(x[i]))))[0][0]*beta) for i in range(k+1)]
        weight[inde]=np.min([weight[inde], C/hmean[ruaindex]**2])
        ###weight=[np.array(float(np.exp(-N[i]*N[i]*hmean[i]**2/1))) for i in range(11)]
        ###weight=[float(i) for i in weight]
        ###weight=np.array(weight)
    
    
    
    Click_5=Click;
    #print("5");

    return Click_5


T=1000

c=30
st=1000
runs=5
alldata1=[]
alldata2=[]
alldata3=[]
alldata4=[]
alldata5=[]
for s in range(runs):   
    np.random.shuffle(Steam[1001:2001])
    history=LinIMED1(T,0.15,st)
    
    reward1=np.array(history)

    data=reward1
    alldata1.append(data)
    history=LinUCB(T,0.7,st)
    
    reward2=np.array(history)
    data=reward2
    alldata2.append(data)
    history=LinTS(T,0.1,st)
    
    reward3=np.array(history)
    data=reward3
    alldata3.append(data)
    
    history=LinIMED2(T,0.15,st)
    
    reward4=np.array(history)
    data=reward4
    alldata4.append(data)
    
    history=LinIMED3(T,0.2,30,st)
    
    reward5=np.array(history)
    data=reward5
    alldata5.append(data)
    
    
    
alldata1=np.array(alldata1)
print("1")


alldata2=np.array(alldata2)
print("2")


alldata3=np.array(alldata3)
print("3")

alldata4=np.array(alldata4)



alldata5=np.array(alldata5)



# plt.plot(iters, Click_2, label="LinUCB",linewidth=1.0)
# plt.plot(iters, Click_3, label="LinTS",linewidth=1.0)
# plt.plot(iters, Click_1, label="LinIMED-1",linewidth=1.0)
# plt.plot(iters, Click_4, label="LinIMED-2",linewidth=1.0)
# plt.plot(iters, Click_5, label="LinIMED-3",linewidth=1.0)


resultdata=[]
for i in range(1):
    ax=fig.add_subplot(1,1,1)  
    color=palette(0)
    avg=np.mean(alldata2,axis=0)
    std=np.std(alldata2,axis=0)
    resultdata.append(["LinUCB   ", avg[T-1], std[T-1]])
    r1 = list(map(lambda x: x[0]-x[1], zip(avg, std)))
    r2 = list(map(lambda x: x[0]+x[1], zip(avg, std)))
    ax.plot(iters, avg, color=color,label="LinUCB",linewidth=3.0)
    ax.fill_between(iters, r1, r2, color=color, alpha=0.2)
    
    color=palette(1)
    avg=np.mean(alldata3,axis=0)
    std=np.std(alldata3,axis=0)
    resultdata.append(["LinTS    ", avg[T-1], std[T-1]])
    r1 = list(map(lambda x: x[0]-x[1], zip(avg, std)))
    r2 = list(map(lambda x: x[0]+x[1], zip(avg, std)))
    ax.plot(iters, avg, color=color,label="LinTS",linewidth=3.0)
    ax.fill_between(iters, r1, r2, color=color, alpha=0.2)

    color=palette(2)
    avg=np.mean(alldata1,axis=0)
    std=np.std(alldata1,axis=0)
    resultdata.append(["LinIMED-1", avg[T-1], std[T-1]])
    r1 = list(map(lambda x: x[0]-x[1], zip(avg, std)))
    r2 = list(map(lambda x: x[0]+x[1], zip(avg, std)))
    ax.plot(iters, avg, color=color,label="LinIMED-1",linewidth=3.0)
    ax.fill_between(iters, r1, r2, color=color, alpha=0.2)

    color=palette(6)
    avg=np.mean(alldata4,axis=0)
    std=np.std(alldata4,axis=0)
    resultdata.append(["LinIMED-2", avg[T-1], std[T-1]])
    r1 = list(map(lambda x: x[0]-x[1], zip(avg, std)))
    r2 = list(map(lambda x: x[0]+x[1], zip(avg, std)))
    ax.plot(iters, avg, color=color,label="LinIMED-2",linewidth=3.0)
    ax.fill_between(iters, r1, r2, color=color, alpha=0.2)
    
    color=palette(3)
    avg=np.mean(alldata5,axis=0)
    std=np.std(alldata5,axis=0)
    resultdata.append(["LinIMED-3", avg[T-1], std[T-1]])
    r1 = list(map(lambda x: x[0]-x[1], zip(avg, std)))
    r2 = list(map(lambda x: x[0]+x[1], zip(avg, std)))
    ax.plot(iters, avg, color=color,label="LinIMED-3",linewidth=3.0)
    ax.fill_between(iters, r1, r2, color=color, alpha=0.2)
    
    ax.legend(loc='upper left',prop=font1)
    ax.set_xlabel('Time',fontsize=24)
    ax.set_ylabel('Regret',fontsize=24)
    
print(np.array(resultdata))
plt.legend(loc='upper left',fontsize=28)
plt.grid('on')
#plt.xlim((0,1000))
plt.ylim((0,1))

#plt.xticks([0,2000,4000,6000,8000,10000], fontsize=22)
plt.yticks([0,0.2,0.4,0.6,0.8,1.0], fontsize=22)

plt.xlabel('Time', fontsize=30)
plt.ylabel('Click-Through Rate (CTR)', fontsize=30)
#np.savetxt("MovieLensK=100_tuning_w="+str(w)+".txt", np.array(resultdata),fmt="%s")
#fig.savefig('MovieLens_50.png', format='png', dpi=300)
