# Simulation setting 1: designed for Case (A)
# Noise distribution: Uniform Mixture of 3/4*Unif(-15,0) + 1/4*Unif(0,15)
# Satisfy both the Lipschitz and 2nd-order smoothness assumptions
# Apply ExUCB with \beta = 2/3 and \gamma = 1/6
# The constants are set as p_{\max} = 50, B = 50, C_{1} = 1, C_{2} = 20, \lambda = 0.1


# Codes Started


# Construct CDF F
trun = 15
cdf = function(x){
  if (x+trun<=0){
    y = 0
  } else if (x+trun>0 && x<=0){
    y = 3/4 + 3*x/4/trun
  } else if (x>0 && x-trun<=0){
    y = 3/4 + 1*x/4/trun
  } else {
    y = 1
  }
  return(y)
}


# Construct Optimal Price Function g that matches x^{\top}\theta_{0} to optimal price
maxi = function(cdf,p){
  tomi = function(x){
    prob = min(max(0,1-cdf(x-p)),1)
    y = x*prob
    return(y)
  }
  if (p+trun<0){
    return(0)
  } else{
    upp = 50
    s = optim(par = 0, tomi, method = c("L-BFGS-B"), lower = 0, upper = upp, control = list(fnscale = -1))
    y = s$par
    return(y)
  }
}


# Specify Constants


# dm = dimension; # theta0 = true \theta_{0}
# lam = regularization lambda in Inner UCB Algorithm;
# C1 = exploration phase constant C_{1}; C2 = discretization constant C_{2};
# CU = UCB constant; nk = number of replications; 
dm = 1
theta0 = rep(30/dm,dm)
lam = 0.1
C1 = 1
C2 = 20
CU = 1/40
nk = 100


# epi = number of episodes; 
# ini = \alpha_{1}, initial episode length;
# T = horizon length;
# exl: record exploration phase lengths; dis: record discretization numbers;
epi = 10
ini = 2^9
T = ini*(2^(epi)-1)
exl = rep(0,epi)
dis = rep(0,epi)


# pmax = p_{\max}, the optimal price upper bound; 
# B = B, the valuation upper bound;
pmax = 50
B = 50


# lsber: record average accumulative optimal revenues
# lser: record average ExUCB accumulative revenues
# llsber: record accumulative optimal revenues in each replication
# llser: record accumulative ExUCB revenues in each replication
# time_cost: record time consumption of each replication
lsber = rep(0,T)
lser = rep(0,T)
llsber = matrix(0,nk,T)
llser = matrix(0,nk,T)
time_cost = rep(0,nk)


# Replications begin
# ik = current iteration; jk = current seed
ik = 1
jk = 1
while (ik <= nk){
  t1 = Sys.time()
  set.seed(jk)
  jk = jk + 1
  # print current iteration
  print(paste("Current iteration:", ik))
  
  # bS: record optimal prices; 
  # ber: record one-period optimal revenues;
  # sber: record accumulative optimal revenues;
  bS = rep(0,T)
  ber = rep(0,T)
  sber = rep(0,T)
  
  # S: record prices set by ExUCB policy;
  # y: record binary outcomes of ExUCB policy;
  # r: record realized revenues of ExUCB policy;
  # er: record one-period revenues of ExUCB policy;
  # ser: record accumulative revenues of ExUCB policy;
  S = rep(0,T)
  y = rep(0,T)
  r = rep(0,T)
  er = rep(0,T)
  ser = rep(0,T)
  
  # x1: covariates; tthx: record true x_{t}^{\top}\theta_{0};
  # thx: record estimated x_{t}^{\top}\theta_{0};
  x1 = matrix(runif(dm*T,1/2,1),T,dm)
  tthx = as.vector(x1%*%theta0)
  thx = rep(0,T)
  
  
  cur_end = 0
  # Episode iteration begins
  # subT: episode length
  for (j in 1:epi){
    subT = ini*2^(j-1)
    
    # Exploration Phase
    explore_l = ceiling(C1*subT^(2/3))
    exl[j] = explore_l
    for (i in 1:explore_l){
      S[i+cur_end] = runif(1,0,B)
      bS[i+cur_end] = maxi(cdf,tthx[i+cur_end])
      y[i+cur_end] = rbinom(1,1,1-cdf(S[i+cur_end]-tthx[i+cur_end]))
      r[i+cur_end] = S[i+cur_end]*y[i+cur_end]
      er[i+cur_end] = S[i+cur_end]*(1-cdf(S[i+cur_end]-tthx[i+cur_end]))
      ber[i+cur_end] = bS[i+cur_end]*(1-cdf(bS[i+cur_end]-tthx[i+cur_end]))
      if (i+cur_end == 1){
        ser[i+cur_end] = er[i+cur_end]
        sber[i+cur_end] = ber[i+cur_end]
      }
      else {
        ser[i+cur_end] = ser[i-1+cur_end]+er[i+cur_end]
        sber[i+cur_end] = sber[i-1+cur_end]+ber[i+cur_end]
      }
    }
    
    # Estimating true theta0
    dat = data.frame(X = x1[(cur_end+1):(cur_end+explore_l),], y = B*y[(cur_end+1):(cur_end+explore_l)])
    glmfit = glm(y~.,data = dat)
    thetahat2 = coef(glmfit)
    thetahat = thetahat2[-1]
    thetahat.set[ik,j] = sum(abs(thetahat-theta0))
    
    
    # UCB Phase
    exploit_l = subT - explore_l
    intv = ceiling(C2*exploit_l^(1/6))
    dis[j] = intv
    for (i in (explore_l+1):subT){
      thx[i+cur_end] = sum(thetahat*x1[i+cur_end,])
    }
    
    # Inner UCB Algorithm
    me0 = rep(0,intv)
    ti0 = rep(0,intv)
    u1 = pmax
    u2 = sum(abs(thetahat))
    u = u1 + 2*u2
    ku = u/intv
    
    for (i in (explore_l+1):subT){
      beta_t = CU*max(1,((lam*intv)^(1/2)/pmax+sqrt(2*log(exploit_l)+intv*log((lam*intv+(i-1)*pmax^(2))/(lam*intv))))^(2))
      cx = thx[i+cur_end]
      dex1 = (-cx+u2+ku/2)%/%ku+1
      dex2 = (u1-cx+u2+ku/2)%/%ku
      num = dex2 - dex1 + 1
      rma = (2*dex1-1)*ku/2 - u2 + cx
      if (i == 1){
        bc = round(runif(1,0.5,num+0.5))
      }
      if (i > 1){
        me = me0[dex1:dex2]
        ti = ti0[dex1:dex2]
        if (sum(ti==0)>0){
          bc = round(runif(1,0.5,sum(ti==0)+0.5))
          bc = which(ti == 0)[bc]
        }
        if (sum(ti==0)==0){
          inde = rep(0,num)
          for (i1 in 1:num){
            inde[i1] = ((i1-1)*ku+rma)*(me[i1]+sqrt(beta_t/(lam+ti[i1])))
          }
          bc = which(inde == max(inde))
          bc = bc[sample(length(bc))[1]]
        }
      }
      S[i+cur_end] = (bc-1)*ku+rma
      bS[i+cur_end] = maxi(cdf,tthx[i+cur_end])
      y[i+cur_end] = rbinom(1,1,1-cdf(S[i+cur_end]-tthx[i+cur_end]))
      r[i+cur_end] = S[i+cur_end]*y[i+cur_end]
      me0[dex1-1+bc] = (me0[dex1-1+bc]*(lam+ti0[dex1-1+bc])+S[i+cur_end]*r[i+cur_end])/(lam+ti0[dex1-1+bc]+(S[i+cur_end])^(2))
      ti0[dex1-1+bc] = ti0[dex1-1+bc] + (S[i+cur_end])^(2)
      er[i+cur_end] = S[i+cur_end]*(1-cdf(S[i+cur_end]-tthx[i+cur_end]))
      ber[i+cur_end] = bS[i+cur_end]*(1-cdf(bS[i+cur_end]-tthx[i+cur_end]))
      ser[i+cur_end] = ser[i+cur_end-1]+er[i+cur_end]
      sber[i+cur_end] = sber[i+cur_end-1]+ber[i+cur_end]
    }
    cur_end = cur_end + subT
  }
  ik = ik + 1
  lser = lser + ser
  lsber = lsber + sber
  llser[ik-1,] = ser
  llsber[ik-1,] = sber
  t2 = Sys.time()
  time_cost[ik-1] = difftime(t2, t1, units = "mins")
  # print time consumption of current iteration
  print(paste("Time consumption in minutes:", time_cost[ik-1]))
}
# calculate accumulative regret in each replication
llreg = llsber - llser
lsber = lsber/nk
lser = lser/nk
# calculated average accumulative regret
reg = lsber - lser
print(time_cost)





# Calculate 95% Confidence Intervals
ci = 0.95
lreg_l = rep(0,T)
lreg_u = rep(0,T)
for (i in 1:T){
  lreg_l[i] = quantile(llreg[,i],(1-ci)/2)
  lreg_u[i] = quantile(llreg[,i],(1+ci)/2)
}


# Construct ending time period for each episode
Time = rep(0,epi)
cur = 0
for (i in 1:epi){
  Time[i] = cur + 2^(i+8)
  cur = Time[i]
}


# Discretize the range for linear fit
# Start from the second episode
num = 300
start_epi = 2
start = log2(Time[start_epi]+1)
end = log2(T)
step = (end - start)/num
Time = seq(start,end,step)
Time = round(2^Time)


# Linear fit
y = log2(reg[Time])
x = log2(Time)
fit = lm(y~x)
co = coef(fit)
print(co)


# Plot
plot(log2(Time),log2(reg[Time]),ann = FALSE,cex.lab = 2, cex.axis = 2,xaxt = "n",yaxt = "n",xlab = "Time",ylab = "Cumulative Regret",type = "l",lty = 1,ylim = c(min(log2(reg[Time]),log2(lreg_l[Time]),log2(lreg_u[Time])),max(log2(reg[Time]),log2(lreg_l[Time]),log2(lreg_u[Time]))),lwd = 2,col = "red")
par(new = TRUE)
plot(log2(Time),log2(lreg_l[Time]),ann = FALSE,cex.lab = 2, cex.axis = 2,xaxt = "n",yaxt = "n",xlab = "Time",ylab = "Cumulative Regret",type = "l",lty = 3,ylim = c(min(log2(reg[Time]),log2(lreg_l[Time]),log2(lreg_u[Time])),max(log2(reg[Time]),log2(lreg_l[Time]),log2(lreg_u[Time]))),lwd = 1,col = "blue")
par(new = TRUE)
plot(log2(Time),log2(lreg_u[Time]),ann = FALSE,cex.lab = 2, cex.axis = 2,xaxt = "n",yaxt = "n",xlab = "Time",ylab = "Cumulative Regret",type = "l",lty = 3,ylim = c(min(log2(reg[Time]),log2(lreg_l[Time]),log2(lreg_u[Time])),max(log2(reg[Time]),log2(lreg_l[Time]),log2(lreg_u[Time]))),lwd = 1,col = "blue")
abline(co[1],co[2],lty = 4,col = "green")

mtext(side = 1, text = "Time", line = 2.5, cex = 2)
mtext(side = 2, text = "Regret", line = 2.3, cex = 2)
axis(1, padj = -0.1, cex.axis = 1.3, at = seq(11,19,2), labels = c(expression(2^11),expression(2^13),expression(2^15),expression(2^17),expression(2^19)))
axis(2, padj = 0.5, cex.axis = 1.3, at = seq(11,17,1), labels = c(expression(2^11),expression(2^12),expression(2^13),expression(2^14),expression(2^15),expression(2^16),expression(2^17)))
legend("topleft",legend = c("ExUCB: Mean","ExUCB: 95% CI","Linear fit: Slope = 0.670"),lty = c(1,3,4),lwd = c(2,1,1),col = c("red","blue","green"),cex = 1.5)


