library(RColorBrewer)

# fig1
xx <- seq(0, 1, 0.01)
## harm surface
tox.seq <- seq(min(eff_tox), max(eff_tox), by = 0.01)
pp <- sapply(eff_tox, function(dose){
  sapply(xx, function(x){
    tox_model_true(x, dose)
  })
})
grid <- as.matrix(expand.grid(xx, tox.seq))
pp <- sapply(1:nrow(grid), function(i){
  tox_model_true(x = grid[i,1], dose = grid[i,2])
})
rgl::plot3d(x = grid[,1], y = grid[,2], z = pp,
            xlab = "x", ylab = "u", zlab = "harm",
            col = hcl(350*(pp - min(pp))/(max(pp) - min(pp))))
rgl::rglwidget()
## reward surface
dose.seq <- seq(min(eff_dose), max(eff_dose), by = 0.01)
pq <- sapply(eff_dose, function(dose){
  sapply(xx, function(x){
    eff_model_true(x, dose)
  })
})
grid <- as.matrix(expand.grid(xx, dose.seq))
pq <- sapply(1:nrow(grid), function(i){
  eff_model_true(x = grid[i,1], dose = grid[i,2])
})
rgl::plot3d(x = grid[,1], y = grid[,2], z = pq,
            xlab = "x", ylab = "u", zlab = "reward",
            col = hcl(350*(pq - min(pq))/(max(pq) - min(pq))))
rgl::rglwidget()



regret_cal <- function(Eff_of_choice, over_theta, lambda, j){
  regret <- Eff_of_choice[,1,] - Eff_of_choice[,j,]+ lambda*over_theta[,j,]-lambda*over_theta[,1,]
  regret <- apply(regret, 1, mean)
  regret <- cumsum(regret)
  return(regret)
}

tr <- 100
K <- 7
nm <- 7
tolerate <- 0.33
d <- 1
Tt <- 5000
lambda <- 1  # change value
filename <- paste0("binary_example", "_lam_", lambda, ".Rdata")
load(filename)
res <- res1

Choice.res = array(0, dim = c(Tt, nm, tr))
Tox_of_choice = array(0, dim = c(Tt, nm, tr))
Eff_of_choice = array(0, dim = c(Tt, nm, tr))
g.res = array(0, dim = c(Tt, 10, tr))
o.res  = array(0, dim = c(Tt, 10, tr))
for(i in 1:tr){
  resi = res[[i]]
  Choice = resi$Choice
  for(j in 2:7){
    if(sum(Choice[,j]==Inf))print(c(i,j))
  }
  Choice.res[,,i] = Choice
  
  Tox = resi$Tox_pb
  tox_of_choice = sapply(1:nm, function(j){
    sapply(1:Tt, function(k){
      Tox[k, Choice[k,j]]
    })
  })
  Tox_of_choice[,, i] = tox_of_choice
  
  Eff = resi$Eff_pb
  eff_of_choice = sapply(1:nm, function(j){
    sapply(1:Tt, function(k){
      Eff[k, Choice[k,j]]
    })
  })
  Eff_of_choice[,, i] = eff_of_choice
  
  g = resi$Par.g
  g.res[,,i] = g
  
  o = resi$Par.o
  o.res[,,i] = o
}


## regret plots
over_theta <- apply(Tox_of_choice, 1:3, function(x)max(x-tolerate, 0))
regret.g <- regret_cal(Eff_of_choice, over_theta, lambda, 2)
regret.o <- regret_cal(Eff_of_choice, over_theta, lambda, 3)
regret.i <- regret_cal(Eff_of_choice, over_theta, lambda, 4)
regret.b <- regret_cal(Eff_of_choice, over_theta, lambda, 5)
regret.c <- regret_cal(Eff_of_choice, over_theta, lambda, 6)
regret.h <- regret_cal(Eff_of_choice, over_theta, lambda, 7)

# plot in section 5
par(mfrow = c(1,1))
par(mar = c(4,4,2,1))
plot(1:Tt, regret.h[1:Tt], type = "l", col = "#9ecae1",
     main = "Average regret", xlab = "rounds", ylab = "regret")
lines(1:Tt, regret.g[1:Tt], col = "red")
lines(1:Tt, regret.i[1:Tt], col = "green")
lines(1:Tt, regret.b[1:Tt], col = "orange")
lines(1:Tt, regret.c[1:Tt], col = "#BC80BD")
legend("topleft", legend = c("Varying coefficient model", "K separate", "Binned context",
                            "Ignore context", "Ignore harm"), 
       col = c("red", "green", "orange", "#BC80BD", "#9ecae1"), lty = 1)

# plots in supp
par(mfrow = c(1,1))
par(mar = c(4,4,2,1))
plot(1:Tt, regret.c[1:Tt], type = "l", col = "#BC80BD",
     main = "Average regret", xlab = "rounds", ylab = "regret")
lines(1:Tt, regret.g[1:Tt], col = "red")
lines(1:Tt, regret.o[1:Tt], col = "blue")
lines(1:Tt, regret.i[1:Tt], col = "green")
lines(1:Tt, regret.b[1:Tt], col = "orange")
legend("topleft", legend = c("Varying coefficient model", "Optimal design",
                             "K separate", "Binned context",
                             "Ignore context"), 
       col = c("red", "blue", "green", "orange", "#BC80BD"), lty = 1)


## Cum counts of being above tolerate plot
is_above_theta <- apply(Tox_of_choice, 1:3, function(x)ifelse(x > tolerate, 1,0))
counts_above_theta <- apply(is_above_theta, 1:2, mean) 
counts_above_theta <- apply(counts_above_theta, 2, cumsum)
# section 5 
plot(1:Tt, counts_above_theta[1:Tt,7], type = "l", col = "#9ecae1",
     main = expression(paste("Total count of p>", theta, " up to round t, averaged over trials")), xlab = "rounds", ylab = "Counts")
lines(1:Tt, counts_above_theta[1:Tt,1], col = "black")
lines(1:Tt, counts_above_theta[1:Tt,2], col = "red")
lines(1:Tt, counts_above_theta[1:Tt,4], col = "green")
lines(1:Tt, counts_above_theta[1:Tt,5], col = "orange")
lines(1:Tt, counts_above_theta[1:Tt,6], col = "#BC80BD")
legend("topleft", legend = c("Oracle","Varying coefficient model",
                            "K separate", "Binned context",
                            "Ignore context", "Ignore harm"),
       col = c("black", "red", "green", "orange",
               "#BC80BD", "#9ecae1"), lty = 1)
#supp
plot(1:Tt, counts_above_theta[1:Tt,6], type = "l", col = "#BC80BD",
     main = expression(paste("Total count of p>", theta, " up to round t, averaged over trials")), xlab = "rounds", ylab = "Counts")
lines(1:Tt, counts_above_theta[1:Tt,1], col = "black")
lines(1:Tt, counts_above_theta[1:Tt,2], col = "red")
lines(1:Tt, counts_above_theta[1:Tt,3], col = "blue")
lines(1:Tt, counts_above_theta[1:Tt,4], col = "green")
lines(1:Tt, counts_above_theta[1:Tt,5], col = "orange")
legend("topleft", legend = c("Oracle","Varying coefficient model",
                             "Optimal design",
                             "K separate", "Binned context",
                             "Ignore context"),
       col = c("black", "red", "blue", "green", "orange",
               "#BC80BD"), lty = 1)





