# -*- coding: utf-8 -*-
"""misspecification

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/13Dc4aKlUdZYzjnXcTtASVlVujrP-i2gC
"""

# Commented out IPython magic to ensure Python compatibility.
# %reset -f
import numpy as np
from matplotlib import pyplot as plt
from sklearn.preprocessing import normalize
import math

d = 16
N = 100
eps = 0.02
# Delta = 0.1
np.random.seed(seed=41)
theta_star = np.random.normal(size=(d,))
theta_star = theta_star / np.linalg.norm(theta_star)

X = normalize(np.random.normal(size=(d, N)), axis=0)

r = np.dot(theta_star, X) + 2 * eps * (np.random.binomial(1, 0.5, size=(N,)) - 0.5)
r_optimal = max(r)
Delta = min([r_optimal - x for x in r if x != r_optimal])
print(r_optimal, Delta)

import time
# t = 1000 * time.time() # current time in milliseconds
# np.random.seed(int(t) % 2**32)
np.random.seed(0)
import matplotlib.patheffects as path_effects

K = 2000
M = 32
for Gamma, c in [(0, 'black'), (0.02, 'orange'), (0.08, 'red'), (0.2, 'green'), (1, 'blue') ,(0.05, 'purple')]:
  ave = np.zeros((K + 1,))
  ave2 = np.zeros((K + 1,))
  t0 = time.time()
  for _ in range(M):
    U = np.eye(d)
    Uinv = np.eye(d)
    b = np.zeros(d)
    X_sel = np.zeros((d, K))
    theta = np.zeros(d)
    beta = 3
    reg = [0]
    for i in range(K):
      UCB = np.sqrt(np.einsum('in, ij, jn -> n', X, Uinv, X))
      est_r = beta * UCB + np.dot(theta, X)
      if Gamma == -1:
        est_r += eps * np.linalg.norm(np.einsum('da, dp, pk -> ak', X, Uinv, X_sel), ord=1, axis=1)
      idx = np.argmax(est_r)
      reg.append(reg[-1] + r_optimal - r[idx])
      X_sel[:, i] = X[:, idx]
      if UCB[idx] > Gamma:
        U += np.outer(X[:, idx], X[:, idx])
        b += (r[idx] + np.random.normal()) * X[:, idx]
        Uinv = np.linalg.inv(U)
        theta = np.matmul(Uinv, b)
    ave += np.array(reg)
    ave2 += np.array(reg) ** 2
  ave = ave / M
  var = np.sqrt(np.abs(ave2 / M - ave ** 2))
  print('Gamma = {}, Regret = {}, Variance = {}, Time = {}'.format(Gamma, ave[-1], var[-1], time.time() - t0))
  if Gamma == -1:
    plt.plot(ave, color=c, label='(Lattimore et al.)')
  elif Gamma != 0:
    plt.plot(ave, color=c, label='$\Gamma = {}$'.format(Gamma))
  else:
    plt.plot(ave, color=c, label='$\Gamma = {}$ (OFUL)'.format(Gamma))

warmup = 10
# sample first k arm each
ghosh_ave = np.zeros((K + 1,))
ghosh_ave2 = np.zeros((K + 1,))
t0 = time.time()
for _ in range(M):
  reg = [0]
  # linear UCB configuration
  indR = 0
  for k in range(K):
    if k < warmup * d:
      idx = k % d
      U += np.outer(X[:, idx], X[:, idx])
      obs_R = r[idx] + np.random.normal()
      b += obs_R * X[:, idx]
      Uinv = np.linalg.inv(U)
      theta = np.matmul(Uinv, b)
      reg.append(reg[-1] + r_optimal - r[idx])
      continue
    elif k < warmup * (d + 1):
      idx = d
      obs_R = r[idx] + np.random.normal()
      indR += obs_R
      reg.append(reg[-1] + r_optimal - r[idx])
      continue

    # Hypothesis test
    elif k == warmup * (d + 1):
      indR /= warmup # estimated reward function
      rad_tab = 1 / math.sqrt(warmup)
      linR = theta @ X[:, d]
      rad_lin = beta * np.sqrt(X[:, d].T @ Uinv @ X[:, d])
      method = "OFUL" if abs(linR - indR) < rad_lin + rad_tab else "UCB"
      U = np.eye(d)
      Uinv = np.eye(d)
      b = np.zeros(d)
      # tabular UCB configuration
      estR = np.zeros(N,)
      cnt = np.ones(N,)

    if method == "OFUL":
      UCB = np.sqrt(np.einsum('in, ij, jn -> n', X, Uinv, X))
      est_r = beta * UCB + np.dot(theta, X)
      idx = np.argmax(est_r)
      reg.append(reg[-1] + r_optimal - r[idx])
      U += np.outer(X[:, idx], X[:, idx])
      b += (r[idx] + np.random.normal()) * X[:, idx]
      Uinv = np.linalg.inv(U)
      theta = np.matmul(Uinv, b)
    else:
      est_r = estR / cnt + 1 / np.sqrt(cnt)
      idx = np.argmax(est_r)
      reg.append(reg[-1] + r_optimal - r[idx])
      estR[idx] += r[idx] + np.random.normal()
      cnt[idx] += 1
  ghosh_ave += np.array(reg)
  ghosh_ave2 += np.array(reg) ** 2
ghosh_ave = ghosh_ave / M
ghosh_var = np.sqrt(np.abs(ghosh_ave2 / M - ghosh_ave ** 2))
print('Regret = {}, Variance = {}, Time = {}'.format(ghosh_ave[-1], ghosh_var[-1], time.time() - t0))
plt.plot(ghosh_ave, label='RLB')


plt.legend(loc='lower right', ncol=2)
# plt.title('Cumulative Regret')
plt.xlabel('# or rounds')
plt.ylabel('Cumulative Regret')
plt.savefig('syn-nfill.pdf')

