# regret comparison script -- plot03
import numpy as np
import matplotlib.pyplot as plt

from dynamics import *


# Plot (3): comparison of FP and OFP
#
#  - all variants lexicographical tiebreaking
#  - all variants using same fixed intialiaztion
#   
#  (a) 25x25 Diagonal
#  (b) 25x25 RPS
#  (c) 25x25 random matrix with [0, 1] entries

fig, ax = plt.subplots(
    1,3, sharey=True,
    figsize=(9,3),
    layout="tight",
    dpi=300
)

fp_col="red"
ofp_col="blue"


T = 10000
Q = best_response_lex

# 25x25 identity
n = 25
A = np.eye(n)

x01 = np.zeros(n); x01[0] = 1
x02 = np.zeros(n); x02[-1] = 1

# standard FP
alpha = 0
eta = 1
x1_t, x2_t, y1_t, y2_t = FTRL(
    A, n, x01, x02, T, alpha,
    Q, eta
)

reg_t = [
    np.max(y1_t[t]) + np.max(y2_t[t])
    for t in np.arange(2, T)
]
ax[0].plot(reg_t, label="FP", color=fp_col)


# Optimistic FP
alpha = 1
eta = 1
x1_t, x2_t, y1_t, y2_t = FTRL(
    A, n, x01, x02, T, alpha,
    Q, eta
)

reg_t = [
    np.max(y1_t[t]) + np.max(y2_t[t])
    for t in np.arange(2, T)
]

ax[0].plot(reg_t, label="OFP", color=ofp_col)

ax[0].legend(loc="upper left")
ax[0].set_xlabel("T")
ax[0].set_title("25x25 Identity")
ax[0].set_ylabel("Total Regret")

# 25x25 RPS
n = 25
A = np.zeros((n,n))
A[0][-1] = 1
for i in range(n-1):
    A[i][i+1] = -1
    A[i+1][i] = 1
A[-1][-2] = 1
A[-1][0] = -1
A *= 2/3


x01 = np.zeros(n); x01[0] = 1
x02 = np.zeros(n); x02[-1] = 1

# standard FP
alpha = 0
eta = 1
x1_t, x2_t, y1_t, y2_t = FTRL(
    A, n, x01, x02, T, alpha,
    Q, eta
)

reg_t = [
    np.max(y1_t[t]) + np.max(y2_t[t])
    for t in np.arange(2, T)
]
ax[1].plot(reg_t, label="FP", color=fp_col)


# Optimistic FP
alpha = 1
eta = 1
x1_t, x2_t, y1_t, y2_t = FTRL(
    A, n, x01, x02, T, alpha,
    Q, eta
)

reg_t = [
    np.max(y1_t[t]) + np.max(y2_t[t])
    for t in np.arange(2, T)
]

ax[1].plot(reg_t, label="OFP", color=ofp_col)

ax[1].legend(loc="upper left")
ax[1].set_xlabel("T")
ax[1].set_title("25x25 RPS")


# 25x25 random (0, 1)
n = 25
A = np.random.uniform(low=0.0, high=1.0, size=(n,n))

x01 = np.zeros(n); x01[0] = 1
x02 = np.zeros(n); x02[-1] = 1

# standard FP
alpha = 0
eta = 1
x1_t, x2_t, y1_t, y2_t = FTRL(
    A, n, x01, x02, T, alpha,
    Q, eta
)

reg_t = [
    np.max(y1_t[t]) + np.max(y2_t[t])
    for t in np.arange(2, T)
]
ax[2].plot(reg_t, label="FP", color=fp_col)


# Optimistic FP
alpha = 1
eta = 1
x1_t, x2_t, y1_t, y2_t = FTRL(
    A, n, x01, x02, T, alpha,
    Q, eta
)

reg_t = [
    np.max(y1_t[t]) + np.max(y2_t[t])
    for t in np.arange(2, T)
]

ax[2].plot(reg_t, label="OFP", color=ofp_col)

ax[2].legend(loc="upper left")
ax[2].set_xlabel("T")
ax[2].set_title("25x25 Random [0,1]")


# plt.show()
plt.savefig("regret-comparison-plot03.png")
