# regret comparison script -- plot01
import numpy as np
import matplotlib.pyplot as plt

from dynamics import *


# Plot 1 (intro): comparison of FP, OFP, AFP
#
#  - all variants lexicographical tiebreaking
#  - all variants using same fixed intialiaztion
#   
#  (a) 2x2 Matching Pennies
#  (b) 15x15 Diagonal
#  (c) 15x15 RPS

fig, ax = plt.subplots(
    1,3, sharey=True,
    figsize=(9,3),
    layout="tight",
    dpi=300
)

fp_col="red"
ofp_col="blue"
afp_col="magenta"

Q = best_response_lex

T = 10000

# Matching Pennies
n = 2
A = np.array([
    [1, -1],
    [-1, 1]
])

x01 = np.ones(n); x01[1] = 2
x01 = x01/np.sum(x01)

x02 = np.ones(n); x02[0] = 2
x02 = x02/np.sum(x01)

# standard FP
alpha = 0
eta = 1
x1_t, x2_t, y1_t, y2_t = FTRL(
    A, n, x01, x02, T, alpha,
    Q, eta
)

reg_t = [
    np.max(y1_t[t]) + np.max(y2_t[t])
    for t in np.arange(2, T)
]
ax[0].plot(reg_t, label="FP", color=fp_col)

# Optimistic FP
alpha = 1
eta = 1
x1_t, x2_t, y1_t, y2_t = FTRL(
    A, n, x01, x02, T, alpha,
    Q, eta
)

reg_t = [
    np.max(y1_t[t]) + np.max(y2_t[t])
    for t in np.arange(2, T)
]

ax[0].plot(reg_t, label="OFP", color=ofp_col)


# Alternating FP
alpha = 0
eta = 1
x1_t, x2_t, y1_t, y2_t = AFTRL(
    A, n, x01, T, alpha,
    Q, eta
)
reg_t = [
    np.max(y1_t[t]) + np.max(y2_t[t])
    for t in np.arange(2, T)
]

ax[0].plot(reg_t, label="AFP", color=afp_col)
ax[0].legend()

ax[0].set_ylabel("Total Regret")
ax[0].set_xlabel("T")
ax[0].set_title("2x2 Matching Pennies")


# 15x15 identity
n = 15
A = np.eye(n)

x01 = np.zeros(n); x01[0] = 1
x02 = np.zeros(n); x02[-1] = 1

# standard FP
alpha = 0
eta = 1
x1_t, x2_t, y1_t, y2_t = FTRL(
    A, n, x01, x02, T, alpha,
    Q, eta
)

reg_t = [
    np.max(y1_t[t]) + np.max(y2_t[t])
    for t in np.arange(2, T)
]
ax[1].plot(reg_t, label="FP", color=fp_col)


# Optimistic FP
alpha = 1
eta = 1
x1_t, x2_t, y1_t, y2_t = FTRL(
    A, n, x01, x02, T, alpha,
    Q, eta
)

reg_t = [
    np.max(y1_t[t]) + np.max(y2_t[t])
    for t in np.arange(2, T)
]

ax[1].plot(reg_t, label="OFP", color=ofp_col)


# Alternating FP
alpha = 0
eta = 1
x1_t, x2_t, y1_t, y2_t = AFTRL(
    A, n, x01, T, alpha,
    Q, eta
)

reg_t = [
    np.max(y1_t[t]) + np.max(y2_t[t])
    for t in np.arange(2, T)
]

ax[1].plot(reg_t, label="AFP", color=afp_col)
ax[1].legend()

ax[1].set_xlabel("T")
ax[1].set_title("15x15 Identity")


# 15x15 RPS
n = 15
A = np.zeros((n,n))
A[0][-1] = 1
for i in range(n-1):
    A[i][i+1] = -1
    A[i+1][i] = 1
A[-1][-2] = 1
A[-1][0] = -1
A *= 2/3

x01 = np.zeros(n); x01[0] = 1
x02 = np.zeros(n); x02[-1] = 1

# standard FP
alpha = 0
eta = 1
x1_t, x2_t, y1_t, y2_t = FTRL(
    A, n, x01, x02, T, alpha,
    Q, eta
)

reg_t = [
    np.max(y1_t[t]) + np.max(y2_t[t])
    for t in np.arange(2, T)
]
ax[2].plot(reg_t, label="FP", color=fp_col)


# Optimistic FP
alpha = 1
eta = 1
x1_t, x2_t, y1_t, y2_t = FTRL(
    A, n, x01, x02, T, alpha,
    Q, eta
)

reg_t = [
    np.max(y1_t[t]) + np.max(y2_t[t])
    for t in np.arange(2, T)
]

ax[2].plot(reg_t, label="OFP", color=ofp_col)


# Alternating FP
alpha = 0
eta = 1
x1_t, x2_t, y1_t, y2_t = AFTRL(
    A, n, x01, T, alpha,
    Q, eta
)

reg_t = [
    np.max(y1_t[t]) + np.max(y2_t[t])
    for t in np.arange(2, T)
]

ax[2].plot(reg_t, label="AFP", color=afp_col)
ax[2].legend()

ax[2].set_xlabel("T")
ax[2].set_title("15x15 RPS")

plt.savefig("regret-comparison-plot01.png")





