# regret comparison script -- table 01
import numpy as np
import matplotlib.pyplot as plt

from dynamics import *


# Table 01: comparison of FP vs. OFP

#  - all variants lexicographical tiebreaking
#  - all variants using same random initialization (x100)
#   
#  (a) n=5, 15, 25 diagonal 
#  (b) n=5, 15, 25 scaled RPS
#  (c) n=5, 15, 25 [0,1] random 

T = 10000
Q = best_response_lex

trials = 100
n_list = [5, 15, 25, 50]

print("Table 01 data")
print("T={}".format(T))
print("lexicographical tiebreaking")
print("trials = {}".format(trials))
print("")

# -------------------------------------
# diagonal identity matrix
for n in n_list:

    A = np.eye(n)

    # standard FP
    fp_regrets = []
    ofp_regrets = []
    for t in np.arange(trials):
        
        # random initialization
        x01 = np.random.uniform(size=n)
        x01 = x01/np.sum(x01)

        x02 = np.random.uniform(size=n)
        x02 = x02/np.sum(x02)

        # standard FP
        alpha = 0; eta = 1
        x1_t, x2_t, y1_t, y2_t = FTRL(
            A, n, x01, x02, T, alpha,
            Q, eta
        )
        fp_reg = np.max(y1_t[T]) + np.max(y2_t[T])
        fp_regrets.append(fp_reg)

        # optimistic FP
        alpha = 1; eta = 1
        x1_t, x2_t, y1_t, y2_t = FTRL(
            A, n, x01, x02, T, alpha,
            Q, eta
        )
        ofp_reg = np.max(y1_t[T]) + np.max(y2_t[T])
        ofp_regrets.append(ofp_reg)

    fp_reg_mean = np.round(np.mean(fp_regrets), 1)
    fp_reg_std = np.round(np.std(fp_regrets), 1)

    ofp_reg_mean = np.round(np.mean(ofp_regrets), 1)
    ofp_reg_std = np.round(np.std(ofp_regrets), 1)

    print("Diagonal Identity A -- n={}".format(n))
    print("FP reg mean/std", fp_reg_mean, fp_reg_std)
    print("OFP reg mean/std", ofp_reg_mean, ofp_reg_std)
    print("")


# # -------------------------------------
# general  RPS matrix
for n in n_list:

    A = np.eye(n)
    A = np.zeros((n,n))
    A[0][-1] = 1
    for i in range(n-1):
        A[i][i+1] = -1
        A[i+1][i] = 1
    A[-1][-2] = 1
    A[-1][0] = -1

    # standard FP
    fp_regrets = []
    ofp_regrets = []
    for t in np.arange(trials):
        
        # random initialization
        x01 = np.random.uniform(size=n)
        x01 = x01/np.sum(x01)

        x02 = np.random.uniform(size=n)
        x02 = x02/np.sum(x02)

        # # standard FP
        alpha = 0; eta = 1
        x1_t, x2_t, y1_t, y2_t = FTRL(
            A, n, x01, x02, T, alpha,
            Q, eta
        )
        fp_reg = np.max(y1_t[T]) + np.max(y2_t[T])
        fp_regrets.append(fp_reg)

        # optimistic FP
        alpha = 1; eta = 1
        x1_t, x2_t, y1_t, y2_t = FTRL(
            A, n, x01, x02, T, alpha,
            Q, eta
        )
        ofp_reg = np.max(y1_t[T]) + np.max(y2_t[T])
        ofp_regrets.append(ofp_reg)

    fp_reg_mean = np.round(np.mean(fp_regrets), 1)
    fp_reg_std = np.round(np.std(fp_regrets), 1)

    ofp_reg_mean = np.round(np.mean(ofp_regrets), 1)
    ofp_reg_std = np.round(np.std(ofp_regrets), 1)

    print("RPS A -- n={}".format(n))
    print("FP reg mean/std", fp_reg_mean, fp_reg_std)
    print("OFP reg mean/std", ofp_reg_mean, ofp_reg_std)
    print("")


# -------------------------------------
# random [0, 1]  matrix
for n in n_list:

    A = np.random.uniform(low=0.0, high=1.0, size=(n,n))

    # standard FP
    fp_regrets = []
    ofp_regrets = []
    for t in np.arange(trials):
        
        # random initialization
        x01 = np.random.uniform(size=n)
        x01 = x01/np.sum(x01)

        x02 = np.random.uniform(size=n)
        x02 = x02/np.sum(x02)

        # # standard FP
        alpha = 0; eta = 1
        x1_t, x2_t, y1_t, y2_t = FTRL(
            A, n, x01, x02, T, alpha,
            Q, eta
        )
        fp_reg = np.max(y1_t[T]) + np.max(y2_t[T])
        fp_regrets.append(fp_reg)

        # optimistic FP
        alpha = 1; eta = 1
        x1_t, x2_t, y1_t, y2_t = FTRL(
            A, n, x01, x02, T, alpha,
            Q, eta
        )
        ofp_reg = np.max(y1_t[T]) + np.max(y2_t[T])
        ofp_regrets.append(ofp_reg)

    fp_reg_mean = np.round(np.mean(fp_regrets), 1)
    fp_reg_std = np.round(np.std(fp_regrets), 1)

    ofp_reg_mean = np.round(np.mean(ofp_regrets), 1)
    ofp_reg_std = np.round(np.std(ofp_regrets), 1)

    print("Random [0, 1] matrix A -- n={}".format(n))
    print("FP reg mean/std", fp_reg_mean, fp_reg_std)
    print("OFP reg mean/std", ofp_reg_mean, ofp_reg_std)
    print("")


