import numpy as np
np.random.seed(2021)
from utils import compute_qlme_mosek
from mosek.fusion import *

############################################################
# each item has supply s[j] == 1/m, NOT s[j] == 1

n, m = 4, 8
v = 2 * (np.random.exponential(size=(n,m)) + np.random.uniform(size=(n,m))) 

# n, m = v.shape
B = (np.random.uniform(0,1, n) + 0.2) * 3

# s = np.ones(m) / m
s = np.random.exponential(size=m)
s /= np.sum(s)

x_opt, u_opt, delta_opt, beta_opt, p_opt = compute_qlme_mosek(v, s, B)
print('max & min beta_opt = {}, {}'.format(np.max(beta_opt), np.min(beta_opt)))

# primal and dual objective values
C = np.sum(B * np.log(B)) - np.sum(B)
pobj = np.sum(B * np.log(u_opt)) - np.sum(delta_opt)
dobj = p_opt @ s - np.sum(B * np.log(beta_opt))
print('duality gap = {:.6f}'.format(dobj + C - pobj))

# max utility buyer i should have got & actual utility 
u_actual = np.sum((v-p_opt) * x_opt, axis = 1)
u_max = np.max((v-p_opt)/p_opt, 1)
print(f'sum of regrets of all buyers = {np.sum(np.maximum(u_max - u_actual, 0))}')
print(f'B/u_opt - beta_opt = {B/u_opt - beta_opt}')

# QL equilibrium utilities
gross_utilities_qlme = np.sum(v * x_opt, axis=1)
spendings_qlme = np.sum(p_opt*x_opt, axis=1)
u_qlme = gross_utilities_qlme - spendings_qlme
print(f'QLME utilities = {u_qlme}')

# run dual averaging
T = 50000
all_sampled_items = np.random.choice(m, p = s, size = T) # sample all items beforehand
beta_min = B / (np.linalg.norm(v, ord=1, axis=1)/m + 2*B) # lower bound (strictly smaller than beta_opt)
beta = (beta_min + 1) / 2 # initial beta
beta_ave = np.zeros(n)
g_ave = np.zeros(n)
winners_all_t = np.zeros(T, dtype=np.int32)
spending = np.zeros(n, dtype=np.float64)
for t in range(1, T+1):
    # sample an item
    j = all_sampled_items[t-1]
    # find winners for this item (just pick the lex. smallest winner, if tie)
    winner = np.argmax(beta * v[:, j])
    winners_all_t[t-1] = winner
    spending[winner] += beta[winner] * v[winner, j] # option 1: use beta(t) to compute prices
    # update g_bar: only the winner's entry can potentially be incremented
    g_ave = (t-1) * g_ave / t if t >= 2 else np.ones(n)/n
    # note the m: since it is non-averaged sum over j
    g_ave[winner] += v[winner, j] / t
    # update beta
    beta = np.maximum(beta_min, np.minimum(1, B / g_ave)) # spending[winner] += beta[winner] * v[winner, j]
    beta_ave = (t-1) * beta_ave / t + beta / t

# check beta convergence
print(beta)
print(beta_opt)
print(beta)
print(g_ave)
print(spending/T)
print(u_opt - delta_opt)

u_opt
B
gross_utilities_qlme



print(B - delta_opt)

B
beta_opt, beta