import pandas as pd
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
import matplotlib.animation as animation
from scipy.integrate import odeint, ode, quad, trapz
from scipy import optimize
from scipy.spatial import distance
from mpl_toolkits.mplot3d import Axes3D
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns
plt.style.use('seaborn')
from PIL import Image
from scipy.stats import entropy
from IPython.display import HTML
from tqdm import tqdm
import math
from algorithms import *
import scipy.sparse as sparse
import scipy.stats as stats
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
We first test out OGDA for simple two player zero sum games (a simple example is Matching Pennies), before extending to the multiplayer case.
G1 = np.array([[1, -1],[-1, 1]])
G2 = np.array([[2, -2],[-2, 2]])
x_1 = np.array([[0.4, 0.6], [0.41, 0.59]])
y_1 = np.array([[0.6, 0.4], [0.59, 0.41]])
data1 = runGDA2Player(G1,x_1,y_1, 5000, 0.1)
plt.plot(data1['x'][:,0], data1['y'][:,0]);
100%|██████████| 5000/5000 [00:00<00:00, 24132.74it/s]
Time average x: [0.49987005 0.50012995] Time average y: [0.49995002 0.50004998]
nash1 = [0.5, 0.5]
dist = np.log([np.linalg.norm(data1['x'][i]-nash1) + 0.000001 for i in range(len(data1['x']))])
plt.plot(dist[:1000]);
graph1 = np.array([[0,1,0,0],
[-1,0,1,0],
[0,-1,0,1],
[0,0,-1,0]])
G_list1 = []
for i in range(2):
G_list1.append(G1)
G_list1.append(G2)
vals1 = np.array([[[0.4, 0.6], [0.41, 0.59]], [[0.6, 0.4], [0.59, 0.41]], [[0.3, 0.7], [0.31, 0.69]], [[0.8, 0.2], [0.79, 0.21]]])
data2 = runGDANPlayer(G_list1, vals1, numsteps=2000, eta=0.1, graph=graph1, N=4, optimistic=True)
100%|██████████| 1998/1998 [00:00<00:00, 2914.10it/s]
Time average values: [[0.49895 0.50105 ] [0.4999875 0.5000125] [0.499075 0.500925 ] [0.49995 0.50005 ]]
plt.plot(data2['vals'][1]);
dist_4player_MP = PlotDist(data2['vals'], 4, nash=[nash1, nash1, nash1, nash1])
100%|██████████| 4/4 [00:00<00:00, 45.94it/s]
PlotTimeAvg(data2, 2, player=3);
vals2 = np.random.rand(20, 1, 2)
for i in range(vals2.shape[0]):
vals2[i] = vals2[i]/(np.sum(vals2[i]))
vals2 = np.repeat(vals2,2, axis=1)
graph2 = np.zeros((20, 20))
for i in (range(19)):
graph2[i][i+1] = 1
graph2[i+1][i] = -1
G_list2 = []
for i in range(10):
G_list2.append(G1)
G_list2.append(G2)
data3 = runGDANPlayer(G_list2, vals2, numsteps=20000, eta=0.1, graph=graph2, N=20)
100%|██████████| 19998/19998 [02:07<00:00, 156.61it/s]
Time average values: [[0.50011028 0.49988972] [0.49999101 0.50000899] [0.50007599 0.49992401] [0.49996203 0.50003797] [0.50015622 0.49984378] [0.49998569 0.50001431] [0.50007353 0.49992647] [0.49994139 0.50005861] [0.50000627 0.49999373] [0.50003308 0.49996692] [0.49991541 0.50008459] [0.50004794 0.49995206] [0.4999105 0.5000895 ] [0.5000994 0.4999006 ] [0.49992835 0.50007165] [0.50013763 0.49986237] [0.49992967 0.50007033] [0.50017637 0.49982363] [0.49995371 0.50004629] [0.50016978 0.49983022]]
dist_20player_MP = PlotDist(data3['vals'], 20, nash=[nash1 for i in range(20)])
100%|██████████| 20/20 [00:04<00:00, 4.57it/s]
PlotTimeAvg(data3, 2, player=0);
graph3 = np.array([[0,1,1],
[-1,0,1],
[-1,-1,0]])
We run the following function for each seed to find the best $\eta$ value.
test_eta1 = FindBestEta(num_etas=10, dimension=3, graph=graph3, players=3, numsteps=2000, seed=6)
100%|██████████| 1998/1998 [00:00<00:00, 4588.93it/s] 100%|██████████| 1998/1998 [00:00<00:00, 4631.51it/s] 100%|██████████| 1998/1998 [00:00<00:00, 4567.92it/s] 100%|██████████| 1998/1998 [00:00<00:00, 4583.60it/s] 100%|██████████| 1998/1998 [00:00<00:00, 4572.99it/s] 100%|██████████| 1998/1998 [00:00<00:00, 4599.45it/s] 100%|██████████| 1998/1998 [00:00<00:00, 4620.60it/s] 100%|██████████| 1998/1998 [00:00<00:00, 4567.93it/s] 100%|██████████| 1998/1998 [00:00<00:00, 4283.66it/s] 100%|██████████| 1998/1998 [00:00<00:00, 4653.10it/s]
rand_data_3players = RandomGameSimulation(num_simulations=5, dimension=3, graph=graph3, players=3, numsteps=1000, eta=[0.2,0.3,0.9,0.24,0.7], rand=False, seeds=[0,1,3,5,6])
100%|██████████| 998/998 [00:00<00:00, 4748.02it/s] 95%|█████████▌| 950/998 [00:00<00:00, 4717.88it/s]
Time average values: [[ 0.4253981 0.41883527 -0.01760276] [-0.04156462 0.4771486 0.01606194] [ 0.36621047 0.27424501 -0.01544017]]
100%|██████████| 998/998 [00:00<00:00, 4681.12it/s] 94%|█████████▍| 937/998 [00:00<00:00, 4694.00it/s]
Time average values: [[ 0.02025194 0.33601617 0.20208312] [-0.01899976 0.18601476 -0.19887019] [ 0.02011121 0.19013487 0.1995475 ]]
100%|██████████| 998/998 [00:00<00:00, 4637.65it/s] 96%|█████████▌| 958/998 [00:00<00:00, 4730.04it/s]
Time average values: [[-0.07404529 0.64880245 0.1211007 ] [ 0.07731149 0.11239675 -0.11830729] [-0.07655186 0.33320692 0.11888803]]
100%|██████████| 998/998 [00:00<00:00, 4725.57it/s] 45%|████▌ | 451/998 [00:00<00:00, 4505.80it/s]
Time average values: [[ 0.18113824 0.0206537 0.13594356] [-0.17967438 -0.01853193 -0.13229736] [ 0.1807426 0.01822216 0.13279607]]
100%|██████████| 998/998 [00:00<00:00, 4207.11it/s]
Time average values: [[-0.05518479 0.1292376 0.2606788 ] [ 0.05779648 -0.1276576 -0.26100289] [-0.05699322 0.12784512 0.26355249]]
PlotTimeAvg(rand_data_3players[0], 3);
graph4 = np.array([[0,1,1,1],
[-1,0,1,1],
[-1,-1,0,1],
[-1,-1,-1,0]])
We run the following function for each seed to find the best $\eta$ value.
test_eta2 = FindBestEta(num_etas=20, dimension=3, graph=graph4, players=4, numsteps=5000, seed=3, endpoints=(0.0001, 1.5))
100%|██████████| 4998/4998 [00:01<00:00, 2979.38it/s] 100%|██████████| 4998/4998 [00:01<00:00, 2968.73it/s] 100%|██████████| 4998/4998 [00:01<00:00, 2928.71it/s] 100%|██████████| 4998/4998 [00:01<00:00, 2952.96it/s] 100%|██████████| 4998/4998 [00:01<00:00, 2966.99it/s] 100%|██████████| 4998/4998 [00:01<00:00, 2990.08it/s] 100%|██████████| 4998/4998 [00:01<00:00, 2988.29it/s] 100%|██████████| 4998/4998 [00:01<00:00, 2967.00it/s] 100%|██████████| 4998/4998 [00:01<00:00, 2933.85it/s] 100%|██████████| 4998/4998 [00:01<00:00, 2985.89it/s] 100%|██████████| 4998/4998 [00:01<00:00, 2952.95it/s] 100%|██████████| 4998/4998 [00:01<00:00, 2980.49it/s] 100%|██████████| 4998/4998 [00:01<00:00, 3016.46it/s] 100%|██████████| 4998/4998 [00:01<00:00, 2954.69it/s] 100%|██████████| 4998/4998 [00:01<00:00, 2912.69it/s] 100%|██████████| 4998/4998 [00:01<00:00, 2971.24it/s] 100%|██████████| 4998/4998 [00:01<00:00, 2961.72it/s] 100%|██████████| 4998/4998 [00:01<00:00, 2927.81it/s] 100%|██████████| 4998/4998 [00:01<00:00, 2960.57it/s] 100%|██████████| 4998/4998 [00:01<00:00, 2878.49it/s]
rand_data_4players = RandomGameSimulation(num_simulations=5, dimension=3, graph=graph4, players=4, numsteps=10000, eta=[0.19,1.184,0.106,1.685,0.369], rand=False, seeds=[0,3,7,8,14])
100%|██████████| 9998/9998 [00:03<00:00, 2962.98it/s] 3%|▎ | 320/9998 [00:00<00:03, 3197.06it/s]
Time average values: [[ 2.16986880e-01 5.07992575e-01 3.65164834e-04] [ 1.66421887e-01 3.88172990e-01 -3.09251702e-04] [ 1.55805979e-01 3.64255022e-01 1.47616268e-04] [ 1.86465073e-01 4.36032354e-01 4.30031252e-04]]
100%|██████████| 9998/9998 [00:03<00:00, 2959.91it/s] 3%|▎ | 301/9998 [00:00<00:03, 3007.25it/s]
Time average values: [[ 1.32040118e-04 6.48802451e-01 3.45365510e-04] [ 4.27692404e-05 1.12396749e-01 -3.81038115e-05] [ 1.54822144e-04 3.33206922e-01 5.49381515e-05] [-8.76533576e-05 2.88688661e-01 1.72960260e-04]]
100%|██████████| 9998/9998 [00:03<00:00, 2824.14it/s] 2%|▏ | 203/9998 [00:00<00:04, 2027.83it/s]
Time average values: [[ 7.63758249e-02 -2.19856794e-03 -1.75113669e-04] [ 3.51573065e-01 -9.87443935e-03 2.64343918e-04] [ 4.54875036e-02 -1.85198127e-03 4.55133720e-05] [ 1.25313116e-01 -2.93551763e-03 1.81217196e-04]]
100%|██████████| 9998/9998 [00:03<00:00, 2853.14it/s] 3%|▎ | 279/9998 [00:00<00:03, 2787.48it/s]
Time average values: [[ 2.86693921e-01 2.47227027e-04 3.20796224e-04] [ 2.98636860e-01 -8.29163030e-05 -2.07662091e-04] [ 4.05184544e-01 1.35182494e-04 3.65662198e-04] [ 1.81044809e-01 4.19824729e-05 -1.57058192e-04]]
100%|██████████| 9998/9998 [00:03<00:00, 2766.11it/s]
Time average values: [[-1.28034264e-01 2.32677939e-04 1.97620126e-01] [-1.61791598e-01 -8.11426008e-05 2.48792680e-01] [-1.34398109e-01 1.22356851e-04 2.07719522e-01] [-8.09462867e-04 -7.06203596e-07 9.33495601e-04]]
PlotTimeAvg(data=rand_data_4players[0], dimension=3, player=3, num_to_plot=2000);
First we convert to sequence form. This results in the following game:
G3 = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, -1/6, 0, 0, 0, -1/6, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1/6, -1/3, 0, 0, 1/6, -1/3, 0, 0, 0, 0],
[0, 0, 0, 0, 0, -1/6, 0, 0, 0, 1/6, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 1/6, -1/3, 0, 0, 1/6, 1/3],
[0, 1/6, 0, 0, 0, 0, 0, 0, 0, 1/6, 0, 0, 0],
[0, 0, 0, 1/6, 1/3, 0, 0, 0, 0, 0, 0, 1/6, 1/3],
[0, 0, -1/6, 0, 0, 0, -1/6, 0, 0, 0, 0, 0, 0],
[0, 0, -1/3, 0, 0, 0, -1/3, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, -1/6, 0, 0, 0, -1/6, 0, 0],
[0, 0, 0, 0, 0, 0, -1/3, 0, 0, 0, 2/3, 0, 0],
[0, 0, -1/6, 0, 0, 0, 0, 0, 0, 0, -1/6, 0, 0],
[0, 0, 1/3, 0, 0, 0, 0, 0, 0, 0, 2/3, 0, 0]])
We run experiments on Kuhn poker games of up to 5 players. Empirically, we found that having too many players results in numerical errors due to the large number of matrix multiplications being performed at every iteration. An interesting future direction is to run truly large scale simulations on higher dimensional extensive form games.
x = np.random.rand(13)
x_norm = x/np.sum(x)
graph4 = np.array([[0,1,0,1],
[-1,0,1,0],
[0,-1,0,1],
[-1,0,-1,0]])
vals_4player = np.array([[x_norm, x_norm], [x_norm, x_norm], [x_norm, x_norm], [x_norm, x_norm]], dtype=object)
vals_5player = np.array([[x_norm, x_norm], [x_norm, x_norm], [x_norm, x_norm], [x_norm, x_norm], [x_norm, x_norm]], dtype=object)
game_list_4players = [G3, G3, G3, G3]
game_list_5players = [G3, G3, G3, G3, G3]
game_list_10players = [G3, G3, G3, G3, G3, G3, G3, G3, G3, G3]
data_poker_4player = runGDANPlayer(game_list_4players, vals_4player, numsteps=50000, eta=0.33, graph=graph4, N=4)
100%|██████████| 49998/49998 [00:17<00:00, 2858.92it/s]
Time average values: [[ 1.65623266e-01 -8.66335970e-03 -8.02642647e-07 8.66851953e-03 8.68268890e-06 -8.69367392e-03 -3.89868891e-05 -1.47210999e-02 -2.43789428e-02 5.42265045e-02 4.61898118e-03 1.62783359e-01 4.96149775e-02] [ 1.65623266e-01 -8.18262434e-07 1.74739247e-05 -1.28141814e-05 -9.59387754e-06 -1.31997310e-05 -2.05384195e-05 1.50206209e-05 -1.48039912e-05 -1.02095076e-05 7.01182781e-06 6.03764166e-05 -1.28912086e-05] [ 1.65623266e-01 1.24947071e-05 -9.34400098e-06 4.22159475e-06 2.46271532e-05 1.81735738e-05 3.45493370e-06 1.10966956e-05 1.44817341e-05 2.71429822e-05 -1.17730604e-05 -3.94219762e-05 2.14841883e-05] [ 1.65623266e-01 -6.45224247e-02 -9.85971135e-06 3.60019330e-06 2.22362895e-02 6.45351602e-02 3.97064482e-06 4.28466159e-02 -7.81154955e-04 6.45808547e-02 -1.14292536e-05 1.22733615e-01 -8.35782588e-02]]
dist_poker_4player = PlotDist(data_poker_4player['vals'], 4, data_poker_4player['timeavg'])
100%|██████████| 4/4 [00:01<00:00, 2.55it/s]
PlotTimeAvg(data_poker_4player, 4, player=3, );
graph5 = np.array([[0,1,0,0,1],
[-1,0,1,0,0],
[0,-1,0,1,0],
[0,0,-1,0,1],
[-1,0,0,-1,0]])
data_poker_5player = runGDANPlayer(game_list_5players, vals_5player, numsteps=100000, eta=0.32, graph=graph5, N=5)
100%|██████████| 99998/99998 [00:48<00:00, 2050.38it/s]
Time average values: [[ 1.65623266e-01 -8.66910456e-03 1.76000119e-06 8.68262951e-03 2.18721320e-06 -8.67381907e-03 -2.23050739e-05 -1.47206834e-02 -2.43781099e-02 5.42304337e-02 4.62300754e-03 1.62781123e-01 4.96181710e-02] [ 1.65623266e-01 1.38512473e-07 8.67059115e-06 -8.86964719e-06 -4.72337563e-06 -7.89574985e-06 -1.02793202e-05 8.44919501e-06 -2.99701404e-06 -4.91702828e-06 3.81143156e-06 3.66276293e-05 -5.03691961e-06] [ 1.65623266e-01 5.83670045e-06 8.31693930e-06 -1.19668418e-05 -4.36972220e-06 -1.05298094e-06 -1.13820879e-05 -1.55036831e-05 2.16104670e-05 3.55234050e-06 -1.48574187e-05 2.00466848e-05 1.12294794e-05] [ 1.65623266e-01 8.09059908e-06 -7.02856483e-06 -1.05289898e-05 1.47930073e-05 1.28690733e-08 3.96341761e-06 -1.14960532e-06 9.82983370e-07 2.35545283e-05 -4.62708415e-06 -2.76784023e-05 1.24026644e-05] [ 1.65623266e-01 -6.45341455e-02 -4.78121581e-06 3.94908020e-06 2.22248802e-02 6.45411044e-02 1.71607010e-06 4.28501172e-02 -7.88157563e-04 6.45631896e-02 -6.12531558e-06 1.22731412e-01 -8.35826655e-02]]
dist_poker_5player = PlotDist(data_poker_5player['vals'], 5, data_poker_5player['timeavg'])
100%|██████████| 5/5 [00:04<00:00, 1.23it/s]
PlotTimeAvg(data_poker_5player, 13, player=3, num_to_plot=3000);