import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle as pkl
import os
from collections import Counter
from tqdm import tqdm_notebook as tqdm
import scipy.stats as sc
import plotly.graph_objects as go
import copy
from MAB import GenericMAB as GM
from MAB.BernoulliMAB import BetaBernoulliMAB as BM
from MAB.GaussianMAB import GaussianMAB as GaM
from MAB.BetaMAB import BetaMAB as BeM
from MAB.RademacherMAB import RadeMAB as RadeM
from MAB import xp_helpers as xph
import warnings
from tqdm import tqdm_notebook as tqdm
warnings. filterwarnings('ignore')
%load_ext autoreload
%autoreload 2
pth = 'results'
param_dic = {'TS': {}, 'TS_star': {}, 'MED': {}, 'NPTS': {}, 'NPTS_star': {}}
all_res ={}
The autoreload extension is already loaded. To reload it, use: %reload_ext autoreload
def paper_plots2(traj, style, step): #Input = a unique set of trajectories from a policy
time_grid = np.arange(traj.shape[0]) * step + 1
avg = traj.mean(axis=1)
q10 = np.quantile(traj, 0.10, axis=1)
q90 = np.quantile(traj, 0.90, axis=1)
plt.plot(time_grid, avg, color=style['color'],
linestyle=style['linestyle'], linewidth=3, marker=style['marker'],
markersize=14, label=style['lgd'], markevery=mkevery)
plt.plot(time_grid, q10, color=style['color'],
linestyle=style['linestyle'], linewidth=0.5, marker=style['marker'],
markersize=4, markevery=mkevery)
plt.plot(time_grid, q90, color=style['color'],
linestyle=style['linestyle'], linewidth=0.5, marker=style['marker'],
markersize=4, markevery=mkevery)
plt.fill_between(time_grid, q10, q90, color=style['fill'])
mkevery = 0.15
step = 100
style1 = {'name': 'SGB', 'lgd': 'SGB $\eta_1$', 'linestyle': '--', 'marker': '^', 'color': 'blue', 'fill': 'aliceblue'}
style2 = {'name': 'SGB', 'lgd': 'SGB $\eta_2$', 'linestyle': '--', 'marker': '*', 'color': 'orange', 'fill': 'lemonchiffon'}
style3 = {'name': 'SGB', 'lgd': 'SGB $\eta_3$', 'linestyle': '--', 'marker': 'o', 'color': 'forestgreen', 'fill': 'honeydew'}
style4 = {'name': 'SGB', 'lgd': 'SGB $\eta_4$', 'linestyle': '--', 'marker': '^', 'color': 'red', 'fill': 'seashell'}
style5 = {'name': 'SGB', 'lgd': 'SGB $\eta_5$', 'linestyle': '--', 'marker': '*', 'color': 'purple', 'fill': 'lavenderblush'}
This is the companion notebook for the Neurips 2025 submission "Does stochastic gradient really succeed for bandits?". We introduce the code that permits to run all experiments from Section 4 and Appendix G of the paper.
Focus on the asymptotic scaling of the regret (Appendix G.3)¶
In this section we provide empirical evidence supporting the conjecture that the asymptotic scaling of the regret of SGB is $\frac{K-1}{K}\frac{\log(T)}{\eta}$, for any $K$-armed bandit problem, if $\eta$ is smaller than the critical threshold $\frac{2\Delta}{K}$. We recall that this result is formally proved in Theorem 1 of the paper for $K=2$.
T, N = 100000, 1000 # Horizon T, number of trajectories N
gap = 0.25
model = RadeM([0., -gap])
u = model.MC_regret('SGB', N, T, {'eta': 0.2})
v = model.MC_regret('SGB', N, T, {'eta': 0.1})
w = model.MC_regret('SGB', N, T, {'eta': 0.05})
# res = pkl.load(open('asymptot_exp.pkl', 'rb'))
# u = res['eta20']
# v = res['eta10']
# w = res['eta5']
plt.plot(u, label='SGB, $\eta_1=0.2$', color='b')
plt.plot(np.log(np.arange(T)+1)/2/0.2 - np.log(T+1)/0.4+ u[-1], label='$t \\mapsto \\frac{\log(t)}{2\eta_1}+a_1$', color='b', linestyle='--')
plt.plot(v, label='SGB, $\eta_2=0.1$', color='orange')
plt.plot(np.log(np.arange(T)+1)/2/0.1 - np.log(T+1)/0.2+ v[-1] , label='$t \\mapsto \\frac{\log(t)}{2\eta_2}+a_2$', color='orange', linestyle='--')
plt.plot(w, label='SGB, $\eta_3=0.05$', color='r')
plt.plot(np.log(np.arange(T)+1)/2/0.05 - np.log(T+1)/0.1+ w[-1] , label='$t \\mapsto \\frac{\log(t)}{2\eta_3}+a_3$', color='r', linestyle='--')
plt.xscale('log')
plt.xlabel("Time (logarithmic scale)", fontsize=18)
plt.ylabel("Empirical Regret", fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.legend(loc="upper left", fontsize=15)
plt.savefig('xp_asymp.pdf', bbox_inches='tight')
plt.legend()
res = {'eta20': u, 'eta10': v, 'eta5': w}
pkl.dump(res, open('asymptot_exp.pkl', 'wb'))
T, N = 100000, 1000 # Horizon T, number of trajectories N
gap = 0.4
model = RadeM([0., -gap, -gap, -gap])
u = model.MC_regret('SGB', N, T, {'eta': 0.2})
v = model.MC_regret('SGB', N, T, {'eta': 0.1})
w = model.MC_regret('SGB', N, T, {'eta': 0.05})
# res = pkl.load(open('asymptot_expK.pkl', 'rb'))
# u = res['eta20']
# v = res['eta10']
# w = res['eta5']
KK = 4/3
plt.plot(u, label='SGB, $\eta_1=0.2$', color='b')
plt.plot(np.log(np.arange(T)+1)/KK/0.2 - np.log(T+1)/KK/0.2+ u[-1], label='$t \\mapsto\\frac{K-1}{K}\\times\\frac{\log(t)}{\\eta_1}+b_1$', color='b', linestyle='--')
plt.plot(v, label='SGB, $\eta_2=0.1$', color='orange')
plt.plot(np.log(np.arange(T)+1)/KK/0.1 - np.log(T+1)/KK/0.1+ v[-1] , label='$t \\mapsto\\frac{K-1}{K}\\times\\frac{\log(t)}{\\eta_2}+b_2$', color='orange', linestyle='--')
plt.plot(w, label='SGB, $\eta_3=0.05$', color='r')
plt.plot(np.log(np.arange(T)+1)/KK/0.05 - np.log(T+1)/KK/0.05+ w[-1] , label='$t \\mapsto\\frac{K-1}{K}\\times\\frac{\log(t)}{\\eta_3}+b_3$', color='r', linestyle='--')
plt.xscale('log')
plt.xlabel("Time (logarithmic scale)", fontsize=18)
plt.ylabel("Empirical Regret", fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.legend(loc="upper left", fontsize=15)
plt.savefig('xp_asympK.pdf', bbox_inches='tight')
plt.legend()
res = {'eta20': u, 'eta10': v, 'eta5': w}
pkl.dump(res, open('asymptot_expK.pkl', 'wb'))
T, N = 100000, 1000 # Horizon T, number of trajectories N
gap = 0.25
K = 5
model = RadeM([0.9, 0.9-gap] + [-1]*3)
eta1 = 2 * gap / K
eta2 = gap / K
eta3 = gap / 2 / K
KK = K/(K-1)
u = model.MC_regret('SGB', N, T, {'eta': eta1})
v = model.MC_regret('SGB', N, T, {'eta': eta2})
w = model.MC_regret('SGB', N, T, {'eta': eta3})
# res = pkl.load(open('asymptot_exp_Knon.pkl', 'rb'))
# u = res['eta1']
# v = res['eta2']
# w = res['eta3']
plt.plot(u, label='SGB, $\eta_1=0.1$', color='b')
plt.plot(np.log(np.arange(T)+1)/KK/eta1 - np.log(T+1)/KK/eta1+ u[-1], label='$t \\mapsto\\frac{K-1}{K}\\times\\frac{\log(t)}{\\eta_1}+c_1$', color='b', linestyle='--')
plt.plot(v, label='SGB, $\eta_2=0.05$', color='orange')
plt.plot(np.log(np.arange(T)+1)/KK/eta2 - np.log(T+1)/KK/eta2+ v[-1] , label='$t \\mapsto\\frac{K-1}{K}\\times\\frac{\log(t)}{\\eta_2}+c_2$', color='orange', linestyle='--')
plt.plot(w, label='SGB, $\eta_3=0.025$', color='r')
plt.plot(np.log(np.arange(T)+1)/KK/eta3 - np.log(T+1)/KK/eta3+ w[-1] , label='$t \\mapsto\\frac{K-1}{K}\\times\\frac{\log(t)}{\\eta_3}+c_3$', color='r', linestyle='--')
plt.xscale('log')
plt.xlabel("Time (logarithmic scale)", fontsize=18)
plt.ylabel("Empirical Regret", fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.legend(loc="upper left", fontsize=15)
plt.savefig('xp_asympK_non2.pdf', bbox_inches='tight')
plt.legend()
res = {'eta1': u, 'eta2': v, 'eta3': w}
pkl.dump(res, open('asymptot_exp_Knon2.pkl', 'wb'))
T, N = 100000, 1000 # Horizon T, number of trajectories N
K = 9
model = RadeM([0.5, 0.2, 0.2] + [0] * 3 + [-0.2] * 3)
gap =0.3
eta1 = 2 * gap / K
eta2 = gap / K
eta3 = gap / 2 / K
KK = K/(K-1)
print(eta1, eta2, eta3)
u = model.MC_regret('SGB', N, T, {'eta': eta1})
v = model.MC_regret('SGB', N, T, {'eta': eta2})
w = model.MC_regret('SGB', N, T, {'eta': eta3})
# res = pkl.load(open('asymptot_exp_Knon.pkl', 'rb'))
# u = res['eta1']
# v = res['eta2']
# w = res['eta3']
plt.plot(u, label='SGB, $\eta_1=0.066$', color='b')
plt.plot(np.log(np.arange(T)+1)/KK/eta1 - np.log(T+1)/KK/eta1+ u[-1], label='$t \\mapsto\\frac{K-1}{K}\\times\\frac{\log(t)}{\\eta_1}+d_1$', color='b', linestyle='--')
plt.plot(v, label='SGB, $\eta_2=0.033$', color='orange')
plt.plot(np.log(np.arange(T)+1)/KK/eta2 - np.log(T+1)/KK/eta2+ v[-1] , label='$t \\mapsto\\frac{K-1}{K}\\times\\frac{\log(t)}{\\eta_2}+d_2$', color='orange', linestyle='--')
plt.plot(w, label='SGB, $\eta_3=0.016$', color='r')
plt.plot(np.log(np.arange(T)+1)/KK/eta3 - np.log(T+1)/KK/eta3+ w[-1] , label='$t \\mapsto\\frac{K-1}{K}\\times\\frac{\log(t)}{\\eta_3}+d_3$', color='r', linestyle='--')
plt.xscale('log')
plt.xlabel("Time (logarithmic scale)", fontsize=18)
plt.ylabel("Empirical Regret", fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.legend(loc="upper left", fontsize=15)
plt.savefig('xp_asympK_non2.pdf', bbox_inches='tight')
plt.legend()
res = {'eta1': u, 'eta2': v, 'eta3': w}
pkl.dump(res, open('asymptot_exp_Knon2.pkl', 'wb'))
0.06666666666666667 0.03333333333333333 0.016666666666666666
Computing 1000 simulations: 100%|███████████| 1000/1000 [27:10<00:00, 1.63s/it] Computing 1000 simulations: 100%|███████████| 1000/1000 [27:07<00:00, 1.63s/it] Computing 1000 simulations: 100%|███████████| 1000/1000 [27:19<00:00, 1.64s/it]
T, N = 100000, 1000 # Horizon T, number of trajectories N
K = 12
model = RadeM([0.] + [-0.4] * 3 + [-0.5] * 3 + [-0.6] * 2 + [-0.8] * 3)
gap =0.4
eta1 = 2 * gap / K
eta2 = gap / K
eta3 = gap / 2 / K
KK = K/(K-1)
u = model.MC_regret('SGB', N, T, {'eta': eta1})
v = model.MC_regret('SGB', N, T, {'eta': eta2})
w = model.MC_regret('SGB', N, T, {'eta': eta3})
print(eta1, eta2, eta3)
#res = pkl.load(open('asymptot_exp_Knon3.pkl', 'rb'))
# u = res['eta1']
# v = res['eta2']
# w = res['eta3']
plt.plot(u, label='SGB, $\eta_1=0.066$', color='b')
plt.plot(np.log(np.arange(T)+1)/KK/eta1 - np.log(T+1)/KK/eta1+ u[-1], label='$t \\mapsto\\frac{K-1}{K}\\times\\frac{\log(t)}{\\eta_1}+e_1$', color='b', linestyle='--')
plt.plot(v, label='SGB, $\eta_2=0.033$', color='orange')
plt.plot(np.log(np.arange(T)+1)/KK/eta2 - np.log(T+1)/KK/eta2+ v[-1] , label='$t \\mapsto\\frac{K-1}{K}\\times\\frac{\log(t)}{\\eta_2}+e_2$', color='orange', linestyle='--')
plt.plot(w, label='SGB, $\eta_3=0.016$', color='r')
plt.plot(np.log(np.arange(T)+1)/KK/eta3 - np.log(T+1)/KK/eta3+ w[-1] , label='$t \\mapsto\\frac{K-1}{K}\\times\\frac{\log(t)}{\\eta_3}+e_3$', color='r', linestyle='--')
plt.xscale('log')
plt.xlabel("Time (logarithmic scale)", fontsize=18)
plt.ylabel("Empirical Regret", fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.legend(loc="upper left", fontsize=15)
plt.savefig('xp_asympK_non3.pdf', bbox_inches='tight')
plt.legend()
res = {'eta1': u, 'eta2': v, 'eta3': w}
pkl.dump(res, open('asymptot_exp_Knon3.pkl', 'wb'))
Computing 1000 simulations: 100%|███████████| 1000/1000 [28:24<00:00, 1.70s/it] Computing 1000 simulations: 100%|███████████| 1000/1000 [28:19<00:00, 1.70s/it] Computing 1000 simulations: 100%|███████████| 1000/1000 [28:20<00:00, 1.70s/it]
0.06666666666666667 0.03333333333333333 0.016666666666666666
Experiments of Section 4¶
We start with the experiment defined for $K=2$, which aims to illustrates Theorem 1 and 2.
### The following function computes the theoretical upper bound of Theorem 1 ###
def bound_2arms(T, eta, Delta):
return np.log(1+2*(eta*Delta+eta*np.exp(2*eta))*T)/(2*eta) + Delta/(2*eta*(Delta-eta*np.exp(2*eta)))
T, N = 20000, 10000 # Horizon T, number of trajectories N
distribs = ['Rade', 'Rade']
dist_params = [0.1, 0]
gap = 0.1
params = (distribs, dist_params, T, N, ['SGB'], {'SGB': {'eta': gap/2}}, 100)
res1, traj1 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption=' test SGB')
params = (distribs, dist_params, T, N, ['SGB'], {'SGB': {'eta': gap}}, 100)
res2, traj2 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption=' test SGB')
params = (distribs, dist_params, T, N, ['SGB'], {'SGB': {'eta': 2 * gap}}, 100)
res3, traj3 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption=' test SGB')
params = (distribs, dist_params, T, N, ['SGB'], {'SGB': {'eta': 5 * gap}}, 100)
res4, traj4 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption=' test SGB')
Running on 20 cpu
Computing 501 simulations: 100%|██████████| 501/501 [08:07<00:00, 1.03it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:15<00:00, 1.01it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:20<00:00, 1.00it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:21<00:00, 1.00s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:27<00:00, 1.01s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:30<00:00, 1.02s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:31<00:00, 1.02s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:33<00:00, 1.02s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:33<00:00, 1.03s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:37<00:00, 1.03s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:37<00:00, 1.03s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:40<00:00, 1.04s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:40<00:00, 1.04s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:41<00:00, 1.04s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:42<00:00, 1.04s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:42<00:00, 1.04s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:43<00:00, 1.05s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:47<00:00, 1.05s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:48<00:00, 1.05s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:48<00:00, 1.06s/it] Computing 501 simulations: 0%| | 0/501 [00:00<?, ?it/s]
Execution time: 531 seconds Running on 20 cpu
Computing 501 simulations: 100%|██████████| 501/501 [08:04<00:00, 1.03it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:11<00:00, 1.02it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:15<00:00, 1.01it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:24<00:00, 1.01s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:25<00:00, 1.01s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:26<00:00, 1.01s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:28<00:00, 1.02s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:29<00:00, 1.02s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:31<00:00, 1.02s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:37<00:00, 1.03s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:37<00:00, 1.03s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:41<00:00, 1.04s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:44<00:00, 1.05s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:45<00:00, 1.05s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:46<00:00, 1.05s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:48<00:00, 1.05s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:48<00:00, 1.06s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:50<00:00, 1.06s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:50<00:00, 1.06s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:57<00:00, 1.07s/it] Computing 501 simulations: 0%| | 0/501 [00:00<?, ?it/s]
Execution time: 537 seconds Running on 20 cpu
Computing 501 simulations: 100%|██████████| 501/501 [08:05<00:00, 1.03it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:11<00:00, 1.02it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:13<00:00, 1.01it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:15<00:00, 1.01it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:22<00:00, 1.00s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:24<00:00, 1.01s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:27<00:00, 1.01s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:28<00:00, 1.02s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:32<00:00, 1.02s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:34<00:00, 1.03s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:35<00:00, 1.03s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:36<00:00, 1.03s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:38<00:00, 1.03s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:40<00:00, 1.04s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:44<00:00, 1.05s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:44<00:00, 1.05s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:45<00:00, 1.05s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:49<00:00, 1.06s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:53<00:00, 1.07s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:55<00:00, 1.07s/it] Computing 501 simulations: 0%| | 0/501 [00:00<?, ?it/s]
Execution time: 536 seconds Running on 20 cpu
Computing 501 simulations: 100%|██████████| 501/501 [07:46<00:00, 1.07it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:47<00:00, 1.07it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:51<00:00, 1.06it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:55<00:00, 1.05it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:58<00:00, 1.05it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:09<00:00, 1.02it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:10<00:00, 1.02it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:11<00:00, 1.02it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:13<00:00, 1.01it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:17<00:00, 1.01it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:21<00:00, 1.00s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:26<00:00, 1.01s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:28<00:00, 1.01s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:28<00:00, 1.01s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:29<00:00, 1.02s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:34<00:00, 1.03s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:35<00:00, 1.03s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:36<00:00, 1.03s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:38<00:00, 1.03s/it] Computing 501 simulations: 100%|█████████▉| 500/501 [08:43<00:00, 3.60it/s]
Execution time: 524 seconds
Computing 501 simulations: 100%|██████████| 501/501 [08:43<00:00, 1.04s/it]
plt.figure(figsize=(12,7))
style1['lgd'] = 'SGB, $\\eta_1=\\frac{\\Delta}{2}$'
style2['lgd'] = 'SGB, $\\eta_2=\\Delta$'
style3['lgd'] = 'SGB, $\\eta_3=2\\Delta$'
style4['lgd'] = 'SGB, $\\eta_4=5\\Delta$'
paper_plots2(traj4['SGB'], style4, 100)
paper_plots2(traj3['SGB'], style3, 100)
paper_plots2(traj2['SGB'], style2, 100)
paper_plots2(traj1['SGB'], style1, 100)
plt.xlabel("Time", fontsize=18)
plt.ylabel("Empirical Regret", fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
Ts = np.arange(1, 19999, 100)
regret_bound = [bound_2arms(T, 0.05, 0.1) for T in Ts]
plt.plot(Ts, regret_bound, color='purple', linestyle='--', marker='*', markevery=0.15, label='Bound of Th.1 for $\eta_1$')
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.ylim(0,330)
plt.xlim(0,19999)
plt.legend(loc="upper left", fontsize=20)
plt.savefig('xp_2arms_rad.pdf', bbox_inches='tight')
We represent the distribution of empirical regret at horizon $T$ below. Since this figure is not legible we chose to report well chosen statistics instead.
plt.figure(figsize=(12,7))
plt.ticklabel_format(style='sci', axis='x', scilimits=(0,0))
plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
b = 100
sns.distplot(traj1['SGB'][-1], bins=b, color='blue', label='SGB $\eta_1$', kde=False)
sns.distplot(traj2['SGB'][-1], bins=b, color='orange', label='SGB $\eta_2$', kde=False)
sns.distplot(traj3['SGB'][-1], bins=b, color='green', label='SGB $\eta_3$', kde=False)
sns.distplot(traj4['SGB'][-1], bins=b, color='red', label='SGB $\eta_4$', kde=False)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.xlim(0)
plt.legend(loc="upper right", fontsize=20)
plt.savefig('density_RT_2armsRad.pdf', bbox_inches='tight')
We now run the experiment for $K=10$, which aims to illustrate Theorem 3 and the conjecture that the critical learning rate is $\eta=\frac{2\Delta}{K}$ in general.
T, N = 20000, 20000 # Horizon T, number of trajectories N
K = 10
distribs = ['Dirac', 'Rade'] + ['Dirac'] * (K-2)
dist_params = [0, 0.1] + [-1] * (K-2)
gap = 0.1
params = (distribs, dist_params, T, N, ['SGB'], {'SGB': {'eta': 2*gap/K}}, 100)
res1, traj1 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption=' test SGB')
params = (distribs, dist_params, T, N, ['SGB'], {'SGB': {'eta': gap/K}}, 100)
res2, traj2 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption=' test SGB')
params = (distribs, dist_params, T, N, ['SGB'], {'SGB': {'eta': gap/2}}, 100)
res3, traj3 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption=' test SGB')
params = (distribs, dist_params, T, N, ['SGB'], {'SGB': {'eta': gap}}, 100)
res4, traj4 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption=' test SGB')
params = (distribs, dist_params, T, N, ['SGB'], {'SGB': {'eta': 0.5}}, 100)
res5, traj5 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption=' test SGB')
res_sum = {'eta1': traj1, 'eta2': traj2, 'eta3': traj3, 'eta4': traj4, 'eta5': traj5}
#pkl.dump(res_sum, open('xp_worst_instance.pkl', 'wb'))
#pkl.load(open('xp_worst_instance.pkl', 'rb'))
Running on 20 cpu
Computing 1001 simulations: 100%|██████████| 1001/1001 [18:36<00:00, 1.11s/it] Computing 1001 simulations: 100%|██████████| 1001/1001 [18:37<00:00, 1.12s/it] Computing 1001 simulations: 100%|██████████| 1001/1001 [19:19<00:00, 1.16s/it] Computing 1001 simulations: 100%|██████████| 1001/1001 [19:28<00:00, 1.17s/it] Computing 1001 simulations: 100%|██████████| 1001/1001 [19:35<00:00, 1.17s/it] Computing 1001 simulations: 100%|██████████| 1001/1001 [19:35<00:00, 1.17s/it] Computing 1001 simulations: 100%|██████████| 1001/1001 [19:41<00:00, 1.18s/it] Computing 1001 simulations: 100%|██████████| 1001/1001 [19:49<00:00, 1.19s/it] Computing 1001 simulations: 100%|██████████| 1001/1001 [19:54<00:00, 1.19s/it] Computing 1001 simulations: 100%|██████████| 1001/1001 [19:58<00:00, 1.20s/it] Computing 1001 simulations: 100%|██████████| 1001/1001 [19:59<00:00, 1.20s/it] Computing 1001 simulations: 100%|██████████| 1001/1001 [20:00<00:00, 1.20s/it] Computing 1001 simulations: 100%|██████████| 1001/1001 [20:02<00:00, 1.20s/it] Computing 1001 simulations: 100%|██████████| 1001/1001 [20:05<00:00, 1.20s/it] Computing 1001 simulations: 100%|██████████| 1001/1001 [20:09<00:00, 1.21s/it] Computing 1001 simulations: 100%|██████████| 1001/1001 [20:10<00:00, 1.21s/it] Computing 1001 simulations: 100%|██████████| 1001/1001 [20:12<00:00, 1.21s/it] Computing 1001 simulations: 100%|██████████| 1001/1001 [20:20<00:00, 1.22s/it] Computing 1001 simulations: 100%|██████████| 1001/1001 [20:21<00:00, 1.22s/it] Computing 1001 simulations: 100%|█████████▉| 1000/1001 [20:30<00:00, 3.08it/s]
Execution time: 1233 seconds
Computing 1001 simulations: 100%|██████████| 1001/1001 [20:30<00:00, 1.23s/it]
# Plot the results
plt.figure(figsize=(12,7))
style1['lgd'] = 'SGB, $\\eta_1=\\frac{\\Delta}{10}$'
style2['lgd'] = 'SGB, $\\eta_2=\\frac{\\Delta}{5}=\\frac{2\\Delta}{K}$'
style3['lgd'] = 'SGB, $\\eta_3=\\frac{\\Delta}{2}$'
style4['lgd'] = 'SGB, $\\eta_4=\\Delta$'
style5['lgd'] = 'SGB, $\\eta_5=5\\Delta$'
paper_plots2(traj5['SGB'], style5, 100)
paper_plots2(traj4['SGB'], style4, 100)
paper_plots2(traj3['SGB'], style3, 100)
paper_plots2(traj2['SGB'], style2, 100)
paper_plots2(traj1['SGB'], style1, 100)
plt.xlabel("Time", fontsize=18)
plt.ylabel("Empirical Regret", fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.xlim(0, 19999)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.legend(loc="upper left", fontsize=20)
plt.savefig('xp_rad_dirac_bad.pdf', bbox_inches='tight')
We again plot the distribution of empirical regret at horizon $T$, but this time since the different learning rates have clearly separated modes, which makes the figure clear and easy to interpret.
plt.figure(figsize=(12,7))
plt.xlabel("Empirical regret at $T=20000$", fontsize=18)
plt.ylabel("Density", fontsize=18)
b = 100
sns.distplot(traj1['SGB'][-1], bins=b, color='blue', label='SGB $\eta_1$', kde=False)
sns.distplot(traj2['SGB'][-1], bins=b, color='orange', label='SGB $\eta_2$', kde=False)
sns.distplot(traj3['SGB'][-1], bins=b, color='green', label='SGB $\eta_3$', kde=False)
sns.distplot(traj4['SGB'][-1], bins=b, color='red', label='SGB $\eta_4$', kde=False)
sns.distplot(traj5['SGB'][-1], bins=b, color='purple', label='SGB $\eta_5$', kde=False)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
scaler = traj1['SGB'][-1].mean()/2
plt.axvline(scaler/25, linestyle='--', color='purple')
plt.axvline(scaler/5, linestyle='--', color='red')
plt.axvline(scaler/2.5, linestyle='--', color='green')
plt.axvline(scaler, linestyle='--', color='orange')
plt.axvline(scaler*2, linestyle='--', color='blue')
plt.xlim(0,2800)
plt.legend(loc="upper right", fontsize=20)
plt.savefig('density_RT_worst.pdf', bbox_inches='tight')
res_sum = {'eta1': traj1, 'eta2': traj2, 'eta3': traj3, 'eta4': traj4}
pkl.dump(res_sum, open('xp_2arms_main2.pkl', 'wb'))
Experiments with gap-free learning rates (Appendix G.1)¶
We start with an experiment where the gap is initially much smaller than the learning rate
T, N = 10000, 5000 # Horizon T, number of trajectories N
K = 2
distribs = ['Rade'] + ['Rade'] * (K-1)
dist_params = [0.05] + [0]
gap = 0.05
def decay(t):
return min(0.5, np.sqrt(np.log(np.exp(1)+ t)/(1+t)))
def decay2(t):
return min(0.1, np.sqrt(np.log(np.exp(1)+ t)/(1+t)))
params = (distribs, dist_params, T, N, ['SGB'], {'SGB': {'eta': 2*gap/K}}, 100)
res1, traj1 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption='SGB')
params = (distribs, dist_params, T, N, ['SGB_decay'], {'SGB_decay': {'eta': decay}}, 100)
res2, traj2 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption='SGB')
params = (distribs, dist_params, T, N, ['SGB'], {'SGB': {'eta': decay(T)}}, 100)
res3, traj3 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption='SGB')
params = (distribs, dist_params, T, N, ['SGB_decay'], {'SGB_decay': {'eta': decay2}}, 100)
res4, traj4 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption='SGB')
Running on 20 cpu
Computing 251 simulations: 100%|██████████| 251/251 [01:40<00:00, 2.50it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:40<00:00, 2.49it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:42<00:00, 2.44it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:49<00:00, 2.29it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:49<00:00, 2.28it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:49<00:00, 2.28it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:50<00:00, 2.28it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:51<00:00, 2.25it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:53<00:00, 2.21it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:53<00:00, 2.21it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:54<00:00, 2.19it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:59<00:00, 2.09it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:59<00:00, 2.09it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:00<00:00, 2.08it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:00<00:00, 2.08it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:03<00:00, 2.04it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:03<00:00, 2.03it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:04<00:00, 2.02it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:06<00:00, 1.99it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:06<00:00, 1.98it/s] Computing 251 simulations: 0%| | 0/251 [00:00<?, ?it/s]
Execution time: 127 seconds Running on 20 cpu
Computing 251 simulations: 100%|██████████| 251/251 [02:14<00:00, 1.86it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:15<00:00, 1.85it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:15<00:00, 1.85it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:18<00:00, 1.81it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:22<00:00, 1.76it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:22<00:00, 1.76it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:24<00:00, 1.74it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:28<00:00, 1.68it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:29<00:00, 1.68it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:30<00:00, 1.67it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:31<00:00, 1.66it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:32<00:00, 1.65it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:33<00:00, 1.64it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:36<00:00, 1.60it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:37<00:00, 1.59it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:39<00:00, 1.57it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:40<00:00, 1.57it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:40<00:00, 1.56it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:41<00:00, 1.56it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:43<00:00, 1.53it/s]
Execution time: 164 seconds Running on 20 cpu
Computing 251 simulations: 100%|██████████| 251/251 [01:46<00:00, 2.35it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:47<00:00, 2.33it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:48<00:00, 2.31it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:51<00:00, 2.24it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:52<00:00, 2.23it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:55<00:00, 2.17it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:57<00:00, 2.14it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:58<00:00, 2.11it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:58<00:00, 2.11it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:59<00:00, 2.11it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:01<00:00, 2.06it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:02<00:00, 2.06it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:03<00:00, 2.04it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:05<00:00, 2.00it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:06<00:00, 1.99it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:07<00:00, 1.97it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:08<00:00, 1.95it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:08<00:00, 1.95it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:12<00:00, 1.89it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:13<00:00, 1.88it/s]
Execution time: 133 seconds Running on 20 cpu
Computing 251 simulations: 100%|██████████| 251/251 [02:23<00:00, 1.74it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:24<00:00, 1.74it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:25<00:00, 1.73it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:26<00:00, 1.71it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:26<00:00, 1.71it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:26<00:00, 1.71it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:27<00:00, 1.70it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:27<00:00, 1.70it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:30<00:00, 1.67it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:31<00:00, 1.65it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:32<00:00, 1.65it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:33<00:00, 1.63it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:34<00:00, 1.63it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:36<00:00, 1.61it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:37<00:00, 1.59it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:40<00:00, 1.56it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:43<00:00, 1.54it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:43<00:00, 1.53it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:44<00:00, 1.53it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:47<00:00, 1.50it/s]
Execution time: 168 seconds
plt.figure(figsize=(12,7))
style1['lgd'] = '$\eta=\\Delta=0.05$'
style2['lgd'] = '$\eta=\\sqrt{\\frac{\log(T)}{T}}\\approx 0.03$'
style3['lgd'] = '$\eta_t=0.5 \wedge \sqrt{\\frac{\log(e+t)}{t}}$'
style4['lgd'] = '$\eta_t=0.1 \wedge \sqrt{\\frac{\log(e+t)}{t}}$'
paper_plots2(traj1['SGB'], style1, 100)
paper_plots2(traj3['SGB'], style2, 100)
paper_plots2(traj2['SGB_decay'], style3, 100)
paper_plots2(traj4['SGB_decay'], style4, 100)
plt.xlabel("Time", fontsize=18)
plt.ylabel("Empirical Regret", fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
ax = plt.gca()
# Remove the top and right spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.ylim(0,500)
plt.xlim(0,9999)
plt.legend(loc="upper left", fontsize=20)
plt.savefig('xp_decay1.pdf', bbox_inches='tight')
We then consider an experiment where the gap is on the contrary relatively large compared to the learning rate, even from the beginning of the experiment.
T, N = 10000, 5000 # Horizon T, number of trajectories N
K = 2
distribs = ['Rade'] + ['Rade'] * (K-1)
dist_params = [0.5] + [0]
gap = 0.5
def decay(t):
return np.sqrt(np.log(np.exp(1)+ t)/(1+t))
def decay2(t):
return min(0.1, np.sqrt(np.log(np.exp(1)+ t)/(1+t)))
params = (distribs, dist_params, T, N, ['SGB'], {'SGB': {'eta': 2*gap/K}}, 100)
res1, traj1 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption='SGB')
params = (distribs, dist_params, T, N, ['SGB_decay'], {'SGB_decay': {'eta': decay}}, 100)
res2, traj2 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption='SGB')
params = (distribs, dist_params, T, N, ['SGB'], {'SGB': {'eta': decay(T)}}, 100)
res3, traj3 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption='SGB')
Running on 20 cpu
Computing 251 simulations: 100%|██████████| 251/251 [01:45<00:00, 2.38it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:46<00:00, 2.36it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:50<00:00, 2.27it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:53<00:00, 2.22it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:54<00:00, 2.18it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:56<00:00, 2.15it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:58<00:00, 2.11it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:59<00:00, 2.11it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:00<00:00, 2.08it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:02<00:00, 2.06it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:02<00:00, 2.05it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:03<00:00, 2.04it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:03<00:00, 2.04it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:03<00:00, 2.03it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:04<00:00, 2.02it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:05<00:00, 2.00it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:07<00:00, 1.96it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:08<00:00, 1.96it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:08<00:00, 1.96it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:15<00:00, 1.85it/s] Computing 251 simulations: 0%| | 0/251 [00:00<?, ?it/s]
Execution time: 136 seconds Running on 20 cpu
Computing 251 simulations: 100%|██████████| 251/251 [02:11<00:00, 1.90it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:17<00:00, 1.83it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:19<00:00, 1.80it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:20<00:00, 1.79it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:22<00:00, 1.76it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:23<00:00, 1.75it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:25<00:00, 1.73it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:28<00:00, 1.69it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:29<00:00, 1.68it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:29<00:00, 1.68it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:30<00:00, 1.67it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:30<00:00, 1.67it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:31<00:00, 1.66it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:35<00:00, 1.61it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:36<00:00, 1.61it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:36<00:00, 1.60it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:37<00:00, 1.59it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:38<00:00, 1.58it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:40<00:00, 1.57it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:40<00:00, 1.57it/s]
Execution time: 160 seconds Running on 20 cpu
Computing 251 simulations: 100%|██████████| 251/251 [01:47<00:00, 2.33it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:48<00:00, 2.32it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:51<00:00, 2.26it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:51<00:00, 2.25it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:53<00:00, 2.21it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:55<00:00, 2.17it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:56<00:00, 2.16it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:57<00:00, 2.13it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:59<00:00, 2.09it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:03<00:00, 2.03it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:04<00:00, 2.02it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:05<00:00, 2.00it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:06<00:00, 1.98it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:06<00:00, 1.98it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:07<00:00, 1.97it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:07<00:00, 1.97it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:08<00:00, 1.95it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:08<00:00, 1.95it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:09<00:00, 1.94it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:12<00:00, 1.89it/s]
Execution time: 133 seconds
### Case where gap is actually rather large ###
plt.figure(figsize=(12,7))
style1['lgd'] = '$\\eta=\\Delta=0.5$'
style2['lgd'] = '$\\eta=\\sqrt{\\frac{\log(T)}{T}}\\approx 0.03$'
style3['lgd'] = '$\\eta_t=\\sqrt{\\frac{\log(e+t)}{t}}$'
paper_plots2(traj1['SGB'], style1, 100)
paper_plots2(traj3['SGB'], style2, 100)
paper_plots2(traj2['SGB_decay'], style3, 100)
plt.xlabel("Time", fontsize=18)
plt.ylabel("Empirical Regret", fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
ax = plt.gca()
# Remove the top and right spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.ylim(0,160)
plt.xlim(0,9999)
plt.legend(loc="upper left", fontsize=20)
plt.savefig('xp_decay2.pdf', bbox_inches='tight')
We finally run the last experiment of Appendix G.1.
T, N = 10000, 5000 # Horizon T, number of trajectories N
K = 5
distribs = ['Rade'] + ['Rade'] * (K-1)
dist_params = [0.25] + [0] * (K-1)
gap = 0.25
def decay(t):
return min(0.5, np.sqrt(K * np.log(np.exp(1)+ t)/(1+t)))
params = (distribs, dist_params, T, N, ['SGB'], {'SGB': {'eta': 2*gap/K}}, 100)
res1, traj1 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption=' test SGB')
params = (distribs, dist_params, T, N, ['SGB_decay'], {'SGB_decay': {'eta': decay}}, 100)
res2, traj2 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption=' test SGB')
params = (distribs, dist_params, T, N, ['SGB'], {'SGB': {'eta': decay(T)}}, 100)
res3, traj3 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption=' test SGB')
Running on 20 cpu
Computing 251 simulations: 100%|██████████| 251/251 [01:55<00:00, 2.17it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:55<00:00, 2.17it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:02<00:00, 2.05it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:03<00:00, 2.04it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:06<00:00, 1.98it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:07<00:00, 1.96it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:08<00:00, 1.95it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:09<00:00, 1.94it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:09<00:00, 1.94it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:10<00:00, 1.92it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:11<00:00, 1.91it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:11<00:00, 1.91it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:12<00:00, 1.89it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:13<00:00, 1.89it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:13<00:00, 1.88it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:14<00:00, 1.87it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:14<00:00, 1.86it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:15<00:00, 1.85it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:20<00:00, 1.79it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:22<00:00, 1.76it/s]
Execution time: 143 seconds Running on 20 cpu
Computing 251 simulations: 100%|██████████| 251/251 [03:06<00:00, 1.34it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:09<00:00, 1.32it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:11<00:00, 1.31it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:11<00:00, 1.31it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:13<00:00, 1.30it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:15<00:00, 1.29it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:17<00:00, 1.27it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:18<00:00, 1.27it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:20<00:00, 1.25it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:24<00:00, 1.23it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:26<00:00, 1.21it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:27<00:00, 1.21it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:27<00:00, 1.21it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:28<00:00, 1.20it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:29<00:00, 1.20it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:31<00:00, 1.19it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:33<00:00, 1.17it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:37<00:00, 1.15it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:37<00:00, 1.15it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:39<00:00, 1.14it/s] Computing 251 simulations: 0%| | 0/251 [00:00<?, ?it/s]
Execution time: 220 seconds Running on 20 cpu
Computing 251 simulations: 100%|██████████| 251/251 [02:00<00:00, 2.08it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:03<00:00, 2.03it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:03<00:00, 2.03it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:06<00:00, 1.99it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:06<00:00, 1.98it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:06<00:00, 1.98it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:07<00:00, 1.97it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:07<00:00, 1.96it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:07<00:00, 1.96it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:08<00:00, 1.95it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:10<00:00, 1.92it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:11<00:00, 1.91it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:11<00:00, 1.91it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:13<00:00, 1.87it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:14<00:00, 1.87it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:14<00:00, 1.87it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:15<00:00, 1.85it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:17<00:00, 1.82it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:20<00:00, 1.78it/s] Computing 251 simulations: 100%|█████████▉| 250/251 [02:22<00:00, 6.41it/s]
Execution time: 143 seconds
Computing 251 simulations: 100%|██████████| 251/251 [02:22<00:00, 1.76it/s]
plt.figure(figsize=(12,7))
style1['lgd'] = '$\eta=\\frac{2\\Delta}{K}=0.1$'
style2['lgd'] = '$\\eta=\\sqrt{\\frac{K\log(T)}{T}}\\approx 0.07$'
style3['lgd'] = '$ \\eta_t=0.5 \wedge\sqrt{\\frac{K\log(t)}{t}}$'
paper_plots2(traj2['SGB_decay'], style3, 100)
paper_plots2(traj1['SGB'], style1, 100)
paper_plots2(traj3['SGB'], style2, 100)
plt.xlabel("Time", fontsize=18)
plt.ylabel("Empirical Regret", fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.xlim(0,9999)
plt.legend(loc="upper left", fontsize=20)
plt.savefig('xp_decay3.pdf', bbox_inches='tight')
Benchmarking SGB with standard bandit policies (Appendix G.2)¶
In this section we run the four experiments presented in Appendix G.2, where SGB is compared to the following policies: UCB, MED, TS, and SAMBA; on problems with different characteristics.
T, N = 10000, 5000 # Horizon T, number of trajectories N
K = 5
distribs = ['Rade'] * K
gap = 0.1
dist_params = [gap] + [0] * (K-1)
alpha = gap/2 / (1-gap/2)
print(alpha)
params = (distribs, dist_params, T, N, ['SGB', 'UCB1', 'MED', 'TS', 'SAMBA'],
{'SGB': {'eta': 2*gap/K}, 'UCB1':{}, 'MED': {}, 'TS': {}, 'SAMBA': {'alpha': alpha}}, 100)
res1, traj1 = xph.multiprocess_MCRad(params, plot=False, pickle_path=pth, caption=' test SGB')
0.052631578947368425 Running on 20 cpu
Computing 251 simulations: 100%|██████████| 251/251 [02:07<00:00, 1.97it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:22<00:00, 1.76it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:23<00:00, 1.75it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:24<00:00, 1.73it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:26<00:00, 1.72it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:26<00:00, 1.71it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:27<00:00, 1.71it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:27<00:00, 1.70it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:27<00:00, 1.70it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:27<00:00, 1.70it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:28<00:00, 1.69it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:29<00:00, 1.68it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:29<00:00, 1.67it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:30<00:00, 1.67it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:34<00:00, 1.62it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:35<00:00, 1.61it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:36<00:00, 1.61it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:41<00:00, 1.55it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:42<00:00, 1.54it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:51<00:00, 1.46it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:16<00:00, 3.29it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:02<00:00, 4.04it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:06<00:00, 3.77it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:05<00:00, 3.85it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:13<00:00, 3.43it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:18<00:00, 3.19it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:11<00:00, 3.51it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:12<00:00, 3.46it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:06<00:00, 3.79it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:07<00:00, 3.72it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:16<00:00, 3.29it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:17<00:00, 3.26it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:20<00:00, 3.12it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:08<00:00, 3.67it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:17<00:00, 3.24it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:30<00:00, 2.76it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:19<00:00, 3.15it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:32<00:00, 2.70it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:38<00:00, 2.56it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:13<00:00, 3.42it/s] Computing 251 simulations: 100%|██████████| 251/251 [05:07<00:00, 1.22s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:20<00:00, 1.28s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:11<00:00, 1.24s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:34<00:00, 1.33s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:10<00:00, 1.24s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:27<00:00, 1.30s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:26<00:00, 1.30s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:28<00:00, 1.31s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:17<00:00, 1.27s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:32<00:00, 1.33s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:27<00:00, 1.30s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:17<00:00, 1.26s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:49<00:00, 1.39s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:38<00:00, 1.35s/it] Computing 251 simulations: 100%|██████████| 251/251 [06:18<00:00, 1.51s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:44<00:00, 1.37s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:45<00:00, 1.37s/it] Computing 251 simulations: 100%|██████████| 251/251 [06:14<00:00, 1.49s/it] Computing 251 simulations: 100%|██████████| 251/251 [06:17<00:00, 1.50s/it] Computing 251 simulations: 100%|██████████| 251/251 [06:00<00:00, 1.44s/it] Computing 251 simulations: 100%|██████████| 251/251 [02:16<00:00, 1.83it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:02<00:00, 2.04it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:04<00:00, 2.02it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:02<00:00, 2.05it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:15<00:00, 1.86it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:13<00:00, 1.88it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:07<00:00, 1.97it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:06<00:00, 1.98it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:10<00:00, 1.93it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:17<00:00, 1.82it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:25<00:00, 1.73it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:30<00:00, 1.66it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:24<00:00, 1.74it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:24<00:00, 1.74it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:02<00:00, 2.05it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:24<00:00, 1.74it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:34<00:00, 1.63it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:30<00:00, 1.67it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:25<00:00, 1.72it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:30<00:00, 1.67it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:21<00:00, 1.77it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:07<00:00, 1.97it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:13<00:00, 1.89it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:03<00:00, 2.04it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:18<00:00, 1.81it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:12<00:00, 1.89it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:11<00:00, 1.91it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:02<00:00, 2.05it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:18<00:00, 1.81it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:15<00:00, 1.85it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:03<00:00, 2.04it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:12<00:00, 1.90it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:32<00:00, 2.70it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:39<00:00, 2.52it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:09<00:00, 1.94it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:16<00:00, 1.84it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:40<00:00, 2.51it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:52<00:00, 2.24it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:39<00:00, 2.51it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:44<00:00, 2.39it/s]
Execution time: 857 seconds
plt.figure(figsize=(12,7))
style1['lgd'] = 'SGB, $\\eta=\\frac{2\\Delta}{K}=0.04$'
style2['lgd'] = 'TS'
style3['lgd'] = 'MED'
style4['lgd'] = 'UCB'
style5['lgd'] = 'SAMBA, $\\alpha \\approx \\frac{\\Delta}{2}\\approx 0.05$'
paper_plots2(traj1['SGB'], style1, 100)
paper_plots2(traj1['TS'], style2, 100)
paper_plots2(traj1['MED'], style3, 100)
paper_plots2(traj1['UCB1'], style4, 100)
paper_plots2(traj1['SAMBA'], style5, 100)
plt.xlabel("Time", fontsize=18)
plt.ylabel("Empirical Regret", fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.legend(loc="upper left", fontsize=20)
plt.savefig('xp_all1.pdf', bbox_inches='tight')
T, N = 10000, 5000 # Horizon T, number of trajectories N
K = 10
distribs = ['Rade'] * K
gap = 0.2
dist_params = [-0.5, -0.5] + [0.3, 0.5] + [0] * 6
alpha = gap/2 / (1-gap/2)
print(alpha)
params = (distribs, dist_params, T, N, ['SGB', 'UCB1', 'MED', 'TS', 'SAMBA'],
{'SGB': {'eta': 2*gap/K}, 'UCB1':{}, 'MED': {}, 'TS': {}, 'SAMBA': {'alpha': alpha}}, 100)
res1, traj1 = xph.multiprocess_MCRad(params, plot=False, pickle_path=pth, caption=' test SGB')
0.11111111111111112 Running on 20 cpu
Computing 251 simulations: 100%|██████████| 251/251 [02:30<00:00, 1.67it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:31<00:00, 1.65it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:31<00:00, 1.65it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:32<00:00, 1.65it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:32<00:00, 1.64it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:34<00:00, 1.62it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:39<00:00, 1.58it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:39<00:00, 1.57it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:40<00:00, 1.56it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:42<00:00, 1.54it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:43<00:00, 1.54it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:43<00:00, 1.54it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:43<00:00, 1.53it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:48<00:00, 1.49it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:48<00:00, 1.49it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:49<00:00, 1.48it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:52<00:00, 1.46it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:59<00:00, 1.40it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:03<00:00, 1.37it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:04<00:00, 1.36it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:01<00:00, 4.05it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:08<00:00, 3.67it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:02<00:00, 4.00it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:13<00:00, 3.41it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:04<00:00, 3.88it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:16<00:00, 3.27it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:10<00:00, 3.54it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:10<00:00, 3.57it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:20<00:00, 3.13it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:16<00:00, 3.28it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:15<00:00, 3.30it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:09<00:00, 3.62it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:13<00:00, 3.42it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:21<00:00, 3.09it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:33<00:00, 2.68it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:09<00:00, 3.64it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:21<00:00, 3.06it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:12<00:00, 3.48it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:26<00:00, 2.89it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:34<00:00, 2.65it/s] Computing 251 simulations: 100%|██████████| 251/251 [07:55<00:00, 1.89s/it] Computing 251 simulations: 100%|██████████| 251/251 [07:28<00:00, 1.79s/it] Computing 251 simulations: 100%|██████████| 251/251 [08:02<00:00, 1.92s/it] Computing 251 simulations: 100%|██████████| 251/251 [08:16<00:00, 1.98s/it] Computing 251 simulations: 100%|██████████| 251/251 [08:04<00:00, 1.93s/it] Computing 251 simulations: 100%|██████████| 251/251 [08:18<00:00, 1.99s/it] Computing 251 simulations: 100%|██████████| 251/251 [08:03<00:00, 1.93s/it] Computing 251 simulations: 100%|██████████| 251/251 [08:19<00:00, 1.99s/it] Computing 251 simulations: 100%|██████████| 251/251 [08:06<00:00, 1.94s/it] Computing 251 simulations: 100%|██████████| 251/251 [08:20<00:00, 1.99s/it] Computing 251 simulations: 100%|██████████| 251/251 [08:29<00:00, 2.03s/it] Computing 251 simulations: 100%|██████████| 251/251 [08:38<00:00, 2.07s/it] Computing 251 simulations: 100%|██████████| 251/251 [08:37<00:00, 2.06s/it] Computing 251 simulations: 100%|██████████| 251/251 [08:40<00:00, 2.07s/it] Computing 251 simulations: 100%|██████████| 251/251 [08:41<00:00, 2.08s/it] Computing 251 simulations: 100%|██████████| 251/251 [08:34<00:00, 2.05s/it] Computing 251 simulations: 100%|██████████| 251/251 [08:16<00:00, 1.98s/it] Computing 251 simulations: 100%|██████████| 251/251 [09:08<00:00, 2.18s/it] Computing 251 simulations: 100%|██████████| 251/251 [09:37<00:00, 2.30s/it] Computing 251 simulations: 100%|██████████| 251/251 [09:22<00:00, 2.24s/it] Computing 251 simulations: 100%|██████████| 251/251 [02:03<00:00, 2.03it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:18<00:00, 1.82it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:19<00:00, 1.80it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:15<00:00, 1.85it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:56<00:00, 2.15it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:18<00:00, 1.82it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:37<00:00, 1.59it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:14<00:00, 1.87it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:27<00:00, 1.71it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:10<00:00, 1.92it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:20<00:00, 1.79it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:03<00:00, 2.03it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:25<00:00, 1.72it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:03<00:00, 2.03it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:16<00:00, 1.85it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:29<00:00, 1.68it/s] Computing 251 simulations: 100%|██████████| 251/251 [03:09<00:00, 1.33it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:02<00:00, 2.04it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:21<00:00, 1.77it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:56<00:00, 1.42it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:07<00:00, 1.97it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:11<00:00, 1.90it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:19<00:00, 1.80it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:08<00:00, 1.95it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:05<00:00, 2.01it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:10<00:00, 1.92it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:16<00:00, 1.84it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:56<00:00, 2.15it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:34<00:00, 1.63it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:04<00:00, 2.01it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:58<00:00, 2.13it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:00<00:00, 2.09it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:52<00:00, 1.45it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:10<00:00, 1.92it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:47<00:00, 2.34it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:52<00:00, 2.22it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:37<00:00, 2.58it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:06<00:00, 3.76it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:45<00:00, 2.38it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:24<00:00, 2.98it/s]
Execution time: 1044 seconds
plt.figure(figsize=(12,7))
style1['lgd'] = 'SGB, $\\eta=\\frac{2\\Delta}{K}=0.04$'
style2['lgd'] = 'TS'
style3['lgd'] = 'MED'
style4['lgd'] = 'UCB'
style5['lgd'] = 'SAMBA, $\\alpha \\approx \\frac{\\Delta}{2}\\approx 0.1$'
paper_plots2(traj1['SGB'], style1, 100)
paper_plots2(traj1['TS'], style2, 100)
paper_plots2(traj1['MED'], style3, 100)
paper_plots2(traj1['UCB1'], style4, 100)
paper_plots2(traj1['SAMBA'], style5, 100)
plt.xlabel("Time", fontsize=18)
plt.ylabel("Empirical Regret", fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.legend(loc="upper left", fontsize=20)
plt.savefig('xp_all2.pdf', bbox_inches='tight')
T, N = 10000, 5000 # Horizon T, number of trajectories N
K = 5
distribs = ['Rade'] * K
gap = 0.05
dist_params = [-0.95, -0.9, -0.9, -0.85, -0.8]
alpha = gap/2 / (1-gap/2)
print(alpha)
params = (distribs, dist_params, T, N, ['SGB', 'UCB1', 'MED', 'TS', 'SAMBA'],
{'SGB': {'eta': 2*gap/K}, 'UCB1':{}, 'MED': {}, 'TS': {}, 'SAMBA': {'alpha': alpha}}, 100)
res1, traj1 = xph.multiprocess_MCRad(params, plot=False, pickle_path=pth, caption=' test SGB')
0.025641025641025644 Running on 20 cpu
Computing 251 simulations: 100%|██████████| 251/251 [02:22<00:00, 1.76it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:25<00:00, 1.73it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:29<00:00, 1.68it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:30<00:00, 1.67it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:30<00:00, 1.66it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:32<00:00, 1.65it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:32<00:00, 1.64it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:34<00:00, 1.62it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:38<00:00, 1.58it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:38<00:00, 1.58it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:39<00:00, 1.57it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:40<00:00, 1.56it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:43<00:00, 1.54it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:47<00:00, 1.50it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:50<00:00, 1.47it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:52<00:00, 1.45it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:53<00:00, 1.45it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:55<00:00, 1.43it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:57<00:00, 1.41it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:59<00:00, 1.40it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:12<00:00, 3.48it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:13<00:00, 3.41it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:06<00:00, 3.76it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:13<00:00, 3.42it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:15<00:00, 3.32it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:19<00:00, 3.16it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:31<00:00, 2.74it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:03<00:00, 3.97it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:29<00:00, 2.82it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:11<00:00, 3.51it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:21<00:00, 3.08it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:29<00:00, 2.82it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:20<00:00, 3.11it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:08<00:00, 3.68it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:24<00:00, 2.97it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:09<00:00, 3.60it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:19<00:00, 3.15it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:15<00:00, 3.34it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:12<00:00, 3.44it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:18<00:00, 3.20it/s] Computing 251 simulations: 100%|██████████| 251/251 [05:36<00:00, 1.34s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:38<00:00, 1.35s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:31<00:00, 1.32s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:43<00:00, 1.37s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:35<00:00, 1.34s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:36<00:00, 1.34s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:46<00:00, 1.38s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:41<00:00, 1.36s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:44<00:00, 1.37s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:31<00:00, 1.32s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:46<00:00, 1.38s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:40<00:00, 1.36s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:52<00:00, 1.40s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:42<00:00, 1.36s/it] Computing 251 simulations: 100%|██████████| 251/251 [06:01<00:00, 1.44s/it] Computing 251 simulations: 100%|██████████| 251/251 [06:17<00:00, 1.50s/it] Computing 251 simulations: 100%|██████████| 251/251 [05:54<00:00, 1.41s/it] Computing 251 simulations: 100%|██████████| 251/251 [06:12<00:00, 1.48s/it] Computing 251 simulations: 100%|██████████| 251/251 [06:12<00:00, 1.48s/it] Computing 251 simulations: 100%|██████████| 251/251 [06:15<00:00, 1.50s/it] Computing 251 simulations: 100%|██████████| 251/251 [02:08<00:00, 1.95it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:08<00:00, 1.95it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:08<00:00, 1.95it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:09<00:00, 1.93it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:04<00:00, 2.02it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:24<00:00, 1.73it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:19<00:00, 1.80it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:15<00:00, 1.85it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:05<00:00, 2.00it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:37<00:00, 1.59it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:18<00:00, 1.82it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:17<00:00, 1.83it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:17<00:00, 1.83it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:27<00:00, 1.71it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:23<00:00, 1.75it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:14<00:00, 1.87it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:25<00:00, 1.72it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:12<00:00, 1.90it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:22<00:00, 1.76it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:37<00:00, 1.59it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:14<00:00, 1.87it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:59<00:00, 2.10it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:21<00:00, 1.78it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:04<00:00, 2.01it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:19<00:00, 1.80it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:21<00:00, 1.77it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:10<00:00, 1.92it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:58<00:00, 2.11it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:22<00:00, 1.76it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:14<00:00, 1.86it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:14<00:00, 1.87it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:49<00:00, 2.30it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:54<00:00, 2.19it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:51<00:00, 2.24it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:43<00:00, 2.42it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:47<00:00, 2.34it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:54<00:00, 2.19it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:28<00:00, 1.68it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:41<00:00, 2.48it/s] Computing 251 simulations: 100%|█████████▉| 250/251 [02:15<00:00, 6.73it/s]
Execution time: 869 seconds
Computing 251 simulations: 100%|██████████| 251/251 [02:16<00:00, 1.84it/s]
plt.figure(figsize=(12,7))
style1['lgd'] = 'SGB, $\\eta=\\frac{2\\Delta}{K}=0.02$'
style2['lgd'] = 'TS'
style3['lgd'] = 'MED'
style4['lgd'] = 'UCB'
style5['lgd'] = 'SAMBA, $\\alpha \\approx \\frac{\\Delta}{2}\\approx 0.025$'
paper_plots2(traj1['SGB'], style1, 100)
paper_plots2(traj1['TS'], style2, 100)
paper_plots2(traj1['MED'], style3, 100)
paper_plots2(traj1['UCB1'], style4, 100)
paper_plots2(traj1['SAMBA'], style5, 100)
plt.xlabel("Time", fontsize=18)
plt.ylabel("Empirical Regret", fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.legend(loc="upper left", fontsize=20)
plt.savefig('xp_all3.pdf', bbox_inches='tight')
T, N = 10000, 5000 # Horizon T, number of trajectories N
K = 4
distribs = ['Rade'] * K
gap = 0.04
dist_params = [0.9, 0.96, 0.92, 0.92]
alpha = gap/2 / (1-gap/2)
print(alpha)
params = (distribs, dist_params, T, N, ['SGB', 'UCB1', 'MED', 'TS', 'SAMBA'],
{'SGB': {'eta': 2*gap/K}, 'UCB1':{}, 'MED': {}, 'TS': {}, 'SAMBA': {'alpha': alpha}}, 100)
res1, traj1 = xph.multiprocess_MCRad(params, plot=False, pickle_path=pth, caption=' test SGB')
0.020408163265306124 Running on 20 cpu
Computing 251 simulations: 100%|██████████| 251/251 [02:13<00:00, 1.89it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:15<00:00, 1.86it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:17<00:00, 1.82it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:18<00:00, 1.81it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:21<00:00, 1.78it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:22<00:00, 1.76it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:24<00:00, 1.74it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:24<00:00, 1.74it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:25<00:00, 1.72it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:29<00:00, 1.68it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:31<00:00, 1.65it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:32<00:00, 1.64it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:33<00:00, 1.63it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:34<00:00, 1.62it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:41<00:00, 1.56it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:42<00:00, 1.55it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:43<00:00, 1.53it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:46<00:00, 1.51it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:47<00:00, 1.50it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:54<00:00, 1.44it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:05<00:00, 3.83it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:09<00:00, 3.60it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:09<00:00, 3.63it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:11<00:00, 3.49it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:00<00:00, 4.15it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:07<00:00, 3.70it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:12<00:00, 3.47it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:24<00:00, 2.97it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:09<00:00, 3.62it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:17<00:00, 3.25it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:27<00:00, 2.86it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:08<00:00, 3.67it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:14<00:00, 3.38it/s] Computing 251 simulations: 100%|██████████| 251/251 [00:59<00:00, 4.18it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:13<00:00, 3.44it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:12<00:00, 3.46it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:26<00:00, 2.89it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:25<00:00, 2.94it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:28<00:00, 2.83it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:39<00:00, 2.52it/s] Computing 251 simulations: 100%|██████████| 251/251 [06:46<00:00, 1.62s/it] Computing 251 simulations: 100%|██████████| 251/251 [06:50<00:00, 1.63s/it] Computing 251 simulations: 100%|██████████| 251/251 [06:53<00:00, 1.65s/it] Computing 251 simulations: 100%|██████████| 251/251 [06:59<00:00, 1.67s/it] Computing 251 simulations: 100%|██████████| 251/251 [06:48<00:00, 1.63s/it] Computing 251 simulations: 100%|██████████| 251/251 [06:56<00:00, 1.66s/it] Computing 251 simulations: 100%|██████████| 251/251 [06:55<00:00, 1.65s/it] Computing 251 simulations: 100%|██████████| 251/251 [06:56<00:00, 1.66s/it] Computing 251 simulations: 100%|██████████| 251/251 [07:12<00:00, 1.72s/it] Computing 251 simulations: 100%|██████████| 251/251 [07:12<00:00, 1.72s/it] Computing 251 simulations: 100%|██████████| 251/251 [06:54<00:00, 1.65s/it] Computing 251 simulations: 100%|██████████| 251/251 [07:27<00:00, 1.78s/it] Computing 251 simulations: 100%|██████████| 251/251 [07:06<00:00, 1.70s/it] Computing 251 simulations: 100%|██████████| 251/251 [07:20<00:00, 1.76s/it] Computing 251 simulations: 100%|██████████| 251/251 [07:04<00:00, 1.69s/it] Computing 251 simulations: 100%|██████████| 251/251 [07:04<00:00, 1.69s/it] Computing 251 simulations: 100%|██████████| 251/251 [07:37<00:00, 1.82s/it] Computing 251 simulations: 100%|██████████| 251/251 [07:26<00:00, 1.78s/it] Computing 251 simulations: 100%|██████████| 251/251 [07:23<00:00, 1.77s/it] Computing 251 simulations: 100%|██████████| 251/251 [07:22<00:00, 1.76s/it] Computing 251 simulations: 100%|██████████| 251/251 [02:06<00:00, 1.99it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:21<00:00, 1.77it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:24<00:00, 1.73it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:03<00:00, 2.03it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:44<00:00, 1.52it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:08<00:00, 1.96it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:17<00:00, 1.82it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:01<00:00, 2.06it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:03<00:00, 2.03it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:02<00:00, 2.05it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:46<00:00, 1.50it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:03<00:00, 2.04it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:51<00:00, 1.46it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:54<00:00, 1.44it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:51<00:00, 1.47it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:06<00:00, 1.98it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:04<00:00, 2.02it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:38<00:00, 1.59it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:58<00:00, 2.11it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:27<00:00, 1.71it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:58<00:00, 2.12it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:22<00:00, 1.76it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:01<00:00, 2.06it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:02<00:00, 2.04it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:12<00:00, 1.90it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:17<00:00, 1.82it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:24<00:00, 1.74it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:21<00:00, 1.77it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:43<00:00, 2.41it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:50<00:00, 2.26it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:55<00:00, 2.17it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:57<00:00, 2.13it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:18<00:00, 1.81it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:23<00:00, 1.75it/s] Computing 251 simulations: 100%|██████████| 251/251 [01:43<00:00, 2.43it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:14<00:00, 1.87it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:00<00:00, 2.08it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:01<00:00, 2.06it/s] Computing 251 simulations: 100%|██████████| 251/251 [02:12<00:00, 1.89it/s] Computing 251 simulations: 100%|█████████▉| 250/251 [01:42<00:00, 6.93it/s]
Execution time: 940 seconds
Computing 251 simulations: 100%|██████████| 251/251 [01:42<00:00, 2.44it/s]
plt.figure(figsize=(12,7))
style1['lgd'] = 'SGB, $\\eta=\\frac{2\\Delta}{K}=0.02$'
style2['lgd'] = 'TS'
style3['lgd'] = 'MED'
style4['lgd'] = 'UCB'
style5['lgd'] = 'SAMBA, $\\alpha \\approx \\frac{\\Delta}{2}\\approx 0.02$'
paper_plots2(traj1['SGB'], style1, 100)
paper_plots2(traj1['TS'], style2, 100)
paper_plots2(traj1['MED'], style3, 100)
paper_plots2(traj1['UCB1'], style4, 100)
paper_plots2(traj1['SAMBA'], style5, 100)
plt.xlabel("Time", fontsize=18)
plt.ylabel("Empirical Regret", fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.legend(loc="upper left", fontsize=20)
plt.savefig('xp_all4.pdf', bbox_inches='tight')
Detailed comparison between SGB and SAMBA (Appendix G.4)¶
We start with the two experiments that compare SGB and SAMBA tuned with learning rates providing the same asymptotic guarantees.
T, N = 10000, 2000 # Horizon T, number of trajectories N
K = 4
distribs = ['B'] * K
gap = 0.1
dist_params = [0.6] + [0.5] * 3
alpha = gap
params = (distribs, dist_params, T, N, ['SGB', 'SAMBA', 'TS'],
{'SGB': {'eta': 2*gap/K}, 'UCB1':{}, 'MED': {}, 'TS': {}, 'SAMBA': {'alpha': alpha}}, 100)
res1, traj1 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption=' test SGB')
Running on 20 cpu
Computing 26 simulations: 100%|██████████| 26/26 [00:02<00:00, 12.75it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:02<00:00, 9.92it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:02<00:00, 9.64it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:02<00:00, 10.42it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:02<00:00, 9.40it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:02<00:00, 9.27it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:02<00:00, 9.14it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:02<00:00, 9.05it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:02<00:00, 8.81it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:03<00:00, 8.28it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:03<00:00, 7.67it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:03<00:00, 7.95it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:04<00:00, 6.26it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:04<00:00, 5.73it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:05<00:00, 5.11it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:05<00:00, 5.18it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:05<00:00, 4.75it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:02<00:00, 8.84it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:05<00:00, 4.64it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:05<00:00, 4.55it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:05<00:00, 4.41it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:04<00:00, 6.28it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:03<00:00, 7.33it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:03<00:00, 6.71it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:01<00:00, 17.04it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:04<00:00, 6.02it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:04<00:00, 5.76it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:04<00:00, 5.30it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:04<00:00, 6.38it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:04<00:00, 5.69it/s] Computing 26 simulations: 8%|▊ | 2/26 [00:00<00:02, 9.51it/s]] Computing 26 simulations: 100%|██████████| 26/26 [00:04<00:00, 5.68it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:03<00:00, 6.52it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:01<00:00, 13.68it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:03<00:00, 6.74it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:01<00:00, 15.69it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:06<00:00, 4.31it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:03<00:00, 7.80it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:02<00:00, 9.32it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:03<00:00, 6.62it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:01<00:00, 14.82it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:03<00:00, 6.74it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:01<00:00, 14.51it/s] Computing 26 simulations: 81%|████████ | 21/26 [00:02<00:00, 8.74it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:01<00:00, 14.55it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:01<00:00, 15.15it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:04<00:00, 6.44it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:01<00:00, 17.73it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:01<00:00, 22.10it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:02<00:00, 9.02it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:01<00:00, 21.00it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:01<00:00, 22.19it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:05<00:00, 5.08it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:01<00:00, 25.41it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:02<00:00, 10.79it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:02<00:00, 8.96it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:04<00:00, 5.72it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:00<00:00, 26.50it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:00<00:00, 33.24it/s] Computing 26 simulations: 100%|██████████| 26/26 [00:00<00:00, 33.49it/s]
Execution time: 16 seconds
plt.figure(figsize=(12,7))
style1['lgd'] = 'SGB, $\\eta=\\frac{2\\Delta}{K}=0.02$'
style2['lgd'] = 'TS'
style5['lgd'] = 'SAMBA, $\\alpha \\approx \\frac{\\Delta}{2}\\approx 0.02$'
paper_plots2(traj1['SGB'], style1, 100)
paper_plots2(traj1['TS'], style2, 100)
paper_plots2(traj1['SAMBA'], style5, 100)
plt.xlabel("Time", fontsize=18)
plt.ylabel("Empirical Regret", fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.legend(loc="upper left", fontsize=20)
plt.savefig('xp_comp1.pdf', bbox_inches='tight')
T, N = 20000, 10000 # Horizon T, number of trajectories N
K = 2
distribs = ['B'] * K
gap = 0.1
dist_params = [0.6] + [0.5] * (K-1)
alpha = gap
params = (distribs, dist_params, T, N, ['SGB', 'SAMBA', 'TS'],
{'SGB': {'eta': gap/2}, 'UCB1':{}, 'MED': {}, 'TS': {}, 'SAMBA': {'alpha': alpha}}, 100)
res1, traj1 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption=' test SGB')
pkl.dump(traj1, open('compsamba1.pkl', 'wb'))
Running on 20 cpu
Computing 501 simulations: 100%|██████████| 501/501 [07:09<00:00, 1.17it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:24<00:00, 1.13it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:31<00:00, 1.11it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:34<00:00, 1.10it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:40<00:00, 1.09it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:41<00:00, 1.09it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:44<00:00, 1.08it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:46<00:00, 1.07it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:00<00:00, 1.04it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:02<00:00, 1.04it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:02<00:00, 1.04it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:03<00:00, 1.04it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:04<00:00, 1.03it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:09<00:00, 1.02it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:10<00:00, 1.02it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:17<00:00, 1.01it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:28<00:00, 1.01s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:31<00:00, 1.02s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:56<00:00, 1.07s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:42<00:00, 1.16s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:03<00:00, 1.04it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:14<00:00, 1.01it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:26<00:00, 1.01s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:01<00:00, 1.04it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:19<00:00, 1.00it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:39<00:00, 1.04s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:00<00:00, 1.04it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:17<00:00, 1.01it/s] Computing 501 simulations: 100%|██████████| 501/501 [09:04<00:00, 1.09s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:33<00:00, 1.02s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:55<00:00, 1.07s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:18<00:00, 1.11s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:36<00:00, 1.03s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:48<00:00, 1.05s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:50<00:00, 1.06s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:21<00:00, 1.12s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:04<00:00, 1.09s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:51<00:00, 1.18s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:30<00:00, 1.14s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:34<00:00, 1.15s/it] Computing 501 simulations: 100%|██████████| 501/501 [06:04<00:00, 1.37it/s] Computing 501 simulations: 100%|██████████| 501/501 [05:53<00:00, 1.42it/s] Computing 501 simulations: 100%|██████████| 501/501 [05:59<00:00, 1.39it/s] Computing 501 simulations: 100%|██████████| 501/501 [05:53<00:00, 1.42it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:34<00:00, 1.27it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:53<00:00, 1.21it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:07<00:00, 1.36it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:28<00:00, 1.29it/s] Computing 501 simulations: 100%|██████████| 501/501 [05:29<00:00, 1.52it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:22<00:00, 1.31it/s] Computing 501 simulations: 100%|██████████| 501/501 [05:40<00:00, 1.47it/s] Computing 501 simulations: 100%|██████████| 501/501 [05:36<00:00, 1.49it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:24<00:00, 1.30it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:47<00:00, 1.23it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:40<00:00, 1.25it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:01<00:00, 1.19it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:52<00:00, 1.21it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:43<00:00, 1.24it/s] Computing 501 simulations: 100%|██████████| 501/501 [05:18<00:00, 1.57it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:55<00:00, 1.70it/s]
Execution time: 1454 seconds
traj1 = pkl.load(open('compsamba1.pkl', 'rb'))
plt.figure(figsize=(12,7))
style1['lgd'] = 'SGB, $\\eta=\\frac{\Delta}{2}=0.05$'
style2['lgd'] = 'TS'
style5['lgd'] = 'SAMBA, $\\alpha = \\Delta=0.1$'
paper_plots2(traj1['SGB'], style1, 100)
paper_plots2(traj1['TS'], style2, 100)
paper_plots2(traj1['SAMBA'], style5, 100)
plt.xlabel("Time (logarithmic scale)", fontsize=18)
plt.ylabel("Empirical Regret", fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.xscale('log')
plt.legend(loc="upper left", fontsize=20)
plt.savefig('xp_comp1.pdf', bbox_inches='tight')
T, N = 20000, 10000 # Horizon T, number of trajectories N
K = 2
distribs = ['B'] * K
gap = 0.3
dist_params = [0.1] + [0.4] * (K-1)
alpha = gap
params = (distribs, dist_params, T, N, ['SGB', 'SAMBA', 'TS'],
{'SGB': {'eta': gap/2}, 'UCB1':{}, 'MED': {}, 'TS': {}, 'SAMBA': {'alpha': alpha}}, 100)
res1, traj1 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption=' test SGB')
pkl.dump(traj1, open('compsamba2.pkl', 'wb'))
Running on 20 cpu
Computing 501 simulations: 100%|██████████| 501/501 [06:30<00:00, 1.28it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:31<00:00, 1.28it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:41<00:00, 1.25it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:42<00:00, 1.24it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:45<00:00, 1.24it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:48<00:00, 1.23it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:56<00:00, 1.20it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:57<00:00, 1.20it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:12<00:00, 1.16it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:30<00:00, 1.11it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:46<00:00, 1.07it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:58<00:00, 1.05it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:49<00:00, 1.06s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:50<00:00, 1.06s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:19<00:00, 1.12s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:20<00:00, 1.12s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:34<00:00, 1.15s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:50<00:00, 1.18s/it] Computing 501 simulations: 100%|██████████| 501/501 [10:00<00:00, 1.20s/it] Computing 501 simulations: 100%|██████████| 501/501 [11:00<00:00, 1.32s/it] Computing 501 simulations: 100%|██████████| 501/501 [07:04<00:00, 1.18it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:08<00:00, 1.17it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:04<00:00, 1.18it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:06<00:00, 1.17it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:00<00:00, 1.19it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:13<00:00, 1.16it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:01<00:00, 1.19it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:02<00:00, 1.19it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:52<00:00, 1.06it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:53<00:00, 1.07s/it] Computing 501 simulations: 100%|██████████| 501/501 [07:55<00:00, 1.05it/s] Computing 501 simulations: 100%|██████████| 501/501 [10:16<00:00, 1.23s/it] Computing 501 simulations: 100%|██████████| 501/501 [07:40<00:00, 1.09it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:17<00:00, 1.01it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:58<00:00, 1.07s/it] Computing 501 simulations: 100%|██████████| 501/501 [11:45<00:00, 1.41s/it] Computing 501 simulations: 100%|██████████| 501/501 [05:21<00:00, 1.56it/s] Computing 501 simulations: 100%|██████████| 501/501 [10:02<00:00, 1.20s/it] Computing 501 simulations: 100%|██████████| 501/501 [05:13<00:00, 1.60it/s] Computing 501 simulations: 100%|██████████| 501/501 [05:11<00:00, 1.61it/s] Computing 501 simulations: 100%|██████████| 501/501 [09:37<00:00, 1.15s/it] Computing 501 simulations: 100%|██████████| 501/501 [10:48<00:00, 1.29s/it] Computing 501 simulations: 100%|██████████| 501/501 [06:09<00:00, 1.36it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:03<00:00, 1.38it/s] Computing 501 simulations: 100%|██████████| 501/501 [09:46<00:00, 1.17s/it] Computing 501 simulations: 100%|██████████| 501/501 [04:24<00:00, 1.90it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:08<00:00, 2.02it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:55<00:00, 1.05it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:00<00:00, 1.39it/s] Computing 501 simulations: 100%|██████████| 501/501 [03:32<00:00, 2.36it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:55<00:00, 1.69it/s] Computing 501 simulations: 100%|██████████| 501/501 [03:46<00:00, 2.22it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:13<00:00, 1.16it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:30<00:00, 1.02s/it] Computing 501 simulations: 100%|██████████| 501/501 [03:25<00:00, 2.43it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:33<00:00, 1.83it/s] Computing 501 simulations: 100%|██████████| 501/501 [03:30<00:00, 2.38it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:34<00:00, 1.27it/s] Computing 501 simulations: 100%|██████████| 501/501 [02:45<00:00, 3.03it/s] Computing 501 simulations: 100%|██████████| 501/501 [03:55<00:00, 2.13it/s]
Execution time: 1416 seconds
plt.figure(figsize=(12,7))
style1['lgd'] = 'SGB, $\\eta=\\frac{\Delta}{2}=0.05$'
style2['lgd'] = 'TS'
style5['lgd'] = 'SAMBA, $\\alpha = \\Delta=0.1$'
paper_plots2(traj1['SGB'], style1, 100)
paper_plots2(traj1['TS'], style2, 100)
paper_plots2(traj1['SAMBA'], style5, 100)
plt.xlabel("Time (logarithmic scale)", fontsize=18)
plt.ylabel("Empirical Regret", fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.xscale('log')
plt.legend(loc="upper left", fontsize=20)
plt.savefig('xp_comp2.pdf', bbox_inches='tight')
We now run the experiments that further exploit second-moment knowledge in order to use larger learning rates in the PG policies.
T, N = 20000, 10000 # Horizon T, number of trajectories N
K = 2
distribs = ['B'] * K
gap = 0.01
dist_params = [0.05] + [0.04] * (K-1)
alpha = 0.2
params = (distribs, dist_params, T, N, ['SGB', 'SAMBA', 'TS'],
{'SGB': {'eta': alpha}, 'UCB1':{}, 'MED': {}, 'TS': {}, 'SAMBA': {'alpha': alpha}}, 100)
res1, traj1 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption=' test SGB')
pkl.dump(traj1, open('moment1.pkl', 'wb'))
Running on 20 cpu
Computing 501 simulations: 100%|██████████| 501/501 [06:35<00:00, 1.27it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:45<00:00, 1.24it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:47<00:00, 1.23it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:49<00:00, 1.22it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:59<00:00, 1.19it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:03<00:00, 1.18it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:09<00:00, 1.17it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:23<00:00, 1.13it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:24<00:00, 1.13it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:39<00:00, 1.09it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:39<00:00, 1.09it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:01<00:00, 1.04it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:11<00:00, 1.02it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:35<00:00, 1.03s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:40<00:00, 1.04s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:45<00:00, 1.05s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:52<00:00, 1.06s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:33<00:00, 1.14s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:44<00:00, 1.17s/it] Computing 501 simulations: 100%|██████████| 501/501 [11:11<00:00, 1.34s/it] Computing 501 simulations: 100%|██████████| 501/501 [07:04<00:00, 1.18it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:03<00:00, 1.18it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:14<00:00, 1.15it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:59<00:00, 1.19it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:04<00:00, 1.03it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:20<00:00, 1.00it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:18<00:00, 1.14it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:54<00:00, 1.06it/s] Computing 501 simulations: 100%|██████████| 501/501 [09:29<00:00, 1.14s/it] Computing 501 simulations: 100%|██████████| 501/501 [07:37<00:00, 1.10it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:58<00:00, 1.20it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:07<00:00, 1.03it/s] Computing 501 simulations: 100%|██████████| 501/501 [09:34<00:00, 1.15s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:23<00:00, 1.13s/it] Computing 501 simulations: 100%|██████████| 501/501 [10:20<00:00, 1.24s/it] Computing 501 simulations: 100%|██████████| 501/501 [10:17<00:00, 1.23s/it] Computing 501 simulations: 100%|██████████| 501/501 [07:35<00:00, 1.10it/s] Computing 501 simulations: 100%|██████████| 501/501 [10:14<00:00, 1.23s/it] Computing 501 simulations: 100%|██████████| 501/501 [05:18<00:00, 1.57it/s] Computing 501 simulations: 100%|██████████| 501/501 [09:45<00:00, 1.17s/it] Computing 501 simulations: 100%|██████████| 501/501 [05:21<00:00, 1.56it/s] Computing 501 simulations: 100%|██████████| 501/501 [05:08<00:00, 1.62it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:47<00:00, 1.23it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:00<00:00, 1.39it/s] Computing 501 simulations: 100%|██████████| 501/501 [12:57<00:00, 1.55s/it] Computing 501 simulations: 100%|██████████| 501/501 [03:50<00:00, 2.17it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:51<00:00, 1.72it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:06<00:00, 1.37it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:44<00:00, 1.76it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:55<00:00, 1.21it/s] Computing 501 simulations: 100%|██████████| 501/501 [05:06<00:00, 1.64it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:59<00:00, 1.67it/s] Computing 501 simulations: 100%|██████████| 501/501 [05:42<00:00, 1.46it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:09<00:00, 1.36it/s] Computing 501 simulations: 100%|██████████| 501/501 [03:39<00:00, 2.28it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:14<00:00, 1.15it/s] Computing 501 simulations: 100%|██████████| 501/501 [05:19<00:00, 1.57it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:23<00:00, 1.90it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:15<00:00, 1.96it/s] Computing 501 simulations: 100%|██████████| 501/501 [02:45<00:00, 3.03it/s]
Execution time: 1458 seconds
traj1 = pkl.load(open('moment1.pkl', 'rb'))
plt.figure(figsize=(12,7))
style1['lgd'] = 'SGB, $\\eta=\\frac{\Delta}{\mu_1}=0.2$'
style2['lgd'] = 'TS'
style5['lgd'] = 'SAMBA, $\\alpha = \\frac{\\Delta}{\mu_1}=0.2$'
paper_plots2(traj1['SGB'], style1, 100)
paper_plots2(traj1['TS'], style2, 100)
paper_plots2(traj1['SAMBA'], style5, 100)
plt.xlabel("Time", fontsize=18)
plt.ylabel("Empirical Regret", fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.legend(loc="upper left", fontsize=20)
plt.savefig('xp_moment1.pdf', bbox_inches='tight')
T, N = 20000, 10000 # Horizon T, number of trajectories N
K = 10
distribs = ['B'] * K
gap = 0.01
dist_params = [0.1] + [0.09] * 2 + [0.08] * 3 + [0.05] * 4
alpha = gap/0.1
eta = 2 * gap/K/0.1
print(alpha, eta)
#Define the parameters and run the experiment
params = (distribs, dist_params, T, N, ['SGB', 'SAMBA', 'TS'],
{'SGB': {'eta': eta}, 'UCB1':{}, 'MED': {}, 'TS': {}, 'SAMBA': {'alpha': alpha}}, 100)
res1, traj1 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption=' test SGB')
pkl.dump(traj1, open('moment2.pkl', 'wb'))
0.09999999999999999 0.02 Running on 20 cpu
Computing 501 simulations: 100%|██████████| 501/501 [07:47<00:00, 1.07it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:47<00:00, 1.07it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:51<00:00, 1.06it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:56<00:00, 1.05it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:56<00:00, 1.05it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:57<00:00, 1.05it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:17<00:00, 1.01it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:30<00:00, 1.02s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:36<00:00, 1.03s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:08<00:00, 1.10s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:24<00:00, 1.13s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:24<00:00, 1.13s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:43<00:00, 1.16s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:50<00:00, 1.18s/it] Computing 501 simulations: 100%|██████████| 501/501 [10:01<00:00, 1.20s/it] Computing 501 simulations: 100%|██████████| 501/501 [10:57<00:00, 1.31s/it] Computing 501 simulations: 100%|██████████| 501/501 [11:24<00:00, 1.37s/it] Computing 501 simulations: 100%|██████████| 501/501 [11:28<00:00, 1.37s/it] Computing 501 simulations: 100%|██████████| 501/501 [11:36<00:00, 1.39s/it] Computing 501 simulations: 100%|██████████| 501/501 [12:00<00:00, 1.44s/it] Computing 501 simulations: 100%|██████████| 501/501 [07:15<00:00, 1.15it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:39<00:00, 1.09it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:15<00:00, 1.15it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:36<00:00, 1.10it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:32<00:00, 1.02s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:53<00:00, 1.06s/it] Computing 501 simulations: 100%|██████████| 501/501 [07:32<00:00, 1.11it/s] Computing 501 simulations: 100%|██████████| 501/501 [09:01<00:00, 1.08s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:01<00:00, 1.08s/it] Computing 501 simulations: 100%|██████████| 501/501 [07:50<00:00, 1.06it/s] Computing 501 simulations: 100%|██████████| 501/501 [09:35<00:00, 1.15s/it] Computing 501 simulations: 100%|██████████| 501/501 [07:01<00:00, 1.19it/s] Computing 501 simulations: 100%|██████████| 501/501 [09:23<00:00, 1.12s/it] Computing 501 simulations: 100%|██████████| 501/501 [06:41<00:00, 1.25it/s] Computing 501 simulations: 100%|██████████| 501/501 [09:29<00:00, 1.14s/it] Computing 501 simulations: 100%|██████████| 501/501 [07:24<00:00, 1.13it/s] Computing 501 simulations: 100%|██████████| 501/501 [09:53<00:00, 1.18s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:33<00:00, 1.02s/it] Computing 501 simulations: 100%|██████████| 501/501 [05:31<00:00, 1.51it/s] Computing 501 simulations: 100%|██████████| 501/501 [10:52<00:00, 1.30s/it] Computing 501 simulations: 100%|██████████| 501/501 [10:23<00:00, 1.24s/it] Computing 501 simulations: 100%|██████████| 501/501 [05:52<00:00, 1.42it/s] Computing 501 simulations: 100%|██████████| 501/501 [05:57<00:00, 1.40it/s] Computing 501 simulations: 100%|██████████| 501/501 [05:19<00:00, 1.57it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:34<00:00, 1.27it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:39<00:00, 1.25it/s] Computing 501 simulations: 100%|██████████| 501/501 [05:44<00:00, 1.46it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:35<00:00, 1.27it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:41<00:00, 1.25it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:52<00:00, 1.71it/s] Computing 501 simulations: 100%|██████████| 501/501 [05:18<00:00, 1.58it/s] Computing 501 simulations: 100%|██████████| 501/501 [05:06<00:00, 1.64it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:29<00:00, 1.86it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:14<00:00, 1.15it/s] Computing 501 simulations: 100%|██████████| 501/501 [05:53<00:00, 1.42it/s] Computing 501 simulations: 100%|██████████| 501/501 [05:49<00:00, 1.43it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:17<00:00, 1.33it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:24<00:00, 1.90it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:15<00:00, 1.96it/s] Computing 501 simulations: 100%|██████████| 501/501 [05:00<00:00, 1.67it/s]
Execution time: 1555 seconds
traj1 = pkl.load(open('moment2.pkl', 'rb'))
plt.figure(figsize=(12,7))
style1['lgd'] = 'SGB, $\\eta=\\frac{2\Delta}{K\mu_1}=0.02$'
style2['lgd'] = 'TS'
style5['lgd'] = 'SAMBA, $\\alpha = \\frac{\\Delta}{\\mu_1}=0.1$'
paper_plots2(traj1['SGB'], style1, 100)
paper_plots2(traj1['TS'], style2, 100)
paper_plots2(traj1['SAMBA'], style5, 100)
plt.xlabel("Time", fontsize=18)
plt.ylabel("Empirical Regret", fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.legend(loc="upper left", fontsize=20)
plt.savefig('xp_moment2.pdf', bbox_inches='tight')
T, N = 20000, 10000 # Horizon T, number of trajectories N
K = 10
distribs = ['B'] * K
gap = 0.02
dist_params = [0.05] + [0.03] * 9
alpha = gap/0.05
eta = 2 * gap/K/0.05
print(alpha, eta)
params = (distribs, dist_params, T, N, ['SGB', 'SAMBA', 'TS'],
{'SGB': {'eta': eta}, 'UCB1':{}, 'MED': {}, 'TS': {}, 'SAMBA': {'alpha': alpha}}, 100)
res1, traj1 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption=' test SGB')
pkl.dump(traj1, open('moment3.pkl', 'wb'))
0.39999999999999997 0.08 Running on 20 cpu
Computing 501 simulations: 100%|██████████| 501/501 [08:51<00:00, 1.06s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:06<00:00, 1.09s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:07<00:00, 1.09s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:20<00:00, 1.12s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:23<00:00, 1.12s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:30<00:00, 1.14s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:32<00:00, 1.14s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:32<00:00, 1.14s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:33<00:00, 1.15s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:36<00:00, 1.15s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:36<00:00, 1.15s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:43<00:00, 1.17s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:45<00:00, 1.17s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:51<00:00, 1.18s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:50<00:00, 1.18s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:53<00:00, 1.18s/it] Computing 501 simulations: 100%|██████████| 501/501 [10:03<00:00, 1.20s/it] Computing 501 simulations: 100%|██████████| 501/501 [10:16<00:00, 1.23s/it] Computing 501 simulations: 100%|██████████| 501/501 [10:27<00:00, 1.25s/it] Computing 501 simulations: 100%|██████████| 501/501 [10:41<00:00, 1.28s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:18<00:00, 1.01it/s] Computing 501 simulations: 100%|██████████| 501/501 [08:01<00:00, 1.04it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:39<00:00, 1.09it/s] Computing 501 simulations: 100%|██████████| 501/501 [09:10<00:00, 1.10s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:38<00:00, 1.03s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:54<00:00, 1.07s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:47<00:00, 1.05s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:28<00:00, 1.01s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:02<00:00, 1.08s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:28<00:00, 1.14s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:00<00:00, 1.08s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:05<00:00, 1.09s/it] Computing 501 simulations: 100%|██████████| 501/501 [08:25<00:00, 1.01s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:29<00:00, 1.14s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:25<00:00, 1.13s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:18<00:00, 1.12s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:51<00:00, 1.18s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:46<00:00, 1.17s/it] Computing 501 simulations: 100%|██████████| 501/501 [10:21<00:00, 1.24s/it] Computing 501 simulations: 100%|██████████| 501/501 [09:30<00:00, 1.14s/it] Computing 501 simulations: 100%|██████████| 501/501 [06:24<00:00, 1.30it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:50<00:00, 1.22it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:20<00:00, 1.14it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:46<00:00, 1.23it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:28<00:00, 1.12it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:53<00:00, 1.21it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:49<00:00, 1.22it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:17<00:00, 1.33it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:24<00:00, 1.13it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:20<00:00, 1.14it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:12<00:00, 1.35it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:52<00:00, 1.22it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:27<00:00, 1.12it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:43<00:00, 1.24it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:39<00:00, 1.09it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:38<00:00, 1.26it/s] Computing 501 simulations: 100%|██████████| 501/501 [07:53<00:00, 1.06it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:36<00:00, 1.26it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:12<00:00, 1.34it/s] Computing 501 simulations: 100%|██████████| 501/501 [06:20<00:00, 1.32it/s]
Execution time: 1594 seconds
traj1 = pkl.load(open('moment3.pkl', 'rb'))
plt.figure(figsize=(12,7))
style1['lgd'] = 'SGB, $\\eta=\\frac{2\Delta}{K\mu_1}=0.08$'
style2['lgd'] = 'TS'
style5['lgd'] = 'SAMBA, $\\alpha = \\frac{\\Delta}{\\mu_1}=0.4$'
paper_plots2(traj1['SGB'], style1, 100)
paper_plots2(traj1['TS'], style2, 100)
paper_plots2(traj1['SAMBA'], style5, 100)
plt.xlabel("Time", fontsize=18)
plt.ylabel("Empirical Regret", fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.legend(loc="upper left", fontsize=20)
plt.savefig('xp_moment3.pdf', bbox_inches='tight')
We finally run the experiment that illustrates that the performance of SGB is sensitive to the rescaling of rewards. In the case of ``equivalent'' Bernoulli and Rademacher problems it seems that SGB performs better with Rademacher inputs.
T, N = 10000, 10000 # Horizon T, number of trajectories N
K = 5
distribs = ['Rade'] + ['Rade'] * (K-1)
dist_params = [0.2] + [0] * (K-1)
gap = 0.2
params = (distribs, dist_params, T, N, ['SGB'], {'SGB': {'eta': 2*gap/K}}, 100)
res1, traj1 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption=' test SGB')
distribs = ['B'] + ['B'] * (K-1)
dist_params = [0.6] + [0.5] * (K-1)
gap = 0.1
params = (distribs, dist_params, T, N, ['SGB'], {'SGB': {'eta': 2*gap/K}}, 100)
res2, traj2 = xph.multiprocess_MC2(params, plot=False, pickle_path=pth, caption=' test SGB')
Running on 20 cpu
Computing 501 simulations: 100%|██████████| 501/501 [03:58<00:00, 2.10it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:00<00:00, 2.08it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:09<00:00, 2.01it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:10<00:00, 2.00it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:10<00:00, 2.00it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:16<00:00, 1.95it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:17<00:00, 1.95it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:17<00:00, 1.95it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:18<00:00, 1.93it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:20<00:00, 1.93it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:23<00:00, 1.90it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:23<00:00, 1.90it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:24<00:00, 1.89it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:26<00:00, 1.88it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:28<00:00, 1.86it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:30<00:00, 1.85it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:32<00:00, 1.84it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:33<00:00, 1.83it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:37<00:00, 1.81it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:37<00:00, 1.81it/s] Computing 501 simulations: 0%| | 0/501 [00:00<?, ?it/s]
Execution time: 277 seconds Running on 20 cpu
Computing 501 simulations: 100%|██████████| 501/501 [03:38<00:00, 2.29it/s] Computing 501 simulations: 100%|██████████| 501/501 [03:47<00:00, 2.20it/s] Computing 501 simulations: 100%|██████████| 501/501 [03:54<00:00, 2.14it/s] Computing 501 simulations: 100%|██████████| 501/501 [03:55<00:00, 2.13it/s] Computing 501 simulations: 100%|██████████| 501/501 [03:55<00:00, 2.13it/s] Computing 501 simulations: 100%|██████████| 501/501 [03:55<00:00, 2.13it/s] Computing 501 simulations: 100%|██████████| 501/501 [03:58<00:00, 2.10it/s] Computing 501 simulations: 100%|██████████| 501/501 [03:58<00:00, 2.10it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:05<00:00, 2.04it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:08<00:00, 2.02it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:09<00:00, 2.01it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:10<00:00, 2.00it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:11<00:00, 2.00it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:11<00:00, 1.99it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:12<00:00, 1.99it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:15<00:00, 1.96it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:15<00:00, 1.96it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:19<00:00, 1.93it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:22<00:00, 1.91it/s] Computing 501 simulations: 100%|██████████| 501/501 [04:23<00:00, 1.90it/s]
Execution time: 263 seconds
plt.figure(figsize=(12,7))
style1['lgd'] = 'SGB, $\\eta=0.08$, Rademacher rewards'
style2['lgd'] = 'SGB, $\\eta=0.04$, Bernoulli rewards'
paper_plots2(traj1['SGB'], style1, 100)
paper_plots2(traj2['SGB'], style2, 100)
plt.xlabel("Time", fontsize=18)
plt.ylabel("Empirical Regret", fontsize=18)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.legend(loc="upper left", fontsize=20)
plt.savefig('xp_centered.pdf', bbox_inches='tight')