% !TeX root = ..\freeExp.tex
\section{Numerical Simulations}
\label{sec:simulations}

% 1. Vary the number of free arms
% 2. 50-20-25 normal 
% 3. Vary number of Agents
% 4. Vary number of Local Arms 
% 5. Reward shift case! (new simulation required)

\begin{figure*}[!t]
    \begin{minipage}{0.4\textwidth}
        \centering
        \subfloat[Case (1)]{\includegraphics[width=0.5\linewidth]{figures/algorithmComparison-50TotalArms-20LocalArms-25Agents-50.png}\label{subfig:action-constrained}}
        \subfloat[Case (2)]{\includegraphics[width=0.5\linewidth]{figures/algorithmComparisonShift-50TotalArms-50LocalArms-25Agents-50.png}\label{subfig:heterogeneous-regret}}
        % \vspace{-3mm}
        \caption{\texttt{FreeExp} \textit{vs.} baselines}
        \label{fig:cumulative-regret}
    \end{minipage}
    % \hspace{-2mm}\hfill\vline\hfill
    \begin{minipage}{.6\textwidth}
        \centering
        \subfloat[Vary \# local arms]{\includegraphics[width=0.33\linewidth]{figures/VaryLocalArms-50.png}\label{subfig:vary-local-arms}}
        \subfloat[Vary \# agents]{\includegraphics[width=0.33\linewidth]{figures/VaryAgents-50.png}\label{subfig:vary-agents}}
        \subfloat[Vary \% of free arms]{\includegraphics[width=0.33\linewidth]{figures/VaryFreeArms-50.png}\label{subfig:vary-free-arms}}
        % \vspace{-3mm}
        \caption{Vary parameters of \MATOBHR}
        \label{fig:vary-parameters}
    \end{minipage}
\end{figure*}


\noindent{\bf Baselines: } We report results of numerical experiments that compare \FreeExp to three known cooperative algorithms that do not leverage free exploration:
(1) \texttt{CO-UCB} and (2) \texttt{CO-KLUCB}, extensions of \texttt{UCB} and \texttt{KLUCB} algorithms to cooperative multi-agent scenarios proposed by \citet{yang2022distributed} and \citet{baek2021fair} respectively;
and (3) \NoFreeExp, a variant of \FreeExp that does not make use of free exploration (see Remark~\ref{rmk:nofree-exp-algo}).

\noindent{\bf Experimental setup: }
Unless otherwise specified, we consider a \MATOBHR model with \(M=25\) agents and \(K=50\) arms.
Each arm is associated with a Gaussian distribution
whose arm-specific mean \(\mu(k)\in (0,1)\) is chosen uniformly at random from the click-through-rates of
Kaggle's \emph{Ad-Click} dataset~\citep{adclicks}
and with variance \(1/2\).
We consider two special cases of agent-specific reward means:
Case (1) \(\nu^\brai(k)\) is either \(0\) or \(-1\) $\forall k\in \mK, i\in \mM$ (i.e.,  \texttt{AC-MA2B}~\citep{yang2022distributed,baek2021fair} where agents have different local arm sets)
and Case (2) \(\nu^\brai(k)\in(-1/2,1/2)\) $\forall k\in \mK, i\in \mM$ (i.e., all agents have the same local arm sets) as the more general heterogeneous reward scenario.
The variances of all agent-specific rewards are set to \(1/2\).
In the \texttt{AC-MA2B} setting (Case (1)),
for each agent, we randomly select \(20\) of these \(50\) arms and set their agent-specific rewards \(\nu^\brai(k) = 0\), i.e., as local arms. The remaining arms' agent-specific rewards is set to \(\nu^\brai(k) = -1\).
In the heterogeneous reward setting (Case (2)),
all agents have the same \(50\) arms but different agent-specific rewards whose means are uniformly and randomly generated between \((-1/2, 1/2)\) for each arm and agent.
All simulations are averaged over \(50\) runs and their standard deviations are plotted as shadow regions.


% \begin{figure}[htp]
%     \centering
%     \subfloat[Constrained action (Case (1))]{\includegraphics[width=0.5\linewidth]{figures/algorithmComparison-50TotalArms-20LocalArms-25Agents-50.png}\label{subfig:action-constrained}}
%     \subfloat[Heteorgeneous reward (2)]{\includegraphics[width=0.5\linewidth]{figures/algorithmComparisonShift-50TotalArms-50LocalArms-25Agents-20.png}\label{subfig:heterogeneous-regret}}
%     \vspace{-3mm}
%     \caption{\texttt{FreeExp} \textit{vs.} baselines}
%     \label{fig:cumulative-regret}
% \end{figure}

% \begin{figure*}[htp]
%     \centering
%     \subfloat[Vary number of local arms]{\includegraphics[width=0.33\linewidth]{figures/VaryLocalArms-50.png}\label{subfig:vary-local-arms}}
%     \subfloat[Vary number of agents]{\includegraphics[width=0.33\linewidth]{figures/VaryAgents-18.png}\label{subfig:vary-agents}}
%     \subfloat[Vary percentage of free arms]{\includegraphics[width=0.33\linewidth]{figures/VaryFreeArms-50.png}\label{subfig:vary-free-arms}}
%     \vspace{-3mm}
%     \caption{Vary parameters of \MATOBHR}
%     \label{fig:vary-parameters}
% \end{figure*}





\noindent{\bf Experimental results: }
In Figures~\ref{subfig:action-constrained} and~\ref{subfig:heterogeneous-regret}, we compare the cumulative regret of all algorithms in Cases (1) and (2).
The notable observations are:
(1) Comparison of \FreeExp to \NoFreeExp shows that utilizing the free exploration mechanism can further improve an algorithm's performance.
(2) The \texttt{KLUCB} algorithm outperform our \FreeExp algorithm.
This is because \FreeExp needs to explicitly exclude arms likely to be local optimal (Line~\ref{line:remove-free-exploration-arms}) and thus suffers a high time-independent cost at the beginning, while \texttt{KLUCB} does not;
and the additional cost of \FreeExp cannot be compensated by the advantage of \FreeExp in saving cost on free arms in these two scenarios.
Especially, we note that
when the number of free arms are large (e.g., see Figure~\ref{subfig:vary-free-arms}'s \(100\%\) free arm case below), the advantage of \FreeExp in saving cost on free arms becomes significant and, therefore,   \texttt{FreeExp} has similar performance to \texttt{KLUCB}.


We report the results of varying the number of parameters of \MATOBHR (Case (1)) in Figure~\ref{fig:vary-parameters}.
In Figure~\ref{subfig:vary-local-arms}, we vary the number of local arms between %\(\{10, 15, 20, 25, 30\}\) 
\(10\) and \(45\) and
report their cumulative regret at round 30K.
All algorithm regrets increase linearly with respect to the number of local arms.
Figure~\ref{subfig:vary-agents} shows the impact of the number of agents \(M\) (from \(10\) agents to \(50\)) on the regrets.
Their regrets also have linear increasing rate in \(M\), which is due to the fixed per-agent costs (independent of \(T\)).
Lastly, we consider an \MATOBHR consisting of \(M=20\) agents and \(K=20\) arms, and devise fours cases containing \(\{5, 10, 15, 20\}\) free arms respectively (i.e., \(25\%, 50\%, 75\%, 100\%\) of all arms are free arms).
We report their regret performance in Figure~\ref{subfig:vary-free-arms}.
The notable observations are:
(1) The regret of \FreeExp decreases as the percentage of free arms increases which corroborates that \FreeExp saves the costs due to pulling free arms.
(2) when all (\(100\%\)) arms are free, \FreeExp has similar performance to \texttt{KLUCB} and outperforms other algorithms.



