\begin{algorithm}[H]
\caption{MCAR algorithm}\label{alg:mcar_algorithm}
\begin{algorithmic}[1]
\State \textbf{Input:} Number of arms $n$, time horizon $T$, $\alpha \geq 1$
\State Initialize: $\hat{\mu}_a = 0$ for all arms $a = 1, 2, \dots, n$ \Comment{initial mean reward estimate for each arm}
\State Set: $T_{a, o} = 0$ for all arms $a = 1, 2, \dots, n$ \Comment{number of times each arm is pulled and reward is observed}



\For{each round $t = 1, 2, \dots, T$}
    \For{each arm $a = 1, 2, \dots, n$}
        \State $\text{UCB}_a(t) = \hat{\mu}_a + \sqrt{\frac{\alpha \log(T)}{2 T_{a, o}}}$

    \EndFor
    \State Select arm $a_t = \arg \max_a \text{UCB}_a(t)$
    \State Pull arm $a_t$ and observe reward $r_t$
    \If{reward is observed}
        \State Update $T_{a_t}$ and $\hat{\mu}_{a_t}$
    \EndIf
\EndFor
\end{algorithmic}
\end{algorithm}