\newcommand{\norm}[1]{\| #1 \|}
\begin{proof}
We follow the same approach used in previous upper bound proofs to derive a lower bound on the estimation error of \( \mu_a \).

At each time step \( 1 \leq t \leq T_2 \), define:

\begin{align*}
&b_a = [\mathbb{P}(m, O^Y = 0 \mid a)]_{K \times 1} \\
&\Theta_a = [\mathbb{P}(m, y, O^Y = 1 \mid a)]_{K \times L} \\
&x_a = [\frac{\mathbb{P}(O^Y = 0 \mid y, a)}{\mathbb{P}(O^Y = 1 \mid y, a)}]_{L \times 1} \\
\end{align*}

Since we know that \( \Theta_a x_a = b_a \), we now invoke Theorem 2.2 from \cite{higham1994survey}, which states:

\textbf{Theorem 2.2.} Let \(Ax = b\) and \((A + \Delta A) y = b + \Delta b\), where \(\|\Delta A\| \leq \epsilon \|E\|\) and \(\|\Delta b\| \leq \epsilon \|f\|\), and assume that \(\epsilon \|A^{-1}\| \|E\| < 1\). Then:

\begin{equation}
    \label{eq:mnar_survey_theo}
    \frac{\|x - y\|}{\|x\|} \leq \frac{\epsilon}{1 - \epsilon \|A^{-1}\| \|E\|}
    \left(
    \frac{\|A^{-1}\| \|f\|}{\|x\|} + \|A^{-1}\| \|E\|
    \right),
\end{equation}

and this bound is attainable to first order in \(\epsilon\).

For each entry of \( b_a \) or \( \Theta_a \), we have \( T_a \) samples. By applying Hoeffding's inequality and following the same approach as in the proofs of previous theorems, we set \( \epsilon = \sqrt{\frac{\alpha \log(T)}{2 T_a}} \). Consequently, we obtain the following bounds (all norms are \( \|.\|_\infty \)):

\begin{align*}
    \| \hat{b}_a - b_a \| &\leq \epsilon, \\
    \| \hat{\Theta}_a - \Theta_a \| &\leq L \epsilon,
\end{align*}

with probability at least \( 1 - 2 K \times (L + 1) t^{-\alpha} \).

Now, under the event described above and using \eqref{eq:mnar_survey_theo}, we have:

\[
    \frac{\|x_a - \hat{x}_a\|}{\|x_a\|} \leq \frac{\epsilon}{1 - \epsilon L \| \Theta_a^{-1}\|}
    \left(
    \frac{\|\Theta_a^{-1}\|}{\|x_a\|} + L \|\Theta_a^{-1}\|
    \right).
\]

We have \( x_a = \left[\frac{1 - \gamma_{y,a}}{\gamma_{y,a}}\right]_{L \times 1} \), and therefore \( \|x_a\| = \frac{1 - \gamma_a}{\gamma_a} \). For sufficiently large \( T \) and for \( T_a \geq \log(T)^2 \), it follows that \( \epsilon L \|\Theta_a^{-1}\| \leq \frac{1}{2} \), leading to:

\[
    \|x_a - \hat{x}_a\| 
    \leq 2 \epsilon 
    \left(
    \|\Theta_a^{-1}\| + L \|\Theta_a^{-1} \| \frac{1 - \gamma_a}{\gamma_a}
    \right)
    = 2 \epsilon \|\Theta_a^{-1}\|
    \left(
    \frac{L}{\gamma_a} - (L - 1)
    \right)
    \leq 2 \epsilon \frac{L}{\gamma_a} \|\Theta_a^{-1}\|.
\]

Now, since \( \|\hat{\Theta}_a - \Theta_a\| \leq L \epsilon \), for sufficiently large \( T \) and \( T_a \geq \log(T)^2 \), we have \( L \epsilon \leq \frac{\|\Theta_a\|}{2} \). Hence:

\[
\frac{\|\Theta_a\|}{2} \leq \|\hat{\Theta}_a\| \leq 2 \|\Theta_a\|,
\]

which implies

\[
\|\Theta_a^{-1}\| = \frac{\kappa(\Theta_a)}{\|\Theta_a\|} \leq \frac{2\kappa(\Theta_a)}{\|\hat{\Theta}_a\|} \leq \frac{2C_a}{\|\hat{\Theta}_a\|}.
\]

Thus:

\begin{equation}
\label{eq:mnar_eq2}
\norm{x_a - \hat{x}_a} \leq  4 \epsilon \frac{L C_a}{\gamma_a \norm{\hat{\Theta}_a}}.
\end{equation}

For sufficiently large \( T \) and \( T_a \geq \log(T)^2 \), we will have:
\[
\norm{x_a - \hat{x}_a} \leq \frac{1}{2\gamma_a}
\]
so:

\[
\frac{1}{\hat{\gamma}_a} = \norm{\hat{x}_a + [1]_{L \times 1}} \geq \norm{x_a + [1]_{L \times 1}} - \frac{1}{2\gamma_a} = \norm{[\frac{1}{\gamma_{y, a}}]_{L \times 1}} - \frac{1}{2\gamma_a} = \frac{1}{2\gamma_a}.
\]

Using \eqref{eq:mnar_eq2}, we have:
\[
\norm{x_a - \hat{x}_a} \leq  8 \epsilon \frac{L C_a}{\hat{\gamma}_a \norm{\hat{\Theta}_a}}.
\]

Since \( x_a + [1]_{L \times 1} = [\frac{1}{\gamma_{y, a}}]_{L \times 1} \), for every \( y \), we have:
\[
\left|\frac{1}{\gamma_{y, a}} - \frac{1}{\hat{\gamma}_{y, a}}\right| \leq  8 \epsilon \frac{L C_a}{\hat{\gamma}_a \norm{\hat{\Theta}_a}}.
\]

Now let \( p_{m, y \mid 1, a}  = \mathbb{P}(m, y \mid O^y = 1, a) \). By applying Hoeffding's inequality, we have the following inequality for all \( m, y \):

\[
    |q_{m, y \mid 1, a} - p_{m, y \mid 1, a}| \leq \sqrt{\frac{\alpha \log(T)}{2 T_{a, o}}}
\]
with probability at least \(1 - 2K L t^{-\alpha} \).
Using the fact that \( \frac{p_{m, y \mid 1, a}}{\gamma_{y, a}} = \mathbb{P}(m, y \mid a) = p_{m, y \mid a} \), we have:
\begin{align*}
\left|p_{m, y \mid a} - \hat{p}_{m, y \mid a}\right|
&= \left| \frac{p_{m, y \mid a}}{\gamma_{y, a}} - \frac{q_{m, y \mid a}}{\hat{\gamma_{y, a}}} \right| \\
&\leq  p_{m, y \mid a} \left|\frac{1}{\gamma_{y, a}} - \frac{1}{\hat{\gamma}_{y, a}}\right| + \frac{1}{\hat{\gamma}_{a}} \left|q_{m, y \mid 1, a} - p_{m, y \mid 1, a}\right| \\
&\leq  8 p_{m, y \mid a} \epsilon \frac{L C_a}{\hat{\gamma}_a \norm{\hat{\Theta}_a}} + \frac{1}{\hat{\gamma}_{a}} \sqrt{\frac{\alpha \log(T)}{T_{a, o}}}.
\end{align*}

Summing up over \( m \), we have:
\begin{align*}
\left|p_{y \mid a} - \hat{p}_{y \mid a}\right|
 &\leq 8 p_{y \mid a} \epsilon \frac{L C_a}{\hat{\gamma}_a \norm{\hat{\Theta}_a}} + \frac{K}{\hat{\gamma}_{a}} \sqrt{\frac{\alpha \log(T)}{T_{a, o}}} \\
 &\leq 8 \epsilon \frac{L C_a}{\hat{\gamma}_a \norm{\hat{\Theta}_a}} + \frac{K}{\hat{\gamma}_{a}} \sqrt{\frac{\alpha \log(T)}{T_{a, o}}}.
\end{align*}

Thus, using \( \sum\limits_{y} |y| = 1\):

\[
| \mu_a - \hat{\mu}_a| \leq \sum\limits_{y} |y| \left|p_{y \mid a} - \hat{p}_{y \mid a}\right| \leq \sum\limits_{y} |y| \left( 8 \epsilon \frac{L C_a}{\hat{\gamma}_a \norm{\hat{\Theta}_a}} + \frac{K}{\hat{\gamma}_{a}} \sqrt{\frac{\alpha \log(T)}{T_{a, o}}} \right) = 8 \epsilon \frac{L C_a}{\hat{\gamma}_a \norm{\hat{\Theta}_a}} + \frac{K}{\hat{\gamma}_{a}} \sqrt{\frac{\alpha \log(T)}{T_{a, o}}} = \epsilon_a.
\]

Hence, \( \text{UCB}(a) = \hat{\mu}_a + \epsilon_a \), and using previous proofs, we conclude that \( \epsilon_a \geq \frac{\Delta_a}{2} \). To finalize the proof:

\[
\norm{\hat{\Theta}_a} \geq \frac{\norm{\Theta_a}}{2}, 
\hat{\gamma}_a \geq  \frac{\gamma_a}{2},
\]

and we have \( \mathbb{P}(O^Y = 1 \mid a) = \sum\limits_{y} p_{y,a}\gamma_{y,a} \). Applying Hoeffding's inequality gives:

\[
\left|\frac{T_{a, o}}{T_a} - \sum\limits_{y} p_{y,a}\gamma_{y,a}\right| \leq \epsilon,
\]

which for sufficiently large \( T \) and \( T_a \geq \log(T)^2 \), states:

\[
    T_{a, o} \geq T_a \left(\sum\limits_{y} p_{y,a}\gamma_{y,a}\right).
\]

Finally, we have:

\[
\epsilon_a \leq \sqrt{\frac{\alpha \log(T)}{T_a}} 
8\frac{L C_a}{\gamma_a \norm{\Theta_a}} + 2\frac{K}{\gamma_{a}} \sqrt{\frac{1}{\sum\limits_{y} p_{y,a}\gamma_{y,a}}}
\leq 8\sqrt{\frac{\alpha \log(T)}{T_a}} 
\max\left( \frac{L C_a}{\gamma_a \norm{\Theta_a}}, \frac{K}{\gamma_{a}} \sqrt{\frac{1}{\sum\limits_{y} p_{y,a}\gamma_{y,a}} }\right),
\]

which, following the exact steps of previous proofs, leads to:

\[
\mathbb{E}[R_T] = O\left( \sqrt{\alpha T \log(T) \sum\limits_{a} S_a^2} \right) 
% = O\left( \sqrt{\alpha T \log(T) \sum\limits_{a} n S} \right).
\]

\end{proof}