\begin{proof}
Consider the following $n + 1$ bandit instances, with $n$ arms labeled $a_1, a_2, \dots, a_n$. The reward distribution for each arm follows a Normal distribution with a variance of 1.

\textbf{Bandit instance $0$:}
\begin{itemize}
    \item $\mathbb{E}[Y(a)] = 0$ for all $a = a_1, \dots, a_n$.
\end{itemize}

\textbf{Bandit instance $k$ for $k = 1, \dots, n$:}
\begin{itemize}
    \item $\mathbb{E}[Y(a_k)] = \Delta$ for $a = a_k$.
    \item $\mathbb{E}[Y(a)] = 0$ for $a \neq a_k$.
\end{itemize}

Next, we present key lemmas adapted from \cite{lattimore2020bandit} to complete our analysis.

\textbf{Divergence Decomposition:}  
Let $\nu = (P(1), \dots, P(k))$ and $\nu' = (P'(1), \dots, P'(k))$ represent the reward distributions for two $k$-armed bandits. For a fixed policy $\pi$, let $P_\nu = P_{\nu,\pi}$ and $P_{\nu'} = P_{\nu',\pi}$ be the probability measures induced by the $n$-round interaction with $\nu$ and $\nu'$. Then:
\[
\text{KL}(P_\nu, P_{\nu'}) = \sum_{i=1}^{k} \mathbb{E}_\nu[T_i(n)] \text{KL}(P(i), P'(i)).
\]

\textbf{Pinsker's Inequality:}  
For measures $P$ and $Q$ on the same probability space $(\Omega, \mathcal{F})$, the total variation distance is bounded by:
\[
d_\text{TV}(P, Q) = \sup_{A \in \mathcal{F}} |P(A) - Q(A)| \leq \sqrt{\frac{1}{2} \text{KL}(P, Q)}.
\]

\textbf{Total Variation Bound:}  
Let $(\Omega, \mathcal{F})$ be a measurable space, and let $P$ and $Q$ be probability measures on $\mathcal{F}$. For any $\mathcal{F}$-measurable random variable $X : \Omega \to [a, b]$, we have:
\[
\left| \int_\Omega X(\omega) dP(\omega) - \int_\Omega X(\omega) dQ(\omega) \right| \leq (b - a) d_\text{TV}(P, Q).
\]

Now, in our setup with missing observations, so $(O^Y, Y)$ represent the observation tuple. Hence, we have:
\[
\text{KL}(P_0, P_i) = \mathbb{E}_0 [T_i] \text{KL}(P_0(i), P_i(i)) = \mathbb{E}_0 [T_i] \frac{\gamma \Delta^2}{2}.
\]

From this, we can bound $\mathbb{E}_i [T_i(T)]$ as follows:
\begin{align*}
\mathbb{E}_i [T_i(T)] &\leq \mathbb{E}_0 [T_i(T)] + T d_\text{TV}(P_0(i), P_i(i)) \\
&\leq \mathbb{E}_0 [T_i(T)] + T \sqrt{\frac{1}{2} \text{KL}(P_0(i), P_i(i))} \\
&= \mathbb{E}_0 [T_i(T)] + T \sqrt{\frac{1}{2} \cdot \frac{\gamma \Delta^2}{2} \mathbb{E}_0 [T_i(T)]} \\
&= \mathbb{E}_0 [T_i(T)] + \frac{T}{2} \sqrt{\gamma \Delta^2 \mathbb{E}_0 [T_i(T)]}.
\end{align*}

Let $R_i = R_T(\pi; i)$ denote the regret of applying policy $\pi$ on the $i$-th bandit instance up to time $T$, where $i$ refers to the $i$-th bandit instance.

Summing over all bandit instances, we have:
\begin{align*}
\sum_{i=1}^n \mathbb{E}[R_i] 
&= \sum_{i=1}^n \Delta(T - \mathbb{E}_i[T_i(T)]) \\
&\geq \Delta Tn - \Delta \sum_{i=1}^n \left(\mathbb{E}_0 [T_i(T)] + \frac{T}{2} \sqrt{\gamma \Delta^2 \mathbb{E}_0 [T_i(T)]}\right) \\
&\geq \Delta Tn - \Delta T - \frac{\Delta^2 T}{2} \sqrt{\gamma Tn} \\
&\geq \frac{\Delta Tn}{2} - \frac{\Delta^2 T}{2} \sqrt{\gamma Tn} \quad \text{using } \Delta = \frac{n}{2\sqrt{\gamma Tn}} \\
&\geq \frac{Tn^2}{8\sqrt{\gamma Tn}} = \frac{n}{8} \sqrt{\frac{Tn}{\gamma}}.
\end{align*}

Thus, there exists an instance where $\mathbb{E}[R_i] \geq \Omega\left(\sqrt{\frac{Tn}{\gamma}}\right)$.
\end{proof}
