\begin{proof}
Consider the following $n + 1$ bandit instances, with $n$ arms labeled $a_1, a_2, \dots, a_n$. The reward distribution for each arm follows a Normal distribution with variance one, and mean specified as follows.

\textbf{Bandit instance 0:}
\begin{itemize}[noitemsep, topsep=0pt]
    \item $\mu_{m, a} = 0$ for all arms $a = a_1, \dots, a_n$ and for all $m =  1, \dots, K$.
\end{itemize}

\textbf{Bandit instance $j$ for $j = 1, \dots, n$:}
\begin{itemize}[noitemsep, topsep=0pt]
    \item $\mu_{m, a} = \frac{\Delta}{P_a \gamma_{m, a}}$ for arm $a = a_j$, and for all $m =  1, \dots, K$.
    \item $\mu_{m, a} = 0$ for all arms $a \neq a_j$, and for all $m =  1, \dots, K$.
\end{itemize}

\vspace{1em} % Add some vertical space before this section

\textbf{For each instance \( j \in \{1, \dots, n\}\):}
\begin{itemize}[noitemsep, topsep=0pt]
    \item If $a = j$: \( \mu_a = \sum_{m \in [K]} p_{m,a} \mu_{m, a} = \Delta \).
    \item If $a \neq j$: \( \mu_a = 0 \).
\end{itemize}

\vspace{0.75em} % Add space between this and the next section

\textbf{For instance 0:}
\begin{itemize}[noitemsep, topsep=0pt]
    \item For all $a \in [1, \dots, n]$: \( \mu_a = 0 \).
\end{itemize}

Now like the previous we use the mentioned lemmas from \cite{lattimore2020bandit} to complete our proof.

Like before for every \( a = 1, \dots, n \) we have:
\begin{align*}    
\text{KL}(P_0, P_a) &= \mathbb{E}_0 [T_a] \text{KL}(P_0(a), P_a(a)) \\
&= \mathbb{E}_0 [T_a] \sum\limits_{m \in [K]} p_{m,a} \gamma_{m, a} \frac{\Delta^2}{2P_a^2 \gamma_{m, a}^2} \\
&= \mathbb{E}_0 [T_a] \frac{\Delta^2}{P_a^2} \sum\limits_{m \in [K]} \frac{p_{m,a}}{\gamma_{m, a}} \\
&= \mathbb{E}_0 [T_a] \frac{\Delta^2}{P_a}
\end{align*}

From this, we can bound $\mathbb{E}_a [T_a(T)]$ as follows:
\begin{align*}
\mathbb{E}_a [T_a(T)] &\leq \mathbb{E}_0 [T_a(T)] + T d_\text{TV}(P_0(a), P_a(a)) \\
&\leq \mathbb{E}_0 [T_a(T)] + T \sqrt{\frac{1}{2} \text{KL}(P_0(a), P_a(a))} \\
&= \mathbb{E}_0 [T_a(T)] + \frac{T}{2} \sqrt{\mathbb{E}_0 [T_a] \frac{\Delta^2}{P_a}}
\end{align*}

Let $R_m =  R_T(\pi; i)$ denote the regret of applying policy $\pi$ on the $i$-th bandit instance up to time $T$, where $i$ refers to the $i$-th bandit instance.

Summing over all bandit instances \(1, \dots, n\), we have:
\begin{align*}
    \sum_{i=1}^n \mathbb{E}[R_i] 
    &= \sum\limits_{a \in [n]} \Delta \left(T - \mathbb{E}_a[T_a(T)]\right) \\
    &\geq \Delta Tn - \Delta \sum\limits_{a \in [n]} \left( \mathbb{E}_0 [T_a(T)] + \frac{T}{2} \sqrt{\mathbb{E}_0 [T_a] \frac{\Delta^2}{P_a}} \right) \\
    &\geq \Delta Tn - \Delta T - \frac{\Delta^2 T}{2} \sum\limits_{a \in [n]} \sqrt{\frac{\mathbb{E}_0 [T_a(T)]}{P_a}} \\
    &\geq \Delta Tn - \Delta T - \frac{\Delta^2 T}{2} \sqrt{T \sum\limits_{a \in [n]} \frac{1}{P_a}} \\
    &\geq \frac{\Delta Tn}{2} - \frac{\Delta^2 T}{2} \sqrt{T \sum\limits_{a \in [n]} \frac{1}{P_a}} \quad \text{using } \Delta = \frac{n}{2\sqrt{T\sum\limits_{a \in [n]} \frac{1}{P_a}}} \\
    &\geq \frac{n}{8} \sqrt{\frac{Tn^2}{\sum\limits_{a \in [n]} \frac{1}{P_a}}}
\end{align*}

Thus, there exists an instance where \( \mathbb{E}[R_i] \geq \Omega\left( \sqrt{\frac{Tn^2}{\sum\limits_{a \in [n]} \frac{1}{P_a}}} \right) = \Omega\left( \sqrt{TnH} \right)  \).
\end{proof}