\section{Useful Results}
\label{app:use_res}
\subsection{Concentration Inequalities}
\begin{lemma}[Azuma-Hoeffding inequality]\label{lem:ah_ineq}
    Let $X_1, X_2, \ldots$ be a martingale difference sequence with $|X_i| \leq c \forall i$. Then for all $\epsilon > 0$ and $n \in \bN$,
    \begin{align}
        \bP\left\{\sum_{i = 1}^{n}{X_i} \geq \epsilon\right\} \leq e^{-\frac{\epsilon^2}{2nc^2}}
    \end{align}
\end{lemma}
The following inequality is Proposition $A.6.6$ of \cite{van1996weak}.
\begin{lemma}[Bretagnolle-Huber-Carol inequality]\label{lem:bhl_ineq}
    If the random vector $\paren{X_1, X_2, \ldots, X_n}$ is multinomially distributed with parameters $N$ and $\paren{p_1, p_2, \ldots, p_n}$, then for $\eps > 0$
    \begin{align}
        \bP\paren{\sum_{i=1}^{n}{\abs{X_i - N p_i}} \geq 2\sqrt{N} \eps} \leq 2^n e^{-2\eps^2}.
    \end{align}
    Alternatively, for $\delta > 0$
    \begin{align}
        \bP\paren{\sum_{i=1}^{n}{\abs{\frac{X_i}{N} - p_i}} < \sqrt{\frac{2n}{N} \log{\br{\frac{2}{\delta^\frac{1}{n}}}}}} \geq 1 - \delta.
    \end{align}
\end{lemma}

The following is essentially Theorem~1 of~\cite{abbasi2011improved}.
\begin{thm}[Self-Normalized Tail Inequality for Vector-Valued Martingales] \label{thm:self_norm}
    Let $\{\cF_t\}_{t=0}^{\infty}$ be a filtration. Let $\{\eta_t\}_{t=1}^{\infty}$ be a real-valued stochastic process such that $\eta_t$ is $\cF_t$ measurable and $\eta_t$ is conditionally $R$ sub-Gaussian for some $R>0$, i.e., 
    \begin{align*}
        \bE\left[ \exp(\lambda \eta_t) | \cF_{t-1}  \right] \le \exp\left( \lambda^2 R^2 \slash 2  \right), \forall \lambda\in \bR.
    \end{align*}
    Let $\{X_t\}_{t=1}^{\infty}$ be an $\bR^{d}$ valued stochastic process such that $X_t$ is $\cF_{t-1}$ measurable. Assume that $V$ is a $d\times d$ positive definite matrix. For any $t\ge 0$, define
    \begin{align*}
        \bar{V}_t := V + \sum_{s=1}^{t} X_s X^\top_s,
    \end{align*}
    and
    \begin{align*}
        S_t := \sum_{s=1}^{t} \eta_s X_s.
    \end{align*}
    Then, for any $\delta>0$, with a probability at least $1-\delta$, for all $t\ge 0$,
    \begin{align*}
        \|S_t\|^{2}_{\bar{V}^{-1}_t} \le 2 R^{2} \log{\br{\frac{\det(\bar{V}_t)^{1\slash 2} \det(V)^{-1\slash 2}}{\delta}}}.
    \end{align*}
\end{thm}

\begin{cor}[Self-Normalized Tail Inequality for Martingales] \label{cor:self_norm_vec}
	Let $\{\cF_i\}_{i=0}^{\infty}$ be a filtration. Let $\{\eta_i\}_{i=1}^{\infty}$ be a $\{\cF_i\}_{i=0}^{\infty}$ measurable stochastic process and $\eta_t$ is conditionaly $R$ sub-Gaussian for some $R > 0$. Let $\{ X_i \}_{i=1}^{\infty}$ be a $\{0,1\}$-valued $\cF_{i-1}$ measurable stochastic process.
	
	Then, for any $\delta>0$, with a probability at least $1-\delta$, for all $k \geq 0$,
	\begin{align*}
		\left|\sum_{i=1}^{k}{\eta_i X_i}\right| \leq R \sqrt{2 \left(1 + \sum_{i=1}^{k}{X_i}\right) \log{\br{\frac{1 + \sum_{i=1}^{k}{X_i}}{\delta}}}} .
	\end{align*}
\end{cor}
\begin{proof}
	Taking $V = 1$, we have that $\bar{V}_t = 1 + \sum_{s = 1}^{t}{X_s}$. The claim follows from Theorem~\ref{thm:self_norm}.
\end{proof}

\subsection{Other Useful Results}

\begin{lemma}\label{lem:bdd_epi_tool}
    Consider the following function $f(x)$ such that $0 < a_0 \leq \frac{a_1}{4}$,
    \begin{align*}
        f(x) = a_0 x - \sqrt{a_1 x} - 1.
    \end{align*}
    Then for all $x \geq 1.5\frac{a_1}{a_0^2}$, $f(x) \geq 0$.
\end{lemma}
\begin{proof}
    See that $f(x) \geq 0$ for all $x \geq \paren{\frac{\sqrt{a_1} + \sqrt{a_1 + 4 a_0}}{2 a_0}}^2$. Since $a_1 \leq 4 a_0$, we have that for all $x \geq 1.5 \frac{a_1}{a_0^2}$ $f(x) \geq 0$.
\end{proof}

\begin{lemma}\label{lem:bdd_dotdifLv}
    Let $\mu_1$ and $\mu_2$ be two probability measures on $Z$ and let $v$ be an $\bR$-valued bounded function on $Z$. Then, the following holds.
    \begin{align*}
        \abs{\int_{Z}{(\mu_1 - \mu_2)(z) v(z) dz}} \leq \frac{1}{2}\norm{\mu_1 - \mu_2}_{TV} \spn{v}.&
    \end{align*}
\end{lemma}
\begin{proof}
    Denote $\lm(\cdot) := \mu_1(\cdot) - \mu_2(\cdot)$. Now let $Z_+,Z_- \subset Z$ be such that $\lm(B) \geq 0$ for every $B \subseteq Z_+$ and $\lm(B) < 0$ for every $B \subseteq Z_-$. We have that
    \begin{align}
        \lm(Z) = \lm(Z_+) + \lm(Z_-) = 0.
    \end{align}
    Also,
    \begin{align}
        \lm(Z_+) - \lm(Z_-) = \norm{\mu_1 - \mu_2}_{TV}.
    \end{align}
    Combining the above two, we get that 
    \begin{align}
        \lm(Z_+) = \frac{1}{2}\norm{\mu_1 - \mu_2}_{TV}.
    \end{align}
    Now,
    \begin{align*}
        \abs{\int_Z{\lm(z) v(z) dz}} &= \abs{\int_{Z_+}{\lm(z) v(z) dz} + \int_{Z_-}{\lm(z) v(z) dz}}\\
        &\leq \abs{\lm(Z_+) \sup_{z \in Z}{v(z)} + \lm(Z_-) \inf_{z \in Z}{v(z)}} \\
        &= \abs{\lm(Z_+) \sup_{z \in Z}{v(z)} - \lm(Z_+) \inf_{z \in Z}{v(z)} + \lm(Z_+) \inf_{z \in Z}{v(z)} + \lm(Z_-) \inf_{z \in Z}{v(z)}} \\
        &= \lm(Z_+) \paren{\sup_{z \in Z}{v(z)} - \inf_{z \in Z}{v(z)}} \\
        &= \frac{1}{2}\norm{\mu_1 - \mu_2}_{TV} \spn{v}.
    \end{align*}
    Hence, we have proven the lemma.
\end{proof}

\begin{lemma}\label{lem:diff_kern_comp}
    Let $\te_1$ and $\te_2$ be two transition probability kernels of two Markov chains with common state space $\cS$. Let $\max_{s \in \cS}{\norm{\te_1(s,\cdot) - \te_2(s,\cdot)}_{TV}} \leq c$. Then,
    \begin{align*}
        \norm{\te\uc{m}_1(s,\cdot) - \te\uc{m}_2(s,\cdot)}_{TV} \leq m\cdot c,~\forall m \in \bN.
    \end{align*}
    where $\te\uc{m}_i$ is the $m$-step transition kernel of the Markov chain with one-step transition kernel $\te_i$ for $i = 1, 2$.
\end{lemma}
\begin{proof}
    We shall prove this using mathematical induction. The base case is given. Let us assume that,
    \begin{align*}
        \norm{\te\uc{i}_1(s,\cdot) - \te\uc{i}_2(s,\cdot)}_{TV} \leq i\cdot c,~\forall i = 1, 2, \ldots, m-1.
    \end{align*}
    See that
    \begin{align*}
        \norm{\te\uc{m}_1(s,\cdot) - \te\uc{m}_2(s,\cdot)}_{TV} &= \left\lVert \int_{\cS}{\te\uc{m-1}_1(s,s\up) \te_1(s\up,\cdot) ds\up} - \int_{\cS}{\te\uc{m-1}_2(s,s\up) \te_1(s\up,\cdot) ds\up}\right.\\
        &\qquad \left. + \int_{\cS}{\te\uc{m-1}_2(s,s\up) \te_1(s\up,\cdot) ds\up} - \int_{\cS}{\te\uc{m-1}_2(s,s\up) \te_2(s\up,\cdot) ds\up}\right\rVert_{TV} \\
        &\leq 2\sup_{A\in \cB_\cS}{\int_{\cS}{\br{\te\uc{m-1}_1(s,s\up) - \te\uc{m-1}_2(s,s\up)}\te_1(s\up,A) ds\up}} \\
        &\quad + 2\sup_{A\in \cB_\cS}{\int_{\cS}{\te\uc{m-1}_2(s,s\up) \br{\te_1(s\up,A) - \te_2(s\up,A)} ds\up}} \\
        &\leq \norm{\te\uc{m-1}_1(s,\cdot) - \te\uc{m-1}_2(s,\cdot)}_{TV} \sup_{A\in \cB_\cS}{\spn{\te_1(\cdot,A)}} \\
        &\quad + \int_{\cS}{\te\uc{m-1}_2(s,s\up) \norm{\te_1(s\up,\cdot) - \te_2(s\up,\cdot)}_{TV} ds\up} \\
        &\leq \norm{\te\uc{m-1}_1(s,\cdot) - \te\uc{m-1}_2(s,\cdot)}_{TV} + \max_{s\up \in \cS}{\norm{\te_1(s\up,\cdot) - \te_2(s\up,\cdot)}_{TV}},
    \end{align*}
    where the first inequality follows from triangle inequality and from the definition of total variation distance, the second inequality follows from Lemma~\ref{lem:bdd_dotdifLv} and by taking the supremum inside integration.~This concludes the proof of the lemma.
\end{proof}