% \newpage
\appendix
\input{./appendix/related work}
\input{./appendix/notation}

\newpage
\section{Omitted Details for the Algorithm}
\label{appen B: omitted proof}
\input{./appendix/algorithms}

\newpage
\section{Omitted Details for the Analysis}
\textbf{Important Notations} 
For convenience, we define 
\begin{equation}\label{def: visitxatotalhat}
    \visitxatotalhat=\max\cbr{\visitxatotalpri,4\confcountxa+7\ln\iota},
\end{equation}
and it can be verified that the confidence width defined in Eq. \ref{eq: confidence set of transition} can be equivalently written as
\begin{align}
\confpwtrans_{\episode}\rbr{\state'\vert\state,\action}  &=  \min\cbr{ 1, \sqrt{\frac{2\transprieasy_\episode\rbr{\state'\vert\state,\action}\rbr{1-\transprieasy_\episode\rbr{\state'\vert\state,\action}} \ln\iota}{\visitxatotalpri}} +  \frac{4\confcountxax+7\ln\iota}{\visitxatotalpri}}, \notag\\
&=  \min\cbr{ 1, \sqrt{\frac{2\transprieasy_\episode\rbr{\state'\vert\state,\action}\rbr{1-\transprieasy_\episode\rbr{\state'\vert\state,\action}} \ln\iota}{\visitxatotalhat}} +  \frac{4\confcountxax+7\ln\iota}{\visitxatotalhat}} \notag
\end{align}
% can be equivalently written as 
since whenever $\visitxatotalhat\neq\visitxatotalpri$, the two definitions both lead to a value of $1$.

\input{./appendix/proof of full-info setting}
\input{./appendix/proof of bandit setting}

\section{Supplementary Lemmas}
\input{./appendix/useful mathmatics}
\input{./appendix/high probability events}
\input{./appendix/difference lemma}

\section{Missing Details for Section \ref{sec: privacy and regret guarantees}}
\label{appen: privacy guarantee proof}
\input{./appendix/privacy guarantee}

% \section{Experiments}
% \label{app sec: Experiments}
% \input{./Sections/6-Experiments}