\newpage
\section{Table of Notation}
%%%%%%%%%%%%%%%%%%% For Publication %%%%%%%%%%%%%%%%%%%%%%%%%%%  
\begin{table}[!h]
\small
\centering
% \begin{tabular*}{\linewidth}{|c|l|}
\begin{tabular}{|c|l|}
   \toprule
   Symbol & Descriptions  \\
   \midrule
   $\statespace$ & state space with cardinality $\statesize$  \\
   $\actionspace$ & action space with cardinality $\actionsize$  \\
   $\transeasy$ & transition function\\
   % $\episode$ & episode \\
   $\episodetotal$ & number of episodes   \\
   % $\horizon$ & episode length  \\
   $\horizontotal$ & episode length \\
   % $\timetotal$ & time total number \\
   $\occmeasureset\rbr{\transeasy}$ & the set of occupancy measure induced by transition $\transeasy$ \\
   $\transspace$ & confidence set of transition function \\
   $\policy_\episode$ & policy at episode $\episode$  \\
   $\occmeasure^{\transeasy,\policy_\episode}$ & occupancy measure of policy $\policy_\episode$ under transition $\transeasy$  \\
   $\transeasy_\episode$ & transition function induced by occupancy measure $\occmeasure_\episode$ at episode $\episode$  \\
   $\occmeasure^{\transeasy_\episode,\policy_\episode}$ & occupancy measure of policy $\policy_\episode$ under transition $\transeasy_\episode$, i.e., $\occmeasure_\episode$  \\
   $\uppocc_\episode$ & upper occupancy measure within $\occmeasureset\rbr{\transspace_\episode}$\\
   $\policy^*$ & optimal policy \\
   $\occmeasure^*$ & optimal occupancy measure \\
   % $\valuef$ & value function of one episode  \\
   % $\regret$ & regret  \\
   $\pripara$ & privacy budget  \\
   $\delta$ & failure probability \\
   $\FTRLpara$ & learning rate for online learning algorithms\\
   $\regularizer\rbr{\occmeasure}$ & regularizer function of $\occmeasure$ for online learning algorithm   \\
   $\visitxatotal$ & count of visiting state-action pair $\rbr{\state,\action}$ \emph{before} episode $\episode$  \\
   $\visitxaxtotal$ & count of going to state $\state^\prime$ from $\state$ upon playing action $\action$ \emph{before} episode $\episode$  \\
   $\loss_\episode\rbr{\state,\action}$ & loss of the state-action pair $\rbr{\state,\action}$ at episode $\episode$  \\
   $\losscum$ & cumulative loss of the state-action pair $\rbr{\state,\action}$ \textit{before} episode $\episode$ \\
   $\visitxatotalpri$ & the privatized version of  $\visitxatotal$ \\
   $\visitxaxtotalpri$ & the privatized version of  $\visitxaxtotal$  \\
   $\visitxatotalhat$ & an optimistic value defined over $\visitxatotalpri$ in Eq.\ref{def: visitxatotalhat}  \\
   $\losscumpri$ & the privatized version of $\losscum$ \\
   $\losspri$ & the privatized version of observed loss $\loss_\episode\rbr{\state,\action}\II_\episode\rbr{\state,\action}$ \\
   $\ddot{\loss}_\episode\rbr{\state,\action}$ & the scaled version of private loss $\losspri$ \\
   $\lossest$ & the final loss estimator of $\rbr{\state,\action}$ \\
   $\transesteasy_\episode$ & transition function estimated by using true counts at episode $\episode$  \\
   $\transprieasy_\episode$ & transition function estimated by using private counts at episode $\episode$ \\
   $\confnormtrans$ & confidence width for the element of private transition estimation at episode $\episode$  \\
   % $\noise$ & noise added to real sum $\losscum$ \\
   % $\badeventloss$ & bad event with loss out of confidence   \\
   % $\badeventtrans$ & bad event with transition out of confidence  \\
   % $\goodevent$ & good event totally  \\
   % $\badevent$ & bad event totally   \\
   $\confcountxa$ & precision level for visitation counters  \\
   % $\confcountxax$ & confidence radius for state-action-state pair counts \\
   $\conflossf$ & precision level for loss counter in full-information setting\\   
   $\ninterval$ & precision level for loss estimator in bandit-feedback setting\\   
   % $\confpwtrans$ & confidence radius bound for transition error pointwisely \\
   % $\confconstant$ & constant in confidence radius for transition $L_1$ norm  \\
   % $\normtranserror$ & $L_1$ estimate error of transition  \\
   % $\confconstpw$ & constant in confidence radius for transition pointwisely \\
   % $\occmeasmid$ & middle variable occupancy measure \\
   % $\occmeasvir$ & virtual occupancy measure   \\
   % $\transbyocc$ & transition introduced by occupancy measure  \\
   % $\policybyocc$ & policy introduced by occupancy measure \\
   % $\userspace$ & user space  \\
   % $\user$ & user  \\
   % $\userseq$ & user sequence \\
   % $\traj$ & one trajectory in one episode  \\
   % $\trajset$ & the set of all possible trajectory in one episode  \\
   % $\Agent$ & RL agent algorithm  \\
   % $\history$ & random event history up to time $t$  \\   
   % $\dualvsum$ & dual variable of occupancy sum \\   
   % $\dualvmid$ & dual variable of  occupancy measure middle relation  \\   
   % $\dualvtrp$ & dual variable of transition positive error  \\ 
   % $\dualvtrn$ & dual variable of transition negative error  \\ 
   % $\dualvtr$ & dual variable of transition error sum bound & \\ 
   % $\vsdual$ & middle variable of solving dual variables  \\ 
   % $\vbellman$ & dual middle variable of bellman error for dual computation \\ 
   \bottomrule  
\end{tabular}
\caption{List of Notation}
\end{table}


%%%%%%%%%%%%%%%%%%% For Authors %%%%%%%%%%%%%%%%%%%%%%%%%%%  
% \begin{table}[!h]
% \centering
% \footnotesize
% \resizebox{\textwidth}{!}{
% \begin{tabular*}{\linewidth}{ccc}
%    \toprule
%    Symbol & Explanation & LatexCode  \\
%    \midrule
%    $\statespace$ & state space & statespace \\
%    $\statesize$ & state space size & statesize \\
%    $\state$ & state & state \\
%    $\actionspace$ & action space & actionspace \\
%    $\actionsize$ & action space size & actionsize \\
%    $\action$ & action & action \\
%    $\transspace$ & acceptable transition space & transspace \\
%    $\trans$ & transition & trans \\
%    $\transest$ & transition estimated by true counts & transest \\
%    $\transpri$ & transition estimated by private counts & transpri \\
%    $\episode$ & episode & episode \\
%    $\episodetotal$ & episode total number & episodetotal \\
%    $\horizon$ & horizon & horizon \\
%    $\horizontotal$ & horizon total number & horizontotal \\
%    $\timetotal$ & time total number & timetotal \\
%    $\loss$ & loss function & loss \\
%    $\policy$ & policy & policy \\
%    $\valuef$ & value function of one episode & valuef \\
%    $\regret$ & regret & regret \\
%    $\pripara$ & DP parameter  & pripara \\
%    $\ninterval$ & noise interval parameter to ignore  & ninterval \\   
%    $\basealgo$ & base non-private algorithm for bandit setting  & basealgo \\
%    $\delta$ & bad event probability  & delta \\
%    $\FTRLpara$ & parameter of FTRL  & FTRLpara \\
%    $\regularizer{\occmeasure}$ & regularizer function of $\occmeasure$  & regularizer \\
%    $\visitxatotal$ & state-action pair visit total counts & visitxatotal \\
%    $\visitxatotalpri$ & state-action pair visit private total counts & visitxatotalpri \\
%    $\visitxaxtotal$ & state-action-state pair visit total counts & visitxaxtotal \\
%    $\visitxaxtotalpri$ & state-action-state pair visit private total counts & visitxaxtotalpri \\
%    $\losscum$ & state-action cumulative loss & losscum \\
%    $\losscumpri$ & state-action private cumulative loss  & losscumpri \\
%    $\losspri$ & state-action private loss & losspri \\
%    $\lossest$ & state-action estimated loss & lossest \\
%    $\noise$ & noise added to real sum $\losscum$ & noise \\
%    $\badeventloss$ & bad event with loss out of confidence  & badeventloss \\
%    $\badeventtrans$ & bad event with transition out of confidence  & badeventtrans \\
%    $\goodevent$ & good event totally &  goodevent \\
%    $\badevent$ & bad event totally  & badevent \\
%    $\datas$ & data stream  & datas \\
%    $\confcountxa$ & confidence radius for state-action pair counts  & confcountxa \\
%    $\confcountxax$ & confidence radius for state-action-state pair counts  & confcountxax \\
%    $\confnormtrans$ & confidence radius bound for transition $L_1$ norm  & confnormtrans \\
%    $\confpwtrans$ & confidence radius bound for transition error pointwisely & confpwtrans \\
%    $\confconstant$ & constant in confidence radius for transition $L_1$ norm & confconstant \\
%    $\normtranserror$ & $L_1$ estimate error of transition & normtranserror \\
%    $\confconstpw$ & constant in confidence radius for transition pointwisely & confconstantpw \\
%    $\occmeasure$ & occupancy measure  & occmeasure \\
%    $\occmeasmid$ & middle variable occupancy measure  & occmeasmid \\
%    $\occmeasvir$ & virtual occupancy measure  & occmeasvir \\
%    $\transbyocc$ & transition introduced by occupancy measure  & transbyocc \\
%    $\policybyocc$ & policy introduced by occupancy measure  & policybyocc \\
%    $\occmeasureset\rbr{\transeasy}$ & acceptable occupancy measure set introduced by given transition  & occmeasureset \\
%    $\userspace$ & user space  & userspace \\
%    $\user$ & user  &  user\\
%    $\userseq$ & user sequence  & userseq \\
%    $\traj$ & one trajectory in one episode  & traj \\
%    $\trajset$ & the set of all possible trajectory in one episode  & trajset \\
%    $\Agent$ & RL agent algorithm  & Agent \\
%    $\history$ & random event history up to time $t$ & history \\   
%    $\dualvsum$ & dual variable of occupancy sum & dualvsum \\   
%    $\dualvmid$ & dual variable of  occupancy measure middle relation & dualvmid \\   
%    $\dualvtrp$ & dual variable of transition positive error & dualvtrp \\   
%    $\dualvtrn$ & dual variable of transition negative error & dualvtrn \\   
%    $\dualvtr$ & dual variable of transition error sum bound & dualvtr \\ 
%    $\vsdual$ & middle variable of solving dual variables & vsdual \\ 
%    $\vbellman$ & dual middle variable of bellman error for dual computation & vbellman \\ 
%    \bottomrule  
% \end{tabular*}}
% \end{table}