\begin{table}[ht]
\caption{Hyperparameters for MuZero and Ours}
\label{table:param_muzero}
\centering
\begin{adjustbox}{width=0.6\textwidth}%\setlength{\tabcolsep}{4pt}
\begin{tabular}{@{}lcccc@{}}
\toprule
Models
& Parameters
& DoorKey
& Sokoban
& CMAB
\\
\midrule						
\multirow{2}{*}{\makecell{MuZero}}
        &	Observation down-sampling	& 96$\times$96	& 96$\times$96 & 96$\times$96\\
	&	Frames stacked	& No & No & No\\
 	&	Frames skip	& No & No & No \\
        &   Reward clipping & No & No & No\\
        &   Discount factor & 0.997 & 0.997 & 0.997\\
        &   Minibatch size & 256 & 256 & 256\\
        &   Optimizer   & Adam  & Adam & Adam\\
        &   Learning rate   & 0.001  & 0.001 & 0.001\\
        & Momentum & 0.9 & 0.9 & 0.9 \\
        % & Learning rate schedule & 0.001 $\rightarrow$ 0.0001 \\
        & Weight decay & 1e-4 & 1e-4 & 1e-4 \\
        & Max gradient norm & 100 & 5 & 100 \\
        & Training steps & 100K & 100K & 100K\\
        & Evaluation episodes & 32 & 32 & 32 \\
        & Min replay size for sampling & 32K & 32K & 32K\\
        & Max replay size & 1M & 1M & 1.6M\\
        & Target network updating interval & 200 & 200 & 200\\
        & Unroll steps & 5 & 5 & 5 \\
        & TD steps & 5 & 5 & 5 \\
        & Policy loss coefficient & 1 & 1 & 1 \\
        & Value loss coefficient & 0.25 & 0.25 & 0.25\\
        & Reconstruction loss coefficient & 1 & 0.1 & 1 \\
        & Dirichlet noise ratio & 0.3 & 0.3 & 0.3 \\
        & Number of simulations in MCTS & 50 & 50 & 15\\
        & Reanalyzed policy ratio & 1.0 & 1.0 & 1.0\\
\midrule
\multirow{2}{*}{\makecell{Ours}}
        &	Sparsity coefficient	& 0.0 & 0.0 & 0.01	\\
	&	Gumbel sigmoid temperature	& 1.0	& 1.0 & 1.0\\
        &   MCTS mask threshold & 0.01 & 0.01 & 0.01 \\
\bottomrule
\end{tabular}
\end{adjustbox}
% \vspace{-0.5cm}
\end{table}