\begin{table}[t]
\renewcommand{\arraystretch}{1.1}
\setlength{\tabcolsep}{0.5\tabcolsep}
\centering
\caption{\textbf{Pre-training Results.} `S': speech input and output. `S$\rightarrow$T': speech input and text output. `T$\rightarrow$S': text input and speech output. Results for Spirit-LM are taken from \citet{spiritlm} and other results are from \citet{moshi}. We use $\emptyset$ to indicate tasks and modalities not supported by the model, and - to indicate scores that are not publicly available.}
\resizebox{\textwidth}{!}{
\begin{tabular}{@{}lcccccc|cccccc@{}}
\toprule
 \multirow{3}{*}{Model} & \multicolumn{6}{c|}{\textbf{Speech Language Modeling}} & \multicolumn{6}{c}{\textbf{Spoken Question Anwsering}} \\
 & \multicolumn{3}{c}{sTopic-StoryCloze} & \multicolumn{3}{c|}{sStoryCloze} & \multicolumn{2}{c}{Web Questions} & \multicolumn{2}{c}{Llama Questions} & \multicolumn{2}{c}{TriviaQA} \\
 & S & T$\rightarrow$S & S$\rightarrow$T & S & T$\rightarrow$S & S$\rightarrow$T & S & S$\rightarrow$T & S & S$\rightarrow$T & S & S$\rightarrow$T \\
\midrule
GSLM & 66.6 & $\emptyset$ & $\emptyset$ & 53.3 & $\emptyset$ & $\emptyset$ & 1.5 & $\emptyset$ & 4.0 
& $\emptyset$ & - & - \\
AudioLM & - & $\emptyset$ & $\emptyset$ & - & $\emptyset$ & $\emptyset$ & 2.3 & $\emptyset$ & 7.0 & $\emptyset$ & - & - \\
TWIST & 76.4 & $\emptyset$ & $\emptyset$ & 55.4 & $\emptyset$ & $\emptyset$ & 1.1 & $\emptyset$ & 0.5 & $\emptyset$ & - & - \\
% VoxtLM &  &  &  &  & \\
Spirit-LM & 82.9 & 72.7 & 88.6 & 61.0 & 59.6 & 64.6 & - & - & - & - & - & - \\
SpeechGPT & $\emptyset$ & $\emptyset$ & $\emptyset$ & $\emptyset$ & $\emptyset$ & $\emptyset$ & $\emptyset$ & 6.5 & $\emptyset$ & 21.6 & $\emptyset$ & 14.8  \\
Spectron & - & - & - & - & - & - & $\emptyset$ & 6.1 & $\emptyset$ & 21.9 & $\emptyset$ & - \\
Moshi & \textbf{83.0} & $\emptyset$ & $\emptyset$ & 60.8 & $\emptyset$ & $\emptyset$ & 9.2 & 26.6 & 21.0 & 62.3 & 7.3 & 22.8 \\ \midrule
Ours (9B) & 82.9  & \textbf{85.0} & \textbf{93.6} & \textbf{62.4} & \textbf{63.2} & \textbf{76.3} & \textbf{15.9} & \textbf{32.2} & \textbf{50.7} & \textbf{64.7} & \textbf{26.5} & \textbf{39.1} \\ \bottomrule
\end{tabular}
}
\label{tab:pre-training}
\end{table}
