\begin{table}[H]
\centering
\caption{Statistics on interleaved pre-training data. Tokens measured in billion.}
\label{tab:interleaved-pretrain-data-breakdown}
\begin{tabular}{@{}ccccccc@{}}
\toprule
Dataset                              & Tokenizer                                                                         & \begin{tabular}[c]{@{}c@{}}Corruption\\ Ratio\end{tabular} & \begin{tabular}[c]{@{}c@{}}Text\\ Tokens\end{tabular} & \begin{tabular}[c]{@{}c@{}}Speech\\ Tokens\end{tabular} & \begin{tabular}[c]{@{}c@{}}Speech\\ Ratio\end{tabular} & \begin{tabular}[c]{@{}c@{}}Total\\ Tokens\end{tabular} \\ \midrule
\multirow{11}{*}{Fineweb-Edu}        & \begin{tabular}[c]{@{}c@{}}Text-60k\\ Speech-50Hz\end{tabular}                    & 0.30                                                       & 56.21                                                 & 343.79                                                  & 0.86                                                   & 400                                                    \\ \cmidrule(l){2-7} 
                                     & \begin{tabular}[c]{@{}c@{}}Text-60k\\ Speech-25Hz\end{tabular}                    & 0.30                                                       & 98.78                                                 & 301.22                                                  & 0.75                                                   & 400                                                    \\ \cmidrule(l){2-7} 
                                     & \multirow{7}{*}{\begin{tabular}[c]{@{}c@{}}Text-60k\\ Speech-12.5Hz\end{tabular}} & 0.10                                                       & 282.82                                                & 117.18                                                  & 0.29                                                   & 400                                                    \\
                                     &                                                                                   & 0.20                                                       & 209.05                                                & 190.95                                                  & 0.48                                                   & 400                                                    \\
                                     &                                                                                   & 0.30                                                       & 158.43                                                & 241.57                                                  & 0.60                                                   & 400                                                    \\
                                     &                                                                                   & 0.40                                                       & 121.54                                                & 278.46                                                  & 0.70                                                   & 400                                                    \\
                                     &                                                                                   & 0.50                                                       & 93.48                                                 & 306.52                                                  & 0.77                                                   & 400                                                    \\
                                     &                                                                                   & 0.75                                                       & 46.15                                                 & 353.85                                                  & 0.88                                                   & 400                                                    \\
                                     &                                                                                   & 1.00                                                       & 0.10                                                  & 399.90                                                  & 1.00                                                   & 400                                                    \\ \cmidrule(l){2-7} 
                                     & \begin{tabular}[c]{@{}c@{}}Text-60k\\ Speech-6.25Hz\end{tabular}                  & 0.30                                                       & 226.50                                                & 173.50                                                  & 0.43                                                   & 400                                                    \\ \cmidrule(l){2-7} 
                                     & \begin{tabular}[c]{@{}c@{}}Text-150k\\ Speech-12.5Hz\end{tabular}                 & 0.30                                                       & 150.51                                                & 249.49                                                  & 0.62                                                   & 400                                                    \\ \midrule
\multirow{2}{*}{Chinese-Fineweb-Edu} & \begin{tabular}[c]{@{}c@{}}Text-60k\\ Speech-12.5Hz\end{tabular}                  & 0.30                                                       & 78.80                                                 & 121.20                                                  & 0.61                                                   & 200                                                    \\ \cmidrule(l){2-7} 
                                     & \begin{tabular}[c]{@{}c@{}}Text-150k\\ Speech-12.5Hz\end{tabular}                 & 0.30                                                       & 77.59                                                 & 122.41                                                  & 0.61                                                   & 200                                                    \\ \bottomrule
\end{tabular}
\end{table}