\begin{table*}[htbp]
\captionsetup{font=small,labelfont=small}
\begin{minipage}{0.48\textwidth}
\centering
\caption{Comparison of magnitude of the regularization term $\mathcal{R}(h, \mathcal{S}, \mathcal{T})$ and the magnitude of ${\sf BagCSI}$ loss on performance for Wine dataset.}\label{tab:reg_term_mag_wine}
\scriptsize
\begin{tabular}{l|rr|rr}
    Dataset &\multicolumn{2}{c}{PLWFA} &\multicolumn{2}{c}{BLWFA} \\\cmidrule{1-5}
    \diagbox{\textbfne{Bag Size}}{\textbfne{Method}} &$\mathcal{R}(h, \mathcal{S}, \mathcal{T})$ &${\sf BagCSI}$ &$\mathcal{R}(h, \mathcal{S}, \mathcal{T})$ &${\sf BagCSI}$ \\\midrule
    8 &0.66 &704.05 &0.62 &698.44 \\
    32 &1.04 &707.39 &1.14 &700.56 \\
    128 &1.32 &708.33 &1.33 &701.18 \\
    256 &1.73 &713.34 &1.75 &706.06 \\
\end{tabular}
\end{minipage}
\hfill
\begin{minipage}{0.48\textwidth}
\centering
\caption{Effect of adding the regularization term $\mathcal{R}(h, \mathcal{S}, \mathcal{T})$ to loss on performance for Wine dataset. For significant impact of the extra regularization term, the hyperparameter search is done within range $W = \{10^{-5}, 5\times 10^{-5}, 10^{-4}, 5\times 10^{-4}, 10^{-3}, 5\times 10^{-3}, 10^{-2}\}$.}\label{tab:reg_term_train_wine}
\scriptsize
\begin{tabular}{l|rr|rr}
    Dataset &\multicolumn{2}{c}{PLWFA} &\multicolumn{2}{c}{BLWFA} \\\cmidrule{1-5}
    \diagbox{\textbfne{Bag Size}}{\textbfne{Method}} &$w_R = 0$ &best $w_R \in W$ &$w_R = 0$ &best $w_R \in W$ \\\midrule
    8 &183.0 ± 0.6 &255.57 ± 3.81 &180.9 ± 0.5 &255.47 ± 3.91 \\
    32 &186.6 ± 1.0 &260.85 ± 3.76 &184.6 ± 0.7 &259.80 ± 3.71 \\
    128 &189.0 ± 0.8 &270.23 ± 3.69 &186.0 ± 0.8 &270.23 ± 3.66 \\
    256 &188.9 ± 1.2 &276.27 ± 3.72 &188.9 ± 1.2 &276.25 ± 3.73 \\
\end{tabular}
\end{minipage}
\end{table*}
