\begin{table*}[ht!]
\centering
\begin{tabular}{l|c|c|c|c}
\toprule
Dataset & Citation    & \# Classes    & Train Samples & Test Samples \\ \hline
Winogrande-Small (WG-S)       & \cite{winogrande} & 2& 0.64K & 1.27K\\
Winogrande-Medium (WG-M)      & \cite{winogrande} & 2&  2.56K & 1.27K\\
ARC-Easy (ARC-E)              & \cite{arc}        & 4&  2.25K & 0.57K\\
ARC-Challenge (ARC-C)         & \cite{arc}        & 4& 1.12K & 0.30K\\
OpenBookQA (OBQA)             & \cite{obqa}       & 4 & 4.96K & 0.50K \\
BoolQ                         & \cite{boolq}      & 2 &  2.49K & 3.27K\\
MMLU-Chemistry                    & \cite{mmlu}      & 4&  - & 0.10K\\
MMLU-Phyics                   & \cite{mmlu}      & 4&  - & 0.10K\\
\bottomrule
\end{tabular}
\caption{Commonsense Reasoning Datasets used in experiments. We note that the MMLU datasets are used only in the out-of-distribution experiments and therefore have no training samples.}
\label{tab:datasets}
\end{table*}