\begin{tabular}{lllrlll}
\toprule
Dataset & Model & Method &   Miscov & APSS & NS & NAR  \\
\midrule
\multirow[t]{4}{*}{DS1000} & \multirow[t]{2}{*}{GPT 4o Mini} & \methodname\ HS & 0.35 & 3.07 & \textbf{18.00} & \textbf{0.75} \\
 &  & \methodname\ L & 0.00 & \textbf{1.00} & 24.00 & 0.00 \\

 & \multirow[t]{2}{*}{Gemma 2 27b} & \methodname\ HS & 0.34 & \textbf{5.99} & \textbf{11.58} & \textbf{0.58} \\
 &  & \methodname\ L & 0.01 & 6.41 & 23.91 & 0.02 \\
\cline{1-7} 
\multirow[t]{9}{*}{GSM} & \multirow[t]{3}{*}{GPT 4o Mini} & \methodname\ HS & 0.22 & 1.22 & 1.22 & \textbf{1.00} \\
 &  & \methodname\ L & 0.22 & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} \\
 &  & CLM First-K & 0.22 & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} \\

 & \multirow[t]{3}{*}{Llama 3 8B} & CLM First-K & 0.28 & 7.68 & 9.94 & \textbf{1.00} \\
 &  & \methodname\ HS & 0.34 & \textbf{5.38} & \textbf{5.56} & \textbf{1.00} \\
 &  & \methodname\ L & 0.31 & 5.78 & 5.99 & \textbf{1.00} \\

 & \multirow[t]{3}{*}{Phi 2} & CLM Max & 0.24 & 11.37 & 20.80 & 0.78 \\
 &  & \methodname\ HS & 0.35 & \textbf{6.62} & \textbf{6.92} & 1.00 \\
 &  & \methodname\ L & 0.32 & 7.51 & 8.00 & \textbf{1.00} \\
\cline{1-7} 
\multirow[t]{9}{*}{MBPP} & \multirow[t]{3}{*}{GPT 4o Mini} & CLM Max & 0.24 & 1.01 & 1.06 & 0.95 \\
 &  & \methodname\ HS & 0.19 & \textbf{1.00} & 1.00 & \textbf{1.00} \\
 &  & \methodname\ L & 0.18 & 1.03 & \textbf{0.99} & \textbf{1.00} \\

 & \multirow[t]{3}{*}{Llama 3 8B} & CLM Max & 0.09 & 4.81 & 17.08 & 0.29 \\
 &  & \methodname\ HS & 0.31 & 2.63 & 2.99 & \textbf{1.00} \\
 &  & \methodname\ L & 0.32 & \textbf{2.43} & \textbf{2.90} & \textbf{1.00} \\

 & \multirow[t]{3}{*}{Phi 2} & CLM Max & 0.10 & 3.01 & 14.34 & 0.32 \\
 &  & \methodname\ HS & 0.32 & 2.53 & 3.13 & \textbf{1.00} \\
 &  & \methodname\ L & 0.32 & \textbf{2.37} & \textbf{2.88} & \textbf{1.00} \\
\cline{1-7} 
\multirow[t]{3}{*}{Math} & \multirow[t]{3}{*}{GPT 4o Mini} & CLM First-K & 0.31 & 5.38 & 5.69 & \textbf{1.00} \\
 &  & \methodname\ HS & 0.34 & 4.98 & 4.99 & 1.00 \\
 &  & \methodname\ L & 0.32 & \textbf{4.69} & \textbf{4.70} & \textbf{1.00} \\
\cline{1-7} 
\multirow[t]{5}{*}{TriviaQA} & \multirow[t]{3}{*}{Llama 3 8B} & \methodname\ HS & 0.31 & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} \\
 &  & \methodname\ L & 0.31 & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} \\
 &  & CLM First-K & 0.28 & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} \\

 & \multirow[t]{2}{*}{Phi 2} & \methodname\ HS & 0.34 & 2.07 & \textbf{19.25} & \textbf{0.73} \\
 &  & \methodname\ L & 0.05 & \textbf{1.88} & 24.99 & 0.13 \\
\cline{1-7} 
\bottomrule
\end{tabular}

\begin{tabular}{lllrlllrlllrlll}
\toprule
 &  &  & \multicolumn{4}{r}{$\alpha=0.2$} & \multicolumn{4}{r}{$\alpha=0.3$} & \multicolumn{4}{r}{$\alpha=0.4$} \\
task & model & method & miscov & APSS & NS & NAR & miscov & APSS & NS & NAR & miscov & APSS & NS & NAR  \\
\midrule
\multirow[t]{4}{*}{DS1000} & \multirow[t]{2}{*}{GPT 4o Mini} & \methodname\ HS & 0.19 & 2.72 & \textbf{21.37} & \textbf{0.51} & 0.30 & \textbf{3.05} & \textbf{19.06} & \textbf{0.68} & 0.40 & 3.16 & \textbf{16.74} & \textbf{0.81} \\
 &  & \methodname\ L & 0.00 & \textbf{1.00} & 23.00 & 0.00 & 0.00 & 4.05 & 24.68 & 0.01 & 0.00 & \textbf{1.00} & 24.00 & 0.00 \\
\cline{2-15}
 & \multirow[t]{2}{*}{Gemma 2 27b} & \methodname\ HS & 0.20 & 8.20 & \textbf{16.61} & \textbf{0.40} & 0.29 & \textbf{6.26} & \textbf{12.02} & \textbf{0.53} & 0.39 & \textbf{6.38} & \textbf{11.61} & \textbf{0.63} \\
 &  & \methodname\ L & 0.01 & \textbf{6.98} & 23.53 & 0.02 & 0.01 & 7.19 & 24.08 & 0.02 & 0.00 & 7.91 & 24.23 & 0.01 \\
\cline{1-15} \cline{2-15}
\multirow[t]{13}{*}{GSM} & \multirow[t]{4}{*}{GPT 4o Mini} & CLM First-K & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN & 0.22 & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} \\
 &  & CLM Sum & 0.02 & 3.89 & 6.80 & 0.09 & 0.24 & 1.11 & 1.17 & \textbf{1.00} & NaN & NaN & NaN & NaN \\
 &  & \methodname\ HS & 0.20 & 3.33 & 3.43 & \textbf{1.00} & 0.22 & 1.29 & 1.30 & \textbf{1.00} & 0.22 & 1.17 & 1.17 & \textbf{1.00} \\
 &  & \methodname\ L & 0.20 & \textbf{2.47} & \textbf{2.50} & \textbf{1.00} & 0.22 & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} & 0.22 & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} \\
\cline{2-15}
 & \multirow[t]{5}{*}{Llama 3 8B} & CLM First-K & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN & 0.33 & 5.47 & 6.69 & \textbf{1.00} \\
 &  & CLM Max & NaN & NaN & NaN & NaN & 0.22 & 10.97 & 23.45 & 0.94 & NaN & NaN & NaN & NaN \\
 &  & CLM Sum & 0.00 & \textbf{14.16} & 24.56 & 0.02 & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN \\
 &  & \methodname\ HS & 0.18 & 19.70 & \textbf{21.58} & \textbf{0.96} & 0.29 & \textbf{6.98} & \textbf{7.27} & \textbf{1.00} & 0.38 & \textbf{4.15} & \textbf{4.26} & \textbf{1.00} \\
 &  & \methodname\ L & 0.11 & 22.06 & 24.36 & 0.67 & 0.26 & 7.88 & 8.27 & \textbf{1.00} & 0.36 & 4.16 & 4.29 & \textbf{1.00} \\
\cline{2-15}
 & \multirow[t]{4}{*}{Phi 2} & CLM First-K & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN & 0.34 & 7.41 & 9.04 & \textbf{1.00} \\
 &  & CLM Sum & NaN & NaN & NaN & NaN & 0.06 & 16.51 & 21.26 & 0.21 & NaN & NaN & NaN & NaN \\
 &  & \methodname\ HS & 0.20 & \textbf{18.76} & \textbf{21.41} & \textbf{0.85} & 0.30 & \textbf{9.86} & \textbf{10.56} & 0.99 & 0.39 & 4.72 & 4.86 & 1.00 \\
 &  & \methodname\ L & NaN & NaN & NaN & NaN & 0.27 & 14.27 & 15.90 & \textbf{1.00} & 0.37 & \textbf{4.43} & \textbf{4.60} & \textbf{1.00} \\
\cline{1-15} \cline{2-15}
\multirow[t]{11}{*}{MBPP} & \multirow[t]{4}{*}{GPT 4o Mini} & CLM First-K & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN & 0.19 & \textbf{1.00} & 1.00 & \textbf{1.00} \\
 &  & CLM Max & 0.01 & 1.17 & 4.11 & 0.06 & 0.15 & 1.02 & 1.38 & 0.65 & NaN & NaN & NaN & NaN \\
 &  & \methodname\ HS & 0.18 & \textbf{1.10} & \textbf{1.80} & \textbf{1.00} & 0.19 & \textbf{1.00} & 1.00 & \textbf{1.00} & 0.19 & \textbf{1.00} & 1.00 & \textbf{1.00} \\
 &  & \methodname\ L & 0.18 & 1.35 & 3.55 & \textbf{1.00} & 0.18 & 1.03 & \textbf{0.99} & \textbf{1.00} & 0.18 & 1.03 & \textbf{0.99} & \textbf{1.00} \\
\cline{2-15}
 & \multirow[t]{4}{*}{Llama 3 8B} & CLM First-K & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN & 0.24 & 4.16 & 6.22 & 0.78 \\
 &  & CLM Max & NaN & NaN & NaN & NaN & 0.01 & 4.73 & 20.54 & 0.04 & NaN & NaN & NaN & NaN \\
 &  & \methodname\ HS & 0.20 & \textbf{9.71} & \textbf{18.00} & \textbf{0.94} & 0.29 & \textbf{3.82} & \textbf{4.78} & \textbf{1.00} & 0.33 & 2.20 & 2.43 & \textbf{1.00} \\
 &  & \methodname\ L & 0.03 & 10.91 & 21.61 & 0.12 & 0.27 & 4.13 & 5.70 & \textbf{1.00} & 0.33 & \textbf{1.88} & \textbf{2.10} & \textbf{1.00} \\
\cline{2-15}
 & \multirow[t]{3}{*}{Phi 2} & CLM Max & NaN & NaN & NaN & NaN & 0.00 & \textbf{1.98} & 6.71 & 0.01 & 0.21 & 2.31 & 9.91 & 0.64 \\
 &  & \methodname\ HS & 0.17 & \textbf{10.23} & \textbf{22.84} & \textbf{0.83} & 0.28 & 4.39 & 7.28 & \textbf{0.98} & 0.35 & 1.83 & 2.09 & \textbf{1.00} \\
 &  & \methodname\ L & 0.00 & 11.00 & 24.00 & 0.00 & 0.26 & 4.25 & \textbf{6.46} & 0.90 & 0.33 & \textbf{1.81} & \textbf{1.97} & \textbf{1.00} \\
\cline{1-15} \cline{2-15}
\multirow[t]{4}{*}{Math} & \multirow[t]{4}{*}{GPT 4o Mini} & CLM First-K & NaN & NaN & NaN & NaN & 0.27 & 8.25 & 9.22 & \textbf{1.00} & 0.36 & 3.78 & 3.86 & \textbf{1.00} \\
 &  & CLM Max & 0.02 & 19.78 & 24.91 & 0.08 & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN \\
 &  & \methodname\ HS & 0.19 & \textbf{12.85} & \textbf{12.90} & \textbf{0.95} & 0.29 & \textbf{6.74} & \textbf{6.75} & 1.00 & 0.37 & 4.05 & 4.06 & \textbf{1.00} \\
 &  & \methodname\ L & 0.15 & 22.98 & 23.12 & 0.80 & 0.28 & 7.08 & 7.10 & \textbf{1.00} & 0.37 & \textbf{3.19} & \textbf{3.20} & \textbf{1.00} \\
\cline{1-15} \cline{2-15}
\multirow[t]{6}{*}{TriviaQA} & \multirow[t]{4}{*}{Llama 3 8B} & CLM First-K & NaN & NaN & NaN & NaN & NaN & NaN & NaN & NaN & 0.37 & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} \\
 &  & CLM Max & NaN & NaN & NaN & NaN & 0.28 & \textbf{1.00} & \textbf{1.08} & \textbf{1.00} & NaN & NaN & NaN & NaN \\
 &  & \methodname\ HS & NaN & NaN & NaN & NaN & 0.29 & 1.15 & 1.90 & \textbf{1.00} & 0.31 & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} \\
 &  & \methodname\ L & NaN & NaN & NaN & NaN & 0.29 & 1.17 & 2.00 & \textbf{1.00} & 0.31 & \textbf{1.00} & \textbf{1.00} & \textbf{1.00} \\
\cline{2-15}
 & \multirow[t]{2}{*}{Phi 2} & \methodname\ HS & 0.19 & \textbf{1.85} & \textbf{21.77} & \textbf{0.50} & 0.29 & 2.02 & \textbf{19.96} & \textbf{0.65} & 0.39 & \textbf{2.13} & \textbf{18.29} & \textbf{0.79} \\
 &  & \methodname\ L & 0.04 & 1.87 & 24.96 & 0.10 & 0.01 & \textbf{1.71} & 24.97 & 0.03 & 0.00 & 2.93 & 24.91 & 0.00 \\
\cline{1-15} \cline{2-15}
\bottomrule
\end{tabular}

