A & \sf Llama-3.1-8b & Base  & 2.92 & 21.52 & 1.67 & 3.00 & 1.17 & 84.33 & 0.00 & 18.03 & 16.67 & 3.29 & 6.44 & 1.66 \\
  &   & CtE & \ditto & \ditto & 16.33 & \textbf{100.00} & 33.83 & 68.17 & 5.00 & 44.67 & 56.03 & 9.74 & 38.99 & 25.08 \\
  &   & Ora. & \ditto & \ditto & 12.67 & 15.50 & 12.33 & 61.83 & 1.83 & 20.83 & 45.49 & 11.26 & 24.14 & 0.21 \\
\midrule
B & \sf GPT-oss-20b & Base  & 22.71 & 44.44 & 21.67 & 38.00 & 32.50 & 83.00 & 10.00 & 37.03 & 44.13 & 7.75 & 29.27 & 12.54 \\
  &   & CtE & \ditto & \ditto & 67.17 & \textbf{100.00} & 73.17 & 84.67 & 38.00 & 72.60 & 79.39 & 10.73 & 60.99 & 37.04 \\
  &   & Ora. & \ditto & \ditto & 58.33 & 80.67 & 92.33 & 95.33 & 62.00 & 77.73 & 76.50 & 3.97 & 78.64 & 13.33 \\
\midrule
C & \sf gpt-4.1-nano-2025-04-14 & Base  & 58.52 & 68.23 & 27.83 & 45.67 & 37.17 & 62.00 & 4.00 & 35.33 & 51.29 & 19.15 & 20.37 & 12.58 \\
  &   & CtE & \ditto & \ditto & \textbf{97.33} & 71.67 & 56.67 & 38.17 & 0.00 & 52.77 & \textbf{97.57} & 61.49 & 0.17 & \textbf{80.87} \\
  &   & Ora. & \ditto & \ditto & 54.67 & 59.17 & 60.17 & 61.83 & 21.50 & 51.47 & 62.37 & 22.57 & 35.09 & 19.37 \\
\midrule
D & \sf Mistral-small:24b & Base  & 43.01 & 64.10 & 44.07 & 50.34 & 45.25 & 50.85 & 1.53 & 38.41 & 63.99 & 34.47 & 20.57 & 29.19 \\
  &   & CtE & \ditto & \ditto & 84.50 & \textbf{100.00} & 80.67 & 79.83 & 42.33 & 77.47 & 92.57 & 17.74 & 63.71 & 33.58 \\
  &   & Ora. & \ditto & \ditto & 72.00 & 74.67 & 90.00 & 95.83 & 50.17 & 76.53 & 82.96 & 1.48 & 67.82 & 15.54 \\
\midrule
E & \sf Llama-3.1-70b & Base  & 20.33 & 48.82 & 20.67 & 28.83 & 23.83 & 71.00 & 2.83 & 29.43 & 47.12 & 10.06 & 20.67 & 3.33 \\
  &   & CtE & \ditto & \ditto & 80.33 & \textbf{100.00} & 86.33 & 93.50 & 56.17 & 83.27 & 90.71 & 2.95 & 81.20 & 26.91 \\
  &   & Ora. & \ditto & \ditto & 61.00 & 72.67 & 82.17 & 93.50 & 50.67 & 72.00 & 75.29 & 3.37 & 76.35 & 5.04 \\
\midrule
F & \sf Gemini-2.0-flash & Base  & 48.03 & 70.08 & 32.67 & 41.50 & 35.33 & 62.67 & 5.00 & 35.43 & 60.08 & 13.00 & 29.14 & 0.42 \\
  &   & CtE & \ditto & \ditto & 88.17 & \textbf{100.00} & 94.50 & 64.33 & 53.83 & 80.17 & 93.84 & 27.82 & 62.68 & 40.58 \\
  &   & Ora. & \ditto & \ditto & 79.00 & 84.17 & 92.00 & 96.67 & 73.83 & 85.13 & 82.33 & 1.37 & 84.15 & 2.96 \\
\midrule
G & \sf gpt-4.1-mini-2025-04-14 & Base  & 28.02 & 53.84 & 33.67 & 48.17 & 38.33 & 56.33 & 5.50 & 36.40 & 62.96 & 15.74 & 28.12 & 4.83 \\
  &   & CtE & \ditto & \ditto & 89.83 & 64.00 & 94.83 & 77.17 & 64.00 & 77.97 & 95.73 & 21.83 & 72.41 & 23.25 \\
  &   & Ora. & \ditto & \ditto & 77.50 & 87.67 & 96.33 & 89.67 & 68.83 & 84.00 & 88.81 & 6.10 & 79.53 & 8.75 \\
\midrule
H & \sf GPT-4o & Base  & 48.98 & 52.09 & 45.17 & 53.33 & 47.00 & 62.83 & 6.17 & 42.90 & 65.34 & 26.69 & 26.12 & 29.79 \\
  &   & CtE & \ditto & \ditto & \textbf{97.33} & 99.33 & 98.33 & 36.33 & 30.67 & 72.40 & 58.67 & 62.79 & 35.08 & 66.66 \\
  &   & Ora. & \ditto & \ditto & 85.33 & 91.83 & 94.83 & 86.50 & 67.83 & 85.26 & 73.43 & 12.09 & 58.79 & 33.88 \\
\midrule
I & \sf gpt-4.1-2025-04-14 & Base  & 28.73 & 59.49 & 39.67 & 49.83 & 38.67 & 63.33 & 10.17 & 40.33 & 67.71 & 11.18 & 35.81 & 3.79 \\
  &   & CtE & \ditto & \ditto & 91.83 & 70.33 & 97.50 & 87.00 & 74.83 & 84.30 & 97.54 & 11.74 & \textbf{84.84} & 13.75 \\
  &   & Ora. & \ditto & \ditto & 82.50 & 91.83 & 94.50 & 95.33 & 74.83 & 87.80 & 92.07 & 3.09 & 83.64 & 6.62 \\
\midrule
J & \sf Grok-3-mini & Base  & 37.67 & 63.09 & 34.33 & 52.33 & 43.83 & 87.67 & 23.00 & 48.23 & 63.46 & 5.61 & 44.78 & 8.12 \\
  &   & CtE & \ditto & \ditto & 90.83 & \textbf{100.00} & 90.00 & 86.67 & 66.83 & 86.87 & 96.39 & 12.77 & 78.69 & 35.08 \\
  &   & Ora. & \ditto & \ditto & 88.17 & 92.67 & \textbf{98.50} & 95.50 & 81.00 & 91.17 & 93.63 & 4.00 & 79.16 & 13.46 \\
\midrule
K & \sf DeepSeek-V3.1 & Base  & 38.81 & 60.82 & 32.50 & 45.50 & 40.17 & 56.00 & 7.33 & 36.30 & 57.03 & 16.61 & 25.73 & 11.38 \\
  &   & CtE & \ditto & \ditto & 94.50 & 84.67 & 91.17 & 67.33 & 54.17 & 78.37 & 97.22 & 31.01 & 62.74 & 30.67 \\
  &   & Ora. & \ditto & \ditto & 81.67 & 88.83 & 95.00 & 94.67 & 71.00 & 86.23 & 91.17 & 3.96 & 79.80 & 10.00 \\
\midrule
L & \sf Gemini-2.5-flash & Base  & 37.69 & 64.85 & 33.50 & 49.50 & 41.50 & 84.50 & 21.83 & 46.17 & 61.01 & 3.60 & 43.82 & 5.33 \\
  &   & CtE & \ditto & \ditto & 89.83 & \textbf{100.00} & 96.83 & 90.67 & 84.83 & \textbf{92.43} & 93.83 & 8.82 & 83.92 & 31.96 \\
  &   & Ora. & \ditto & \ditto & 89.17 & 90.17 & 95.33 & 95.67 & \textbf{85.50} & 91.17 & 92.56 & 4.08 & 83.49 & 8.00 \\
\midrule
M & \sf GPT-5-nano & Base  & 33.48 & 42.09 & 59.00 & 64.00 & 63.17 & 67.67 & 17.33 & 54.23 & 72.28 & 29.88 & 38.52 & 41.79 \\
  &   & CtE & \ditto & \ditto & 77.83 & \textbf{100.00} & 76.83 & 19.50 & 0.50 & 54.93 & 78.16 & \textbf{80.39} & 7.82 & 31.00 \\
  &   & Ora. & \ditto & \ditto & 84.17 & 83.17 & 97.17 & 77.83 & 53.33 & 79.13 & 89.65 & 21.40 & 58.45 & 41.25 \\
\midrule
N & \sf DeepSeek-reasoner & Base  & 29.95 & 51.86 & 18.67 & 40.17 & 30.17 & 82.00 & 10.83 & 36.37 & 46.10 & 3.96 & 31.15 & 4.21 \\
  &   & CtE & \ditto & \ditto & 78.50 & 75.33 & 85.33 & 69.67 & 51.50 & 72.07 & 82.40 & 29.66 & 56.37 & 28.54 \\
  &   & Ora. & \ditto & \ditto & 68.33 & 75.67 & 86.33 & 93.00 & 58.33 & 76.33 & 82.21 & 5.37 & 79.55 & 6.25 \\
\midrule
O & \sf Gemini-2.5-pro & Base  & 36.88 & 66.44 & 31.00 & 43.67 & 38.83 & 81.50 & 19.50 & 42.90 & 63.14 & 3.25 & 46.98 & 3.33 \\
  &   & CtE & \ditto & \ditto & 85.33 & \textbf{100.00} & 88.00 & 93.00 & 71.33 & 87.53 & 89.78 & 6.22 & 79.02 & 33.00 \\
  &   & Ora. & \ditto & \ditto & 77.33 & 80.17 & 92.17 & \textbf{97.83} & 74.83 & 84.47 & 81.97 & 2.06 & 80.89 & 10.50 \\
\midrule
P & \sf GPT-5-mini & Base  & 63.15 & 76.22 & 65.33 & 63.83 & 68.17 & 61.50 & 14.33 & 54.63 & 77.18 & 35.93 & 37.25 & 47.17 \\
  &   & CtE & \ditto & \ditto & 88.00 & \textbf{100.00} & 75.00 & 67.33 & 34.00 & 72.87 & 92.58 & 32.67 & 44.64 & 55.08 \\
  &   & Ora. & \ditto & \ditto & 86.83 & 91.67 & 95.00 & 73.67 & 54.50 & 80.33 & 91.04 & 26.17 & 55.03 & 50.04 \\
\midrule
Q & \sf GPT-o3 & Base  & 31.42 & 58.92 & 34.41 & 50.68 & 38.64 & 86.61 & 19.15 & 45.90 & 62.46 & 5.53 & 43.83 & 8.98 \\
  &   & CtE & \ditto & \ditto & 82.00 & 88.33 & 91.33 & 92.50 & 74.67 & 85.77 & 93.24 & 6.74 & 83.97 & 26.58 \\
  &   & Ora. & \ditto & \ditto & 73.67 & 92.00 & 94.50 & 97.33 & 77.17 & 86.93 & 82.18 & 2.44 & 80.82 & 13.88 \\
\midrule
R & \sf GPT-5 & Base  & 58.59 & 76.89 & 61.00 & 64.17 & 65.00 & 73.50 & 21.67 & 57.07 & 78.16 & 22.11 & 48.16 & 32.00 \\
  &   & CtE & \ditto & \ditto & 92.33 & \textbf{100.00} & 96.50 & 70.67 & 59.83 & 83.87 & 96.63 & 29.20 & 66.83 & 47.50 \\
  &   & Ora. & \ditto & \ditto & 90.67 & 94.00 & 98.17 & 79.00 & 65.33 & 85.43 & 96.11 & 21.00 & 69.17 & 34.59 \\
\midrule