,judge,solver,solver_id,problem,competition,true_grade,split,cost,confidence,incorrect
0,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,allrussian_2025_1,allrussian,incorrect,generic,0.0,,True
1,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,allrussian_2025_10,allrussian,incorrect,generic,0.0,,True
2,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,allrussian_2025_11,allrussian,correct,generic,0.0,,False
3,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,allrussian_2025_12,allrussian,incorrect,generic,0.0,,True
4,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,allrussian_2025_13,allrussian,correct,generic,0.0,,False
5,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,allrussian_2025_14,allrussian,correct,generic,0.0,,False
6,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,allrussian_2025_15,allrussian,incorrect,generic,0.0,,True
7,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,allrussian_2025_16,allrussian,incorrect,generic,0.0,,True
8,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,allrussian_2025_2,allrussian,incorrect,generic,0.0,,True
9,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,allrussian_2025_3,allrussian,correct,generic,0.0,,False
10,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,allrussian_2025_4,allrussian,incorrect,generic,0.0,,True
11,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,allrussian_2025_5,allrussian,correct,generic,0.0,,False
12,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,allrussian_2025_6,allrussian,detected,generic,0.0,,False
13,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,allrussian_2025_7,allrussian,detected,generic,0.0,,False
14,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,allrussian_2025_8,allrussian,incorrect,generic,0.0,,True
15,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,allrussian_2025_9,allrussian,correct,generic,0.0,,False
16,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmo_2025_1,bmo,incorrect,generic,0.0,,True
17,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmo_2025_2,bmo,incorrect,generic,0.0,,True
18,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmo_2025_3,bmo,corrected,generic,0.0,,False
19,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmo_2025_4,bmo,incorrect,generic,0.0,,True
20,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_A_2025_1,bmosl,correct,generic,0.0,,False
21,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_A_2025_2,bmosl,incorrect,generic,0.0,,True
22,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_A_2025_3,bmosl,incorrect,generic,0.0,,True
23,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_A_2025_4,bmosl,correct,generic,0.0,,False
24,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_A_2025_5,bmosl,correct,generic,0.0,,False
25,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_A_2025_6,bmosl,incorrect,generic,0.0,,True
26,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_C_2025_1,bmosl,incorrect,generic,0.0,,True
27,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_C_2025_2,bmosl,detected,generic,0.0,,False
28,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_C_2025_3,bmosl,correct,generic,0.0,,False
29,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_C_2025_4,bmosl,incorrect,generic,0.0,,True
30,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_C_2025_5,bmosl,incorrect,generic,0.0,,True
31,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_C_2025_6,bmosl,detected,generic,0.0,,False
32,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_G_2025_1,bmosl,incorrect,generic,0.0,,True
33,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_G_2025_2,bmosl,incorrect,generic,0.0,,True
34,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_G_2025_3,bmosl,incorrect,generic,0.0,,True
35,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_G_2025_4,bmosl,incorrect,generic,0.0,,True
36,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_G_2025_5,bmosl,incorrect,generic,0.0,,True
37,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_G_2025_6,bmosl,correct,generic,0.0,,False
38,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_G_2025_7,bmosl,incorrect,generic,0.0,,True
39,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_NT_2025_1,bmosl,detected,generic,0.0,,False
40,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_NT_2025_2,bmosl,detected,generic,0.0,,False
41,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_NT_2025_3,bmosl,correct,generic,0.0,,False
42,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_NT_2025_4,bmosl,correct,generic,0.0,,False
43,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_NT_2025_5,bmosl,detected,generic,0.0,,False
44,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_NT_2025_6,bmosl,correct,generic,0.0,,False
45,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bmosl_NT_2025_7,bmosl,detected,generic,0.0,,False
46,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bulgaria_2025_1,bulgaria,corrected,generic,0.0,,False
47,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bulgaria_2025_2,bulgaria,detected,generic,0.0,,False
48,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bulgaria_2025_3,bulgaria,detected,generic,0.0,,False
49,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bulgaria_2025_4,bulgaria,incorrect,generic,0.0,,True
50,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bulgaria_2025_5,bulgaria,correct,generic,0.0,,False
51,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,bulgaria_2025_6,bulgaria,detected,generic,0.0,,False
52,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,canada_2025_1,canada,correct,generic,0.0,,False
53,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,canada_2025_2,canada,correct,generic,0.0,,False
54,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,canada_2025_3,canada,correct,generic,0.0,,False
55,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,canada_2025_4,canada,incorrect,generic,0.0,,True
56,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,canada_2025_5,canada,correct,generic,0.0,,False
57,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,china_2025_1,china,incorrect,generic,0.0,,True
58,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,china_2025_2,china,detected,generic,0.0,,False
59,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,china_2025_3,china,incorrect,generic,0.0,,True
60,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,china_2025_5,china,detected,generic,0.0,,False
61,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,china_2025_6,china,incorrect,generic,0.0,,True
62,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_1,chinatst,incorrect,generic,0.0,,True
63,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_10,chinatst,incorrect,generic,0.0,,True
64,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_11,chinatst,detected,generic,0.0,,False
65,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_12,chinatst,incorrect,generic,0.0,,True
66,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_13,chinatst,incorrect,generic,0.0,,True
67,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_14,chinatst,detected,generic,0.0,,False
68,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_15,chinatst,correct,generic,0.0,,False
69,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_16,chinatst,incorrect,generic,0.0,,True
70,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_17,chinatst,correct,generic,0.0,,False
71,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_18,chinatst,detected,generic,0.0,,False
72,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_19,chinatst,incorrect,generic,0.0,,True
73,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_2,chinatst,incorrect,generic,0.0,,True
74,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_20,chinatst,incorrect,generic,0.0,,True
75,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_21,chinatst,detected,generic,0.0,,False
76,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_22,chinatst,incorrect,generic,0.0,,True
77,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_3,chinatst,correct,generic,0.0,,False
78,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_4,chinatst,detected,generic,0.0,,False
79,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_5,chinatst,detected,generic,0.0,,False
80,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_6,chinatst,correct,generic,0.0,,False
81,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_7,chinatst,detected,generic,0.0,,False
82,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_8,chinatst,incorrect,generic,0.0,,True
83,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,chinatst_2025_9,chinatst,detected,generic,0.0,,False
84,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,egmo_2025_1,egmo,correct,generic,0.0,,False
85,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,egmo_2025_2,egmo,incorrect,generic,0.0,,True
86,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,egmo_2025_3,egmo,detected,generic,0.0,,False
87,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,egmo_2025_4,egmo,incorrect,generic,0.0,,True
88,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,egmo_2025_5,egmo,incorrect,generic,0.0,,True
89,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,egmo_2025_6,egmo,incorrect,generic,0.0,,True
90,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_A_2025_1,elmosl,correct,generic,0.0,,False
91,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_A_2025_2,elmosl,correct,generic,0.0,,False
92,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_A_2025_3,elmosl,incorrect,generic,0.0,,True
93,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_A_2025_5,elmosl,incorrect,generic,0.0,,True
94,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_A_2025_6,elmosl,correct,generic,0.0,,False
95,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_A_2025_7,elmosl,detected,generic,0.0,,False
96,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_A_2025_8,elmosl,correct,generic,0.0,,False
97,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_C_2025_1,elmosl,incorrect,generic,0.0,,True
98,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_C_2025_2,elmosl,incorrect,generic,0.0,,True
99,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_C_2025_3,elmosl,correct,generic,0.0,,False
100,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_C_2025_4,elmosl,incorrect,generic,0.0,,True
101,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_C_2025_5,elmosl,incorrect,generic,0.0,,True
102,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_C_2025_6,elmosl,incorrect,generic,0.0,,True
103,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_C_2025_7,elmosl,incorrect,generic,0.0,,True
104,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_C_2025_8,elmosl,detected,generic,0.0,,False
105,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_C_2025_9,elmosl,correct,generic,0.0,,False
106,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_G_2025_1,elmosl,incorrect,generic,0.0,,True
107,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_G_2025_2,elmosl,incorrect,generic,0.0,,True
108,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_G_2025_3,elmosl,detected,generic,0.0,,False
109,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_G_2025_4,elmosl,correct,generic,0.0,,False
110,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_G_2025_5,elmosl,incorrect,generic,0.0,,True
111,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_G_2025_6,elmosl,detected,generic,0.0,,False
112,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_G_2025_7,elmosl,incorrect,generic,0.0,,True
113,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_G_2025_8,elmosl,incorrect,generic,0.0,,True
114,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_G_2025_9,elmosl,incorrect,generic,0.0,,True
115,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_NT_2025_1,elmosl,detected,generic,0.0,,False
116,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_NT_2025_2,elmosl,incorrect,generic,0.0,,True
117,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_NT_2025_3,elmosl,correct,generic,0.0,,False
118,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_NT_2025_4,elmosl,correct,generic,0.0,,False
119,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_NT_2025_5,elmosl,incorrect,generic,0.0,,True
120,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_NT_2025_6,elmosl,correct,generic,0.0,,False
121,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,elmosl_NT_2025_7,elmosl,incorrect,generic,0.0,,True
122,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,german_2025_1,german,correct,generic,0.0,,False
123,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,german_2025_2,german,incorrect,generic,0.0,,True
124,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,german_2025_3,german,correct,generic,0.0,,False
125,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,german_2025_4,german,correct,generic,0.0,,False
126,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,greece_2025_1,greece,correct,generic,0.0,,False
127,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,greece_2025_2,greece,detected,generic,0.0,,False
128,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,greece_2025_3,greece,correct,generic,0.0,,False
129,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,greece_2025_4,greece,detected,generic,0.0,,False
130,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_1,imosl,correct,generic,0.0,,False
131,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_10,imosl,incorrect,generic,0.0,,True
132,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_11,imosl,incorrect,generic,0.0,,True
133,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_12,imosl,detected,generic,0.0,,False
134,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_13,imosl,detected,generic,0.0,,False
135,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_14,imosl,detected,generic,0.0,,False
136,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_15,imosl,detected,generic,0.0,,False
137,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_16,imosl,incorrect,generic,0.0,,True
138,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_17,imosl,detected,generic,0.0,,False
139,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_18,imosl,incorrect,generic,0.0,,True
140,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_19,imosl,incorrect,generic,0.0,,True
141,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_2,imosl,correct,generic,0.0,,False
142,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_20,imosl,incorrect,generic,0.0,,True
143,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_21,imosl,incorrect,generic,0.0,,True
144,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_22,imosl,incorrect,generic,0.0,,True
145,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_23,imosl,incorrect,generic,0.0,,True
146,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_24,imosl,incorrect,generic,0.0,,True
147,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_25,imosl,correct,generic,0.0,,False
148,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_26,imosl,correct,generic,0.0,,False
149,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_27,imosl,incorrect,generic,0.0,,True
150,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_28,imosl,correct,generic,0.0,,False
151,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_29,imosl,detected,generic,0.0,,False
152,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_3,imosl,detected,generic,0.0,,False
153,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_30,imosl,detected,generic,0.0,,False
154,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_31,imosl,incorrect,generic,0.0,,True
155,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_32,imosl,incorrect,generic,0.0,,True
156,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_33,imosl,correct,generic,0.0,,False
157,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_34,imosl,incorrect,generic,0.0,,True
158,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_35,imosl,incorrect,generic,0.0,,True
159,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_36,imosl,detected,generic,0.0,,False
160,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_37,imosl,incorrect,generic,0.0,,True
161,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_4,imosl,incorrect,generic,0.0,,True
162,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_5,imosl,correct,generic,0.0,,False
163,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_6,imosl,detected,generic,0.0,,False
164,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_7,imosl,correct,generic,0.0,,False
165,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_8,imosl,incorrect,generic,0.0,,True
166,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,imosl_2025_9,imosl,correct,generic,0.0,,False
167,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_2025_1,india,correct,generic,0.0,,False
168,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_2025_2,india,incorrect,generic,0.0,,True
169,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_2025_3,india,incorrect,generic,0.0,,True
170,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_2025_4,india,detected,generic,0.0,,False
171,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_2025_5,india,correct,generic,0.0,,False
172,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_2025_6,india,detected,generic,0.0,,False
173,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_prep_2025_1,india,detected,generic,0.0,,False
174,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_prep_2025_10,india,incorrect,generic,0.0,,True
175,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_prep_2025_11,india,incorrect,generic,0.0,,True
176,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_prep_2025_12,india,correct,generic,0.0,,False
177,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_prep_2025_13,india,incorrect,generic,0.0,,True
178,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_prep_2025_14,india,detected,generic,0.0,,False
179,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_prep_2025_15,india,detected,generic,0.0,,False
180,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_prep_2025_16,india,correct,generic,0.0,,False
181,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_prep_2025_17,india,correct,generic,0.0,,False
182,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_prep_2025_18,india,incorrect,generic,0.0,,True
183,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_prep_2025_19,india,detected,generic,0.0,,False
184,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_prep_2025_2,india,incorrect,generic,0.0,,True
185,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_prep_2025_20,india,incorrect,generic,0.0,,True
186,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_prep_2025_21,india,correct,generic,0.0,,False
187,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_prep_2025_3,india,detected,generic,0.0,,False
188,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_prep_2025_4,india,incorrect,generic,0.0,,True
189,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_prep_2025_5,india,incorrect,generic,0.0,,True
190,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_prep_2025_6,india,detected,generic,0.0,,False
191,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_prep_2025_7,india,incorrect,generic,0.0,,True
192,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_prep_2025_8,india,correct,generic,0.0,,False
193,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,india_prep_2025_9,india,correct,generic,0.0,,False
194,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,iran_tst_2025_1,iran,detected,generic,0.0,,False
195,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,iran_tst_2025_2,iran,detected,generic,0.0,,False
196,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,iran_tst_2025_3,iran,incorrect,generic,0.0,,True
197,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,iran_tst_2025_4,iran,incorrect,generic,0.0,,True
198,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,iran_tst_2025_5,iran,incorrect,generic,0.0,,True
199,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,iran_tst_2025_6,iran,detected,generic,0.0,,False
200,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,iran_tst_2025_7,iran,correct,generic,0.0,,False
201,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,iran_tst_2025_8,iran,incorrect,generic,0.0,,True
202,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,iran_tst_2025_9,iran,incorrect,generic,0.0,,True
203,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,israel_tst_2025_1,israel,incorrect,generic,0.0,,True
204,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,israel_tst_2025_10,israel,incorrect,generic,0.0,,True
205,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,israel_tst_2025_2,israel,detected,generic,0.0,,False
206,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,israel_tst_2025_3,israel,detected,generic,0.0,,False
207,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,israel_tst_2025_4,israel,incorrect,generic,0.0,,True
208,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,israel_tst_2025_5,israel,correct,generic,0.0,,False
209,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,israel_tst_2025_6,israel,incorrect,generic,0.0,,True
210,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,israel_tst_2025_7,israel,correct,generic,0.0,,False
211,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,israel_tst_2025_8,israel,incorrect,generic,0.0,,True
212,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,israel_tst_2025_9,israel,detected,generic,0.0,,False
213,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,izho_2025_1,izho,incorrect,generic,0.0,,True
214,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,izho_2025_2,izho,incorrect,generic,0.0,,True
215,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,izho_2025_3,izho,incorrect,generic,0.0,,True
216,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,izho_2025_4,izho,detected,generic,0.0,,False
217,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,izho_2025_5,izho,incorrect,generic,0.0,,True
218,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,izho_2025_6,izho,incorrect,generic,0.0,,True
219,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,jbmo_2025_1,jbmo,detected,generic,0.0,,False
220,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,jbmo_2025_2,jbmo,correct,generic,0.0,,False
221,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,jbmo_2025_3,jbmo,detected,generic,0.0,,False
222,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,jbmo_2025_4,jbmo,correct,generic,0.0,,False
223,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,korea_2025_1,korea,correct,generic,0.0,,False
224,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,korea_2025_2,korea,correct,generic,0.0,,False
225,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,korea_2025_3,korea,incorrect,generic,0.0,,True
226,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,korea_2025_4,korea,incorrect,generic,0.0,,True
227,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,korea_2025_5,korea,incorrect,generic,0.0,,True
228,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,korea_2025_6,korea,incorrect,generic,0.0,,True
229,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_1,matharena,correct,matharena,0.0,,False
230,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_10,matharena,correct,matharena,0.0,,False
231,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_11,matharena,incorrect,matharena,0.0,,True
232,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_12,matharena,incorrect,matharena,0.0,,True
233,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_13,matharena,incorrect,matharena,0.0,,True
234,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_14,matharena,incorrect,matharena,0.0,,True
235,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_15,matharena,incorrect,matharena,0.0,,True
236,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_16,matharena,correct,matharena,0.0,,False
237,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_17,matharena,correct,matharena,0.0,,False
238,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_18,matharena,correct,matharena,0.0,,False
239,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_19,matharena,correct,matharena,0.0,,False
240,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_2,matharena,detected,matharena,0.0,,False
241,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_20,matharena,detected,matharena,0.0,,False
242,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_21,matharena,correct,matharena,0.0,,False
243,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_22,matharena,correct,matharena,0.0,,False
244,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_23,matharena,correct,matharena,0.0,,False
245,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_24,matharena,incorrect,matharena,0.0,,True
246,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_25,matharena,correct,matharena,0.0,,False
247,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_26,matharena,incorrect,matharena,0.0,,True
248,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_27,matharena,correct,matharena,0.0,,False
249,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_28,matharena,incorrect,matharena,0.0,,True
250,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_29,matharena,correct,matharena,0.0,,False
251,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_3,matharena,correct,matharena,0.0,,False
252,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_30,matharena,incorrect,matharena,0.0,,True
253,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_4,matharena,corrected,matharena,0.0,,False
254,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_5,matharena,correct,matharena,0.0,,False
255,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_6,matharena,correct,matharena,0.0,,False
256,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_7,matharena,correct,matharena,0.0,,False
257,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_8,matharena,corrected,matharena,0.0,,False
258,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_aime_aime_2025_9,matharena,correct,matharena,0.0,,False
259,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_1,matharena,correct,matharena,0.0,,False
260,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_10,matharena,correct,matharena,0.0,,False
261,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_11,matharena,correct,matharena,0.0,,False
262,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_12,matharena,correct,matharena,0.0,,False
263,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_13,matharena,incorrect,matharena,0.0,,True
264,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_14,matharena,correct,matharena,0.0,,False
265,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_15,matharena,incorrect,matharena,0.0,,True
266,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_16,matharena,correct,matharena,0.0,,False
267,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_17,matharena,correct,matharena,0.0,,False
268,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_18,matharena,correct,matharena,0.0,,False
269,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_19,matharena,correct,matharena,0.0,,False
270,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_2,matharena,correct,matharena,0.0,,False
271,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_20,matharena,correct,matharena,0.0,,False
272,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_21,matharena,correct,matharena,0.0,,False
273,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_22,matharena,correct,matharena,0.0,,False
274,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_23,matharena,correct,matharena,0.0,,False
275,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_24,matharena,correct,matharena,0.0,,False
276,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_25,matharena,detected,matharena,0.0,,False
277,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_26,matharena,correct,matharena,0.0,,False
278,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_27,matharena,incorrect,matharena,0.0,,True
279,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_28,matharena,detected,matharena,0.0,,False
280,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_29,matharena,correct,matharena,0.0,,False
281,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_3,matharena,incorrect,matharena,0.0,,True
282,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_30,matharena,incorrect,matharena,0.0,,True
283,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_4,matharena,correct,matharena,0.0,,False
284,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_5,matharena,correct,matharena,0.0,,False
285,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_6,matharena,correct,matharena,0.0,,False
286,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_7,matharena,correct,matharena,0.0,,False
287,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_8,matharena,correct,matharena,0.0,,False
288,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_brumo_brumo_2025_9,matharena,correct,matharena,0.0,,False
289,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_1,matharena,correct,matharena,0.0,,False
290,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_10,matharena,correct,matharena,0.0,,False
291,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_11,matharena,correct,matharena,0.0,,False
292,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_12,matharena,correct,matharena,0.0,,False
293,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_13,matharena,correct,matharena,0.0,,False
294,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_14,matharena,correct,matharena,0.0,,False
295,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_15,matharena,incorrect,matharena,0.0,,True
296,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_16,matharena,correct,matharena,0.0,,False
297,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_17,matharena,incorrect,matharena,0.0,,True
298,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_18,matharena,incorrect,matharena,0.0,,True
299,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_19,matharena,correct,matharena,0.0,,False
300,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_2,matharena,correct,matharena,0.0,,False
301,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_20,matharena,incorrect,matharena,0.0,,True
302,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_21,matharena,incorrect,matharena,0.0,,True
303,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_22,matharena,correct,matharena,0.0,,False
304,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_23,matharena,incorrect,matharena,0.0,,True
305,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_24,matharena,correct,matharena,0.0,,False
306,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_25,matharena,incorrect,matharena,0.0,,True
307,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_26,matharena,correct,matharena,0.0,,False
308,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_27,matharena,incorrect,matharena,0.0,,True
309,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_28,matharena,incorrect,matharena,0.0,,True
310,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_29,matharena,correct,matharena,0.0,,False
311,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_3,matharena,correct,matharena,0.0,,False
312,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_30,matharena,detected,matharena,0.0,,False
313,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_31,matharena,correct,matharena,0.0,,False
314,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_32,matharena,incorrect,matharena,0.0,,True
315,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_33,matharena,correct,matharena,0.0,,False
316,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_34,matharena,incorrect,matharena,0.0,,True
317,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_35,matharena,correct,matharena,0.0,,False
318,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_36,matharena,correct,matharena,0.0,,False
319,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_37,matharena,correct,matharena,0.0,,False
320,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_38,matharena,incorrect,matharena,0.0,,True
321,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_39,matharena,detected,matharena,0.0,,False
322,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_4,matharena,correct,matharena,0.0,,False
323,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_40,matharena,incorrect,matharena,0.0,,True
324,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_5,matharena,incorrect,matharena,0.0,,True
325,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_6,matharena,correct,matharena,0.0,,False
326,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_7,matharena,incorrect,matharena,0.0,,True
327,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_8,matharena,incorrect,matharena,0.0,,True
328,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_cmimc_cmimc_2025_9,matharena,correct,matharena,0.0,,False
329,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_1,matharena,correct,matharena,0.0,,False
330,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_10,matharena,detected,matharena,0.0,,False
331,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_11,matharena,correct,matharena,0.0,,False
332,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_12,matharena,correct,matharena,0.0,,False
333,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_13,matharena,detected,matharena,0.0,,False
334,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_14,matharena,incorrect,matharena,0.0,,True
335,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_15,matharena,correct,matharena,0.0,,False
336,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_16,matharena,correct,matharena,0.0,,False
337,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_17,matharena,incorrect,matharena,0.0,,True
338,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_18,matharena,incorrect,matharena,0.0,,True
339,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_19,matharena,incorrect,matharena,0.0,,True
340,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_2,matharena,correct,matharena,0.0,,False
341,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_20,matharena,incorrect,matharena,0.0,,True
342,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_21,matharena,correct,matharena,0.0,,False
343,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_22,matharena,correct,matharena,0.0,,False
344,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_23,matharena,correct,matharena,0.0,,False
345,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_24,matharena,correct,matharena,0.0,,False
346,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_25,matharena,detected,matharena,0.0,,False
347,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_26,matharena,correct,matharena,0.0,,False
348,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_27,matharena,detected,matharena,0.0,,False
349,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_28,matharena,correct,matharena,0.0,,False
350,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_29,matharena,incorrect,matharena,0.0,,True
351,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_3,matharena,correct,matharena,0.0,,False
352,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_30,matharena,incorrect,matharena,0.0,,True
353,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_4,matharena,correct,matharena,0.0,,False
354,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_5,matharena,incorrect,matharena,0.0,,True
355,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_6,matharena,correct,matharena,0.0,,False
356,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_7,matharena,correct,matharena,0.0,,False
357,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_8,matharena,correct,matharena,0.0,,False
358,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_hmmt_hmmt_feb_2025_9,matharena,correct,matharena,0.0,,False
359,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_1,matharena,correct,matharena,0.0,,False
360,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_10,matharena,correct,matharena,0.0,,False
361,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_11,matharena,detected,matharena,0.0,,False
362,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_12,matharena,correct,matharena,0.0,,False
363,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_13,matharena,incorrect,matharena,0.0,,True
364,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_14,matharena,correct,matharena,0.0,,False
365,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_15,matharena,correct,matharena,0.0,,False
366,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_16,matharena,correct,matharena,0.0,,False
367,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_17,matharena,correct,matharena,0.0,,False
368,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_18,matharena,correct,matharena,0.0,,False
369,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_19,matharena,correct,matharena,0.0,,False
370,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_2,matharena,correct,matharena,0.0,,False
371,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_20,matharena,incorrect,matharena,0.0,,True
372,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_21,matharena,correct,matharena,0.0,,False
373,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_22,matharena,incorrect,matharena,0.0,,True
374,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_23,matharena,correct,matharena,0.0,,False
375,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_24,matharena,detected,matharena,0.0,,False
376,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_25,matharena,correct,matharena,0.0,,False
377,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_26,matharena,correct,matharena,0.0,,False
378,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_27,matharena,incorrect,matharena,0.0,,True
379,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_28,matharena,correct,matharena,0.0,,False
380,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_29,matharena,correct,matharena,0.0,,False
381,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_3,matharena,correct,matharena,0.0,,False
382,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_30,matharena,incorrect,matharena,0.0,,True
383,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_31,matharena,correct,matharena,0.0,,False
384,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_32,matharena,correct,matharena,0.0,,False
385,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_33,matharena,correct,matharena,0.0,,False
386,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_34,matharena,detected,matharena,0.0,,False
387,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_35,matharena,correct,matharena,0.0,,False
388,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_36,matharena,correct,matharena,0.0,,False
389,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_37,matharena,correct,matharena,0.0,,False
390,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_38,matharena,correct,matharena,0.0,,False
391,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_39,matharena,correct,matharena,0.0,,False
392,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_4,matharena,correct,matharena,0.0,,False
393,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_40,matharena,detected,matharena,0.0,,False
394,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_41,matharena,correct,matharena,0.0,,False
395,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_42,matharena,incorrect,matharena,0.0,,True
396,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_43,matharena,incorrect,matharena,0.0,,True
397,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_44,matharena,correct,matharena,0.0,,False
398,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_45,matharena,correct,matharena,0.0,,False
399,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_46,matharena,detected,matharena,0.0,,False
400,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_47,matharena,correct,matharena,0.0,,False
401,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_48,matharena,correct,matharena,0.0,,False
402,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_49,matharena,correct,matharena,0.0,,False
403,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_5,matharena,correct,matharena,0.0,,False
404,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_50,matharena,incorrect,matharena,0.0,,True
405,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_51,matharena,correct,matharena,0.0,,False
406,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_52,matharena,correct,matharena,0.0,,False
407,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_53,matharena,incorrect,matharena,0.0,,True
408,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_6,matharena,detected,matharena,0.0,,False
409,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_7,matharena,correct,matharena,0.0,,False
410,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_8,matharena,detected,matharena,0.0,,False
411,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,matharena_smt_smt_2025_9,matharena,correct,matharena,0.0,,False
412,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,nordic_2025_1,nordic,detected,generic,0.0,,False
413,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,nordic_2025_2,nordic,detected,generic,0.0,,False
414,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,nordic_2025_3,nordic,correct,generic,0.0,,False
415,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,pan_african_2025_1,pan,incorrect,generic,0.0,,True
416,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,pan_african_2025_2,pan,correct,generic,0.0,,False
417,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,pan_african_2025_3,pan,incorrect,generic,0.0,,True
418,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,pan_african_2025_4,pan,correct,generic,0.0,,False
419,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,pan_african_2025_5,pan,detected,generic,0.0,,False
420,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,pan_african_2025_6,pan,incorrect,generic,0.0,,True
421,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,philippines_2025_1,philippines,correct,generic,0.0,,False
422,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,philippines_2025_2,philippines,incorrect,generic,0.0,,True
423,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,philippines_2025_3,philippines,detected,generic,0.0,,False
424,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,philippines_2025_4,philippines,incorrect,generic,0.0,,True
425,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,philippines_2025_5,philippines,corrected,generic,0.0,,False
426,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,philippines_2025_6,philippines,incorrect,generic,0.0,,True
427,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,philippines_2025_7,philippines,incorrect,generic,0.0,,True
428,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,philippines_2025_8,philippines,correct,generic,0.0,,False
429,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,polish_2025_1,polish,correct,generic,0.0,,False
430,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,polish_2025_2,polish,detected,generic,0.0,,False
431,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,polish_2025_3,polish,detected,generic,0.0,,False
432,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,polish_2025_4,polish,detected,generic,0.0,,False
433,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,polish_2025_5,polish,incorrect,generic,0.0,,True
434,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,polish_2025_6,polish,detected,generic,0.0,,False
435,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,rmm_2025_1,rmm,detected,generic,0.0,,False
436,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,rmm_2025_2,rmm,incorrect,generic,0.0,,True
437,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,rmm_2025_3,rmm,incorrect,generic,0.0,,True
438,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,rmm_2025_4,rmm,incorrect,generic,0.0,,True
439,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,rmm_2025_5,rmm,incorrect,generic,0.0,,True
440,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,rmm_2025_6,rmm,correct,generic,0.0,,False
441,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,romania_10_2025_1,romania,incorrect,generic,0.0,,True
442,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,romania_10_2025_2,romania,correct,generic,0.0,,False
443,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,romania_10_2025_3,romania,correct,generic,0.0,,False
444,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,romania_11_2025_1,romania,correct,generic,0.0,,False
445,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,romania_11_2025_2,romania,correct,generic,0.0,,False
446,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,romania_11_2025_3,romania,detected,generic,0.0,,False
447,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,romania_12_2025_1,romania,detected,generic,0.0,,False
448,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,romania_12_2025_2,romania,correct,generic,0.0,,False
449,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,romania_12_2025_3,romania,incorrect,generic,0.0,,True
450,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,romania_9_2025_1,romania,incorrect,generic,0.0,,True
451,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,romania_9_2025_2,romania,incorrect,generic,0.0,,True
452,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,romania_tst_2025_1,romania,incorrect,generic,0.0,,True
453,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,romania_tst_2025_2,romania,detected,generic,0.0,,False
454,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,romania_tst_2025_3,romania,incorrect,generic,0.0,,True
455,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,serbia_tst_bmo_2025_1,serbia,detected,generic,0.0,,False
456,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,serbia_tst_bmo_2025_2,serbia,correct,generic,0.0,,False
457,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,serbia_tst_bmo_2025_3,serbia,detected,generic,0.0,,False
458,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,serbia_tst_bmo_2025_4,serbia,incorrect,generic,0.0,,True
459,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,spain_2025_1,spain,correct,generic,0.0,,False
460,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,spain_2025_2,spain,detected,generic,0.0,,False
461,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,spain_2025_3,spain,detected,generic,0.0,,False
462,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,spain_2025_4,spain,incorrect,generic,0.0,,True
463,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,spain_2025_5,spain,incorrect,generic,0.0,,True
464,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,thai_2025_1,thai,correct,generic,0.0,,False
465,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,thai_2025_10,thai,incorrect,generic,0.0,,True
466,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,thai_2025_2,thai,detected,generic,0.0,,False
467,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,thai_2025_3,thai,detected,generic,0.0,,False
468,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,thai_2025_4,thai,detected,generic,0.0,,False
469,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,thai_2025_5,thai,correct,generic,0.0,,False
470,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,thai_2025_6,thai,correct,generic,0.0,,False
471,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,thai_2025_7,thai,correct,generic,0.0,,False
472,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,thai_2025_8,thai,incorrect,generic,0.0,,True
473,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,thai_2025_9,thai,detected,generic,0.0,,False
474,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,turkey_tst_2025_1,turkey,correct,generic,0.0,,False
475,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,turkey_tst_2025_2,turkey,incorrect,generic,0.0,,True
476,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,turkey_tst_2025_3,turkey,incorrect,generic,0.0,,True
477,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,turkey_tst_2025_4,turkey,incorrect,generic,0.0,,True
478,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,turkey_tst_2025_5,turkey,incorrect,generic,0.0,,True
479,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,turkey_tst_2025_6,turkey,incorrect,generic,0.0,,True
480,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,turkey_tst_2025_7,turkey,incorrect,generic,0.0,,True
481,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,turkey_tst_2025_8,turkey,correct,generic,0.0,,False
482,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,turkey_tst_2025_9,turkey,correct,generic,0.0,,False
483,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,usamo_2025_1,usamo,correct,generic,0.0,,False
484,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,usamo_2025_2,usamo,incorrect,generic,0.0,,True
485,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,usamo_2025_3,usamo,incorrect,generic,0.0,,True
486,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,usamo_2025_4,usamo,incorrect,generic,0.0,,True
487,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,usamo_2025_5,usamo,incorrect,generic,0.0,,True
488,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,usamo_2025_6,usamo,incorrect,generic,0.0,,True
489,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,usatst_2025_1,usatst,correct,generic,0.0,,False
490,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,usatst_2025_2,usatst,incorrect,generic,0.0,,True
491,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,usatst_2025_3,usatst,incorrect,generic,0.0,,True
492,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,usatst_2025_4,usatst,detected,generic,0.0,,False
493,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,usatst_2025_5,usatst,incorrect,generic,0.0,,True
494,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,usatst_2025_6,usatst,detected,generic,0.0,,False
495,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,usatst_2025_7,usatst,detected,generic,0.0,,False
496,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,usatst_2025_8,usatst,incorrect,generic,0.0,,True
497,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,usatst_2025_9,usatst,incorrect,generic,0.0,,True
498,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,vietnam_2025_1,vietnam,correct,generic,0.0,,False
499,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,vietnam_2025_2,vietnam,correct,generic,0.0,,False
500,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,vietnam_2025_3,vietnam,detected,generic,0.0,,False
501,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,vietnam_2025_4,vietnam,incorrect,generic,0.0,,True
502,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,vietnam_2025_5,vietnam,detected,generic,0.0,,False
503,GPT-5-mini (medium),o4-mini (high),openai/o4-mini--high,vietnam_2025_6,vietnam,detected,generic,0.0,,False
504,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,allrussian_2025_2,allrussian,correct,generic,0.05087125,,False
505,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,allrussian_2025_15,allrussian,incorrect,generic,0.064595,,True
506,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,allrussian_2025_13,allrussian,correct,generic,0.0252445,,False
507,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,allrussian_2025_11,allrussian,detected,generic,0.0489515,,False
508,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,allrussian_2025_1,allrussian,correct,generic,0.07498375,,False
509,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,allrussian_2025_12,allrussian,incorrect,generic,0.0485305,,True
510,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,allrussian_2025_14,allrussian,correct,generic,0.0271035,,False
511,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,allrussian_2025_3,allrussian,correct,generic,0.0757055,,False
512,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,allrussian_2025_10,allrussian,correct,generic,0.056515,,False
513,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,allrussian_2025_6,allrussian,incorrect,generic,0.05342675,,True
514,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,allrussian_2025_5,allrussian,correct,generic,0.0276385,,False
515,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,allrussian_2025_9,allrussian,corrected,generic,0.02510475,,False
516,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,allrussian_2025_8,allrussian,incorrect,generic,0.06028325,,True
517,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,allrussian_2025_7,allrussian,detected,generic,0.06797375,,False
518,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,allrussian_2025_16,allrussian,incorrect,generic,0.06386525,,True
519,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,china_2025_2,china,detected,generic,0.05345225,,False
520,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,china_2025_1,china,correct,generic,0.07018475,,False
521,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,china_2025_3,china,incorrect,generic,0.0464085,,True
522,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,china_2025_6,china,detected,generic,0.07761225,,False
523,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,egmo_2025_1,egmo,incorrect,generic,0.0547485,,True
524,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,egmo_2025_4,egmo,incorrect,generic,0.06360775,,True
525,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,egmo_2025_2,egmo,detected,generic,0.0554405,,False
526,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,egmo_2025_6,egmo,incorrect,generic,0.0718695,,True
527,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,egmo_2025_5,egmo,incorrect,generic,0.05319125,,True
528,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,german_2025_2,german,incorrect,generic,0.070707,,True
529,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,german_2025_1,german,correct,generic,0.05443975,,False
530,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,german_2025_4,german,correct,generic,0.07796125,,False
531,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,german_2025_3,german,correct,generic,0.033845,,False
532,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_2025_4,india,correct,generic,0.03391375,,False
533,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_2025_6,india,incorrect,generic,0.0640015,,True
534,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_2025_1,india,detected,generic,0.0143415,,False
535,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_2025_2,india,incorrect,generic,0.0307075,,True
536,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_2025_3,india,incorrect,generic,0.05066525,,True
537,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_prep_2025_15,india,incorrect,generic,0.0490015,,True
538,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_prep_2025_1,india,correct,generic,0.04376625,,False
539,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_prep_2025_16,india,correct,generic,0.02730775,,False
540,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_prep_2025_17,india,correct,generic,0.0592815,,False
541,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_prep_2025_13,india,incorrect,generic,0.0685655,,True
542,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_prep_2025_14,india,detected,generic,0.04847725,,False
543,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_prep_2025_12,india,incorrect,generic,0.06508625,,True
544,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_prep_2025_3,india,correct,generic,0.07634825,,False
545,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_prep_2025_20,india,incorrect,generic,0.07185825,,True
546,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_2025_5,india,detected,generic,0.05341,,False
547,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_prep_2025_11,india,incorrect,generic,0.0278575,,True
548,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_prep_2025_6,india,detected,generic,0.07949475,,False
549,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_prep_2025_5,india,detected,generic,0.0495145,,False
550,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_prep_2025_9,india,correct,generic,0.02396325,,False
551,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_prep_2025_8,india,correct,generic,0.0603665,,False
552,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_prep_2025_4,india,correct,generic,0.057562,,False
553,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_prep_2025_19,india,detected,generic,0.08971075,,False
554,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_prep_2025_18,india,incorrect,generic,0.0622335,,True
555,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,india_prep_2025_21,india,correct,generic,0.06851975,,False
556,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,korea_2025_2,korea,incorrect,generic,0.0503675,,True
557,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,korea_2025_5,korea,correct,generic,0.0740405,,False
558,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_aime_aime_2025_10,matharena,correct,matharena,0.04772625,,False
559,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_aime_aime_2025_13,matharena,correct,matharena,0.06145775,,False
560,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,korea_2025_4,korea,detected,generic,0.05968475,,False
561,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,korea_2025_6,korea,incorrect,generic,0.0666405,,True
562,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_aime_aime_2025_25,matharena,correct,matharena,0.0252305,,False
563,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_aime_aime_2025_23,matharena,correct,matharena,0.04239275,,False
564,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_aime_aime_2025_29,matharena,correct,matharena,0.06412375,,False
565,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_aime_aime_2025_5,matharena,correct,matharena,0.0256035,,False
566,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_aime_aime_2025_6,matharena,correct,matharena,0.02193525,,False
567,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_aime_aime_2025_8,matharena,correct,matharena,0.0206615,,False
568,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_aime_aime_2025_3,matharena,detected,matharena,0.012724,,False
569,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_brumo_brumo_2025_19,matharena,correct,matharena,0.01469575,,False
570,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_brumo_brumo_2025_18,matharena,correct,matharena,0.013903,,False
571,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_brumo_brumo_2025_14,matharena,correct,matharena,0.0212235,,False
572,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_brumo_brumo_2025_3,matharena,correct,matharena,0.02810125,,False
573,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_brumo_brumo_2025_27,matharena,correct,matharena,0.04321725,,False
574,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_brumo_brumo_2025_9,matharena,correct,matharena,0.0155755,,False
575,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_cmimc_cmimc_2025_15,matharena,detected,matharena,0.03822075,,False
576,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_cmimc_cmimc_2025_1,matharena,correct,matharena,0.03003975,,False
577,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_cmimc_cmimc_2025_16,matharena,detected,matharena,0.07010175,,False
578,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_cmimc_cmimc_2025_26,matharena,correct,matharena,0.02815775,,False
579,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_cmimc_cmimc_2025_31,matharena,correct,matharena,0.025772,,False
580,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_cmimc_cmimc_2025_40,matharena,correct,matharena,0.04368225,,False
581,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_cmimc_cmimc_2025_23,matharena,detected,matharena,0.05562475,,False
582,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_hmmt_hmmt_feb_2025_12,matharena,correct,matharena,0.02914325,,False
583,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_cmimc_cmimc_2025_33,matharena,correct,matharena,0.0326565,,False
584,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_hmmt_hmmt_feb_2025_28,matharena,correct,matharena,0.049945,,False
585,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_cmimc_cmimc_2025_12,matharena,incorrect,matharena,0.0156625,,True
586,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_hmmt_hmmt_feb_2025_5,matharena,correct,matharena,0.02821375,,False
587,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_smt_smt_2025_11,matharena,incorrect,matharena,0.00362375,,True
588,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_smt_smt_2025_14,matharena,correct,matharena,0.01703875,,False
589,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_smt_smt_2025_24,matharena,detected,matharena,0.064739,,False
590,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_smt_smt_2025_2,matharena,correct,matharena,0.01123025,,False
591,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_smt_smt_2025_26,matharena,correct,matharena,0.0238165,,False
592,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_smt_smt_2025_18,matharena,correct,matharena,0.04962625,,False
593,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_smt_smt_2025_31,matharena,correct,matharena,0.015473,,False
594,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_smt_smt_2025_42,matharena,incorrect,matharena,0.07568675,,True
595,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_smt_smt_2025_44,matharena,correct,matharena,0.01796325,,False
596,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_smt_smt_2025_38,matharena,correct,matharena,0.05504225,,False
597,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,matharena_smt_smt_2025_49,matharena,correct,matharena,0.03749025,,False
598,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,nordic_2025_3,nordic,correct,generic,0.0580555,,False
599,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,nordic_2025_1,nordic,correct,generic,0.0560365,,False
600,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,philippines_2025_1,philippines,correct,generic,0.04948275,,False
601,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,nordic_2025_2,nordic,correct,generic,0.04774275,,False
602,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,philippines_2025_3,philippines,correct,generic,0.03225475,,False
603,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,philippines_2025_4,philippines,incorrect,generic,0.06309675,,True
604,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,philippines_2025_8,philippines,detected,generic,0.0725305,,False
605,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,philippines_2025_2,philippines,incorrect,generic,0.0610125,,True
606,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,philippines_2025_7,philippines,correct,generic,0.07717725,,False
607,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,polish_2025_3,polish,detected,generic,0.03358875,,False
608,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,polish_2025_4,polish,correct,generic,0.0765185,,False
609,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,philippines_2025_5,philippines,correct,generic,0.0330565,,False
610,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,polish_2025_1,polish,correct,generic,0.0362685,,False
611,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,polish_2025_6,polish,detected,generic,0.02435725,,False
612,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,polish_2025_5,polish,detected,generic,0.03610275,,False
613,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,rmm_2025_3,rmm,detected,generic,0.046687,,False
614,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,rmm_2025_4,rmm,incorrect,generic,0.077488,,True
615,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,rmm_2025_6,rmm,correct,generic,0.06607275,,False
616,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,rmm_2025_5,rmm,incorrect,generic,0.075012,,True
617,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,polish_2025_2,polish,correct,generic,0.0509575,,False
618,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,rmm_2025_1,rmm,incorrect,generic,0.044468,,True
619,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,turkey_tst_2025_1,turkey,correct,generic,0.02859,,False
620,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,turkey_tst_2025_2,turkey,incorrect,generic,0.03577875,,True
621,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,turkey_tst_2025_4,turkey,detected,generic,0.0424605,,False
622,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,turkey_tst_2025_6,turkey,incorrect,generic,0.044099,,True
623,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,turkey_tst_2025_9,turkey,detected,generic,0.03881975,,False
624,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,usamo_2025_4,usamo,correct,generic,0.08006475,,False
625,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,usamo_2025_2,usamo,detected,generic,0.0282285,,False
626,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,usamo_2025_6,usamo,detected,generic,0.04092975,,False
627,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,turkey_tst_2025_8,turkey,correct,generic,0.019702,,False
628,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,usamo_2025_3,usamo,incorrect,generic,0.0419005,,True
629,GPT-5-mini (medium),GPT-5-mini (high),openai/gpt-5-mini,usamo_2025_5,usamo,incorrect,generic,0.07382625,,True
630,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,allrussian_2025_4,allrussian,incorrect,generic,0.052666649999999995,,True
631,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,allrussian_2025_3,allrussian,correct,generic,0.02027505,,False
632,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,allrussian_2025_14,allrussian,correct,generic,0.00505905,,False
633,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,allrussian_2025_11,allrussian,correct,generic,0.013121249999999997,,False
634,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,allrussian_2025_16,allrussian,corrected,generic,0.017133899999999997,,False
635,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,allrussian_2025_13,allrussian,detected,generic,0.0028402499999999995,,False
636,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,allrussian_2025_5,allrussian,correct,generic,0.013677449999999999,,False
637,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,allrussian_2025_1,allrussian,incorrect,generic,0.0217278,,True
638,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,allrussian_2025_10,allrussian,incorrect,generic,0.018901949999999997,,True
639,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,allrussian_2025_12,allrussian,incorrect,generic,0.02902125,,True
640,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,allrussian_2025_15,allrussian,corrected,generic,0.03514635,,False
641,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,allrussian_2025_9,allrussian,correct,generic,0.006451800000000001,,False
642,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,allrussian_2025_6,allrussian,incorrect,generic,0.0137688,,True
643,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,allrussian_2025_8,allrussian,correct,generic,0.04542585,,False
644,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmo_2025_2,bmo,correct,generic,0.0477546,,False
645,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,allrussian_2025_7,allrussian,correct,generic,0.0340476,,False
646,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmo_2025_1,bmo,incorrect,generic,0.05336549999999999,,True
647,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_A_2025_2,bmosl,correct,generic,0.0172164,,False
648,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_A_2025_6,bmosl,correct,generic,0.019567199999999996,,False
649,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_A_2025_5,bmosl,detected,generic,0.0332676,,False
650,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_C_2025_4,bmosl,incorrect,generic,0.0032942999999999996,,True
651,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_A_2025_4,bmosl,correct,generic,0.0271608,,False
652,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_C_2025_1,bmosl,incorrect,generic,0.0337434,,True
653,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_A_2025_3,bmosl,correct,generic,0.028711949999999997,,False
654,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_C_2025_6,bmosl,incorrect,generic,0.03164205,,True
655,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_C_2025_3,bmosl,detected,generic,0.04686615,,False
656,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_G_2025_6,bmosl,correct,generic,0.0369,,False
657,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmo_2025_4,bmo,incorrect,generic,0.0403746,,True
658,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_G_2025_1,bmosl,detected,generic,0.0497622,,False
659,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_C_2025_2,bmosl,correct,generic,0.0233952,,False
660,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_G_2025_3,bmosl,correct,generic,0.022584450000000002,,False
661,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmo_2025_3,bmo,corrected,generic,0.033189899999999994,,False
662,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_NT_2025_2,bmosl,correct,generic,0.0257307,,False
663,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_NT_2025_1,bmosl,correct,generic,0.02094255,,False
664,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_A_2025_1,bmosl,correct,generic,0.00955605,,False
665,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_G_2025_4,bmosl,incorrect,generic,0.04015305,,True
666,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_NT_2025_7,bmosl,correct,generic,0.013277699999999998,,False
667,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_C_2025_5,bmosl,incorrect,generic,0.0444426,,True
668,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_G_2025_2,bmosl,detected,generic,0.02604495,,False
669,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bulgaria_2025_2,bulgaria,correct,generic,0.01046775,,False
670,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bulgaria_2025_1,bulgaria,corrected,generic,0.010677899999999999,,False
671,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_NT_2025_3,bmosl,incorrect,generic,0.02140155,,True
672,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bulgaria_2025_3,bulgaria,correct,generic,0.00834585,,False
673,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_G_2025_7,bmosl,incorrect,generic,0.0199794,,True
674,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_NT_2025_4,bmosl,correct,generic,0.021937650000000003,,False
675,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bulgaria_2025_5,bulgaria,correct,generic,0.0035231999999999998,,False
676,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,canada_2025_2,canada,correct,generic,0.008712600000000001,,False
677,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bulgaria_2025_6,bulgaria,correct,generic,0.0146799,,False
678,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,canada_2025_1,canada,correct,generic,0.01163625,,False
679,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_NT_2025_5,bmosl,detected,generic,0.03382755,,False
680,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,canada_2025_3,canada,incorrect,generic,0.0158373,,True
681,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,china_2025_2,china,incorrect,generic,0.0182547,,True
682,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,canada_2025_5,canada,detected,generic,0.013315200000000001,,False
683,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_G_2025_5,bmosl,corrected,generic,0.01695315,,False
684,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,china_2025_3,china,incorrect,generic,0.047176649999999994,,True
685,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bulgaria_2025_4,bulgaria,correct,generic,0.05716799999999999,,False
686,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,china_2025_1,china,incorrect,generic,0.036144,,True
687,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,canada_2025_4,canada,incorrect,generic,0.0217932,,True
688,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,china_2025_6,china,correct,generic,0.0275661,,False
689,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,allrussian_2025_2,allrussian,incorrect,generic,0.0248115,,True
690,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_14,chinatst,detected,generic,0.0106158,,False
691,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_10,chinatst,correct,generic,0.036408,,False
692,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_15,chinatst,correct,generic,0.0254451,,False
693,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_11,chinatst,correct,generic,0.01065975,,False
694,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_17,chinatst,incorrect,generic,0.033456900000000005,,True
695,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_13,chinatst,incorrect,generic,0.040454399999999995,,True
696,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_12,chinatst,incorrect,generic,0.04572854999999999,,True
697,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_2,chinatst,incorrect,generic,0.050112,,True
698,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,bmosl_NT_2025_6,bmosl,correct,generic,0.05273745,,False
699,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,china_2025_5,china,incorrect,generic,0.03370454999999999,,True
700,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_18,chinatst,correct,generic,0.025957949999999997,,False
701,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_4,chinatst,correct,generic,0.048113699999999995,,False
702,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_20,chinatst,correct,generic,0.0199995,,False
703,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_5,chinatst,incorrect,generic,0.02806005,,True
704,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_9,chinatst,correct,generic,0.029123999999999997,,False
705,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_7,chinatst,detected,generic,0.01288635,,False
706,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_6,chinatst,correct,generic,0.0075654,,False
707,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_1,chinatst,incorrect,generic,0.06000945,,True
708,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_22,chinatst,corrected,generic,0.0166245,,False
709,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_16,chinatst,incorrect,generic,0.050583,,True
710,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_3,chinatst,detected,generic,0.01572915,,False
711,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_19,chinatst,corrected,generic,0.02205765,,False
712,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_8,chinatst,incorrect,generic,0.02750835,,True
713,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,chinatst_2025_21,chinatst,correct,generic,0.025742100000000004,,False
714,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,egmo_2025_2,egmo,detected,generic,0.01160925,,False
715,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,egmo_2025_3,egmo,corrected,generic,0.04684725,,False
716,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,egmo_2025_4,egmo,incorrect,generic,0.0243912,,True
717,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,egmo_2025_1,egmo,correct,generic,0.02236005,,False
718,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_A_2025_2,elmosl,correct,generic,0.00427935,,False
719,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,egmo_2025_6,egmo,incorrect,generic,0.05165324999999999,,True
720,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_A_2025_1,elmosl,incorrect,generic,0.03414149999999999,,True
721,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,egmo_2025_5,egmo,incorrect,generic,0.033264300000000004,,True
722,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_A_2025_6,elmosl,correct,generic,0.0103665,,False
723,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_C_2025_1,elmosl,incorrect,generic,0.0047751,,True
724,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_C_2025_2,elmosl,incorrect,generic,0.021315599999999997,,True
725,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_A_2025_5,elmosl,corrected,generic,0.033286050000000005,,False
726,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_C_2025_4,elmosl,incorrect,generic,0.027527700000000002,,True
727,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_A_2025_7,elmosl,incorrect,generic,0.018157799999999998,,True
728,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_C_2025_8,elmosl,correct,generic,0.02671935,,False
729,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_A_2025_3,elmosl,incorrect,generic,0.022309799999999998,,True
730,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_A_2025_8,elmosl,detected,generic,0.020354699999999996,,False
731,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_C_2025_3,elmosl,detected,generic,0.0051721499999999995,,False
732,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_C_2025_7,elmosl,corrected,generic,0.0065667,,False
733,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_G_2025_7,elmosl,detected,generic,0.049665299999999996,,False
734,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_C_2025_5,elmosl,incorrect,generic,0.024001949999999998,,True
735,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_C_2025_9,elmosl,corrected,generic,0.026566950000000002,,False
736,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_G_2025_5,elmosl,correct,generic,0.03745035,,False
737,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_NT_2025_6,elmosl,correct,generic,0.01235625,,False
738,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_NT_2025_7,elmosl,incorrect,generic,0.0480369,,True
739,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_NT_2025_4,elmosl,detected,generic,0.037213800000000005,,False
740,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_C_2025_6,elmosl,incorrect,generic,0.046360799999999994,,True
741,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_G_2025_4,elmosl,incorrect,generic,0.0268308,,True
742,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_G_2025_2,elmosl,incorrect,generic,0.04120935,,True
743,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_NT_2025_3,elmosl,correct,generic,0.004669049999999999,,False
744,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_G_2025_3,elmosl,incorrect,generic,0.0054054,,True
745,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_G_2025_8,elmosl,incorrect,generic,0.03650355,,True
746,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,german_2025_2,german,incorrect,generic,0.03565395,,True
747,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_NT_2025_1,elmosl,correct,generic,0.02235735,,False
748,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_G_2025_9,elmosl,incorrect,generic,0.04718025,,True
749,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_NT_2025_2,elmosl,incorrect,generic,0.022668149999999998,,True
750,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,german_2025_3,german,detected,generic,0.01153875,,False
751,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,greece_2025_3,greece,correct,generic,0.0134154,,False
752,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,greece_2025_1,greece,correct,generic,0.00420045,,False
753,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_1,imosl,correct,generic,0.011637149999999999,,False
754,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,greece_2025_4,greece,correct,generic,0.01582635,,False
755,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,german_2025_4,german,correct,generic,0.0225363,,False
756,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,greece_2025_2,greece,correct,generic,0.031107899999999997,,False
757,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_12,imosl,detected,generic,0.026155650000000003,,False
758,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_G_2025_1,elmosl,incorrect,generic,0.026572349999999998,,True
759,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_13,imosl,correct,generic,0.02123385,,False
760,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_G_2025_6,elmosl,incorrect,generic,0.02961765,,True
761,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_14,imosl,correct,generic,0.0265176,,False
762,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_16,imosl,incorrect,generic,0.02526795,,True
763,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_15,imosl,correct,generic,0.0289275,,False
764,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,german_2025_1,german,correct,generic,0.009956399999999999,,False
765,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_19,imosl,incorrect,generic,0.04222335,,True
766,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_2,imosl,correct,generic,0.013618199999999999,,False
767,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_18,imosl,incorrect,generic,0.05492445,,True
768,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_17,imosl,detected,generic,0.034581,,False
769,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_20,imosl,correct,generic,0.0388494,,False
770,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_26,imosl,incorrect,generic,0.03928229999999999,,True
771,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_22,imosl,correct,generic,0.03771674999999999,,False
772,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_25,imosl,correct,generic,0.015519899999999998,,False
773,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_27,imosl,incorrect,generic,0.037817699999999996,,True
774,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_31,imosl,correct,generic,0.0250305,,False
775,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_28,imosl,incorrect,generic,0.029034,,True
776,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,elmosl_NT_2025_5,elmosl,incorrect,generic,0.0225591,,True
777,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_29,imosl,incorrect,generic,0.03472035,,True
778,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_3,imosl,correct,generic,0.00758145,,False
779,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_11,imosl,incorrect,generic,0.02439585,,True
780,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_35,imosl,incorrect,generic,0.034838549999999996,,True
781,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_30,imosl,incorrect,generic,0.02639805,,True
782,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_5,imosl,correct,generic,0.0126822,,False
783,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_23,imosl,incorrect,generic,0.0411885,,True
784,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_36,imosl,detected,generic,0.0181362,,False
785,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_2025_1,india,correct,generic,0.0029842499999999995,,False
786,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_9,imosl,correct,generic,0.00987165,,False
787,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_7,imosl,correct,generic,0.0153183,,False
788,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_34,imosl,incorrect,generic,0.02859195,,True
789,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_21,imosl,incorrect,generic,0.03368475,,True
790,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_24,imosl,incorrect,generic,0.0359991,,True
791,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_33,imosl,detected,generic,0.015116699999999999,,False
792,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_2025_2,india,correct,generic,0.02098725,,False
793,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_2025_3,india,detected,generic,0.0143001,,False
794,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_6,imosl,incorrect,generic,0.0178608,,True
795,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_4,imosl,incorrect,generic,0.032739149999999995,,True
796,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_2025_5,india,correct,generic,0.03529335,,False
797,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_prep_2025_12,india,incorrect,generic,0.031979099999999996,,True
798,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_2025_6,india,incorrect,generic,0.01102305,,True
799,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_prep_2025_1,india,correct,generic,0.027647099999999997,,False
800,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_10,imosl,incorrect,generic,0.024583349999999997,,True
801,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_2025_4,india,incorrect,generic,0.005212799999999999,,True
802,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_prep_2025_17,india,correct,generic,0.026293650000000002,,False
803,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_prep_2025_16,india,correct,generic,0.0220092,,False
804,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_prep_2025_13,india,incorrect,generic,0.053864249999999995,,True
805,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_prep_2025_15,india,correct,generic,0.025487249999999996,,False
806,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_prep_2025_14,india,correct,generic,0.0099735,,False
807,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_prep_2025_18,india,incorrect,generic,0.03937965,,True
808,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_prep_2025_10,india,corrected,generic,0.0173835,,False
809,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_prep_2025_21,india,correct,generic,0.0279678,,False
810,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_prep_2025_20,india,detected,generic,0.0374613,,False
811,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_prep_2025_7,india,incorrect,generic,0.031890449999999994,,True
812,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_prep_2025_3,india,correct,generic,0.0218901,,False
813,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_prep_2025_19,india,detected,generic,0.0049283999999999994,,False
814,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_prep_2025_4,india,correct,generic,0.02021895,,False
815,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_8,imosl,incorrect,generic,0.0272085,,True
816,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_37,imosl,incorrect,generic,0.03091335,,True
817,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_prep_2025_5,india,corrected,generic,0.05599545,,False
818,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,iran_tst_2025_2,iran,incorrect,generic,0.01639875,,True
819,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_prep_2025_11,india,incorrect,generic,0.012535649999999999,,True
820,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_prep_2025_2,india,incorrect,generic,0.03594375,,True
821,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,imosl_2025_32,imosl,incorrect,generic,0.0085353,,True
822,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,iran_tst_2025_4,iran,incorrect,generic,0.017647799999999998,,True
823,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,iran_tst_2025_7,iran,detected,generic,0.00309135,,False
824,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,iran_tst_2025_1,iran,correct,generic,0.02552175,,False
825,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,iran_tst_2025_6,iran,incorrect,generic,0.032463900000000004,,True
826,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_prep_2025_6,india,correct,generic,0.018650399999999998,,False
827,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_prep_2025_9,india,correct,generic,0.015309899999999998,,False
828,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,iran_tst_2025_5,iran,incorrect,generic,0.032691899999999996,,True
829,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,israel_tst_2025_1,israel,incorrect,generic,0.00372375,,True
830,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,israel_tst_2025_5,israel,correct,generic,0.013731,,False
831,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,israel_tst_2025_4,israel,incorrect,generic,0.0229008,,True
832,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,iran_tst_2025_9,iran,incorrect,generic,0.04471635,,True
833,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,israel_tst_2025_6,israel,incorrect,generic,0.04100849999999999,,True
834,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,israel_tst_2025_7,israel,incorrect,generic,0.0076893000000000005,,True
835,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,india_prep_2025_8,india,correct,generic,0.01685745,,False
836,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,israel_tst_2025_10,israel,incorrect,generic,0.030989399999999997,,True
837,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,iran_tst_2025_3,iran,incorrect,generic,0.0528531,,True
838,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,israel_tst_2025_2,israel,incorrect,generic,0.05285445,,True
839,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,israel_tst_2025_8,israel,incorrect,generic,0.029314949999999996,,True
840,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,israel_tst_2025_3,israel,correct,generic,0.0175716,,False
841,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,izho_2025_4,izho,correct,generic,0.00875445,,False
842,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,israel_tst_2025_9,israel,incorrect,generic,0.0273786,,True
843,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,izho_2025_2,izho,correct,generic,0.035671049999999996,,False
844,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,jbmo_2025_1,jbmo,detected,generic,0.0044157,,False
845,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,izho_2025_1,izho,incorrect,generic,0.0063684,,True
846,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,jbmo_2025_3,jbmo,incorrect,generic,0.02769345,,True
847,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,izho_2025_6,izho,detected,generic,0.03095565,,False
848,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,izho_2025_5,izho,incorrect,generic,0.015762,,True
849,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,jbmo_2025_2,jbmo,corrected,generic,0.0337737,,False
850,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,jbmo_2025_4,jbmo,correct,generic,0.0379548,,False
851,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,korea_2025_1,korea,corrected,generic,0.02184405,,False
852,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,izho_2025_3,izho,corrected,generic,0.035918849999999995,,False
853,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,korea_2025_2,korea,correct,generic,0.01561125,,False
854,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,korea_2025_5,korea,correct,generic,0.02686665,,False
855,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_10,matharena,correct,matharena,0.016988700000000002,,False
856,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,korea_2025_4,korea,correct,generic,0.045570599999999996,,False
857,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_11,matharena,correct,matharena,0.0156525,,False
858,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_17,matharena,correct,matharena,0.0042648,,False
859,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_14,matharena,incorrect,matharena,0.05875995,,True
860,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,korea_2025_3,korea,incorrect,generic,0.02528175,,True
861,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,iran_tst_2025_8,iran,correct,generic,0.010095,,False
862,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_15,matharena,incorrect,matharena,0.04677855,,True
863,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,korea_2025_6,korea,correct,generic,0.029101649999999996,,False
864,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_1,matharena,correct,matharena,0.0048506999999999995,,False
865,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_19,matharena,correct,matharena,0.0053659499999999995,,False
866,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_12,matharena,correct,matharena,0.01365315,,False
867,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_18,matharena,correct,matharena,0.02872155,,False
868,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_13,matharena,correct,matharena,0.0354192,,False
869,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_20,matharena,correct,matharena,0.0231444,,False
870,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_24,matharena,correct,matharena,0.0071241,,False
871,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_22,matharena,correct,matharena,0.010113299999999999,,False
872,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_21,matharena,correct,matharena,0.02293725,,False
873,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_16,matharena,correct,matharena,0.005772599999999999,,False
874,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_25,matharena,correct,matharena,0.01804005,,False
875,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_2,matharena,correct,matharena,0.03288825,,False
876,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_23,matharena,correct,matharena,0.012150600000000001,,False
877,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_29,matharena,corrected,matharena,0.0112134,,False
878,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_26,matharena,correct,matharena,0.01321725,,False
879,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_6,matharena,correct,matharena,0.0051807,,False
880,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_4,matharena,correct,matharena,0.007546799999999999,,False
881,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_27,matharena,correct,matharena,0.01159395,,False
882,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_28,matharena,corrected,matharena,0.02388435,,False
883,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_30,matharena,correct,matharena,0.0238905,,False
884,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_3,matharena,corrected,matharena,0.009223950000000002,,False
885,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_9,matharena,correct,matharena,0.01227375,,False
886,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_1,matharena,correct,matharena,0.00269595,,False
887,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_8,matharena,correct,matharena,0.00663225,,False
888,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_5,matharena,correct,matharena,0.00752505,,False
889,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_13,matharena,correct,matharena,0.057424199999999995,,False
890,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_aime_aime_2025_7,matharena,incorrect,matharena,0.01853235,,True
891,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_14,matharena,correct,matharena,0.00457305,,False
892,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_12,matharena,correct,matharena,0.0061002,,False
893,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_11,matharena,correct,matharena,0.01096845,,False
894,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_21,matharena,incorrect,matharena,0.02016555,,True
895,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_20,matharena,correct,matharena,0.005982599999999999,,False
896,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_2,matharena,correct,matharena,0.0054675,,False
897,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_18,matharena,correct,matharena,0.0034903499999999997,,False
898,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_10,matharena,correct,matharena,0.012894599999999999,,False
899,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_16,matharena,correct,matharena,0.0040298999999999995,,False
900,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_19,matharena,correct,matharena,0.0055758,,False
901,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_17,matharena,correct,matharena,0.01696665,,False
902,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_25,matharena,correct,matharena,0.007236,,False
903,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_3,matharena,correct,matharena,0.015753299999999998,,False
904,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_15,matharena,incorrect,matharena,0.0304269,,True
905,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_26,matharena,correct,matharena,0.011510699999999999,,False
906,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_23,matharena,correct,matharena,0.0059473500000000006,,False
907,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_4,matharena,correct,matharena,0.007015349999999999,,False
908,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_22,matharena,incorrect,matharena,0.0161835,,True
909,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_29,matharena,corrected,matharena,0.008174550000000001,,False
910,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_24,matharena,correct,matharena,0.022905599999999998,,False
911,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_7,matharena,correct,matharena,0.0064398,,False
912,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_5,matharena,correct,matharena,0.01007145,,False
913,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_8,matharena,correct,matharena,0.009175949999999999,,False
914,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_28,matharena,incorrect,matharena,0.0386754,,True
915,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_27,matharena,corrected,matharena,0.0133539,,False
916,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_12,matharena,correct,matharena,0.00931725,,False
917,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_6,matharena,correct,matharena,0.008657399999999999,,False
918,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_10,matharena,correct,matharena,0.01047255,,False
919,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_9,matharena,correct,matharena,0.00566985,,False
920,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_16,matharena,incorrect,matharena,0.024428999999999996,,True
921,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_brumo_brumo_2025_30,matharena,incorrect,matharena,0.07351485,,True
922,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_14,matharena,correct,matharena,0.0147861,,False
923,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_13,matharena,correct,matharena,0.0110997,,False
924,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_17,matharena,correct,matharena,0.0076698,,False
925,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_11,matharena,incorrect,matharena,0.0130929,,True
926,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_1,matharena,correct,matharena,0.008736599999999999,,False
927,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_15,matharena,correct,matharena,0.0187278,,False
928,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_18,matharena,incorrect,matharena,0.03338835,,True
929,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_21,matharena,correct,matharena,0.0258804,,False
930,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_2,matharena,correct,matharena,0.00768555,,False
931,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_25,matharena,correct,matharena,0.023038649999999997,,False
932,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_20,matharena,detected,matharena,0.06201735,,False
933,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_26,matharena,correct,matharena,0.0191454,,False
934,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_23,matharena,correct,matharena,0.016467600000000002,,False
935,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_22,matharena,correct,matharena,0.00623595,,False
936,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_29,matharena,correct,matharena,0.021412949999999997,,False
937,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_27,matharena,correct,matharena,0.021779549999999998,,False
938,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_28,matharena,correct,matharena,0.0402519,,False
939,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_19,matharena,correct,matharena,0.00978675,,False
940,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_3,matharena,correct,matharena,0.00706785,,False
941,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_32,matharena,correct,matharena,0.0282273,,False
942,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_33,matharena,correct,matharena,0.012304049999999999,,False
943,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_30,matharena,corrected,matharena,0.029377949999999996,,False
944,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_37,matharena,correct,matharena,0.015164399999999998,,False
945,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_36,matharena,correct,matharena,0.01448595,,False
946,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_31,matharena,correct,matharena,0.009250350000000001,,False
947,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_34,matharena,correct,matharena,0.027909899999999998,,False
948,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_40,matharena,correct,matharena,0.0214737,,False
949,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_7,matharena,correct,matharena,0.022996350000000002,,False
950,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_35,matharena,correct,matharena,0.01452315,,False
951,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_39,matharena,correct,matharena,0.009004950000000001,,False
952,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_38,matharena,corrected,matharena,0.04412535,,False
953,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_24,matharena,correct,matharena,0.01032555,,False
954,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_4,matharena,correct,matharena,0.007086149999999999,,False
955,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_8,matharena,correct,matharena,0.0173004,,False
956,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_11,matharena,correct,matharena,0.0223428,,False
957,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_5,matharena,correct,matharena,0.034410449999999995,,False
958,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_12,matharena,correct,matharena,0.0213027,,False
959,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_6,matharena,incorrect,matharena,0.0262782,,True
960,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_15,matharena,correct,matharena,0.0232107,,False
961,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_2,matharena,incorrect,matharena,0.007138799999999999,,True
962,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_13,matharena,incorrect,matharena,0.03092085,,True
963,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_10,matharena,incorrect,matharena,0.0399405,,True
964,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_20,matharena,correct,matharena,0.04840845,,False
965,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_19,matharena,incorrect,matharena,0.0274695,,True
966,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_16,matharena,correct,matharena,0.016485899999999998,,False
967,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_21,matharena,correct,matharena,0.005607749999999999,,False
968,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_17,matharena,correct,matharena,0.028552349999999997,,False
969,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_22,matharena,correct,matharena,0.010256099999999999,,False
970,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_cmimc_cmimc_2025_9,matharena,correct,matharena,0.0231342,,False
971,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_23,matharena,correct,matharena,0.015670049999999998,,False
972,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_26,matharena,incorrect,matharena,0.026469299999999998,,True
973,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_28,matharena,correct,matharena,0.01178235,,False
974,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_24,matharena,correct,matharena,0.0205194,,False
975,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_5,matharena,incorrect,matharena,0.011562,,True
976,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_14,matharena,incorrect,matharena,0.012870449999999999,,True
977,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_25,matharena,incorrect,matharena,0.04433955,,True
978,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_3,matharena,correct,matharena,0.014824799999999999,,False
979,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_4,matharena,detected,matharena,0.0121023,,False
980,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_29,matharena,correct,matharena,0.023463599999999998,,False
981,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_27,matharena,correct,matharena,0.023054999999999996,,False
982,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_6,matharena,correct,matharena,0.01346445,,False
983,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_10,matharena,correct,matharena,0.00933885,,False
984,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_30,matharena,incorrect,matharena,0.0642873,,True
985,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_1,matharena,correct,matharena,0.004698,,False
986,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_11,matharena,detected,matharena,0.026875200000000002,,False
987,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_13,matharena,correct,matharena,0.015226799999999999,,False
988,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_8,matharena,detected,matharena,0.00567,,False
989,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_14,matharena,correct,matharena,0.006489,,False
990,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_12,matharena,correct,matharena,0.0072351,,False
991,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_7,matharena,incorrect,matharena,0.01790295,,True
992,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_18,matharena,corrected,matharena,0.01376985,,False
993,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_19,matharena,correct,matharena,0.017691899999999997,,False
994,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_16,matharena,correct,matharena,0.0067092,,False
995,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_15,matharena,incorrect,matharena,0.00739875,,True
996,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_1,matharena,correct,matharena,0.0061797,,False
997,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_17,matharena,corrected,matharena,0.0098829,,False
998,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_hmmt_hmmt_feb_2025_9,matharena,correct,matharena,0.02114475,,False
999,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_24,matharena,correct,matharena,0.01497315,,False
1000,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_20,matharena,incorrect,matharena,0.0252534,,True
1001,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_18,matharena,correct,matharena,0.038967299999999996,,False
1002,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_2,matharena,correct,matharena,0.0022113,,False
1003,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_25,matharena,correct,matharena,0.00668925,,False
1004,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_21,matharena,correct,matharena,0.011393549999999999,,False
1005,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_26,matharena,correct,matharena,0.00617925,,False
1006,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_23,matharena,correct,matharena,0.019270649999999997,,False
1007,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_28,matharena,correct,matharena,0.005756400000000001,,False
1008,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_31,matharena,incorrect,matharena,0.008078549999999999,,True
1009,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_22,matharena,incorrect,matharena,0.019701299999999998,,True
1010,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_3,matharena,correct,matharena,0.006694049999999999,,False
1011,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_27,matharena,correct,matharena,0.024653999999999995,,False
1012,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_29,matharena,correct,matharena,0.017268600000000002,,False
1013,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_32,matharena,correct,matharena,0.0071578499999999995,,False
1014,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_30,matharena,incorrect,matharena,0.047821949999999995,,True
1015,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_37,matharena,incorrect,matharena,0.022563,,True
1016,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_35,matharena,incorrect,matharena,0.010571849999999999,,True
1017,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_33,matharena,correct,matharena,0.0348,,False
1018,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_40,matharena,incorrect,matharena,0.01784355,,True
1019,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_4,matharena,correct,matharena,0.00544755,,False
1020,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_38,matharena,correct,matharena,0.0158169,,False
1021,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_43,matharena,incorrect,matharena,0.04899075,,True
1022,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_42,matharena,correct,matharena,0.04967939999999999,,False
1023,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_36,matharena,correct,matharena,0.01630455,,False
1024,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_34,matharena,incorrect,matharena,0.028840349999999997,,True
1025,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_39,matharena,correct,matharena,0.02118555,,False
1026,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_44,matharena,correct,matharena,0.0054662999999999995,,False
1027,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_41,matharena,correct,matharena,0.017373600000000003,,False
1028,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_52,matharena,incorrect,matharena,0.018807,,True
1029,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_47,matharena,correct,matharena,0.0066307499999999995,,False
1030,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_5,matharena,correct,matharena,0.0053216999999999995,,False
1031,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_45,matharena,correct,matharena,0.0029121,,False
1032,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_49,matharena,incorrect,matharena,0.006969899999999999,,True
1033,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_53,matharena,incorrect,matharena,0.03958725,,True
1034,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_50,matharena,correct,matharena,0.005562,,False
1035,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_9,matharena,correct,matharena,0.004152899999999999,,False
1036,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_46,matharena,incorrect,matharena,0.010055550000000002,,True
1037,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_48,matharena,correct,matharena,0.01046685,,False
1038,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,nordic_2025_2,nordic,correct,generic,0.013739999999999999,,False
1039,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_51,matharena,correct,matharena,0.0186618,,False
1040,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_7,matharena,correct,matharena,0.00422025,,False
1041,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_8,matharena,correct,matharena,0.0123642,,False
1042,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,matharena_smt_smt_2025_6,matharena,correct,matharena,0.007647749999999999,,False
1043,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,pan_african_2025_2,pan,correct,generic,0.0057723,,False
1044,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,pan_african_2025_4,pan,correct,generic,0.00396735,,False
1045,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,pan_african_2025_1,pan,incorrect,generic,0.012274499999999999,,True
1046,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,nordic_2025_1,nordic,correct,generic,0.01396725,,False
1047,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,philippines_2025_3,philippines,correct,generic,0.017945399999999997,,False
1048,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,pan_african_2025_3,pan,incorrect,generic,0.00367755,,True
1049,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,pan_african_2025_5,pan,correct,generic,0.02303685,,False
1050,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,nordic_2025_3,nordic,incorrect,generic,0.02140185,,True
1051,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,pan_african_2025_6,pan,incorrect,generic,0.026289,,True
1052,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,philippines_2025_2,philippines,corrected,generic,0.02393385,,False
1053,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,philippines_2025_1,philippines,incorrect,generic,0.0068226,,True
1054,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,philippines_2025_8,philippines,correct,generic,0.03922485,,False
1055,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,polish_2025_2,polish,correct,generic,0.01448325,,False
1056,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,philippines_2025_4,philippines,correct,generic,0.045108,,False
1057,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,polish_2025_1,polish,correct,generic,0.01271325,,False
1058,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,philippines_2025_5,philippines,correct,generic,0.029758649999999998,,False
1059,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,philippines_2025_7,philippines,incorrect,generic,0.04687589999999999,,True
1060,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,polish_2025_6,polish,detected,generic,0.008514899999999999,,False
1061,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,polish_2025_5,polish,correct,generic,0.0260766,,False
1062,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,philippines_2025_6,philippines,incorrect,generic,0.023548799999999998,,True
1063,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,rmm_2025_2,rmm,correct,generic,0.01986255,,False
1064,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,rmm_2025_6,rmm,correct,generic,0.0141954,,False
1065,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,rmm_2025_5,rmm,incorrect,generic,0.05889795,,True
1066,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,rmm_2025_1,rmm,incorrect,generic,0.01951515,,True
1067,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,polish_2025_3,polish,correct,generic,0.022073399999999996,,False
1068,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,polish_2025_4,polish,correct,generic,0.024100049999999998,,False
1069,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,romania_10_2025_2,romania,correct,generic,0.0229494,,False
1070,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,romania_10_2025_3,romania,correct,generic,0.024553499999999995,,False
1071,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,romania_11_2025_2,romania,correct,generic,0.00792,,False
1072,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,romania_12_2025_2,romania,correct,generic,0.004959299999999999,,False
1073,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,romania_10_2025_1,romania,detected,generic,0.032883449999999995,,False
1074,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,romania_11_2025_3,romania,correct,generic,0.02864835,,False
1075,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,romania_11_2025_1,romania,correct,generic,0.0197526,,False
1076,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,romania_12_2025_1,romania,incorrect,generic,0.01295265,,True
1077,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,rmm_2025_4,rmm,incorrect,generic,0.01899075,,True
1078,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,romania_9_2025_2,romania,detected,generic,0.03448904999999999,,False
1079,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,romania_tst_2025_3,romania,correct,generic,0.0248181,,False
1080,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,romania_9_2025_1,romania,incorrect,generic,0.0314148,,True
1081,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,romania_12_2025_3,romania,correct,generic,0.020007,,False
1082,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,serbia_tst_bmo_2025_3,serbia,incorrect,generic,0.0203076,,True
1083,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,serbia_tst_bmo_2025_2,serbia,correct,generic,0.0180252,,False
1084,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,romania_tst_2025_2,romania,corrected,generic,0.0188295,,False
1085,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,romania_tst_2025_1,romania,incorrect,generic,0.01324485,,True
1086,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,spain_2025_1,spain,incorrect,generic,0.01072395,,True
1087,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,serbia_tst_bmo_2025_4,serbia,incorrect,generic,0.056035949999999994,,True
1088,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,spain_2025_3,spain,detected,generic,0.013338449999999998,,False
1089,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,spain_2025_2,spain,correct,generic,0.0222843,,False
1090,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,spain_2025_4,spain,incorrect,generic,0.030783599999999998,,True
1091,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,spain_2025_5,spain,correct,generic,0.025811399999999998,,False
1092,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,thai_2025_10,thai,correct,generic,0.020934,,False
1093,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,serbia_tst_bmo_2025_1,serbia,corrected,generic,0.0264513,,False
1094,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,thai_2025_1,thai,correct,generic,0.0098853,,False
1095,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,rmm_2025_3,rmm,incorrect,generic,0.007119749999999999,,True
1096,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,thai_2025_8,thai,incorrect,generic,0.01836375,,True
1097,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,thai_2025_2,thai,correct,generic,0.0070998,,False
1098,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,turkey_tst_2025_1,turkey,correct,generic,0.00815235,,False
1099,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,thai_2025_3,thai,incorrect,generic,0.01160565,,True
1100,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,thai_2025_5,thai,incorrect,generic,0.0111942,,True
1101,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,thai_2025_4,thai,correct,generic,0.02320515,,False
1102,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,turkey_tst_2025_2,turkey,incorrect,generic,0.029879399999999997,,True
1103,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,thai_2025_7,thai,correct,generic,0.027944850000000004,,False
1104,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,thai_2025_9,thai,incorrect,generic,0.00996495,,True
1105,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,turkey_tst_2025_9,turkey,correct,generic,0.011496,,False
1106,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,turkey_tst_2025_8,turkey,correct,generic,0.01132995,,False
1107,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,turkey_tst_2025_6,turkey,incorrect,generic,0.04817115,,True
1108,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,turkey_tst_2025_7,turkey,incorrect,generic,0.04871565,,True
1109,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,usamo_2025_2,usamo,correct,generic,0.01176465,,False
1110,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,turkey_tst_2025_3,turkey,incorrect,generic,0.0481527,,True
1111,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,turkey_tst_2025_4,turkey,detected,generic,0.03387104999999999,,False
1112,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,usamo_2025_3,usamo,incorrect,generic,0.01704105,,True
1113,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,thai_2025_6,thai,correct,generic,0.018599699999999997,,False
1114,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,usamo_2025_4,usamo,correct,generic,0.0161556,,False
1115,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,usatst_2025_4,usatst,correct,generic,0.0206058,,False
1116,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,usatst_2025_7,usatst,correct,generic,0.018246,,False
1117,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,usatst_2025_6,usatst,incorrect,generic,0.01114485,,True
1118,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,usatst_2025_1,usatst,correct,generic,0.015732,,False
1119,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,usamo_2025_6,usamo,correct,generic,0.0160338,,False
1120,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,usamo_2025_5,usamo,correct,generic,0.0459495,,False
1121,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,turkey_tst_2025_5,turkey,incorrect,generic,0.02228475,,True
1122,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,usamo_2025_1,usamo,incorrect,generic,0.0170217,,True
1123,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,usatst_2025_9,usatst,incorrect,generic,0.030110549999999996,,True
1124,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,usatst_2025_5,usatst,correct,generic,0.0399984,,False
1125,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,vietnam_2025_1,vietnam,correct,generic,0.0066754499999999994,,False
1126,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,usatst_2025_2,usatst,incorrect,generic,0.01348455,,True
1127,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,usatst_2025_3,usatst,correct,generic,0.022182149999999998,,False
1128,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,vietnam_2025_5,vietnam,correct,generic,0.028033649999999997,,False
1129,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,usatst_2025_8,usatst,correct,generic,0.010701599999999999,,False
1130,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,vietnam_2025_3,vietnam,incorrect,generic,0.01818375,,True
1131,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,vietnam_2025_4,vietnam,incorrect,generic,0.05481615,,True
1132,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,vietnam_2025_2,vietnam,correct,generic,0.0193098,,False
1133,GPT-5-mini (medium),GPT OSS 120B (high),openai/oss-120b,vietnam_2025_6,vietnam,correct,generic,0.0251682,,False
1134,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,allrussian_2025_9,allrussian,correct,generic,0.0,,False
1135,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,allrussian_2025_12,allrussian,detected,generic,0.0,,False
1136,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,allrussian_2025_15,allrussian,incorrect,generic,0.0,,True
1137,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,allrussian_2025_14,allrussian,detected,generic,0.0,,False
1138,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,allrussian_2025_3,allrussian,correct,generic,0.0,,False
1139,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,allrussian_2025_6,allrussian,incorrect,generic,0.0,,True
1140,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,allrussian_2025_5,allrussian,correct,generic,0.0,,False
1141,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,allrussian_2025_16,allrussian,incorrect,generic,0.0,,True
1142,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,allrussian_2025_13,allrussian,detected,generic,0.0,,False
1143,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,allrussian_2025_8,allrussian,incorrect,generic,0.0,,True
1144,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,allrussian_2025_2,allrussian,incorrect,generic,0.0,,True
1145,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,allrussian_2025_1,allrussian,incorrect,generic,0.0,,True
1146,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_A_2025_1,bmosl,detected,generic,0.0,,False
1147,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_A_2025_3,bmosl,incorrect,generic,0.0,,True
1148,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmo_2025_1,bmo,incorrect,generic,0.0,,True
1149,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,allrussian_2025_10,allrussian,incorrect,generic,0.0,,True
1150,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_A_2025_5,bmosl,incorrect,generic,0.0,,True
1151,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmo_2025_4,bmo,detected,generic,0.0,,False
1152,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,allrussian_2025_7,allrussian,detected,generic,0.0,,False
1153,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,allrussian_2025_4,allrussian,incorrect,generic,0.0,,True
1154,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmo_2025_3,bmo,corrected,generic,0.0,,False
1155,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_A_2025_6,bmosl,correct,generic,0.0,,False
1156,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,allrussian_2025_11,allrussian,incorrect,generic,0.0,,True
1157,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_C_2025_1,bmosl,incorrect,generic,0.0,,True
1158,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_C_2025_2,bmosl,incorrect,generic,0.0,,True
1159,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmo_2025_2,bmo,incorrect,generic,0.0,,True
1160,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_A_2025_4,bmosl,correct,generic,0.0,,False
1161,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_C_2025_3,bmosl,detected,generic,0.0,,False
1162,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_C_2025_6,bmosl,incorrect,generic,0.0,,True
1163,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_C_2025_4,bmosl,incorrect,generic,0.0,,True
1164,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_G_2025_5,bmosl,incorrect,generic,0.0,,True
1165,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_G_2025_6,bmosl,detected,generic,0.0,,False
1166,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_G_2025_7,bmosl,correct,generic,0.0,,False
1167,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_NT_2025_1,bmosl,detected,generic,0.0,,False
1168,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_C_2025_5,bmosl,incorrect,generic,0.0,,True
1169,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_G_2025_1,bmosl,incorrect,generic,0.0,,True
1170,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_G_2025_4,bmosl,incorrect,generic,0.0,,True
1171,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_NT_2025_4,bmosl,detected,generic,0.0,,False
1172,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_NT_2025_2,bmosl,detected,generic,0.0,,False
1173,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bulgaria_2025_4,bulgaria,correct,generic,0.0,,False
1174,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bulgaria_2025_3,bulgaria,detected,generic,0.0,,False
1175,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_NT_2025_6,bmosl,incorrect,generic,0.0,,True
1176,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bulgaria_2025_6,bulgaria,incorrect,generic,0.0,,True
1177,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_NT_2025_7,bmosl,correct,generic,0.0,,False
1178,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bulgaria_2025_2,bulgaria,detected,generic,0.0,,False
1179,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_A_2025_2,bmosl,incorrect,generic,0.0,,True
1180,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_NT_2025_5,bmosl,correct,generic,0.0,,False
1181,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,canada_2025_1,canada,correct,generic,0.0,,False
1182,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_G_2025_2,bmosl,incorrect,generic,0.0,,True
1183,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,canada_2025_2,canada,correct,generic,0.0,,False
1184,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,canada_2025_3,canada,incorrect,generic,0.0,,True
1185,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bulgaria_2025_5,bulgaria,incorrect,generic,0.0,,True
1186,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,canada_2025_4,canada,incorrect,generic,0.0,,True
1187,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_NT_2025_3,bmosl,correct,generic,0.0,,False
1188,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,china_2025_1,china,correct,generic,0.0,,False
1189,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bmosl_G_2025_3,bmosl,detected,generic,0.0,,False
1190,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,china_2025_3,china,incorrect,generic,0.0,,True
1191,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_12,chinatst,detected,generic,0.0,,False
1192,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,bulgaria_2025_1,bulgaria,corrected,generic,0.0,,False
1193,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_11,chinatst,correct,generic,0.0,,False
1194,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,canada_2025_5,canada,incorrect,generic,0.0,,True
1195,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_15,chinatst,detected,generic,0.0,,False
1196,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,china_2025_6,china,incorrect,generic,0.0,,True
1197,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_14,chinatst,incorrect,generic,0.0,,True
1198,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_16,chinatst,incorrect,generic,0.0,,True
1199,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_21,chinatst,detected,generic,0.0,,False
1200,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_2,chinatst,detected,generic,0.0,,False
1201,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_4,chinatst,incorrect,generic,0.0,,True
1202,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_20,chinatst,incorrect,generic,0.0,,True
1203,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_1,chinatst,incorrect,generic,0.0,,True
1204,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_13,chinatst,incorrect,generic,0.0,,True
1205,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_19,chinatst,incorrect,generic,0.0,,True
1206,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_17,chinatst,detected,generic,0.0,,False
1207,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_8,chinatst,incorrect,generic,0.0,,True
1208,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,china_2025_5,china,incorrect,generic,0.0,,True
1209,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_10,chinatst,incorrect,generic,0.0,,True
1210,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_5,chinatst,incorrect,generic,0.0,,True
1211,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,china_2025_2,china,detected,generic,0.0,,False
1212,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,egmo_2025_1,egmo,detected,generic,0.0,,False
1213,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_3,chinatst,detected,generic,0.0,,False
1214,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_7,chinatst,incorrect,generic,0.0,,True
1215,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_18,chinatst,detected,generic,0.0,,False
1216,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,egmo_2025_3,egmo,corrected,generic,0.0,,False
1217,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_22,chinatst,incorrect,generic,0.0,,True
1218,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_9,chinatst,detected,generic,0.0,,False
1219,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,egmo_2025_6,egmo,incorrect,generic,0.0,,True
1220,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_A_2025_1,elmosl,detected,generic,0.0,,False
1221,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,chinatst_2025_6,chinatst,incorrect,generic,0.0,,True
1222,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_A_2025_7,elmosl,detected,generic,0.0,,False
1223,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,egmo_2025_5,egmo,incorrect,generic,0.0,,True
1224,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_A_2025_8,elmosl,detected,generic,0.0,,False
1225,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_C_2025_3,elmosl,correct,generic,0.0,,False
1226,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_A_2025_6,elmosl,correct,generic,0.0,,False
1227,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,egmo_2025_4,egmo,corrected,generic,0.0,,False
1228,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_A_2025_2,elmosl,detected,generic,0.0,,False
1229,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_C_2025_5,elmosl,detected,generic,0.0,,False
1230,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_A_2025_5,elmosl,incorrect,generic,0.0,,True
1231,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_C_2025_4,elmosl,incorrect,generic,0.0,,True
1232,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_C_2025_7,elmosl,incorrect,generic,0.0,,True
1233,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,egmo_2025_2,egmo,incorrect,generic,0.0,,True
1234,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_G_2025_5,elmosl,detected,generic,0.0,,False
1235,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_G_2025_4,elmosl,incorrect,generic,0.0,,True
1236,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_C_2025_8,elmosl,detected,generic,0.0,,False
1237,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_C_2025_2,elmosl,incorrect,generic,0.0,,True
1238,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_G_2025_3,elmosl,detected,generic,0.0,,False
1239,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_G_2025_2,elmosl,incorrect,generic,0.0,,True
1240,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_G_2025_1,elmosl,incorrect,generic,0.0,,True
1241,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_C_2025_6,elmosl,incorrect,generic,0.0,,True
1242,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_NT_2025_2,elmosl,incorrect,generic,0.0,,True
1243,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_C_2025_9,elmosl,detected,generic,0.0,,False
1244,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_G_2025_8,elmosl,incorrect,generic,0.0,,True
1245,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_C_2025_1,elmosl,incorrect,generic,0.0,,True
1246,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_A_2025_3,elmosl,incorrect,generic,0.0,,True
1247,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_NT_2025_7,elmosl,incorrect,generic,0.0,,True
1248,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_NT_2025_6,elmosl,incorrect,generic,0.0,,True
1249,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_G_2025_7,elmosl,incorrect,generic,0.0,,True
1250,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_G_2025_6,elmosl,incorrect,generic,0.0,,True
1251,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_NT_2025_1,elmosl,detected,generic,0.0,,False
1252,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_NT_2025_4,elmosl,detected,generic,0.0,,False
1253,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_G_2025_9,elmosl,incorrect,generic,0.0,,True
1254,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_NT_2025_5,elmosl,incorrect,generic,0.0,,True
1255,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,german_2025_4,german,correct,generic,0.0,,False
1256,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,greece_2025_2,greece,detected,generic,0.0,,False
1257,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,german_2025_3,german,detected,generic,0.0,,False
1258,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,german_2025_1,german,correct,generic,0.0,,False
1259,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,elmosl_NT_2025_3,elmosl,incorrect,generic,0.0,,True
1260,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_1,imosl,correct,generic,0.0,,False
1261,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_10,imosl,incorrect,generic,0.0,,True
1262,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,german_2025_2,german,incorrect,generic,0.0,,True
1263,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_11,imosl,incorrect,generic,0.0,,True
1264,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,greece_2025_4,greece,correct,generic,0.0,,False
1265,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_17,imosl,detected,generic,0.0,,False
1266,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_18,imosl,correct,generic,0.0,,False
1267,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,greece_2025_3,greece,correct,generic,0.0,,False
1268,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_2,imosl,correct,generic,0.0,,False
1269,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_20,imosl,detected,generic,0.0,,False
1270,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_16,imosl,incorrect,generic,0.0,,True
1271,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_15,imosl,detected,generic,0.0,,False
1272,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_19,imosl,incorrect,generic,0.0,,True
1273,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_14,imosl,incorrect,generic,0.0,,True
1274,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_13,imosl,incorrect,generic,0.0,,True
1275,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_12,imosl,incorrect,generic,0.0,,True
1276,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,greece_2025_1,greece,correct,generic,0.0,,False
1277,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_26,imosl,correct,generic,0.0,,False
1278,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_24,imosl,incorrect,generic,0.0,,True
1279,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_23,imosl,incorrect,generic,0.0,,True
1280,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_22,imosl,incorrect,generic,0.0,,True
1281,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_25,imosl,incorrect,generic,0.0,,True
1282,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_27,imosl,correct,generic,0.0,,False
1283,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_35,imosl,detected,generic,0.0,,False
1284,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_3,imosl,detected,generic,0.0,,False
1285,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_33,imosl,correct,generic,0.0,,False
1286,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_34,imosl,incorrect,generic,0.0,,True
1287,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_21,imosl,incorrect,generic,0.0,,True
1288,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_32,imosl,incorrect,generic,0.0,,True
1289,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_30,imosl,incorrect,generic,0.0,,True
1290,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_36,imosl,detected,generic,0.0,,False
1291,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_28,imosl,detected,generic,0.0,,False
1292,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_31,imosl,incorrect,generic,0.0,,True
1293,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_4,imosl,incorrect,generic,0.0,,True
1294,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_7,imosl,detected,generic,0.0,,False
1295,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_2025_2,india,incorrect,generic,0.0,,True
1296,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_8,imosl,incorrect,generic,0.0,,True
1297,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_6,imosl,detected,generic,0.0,,False
1298,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_37,imosl,incorrect,generic,0.0,,True
1299,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_2025_3,india,incorrect,generic,0.0,,True
1300,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_5,imosl,detected,generic,0.0,,False
1301,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_9,imosl,correct,generic,0.0,,False
1302,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_2025_4,india,detected,generic,0.0,,False
1303,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,imosl_2025_29,imosl,incorrect,generic,0.0,,True
1304,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_2025_6,india,incorrect,generic,0.0,,True
1305,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_prep_2025_1,india,detected,generic,0.0,,False
1306,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_2025_1,india,correct,generic,0.0,,False
1307,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_prep_2025_12,india,detected,generic,0.0,,False
1308,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_2025_5,india,detected,generic,0.0,,False
1309,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_prep_2025_15,india,incorrect,generic,0.0,,True
1310,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_prep_2025_16,india,detected,generic,0.0,,False
1311,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_prep_2025_17,india,detected,generic,0.0,,False
1312,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_prep_2025_18,india,incorrect,generic,0.0,,True
1313,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_prep_2025_14,india,detected,generic,0.0,,False
1314,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_prep_2025_13,india,incorrect,generic,0.0,,True
1315,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_prep_2025_10,india,incorrect,generic,0.0,,True
1316,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_prep_2025_11,india,detected,generic,0.0,,False
1317,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_prep_2025_3,india,detected,generic,0.0,,False
1318,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_prep_2025_2,india,incorrect,generic,0.0,,True
1319,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_prep_2025_21,india,incorrect,generic,0.0,,True
1320,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_prep_2025_4,india,incorrect,generic,0.0,,True
1321,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,iran_tst_2025_1,iran,incorrect,generic,0.0,,True
1322,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_prep_2025_5,india,detected,generic,0.0,,False
1323,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_prep_2025_6,india,incorrect,generic,0.0,,True
1324,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_prep_2025_7,india,incorrect,generic,0.0,,True
1325,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,iran_tst_2025_4,iran,incorrect,generic,0.0,,True
1326,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_prep_2025_9,india,correct,generic,0.0,,False
1327,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,iran_tst_2025_3,iran,incorrect,generic,0.0,,True
1328,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,iran_tst_2025_2,iran,incorrect,generic,0.0,,True
1329,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_prep_2025_8,india,incorrect,generic,0.0,,True
1330,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_prep_2025_20,india,incorrect,generic,0.0,,True
1331,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,iran_tst_2025_5,iran,correct,generic,0.0,,False
1332,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,india_prep_2025_19,india,incorrect,generic,0.0,,True
1333,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,israel_tst_2025_1,israel,detected,generic,0.0,,False
1334,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,iran_tst_2025_9,iran,incorrect,generic,0.0,,True
1335,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,israel_tst_2025_8,israel,detected,generic,0.0,,False
1336,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,israel_tst_2025_7,israel,detected,generic,0.0,,False
1337,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,israel_tst_2025_6,israel,detected,generic,0.0,,False
1338,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,iran_tst_2025_7,iran,detected,generic,0.0,,False
1339,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,israel_tst_2025_2,israel,incorrect,generic,0.0,,True
1340,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,israel_tst_2025_4,israel,incorrect,generic,0.0,,True
1341,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,israel_tst_2025_3,israel,detected,generic,0.0,,False
1342,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,israel_tst_2025_5,israel,detected,generic,0.0,,False
1343,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,izho_2025_2,izho,incorrect,generic,0.0,,True
1344,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,iran_tst_2025_6,iran,detected,generic,0.0,,False
1345,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,iran_tst_2025_8,iran,detected,generic,0.0,,False
1346,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,izho_2025_4,izho,incorrect,generic,0.0,,True
1347,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,jbmo_2025_4,jbmo,correct,generic,0.0,,False
1348,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,izho_2025_3,izho,incorrect,generic,0.0,,True
1349,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,izho_2025_5,izho,incorrect,generic,0.0,,True
1350,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,jbmo_2025_1,jbmo,detected,generic,0.0,,False
1351,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,izho_2025_1,izho,incorrect,generic,0.0,,True
1352,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,jbmo_2025_2,jbmo,detected,generic,0.0,,False
1353,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,jbmo_2025_3,jbmo,detected,generic,0.0,,False
1354,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,korea_2025_3,korea,incorrect,generic,0.0,,True
1355,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,korea_2025_4,korea,detected,generic,0.0,,False
1356,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_1,matharena,correct,matharena,0.0,,False
1357,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,korea_2025_2,korea,detected,generic,0.0,,False
1358,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,izho_2025_6,izho,detected,generic,0.0,,False
1359,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,israel_tst_2025_9,israel,incorrect,generic,0.0,,True
1360,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_11,matharena,incorrect,matharena,0.0,,True
1361,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,korea_2025_5,korea,incorrect,generic,0.0,,True
1362,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,israel_tst_2025_10,israel,incorrect,generic,0.0,,True
1363,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_12,matharena,correct,matharena,0.0,,False
1364,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_15,matharena,incorrect,matharena,0.0,,True
1365,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,korea_2025_6,korea,incorrect,generic,0.0,,True
1366,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_18,matharena,correct,matharena,0.0,,False
1367,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_10,matharena,incorrect,matharena,0.0,,True
1368,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_16,matharena,correct,matharena,0.0,,False
1369,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_14,matharena,incorrect,matharena,0.0,,True
1370,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,korea_2025_1,korea,correct,generic,0.0,,False
1371,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_17,matharena,correct,matharena,0.0,,False
1372,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_19,matharena,correct,matharena,0.0,,False
1373,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_25,matharena,detected,matharena,0.0,,False
1374,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_29,matharena,correct,matharena,0.0,,False
1375,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_22,matharena,correct,matharena,0.0,,False
1376,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_21,matharena,correct,matharena,0.0,,False
1377,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_20,matharena,incorrect,matharena,0.0,,True
1378,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_13,matharena,detected,matharena,0.0,,False
1379,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_28,matharena,incorrect,matharena,0.0,,True
1380,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_24,matharena,detected,matharena,0.0,,False
1381,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_30,matharena,incorrect,matharena,0.0,,True
1382,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_23,matharena,incorrect,matharena,0.0,,True
1383,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_2,matharena,correct,matharena,0.0,,False
1384,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_27,matharena,correct,matharena,0.0,,False
1385,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_5,matharena,correct,matharena,0.0,,False
1386,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_6,matharena,correct,matharena,0.0,,False
1387,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_26,matharena,detected,matharena,0.0,,False
1388,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_11,matharena,correct,matharena,0.0,,False
1389,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_1,matharena,correct,matharena,0.0,,False
1390,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_17,matharena,correct,matharena,0.0,,False
1391,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_3,matharena,correct,matharena,0.0,,False
1392,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_8,matharena,correct,matharena,0.0,,False
1393,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_10,matharena,detected,matharena,0.0,,False
1394,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_4,matharena,correct,matharena,0.0,,False
1395,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_7,matharena,incorrect,matharena,0.0,,True
1396,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_16,matharena,correct,matharena,0.0,,False
1397,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_12,matharena,incorrect,matharena,0.0,,True
1398,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_15,matharena,incorrect,matharena,0.0,,True
1399,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_20,matharena,correct,matharena,0.0,,False
1400,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_19,matharena,correct,matharena,0.0,,False
1401,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_aime_aime_2025_9,matharena,correct,matharena,0.0,,False
1402,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_13,matharena,incorrect,matharena,0.0,,True
1403,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_14,matharena,correct,matharena,0.0,,False
1404,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_18,matharena,correct,matharena,0.0,,False
1405,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_24,matharena,corrected,matharena,0.0,,False
1406,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_21,matharena,detected,matharena,0.0,,False
1407,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_23,matharena,correct,matharena,0.0,,False
1408,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_26,matharena,correct,matharena,0.0,,False
1409,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_27,matharena,incorrect,matharena,0.0,,True
1410,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_25,matharena,correct,matharena,0.0,,False
1411,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_2,matharena,correct,matharena,0.0,,False
1412,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_28,matharena,incorrect,matharena,0.0,,True
1413,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_3,matharena,correct,matharena,0.0,,False
1414,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_7,matharena,correct,matharena,0.0,,False
1415,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_5,matharena,correct,matharena,0.0,,False
1416,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_30,matharena,incorrect,matharena,0.0,,True
1417,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_22,matharena,incorrect,matharena,0.0,,True
1418,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_4,matharena,correct,matharena,0.0,,False
1419,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_12,matharena,correct,matharena,0.0,,False
1420,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_1,matharena,correct,matharena,0.0,,False
1421,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_6,matharena,incorrect,matharena,0.0,,True
1422,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_13,matharena,correct,matharena,0.0,,False
1423,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_9,matharena,detected,matharena,0.0,,False
1424,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_14,matharena,correct,matharena,0.0,,False
1425,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_29,matharena,incorrect,matharena,0.0,,True
1426,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_17,matharena,correct,matharena,0.0,,False
1427,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_10,matharena,correct,matharena,0.0,,False
1428,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_18,matharena,incorrect,matharena,0.0,,True
1429,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_brumo_brumo_2025_8,matharena,correct,matharena,0.0,,False
1430,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_15,matharena,incorrect,matharena,0.0,,True
1431,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_2,matharena,correct,matharena,0.0,,False
1432,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_11,matharena,incorrect,matharena,0.0,,True
1433,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_19,matharena,detected,matharena,0.0,,False
1434,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_21,matharena,detected,matharena,0.0,,False
1435,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_29,matharena,correct,matharena,0.0,,False
1436,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_22,matharena,detected,matharena,0.0,,False
1437,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_20,matharena,incorrect,matharena,0.0,,True
1438,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_24,matharena,correct,matharena,0.0,,False
1439,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_25,matharena,detected,matharena,0.0,,False
1440,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_23,matharena,correct,matharena,0.0,,False
1441,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_31,matharena,correct,matharena,0.0,,False
1442,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_3,matharena,correct,matharena,0.0,,False
1443,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_27,matharena,detected,matharena,0.0,,False
1444,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_26,matharena,correct,matharena,0.0,,False
1445,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_30,matharena,incorrect,matharena,0.0,,True
1446,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_16,matharena,incorrect,matharena,0.0,,True
1447,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_32,matharena,incorrect,matharena,0.0,,True
1448,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_36,matharena,correct,matharena,0.0,,False
1449,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_34,matharena,incorrect,matharena,0.0,,True
1450,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_40,matharena,incorrect,matharena,0.0,,True
1451,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_8,matharena,incorrect,matharena,0.0,,True
1452,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_6,matharena,incorrect,matharena,0.0,,True
1453,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_9,matharena,correct,matharena,0.0,,False
1454,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_38,matharena,incorrect,matharena,0.0,,True
1455,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_37,matharena,incorrect,matharena,0.0,,True
1456,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_5,matharena,detected,matharena,0.0,,False
1457,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_1,matharena,correct,matharena,0.0,,False
1458,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_11,matharena,incorrect,matharena,0.0,,True
1459,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_4,matharena,correct,matharena,0.0,,False
1460,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_7,matharena,incorrect,matharena,0.0,,True
1461,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_14,matharena,incorrect,matharena,0.0,,True
1462,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_16,matharena,incorrect,matharena,0.0,,True
1463,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_12,matharena,incorrect,matharena,0.0,,True
1464,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_15,matharena,incorrect,matharena,0.0,,True
1465,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_39,matharena,incorrect,matharena,0.0,,True
1466,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_35,matharena,correct,matharena,0.0,,False
1467,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_18,matharena,incorrect,matharena,0.0,,True
1468,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_13,matharena,incorrect,matharena,0.0,,True
1469,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_33,matharena,incorrect,matharena,0.0,,True
1470,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_10,matharena,incorrect,matharena,0.0,,True
1471,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_2,matharena,correct,matharena,0.0,,False
1472,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_21,matharena,correct,matharena,0.0,,False
1473,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_20,matharena,incorrect,matharena,0.0,,True
1474,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_19,matharena,incorrect,matharena,0.0,,True
1475,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_23,matharena,correct,matharena,0.0,,False
1476,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_cmimc_cmimc_2025_28,matharena,incorrect,matharena,0.0,,True
1477,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_17,matharena,incorrect,matharena,0.0,,True
1478,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_27,matharena,correct,matharena,0.0,,False
1479,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_8,matharena,correct,matharena,0.0,,False
1480,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_3,matharena,incorrect,matharena,0.0,,True
1481,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_22,matharena,incorrect,matharena,0.0,,True
1482,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_25,matharena,incorrect,matharena,0.0,,True
1483,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_30,matharena,incorrect,matharena,0.0,,True
1484,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_11,matharena,detected,matharena,0.0,,False
1485,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_1,matharena,correct,matharena,0.0,,False
1486,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_4,matharena,correct,matharena,0.0,,False
1487,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_7,matharena,detected,matharena,0.0,,False
1488,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_13,matharena,incorrect,matharena,0.0,,True
1489,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_5,matharena,correct,matharena,0.0,,False
1490,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_29,matharena,incorrect,matharena,0.0,,True
1491,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_15,matharena,correct,matharena,0.0,,False
1492,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_6,matharena,detected,matharena,0.0,,False
1493,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_10,matharena,correct,matharena,0.0,,False
1494,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_16,matharena,correct,matharena,0.0,,False
1495,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_24,matharena,detected,matharena,0.0,,False
1496,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_26,matharena,detected,matharena,0.0,,False
1497,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_14,matharena,correct,matharena,0.0,,False
1498,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_2,matharena,correct,matharena,0.0,,False
1499,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_18,matharena,incorrect,matharena,0.0,,True
1500,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_28,matharena,correct,matharena,0.0,,False
1501,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_22,matharena,incorrect,matharena,0.0,,True
1502,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_23,matharena,correct,matharena,0.0,,False
1503,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_19,matharena,incorrect,matharena,0.0,,True
1504,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_25,matharena,correct,matharena,0.0,,False
1505,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_29,matharena,correct,matharena,0.0,,False
1506,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_hmmt_hmmt_feb_2025_9,matharena,incorrect,matharena,0.0,,True
1507,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_27,matharena,incorrect,matharena,0.0,,True
1508,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_31,matharena,correct,matharena,0.0,,False
1509,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_24,matharena,detected,matharena,0.0,,False
1510,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_21,matharena,correct,matharena,0.0,,False
1511,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_3,matharena,correct,matharena,0.0,,False
1512,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_12,matharena,detected,matharena,0.0,,False
1513,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_34,matharena,correct,matharena,0.0,,False
1514,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_32,matharena,correct,matharena,0.0,,False
1515,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_20,matharena,incorrect,matharena,0.0,,True
1516,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_28,matharena,correct,matharena,0.0,,False
1517,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_37,matharena,incorrect,matharena,0.0,,True
1518,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_30,matharena,incorrect,matharena,0.0,,True
1519,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_26,matharena,correct,matharena,0.0,,False
1520,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_33,matharena,detected,matharena,0.0,,False
1521,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_35,matharena,correct,matharena,0.0,,False
1522,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_40,matharena,incorrect,matharena,0.0,,True
1523,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_39,matharena,correct,matharena,0.0,,False
1524,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_41,matharena,incorrect,matharena,0.0,,True
1525,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_36,matharena,correct,matharena,0.0,,False
1526,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_48,matharena,correct,matharena,0.0,,False
1527,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_45,matharena,correct,matharena,0.0,,False
1528,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_47,matharena,correct,matharena,0.0,,False
1529,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_42,matharena,detected,matharena,0.0,,False
1530,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_46,matharena,correct,matharena,0.0,,False
1531,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_5,matharena,correct,matharena,0.0,,False
1532,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_38,matharena,incorrect,matharena,0.0,,True
1533,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_44,matharena,correct,matharena,0.0,,False
1534,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_4,matharena,correct,matharena,0.0,,False
1535,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_43,matharena,incorrect,matharena,0.0,,True
1536,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_53,matharena,incorrect,matharena,0.0,,True
1537,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_51,matharena,incorrect,matharena,0.0,,True
1538,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_49,matharena,correct,matharena,0.0,,False
1539,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_50,matharena,correct,matharena,0.0,,False
1540,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_7,matharena,correct,matharena,0.0,,False
1541,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_17,matharena,incorrect,matharena,0.0,,True
1542,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_52,matharena,correct,matharena,0.0,,False
1543,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,nordic_2025_3,nordic,detected,generic,0.0,,False
1544,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,nordic_2025_1,nordic,correct,generic,0.0,,False
1545,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_9,matharena,correct,matharena,0.0,,False
1546,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,pan_african_2025_2,pan,correct,generic,0.0,,False
1547,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,philippines_2025_1,philippines,correct,generic,0.0,,False
1548,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_6,matharena,detected,matharena,0.0,,False
1549,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,pan_african_2025_5,pan,correct,generic,0.0,,False
1550,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,nordic_2025_2,nordic,detected,generic,0.0,,False
1551,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,matharena_smt_smt_2025_8,matharena,detected,matharena,0.0,,False
1552,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,pan_african_2025_3,pan,corrected,generic,0.0,,False
1553,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,philippines_2025_7,philippines,detected,generic,0.0,,False
1554,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,pan_african_2025_4,pan,correct,generic,0.0,,False
1555,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,philippines_2025_2,philippines,incorrect,generic,0.0,,True
1556,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,philippines_2025_3,philippines,detected,generic,0.0,,False
1557,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,pan_african_2025_1,pan,correct,generic,0.0,,False
1558,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,philippines_2025_8,philippines,correct,generic,0.0,,False
1559,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,pan_african_2025_6,pan,incorrect,generic,0.0,,True
1560,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,philippines_2025_5,philippines,incorrect,generic,0.0,,True
1561,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,philippines_2025_4,philippines,detected,generic,0.0,,False
1562,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,polish_2025_2,polish,detected,generic,0.0,,False
1563,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,philippines_2025_6,philippines,incorrect,generic,0.0,,True
1564,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,polish_2025_4,polish,detected,generic,0.0,,False
1565,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,polish_2025_6,polish,detected,generic,0.0,,False
1566,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,polish_2025_1,polish,correct,generic,0.0,,False
1567,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,polish_2025_5,polish,detected,generic,0.0,,False
1568,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,rmm_2025_6,rmm,detected,generic,0.0,,False
1569,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,rmm_2025_3,rmm,incorrect,generic,0.0,,True
1570,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,polish_2025_3,polish,detected,generic,0.0,,False
1571,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,romania_11_2025_2,romania,correct,generic,0.0,,False
1572,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,rmm_2025_2,rmm,detected,generic,0.0,,False
1573,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,romania_10_2025_3,romania,detected,generic,0.0,,False
1574,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,romania_10_2025_1,romania,detected,generic,0.0,,False
1575,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,romania_10_2025_2,romania,incorrect,generic,0.0,,True
1576,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,romania_12_2025_2,romania,correct,generic,0.0,,False
1577,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,romania_12_2025_3,romania,detected,generic,0.0,,False
1578,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,rmm_2025_5,rmm,incorrect,generic,0.0,,True
1579,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,rmm_2025_1,rmm,incorrect,generic,0.0,,True
1580,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,rmm_2025_4,rmm,incorrect,generic,0.0,,True
1581,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,romania_11_2025_3,romania,incorrect,generic,0.0,,True
1582,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,serbia_tst_bmo_2025_2,serbia,correct,generic,0.0,,False
1583,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,romania_tst_2025_1,romania,detected,generic,0.0,,False
1584,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,romania_9_2025_1,romania,detected,generic,0.0,,False
1585,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,romania_tst_2025_3,romania,incorrect,generic,0.0,,True
1586,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,romania_12_2025_1,romania,incorrect,generic,0.0,,True
1587,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,spain_2025_1,spain,correct,generic,0.0,,False
1588,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,serbia_tst_bmo_2025_3,serbia,detected,generic,0.0,,False
1589,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,romania_tst_2025_2,romania,incorrect,generic,0.0,,True
1590,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,romania_9_2025_2,romania,detected,generic,0.0,,False
1591,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,serbia_tst_bmo_2025_4,serbia,incorrect,generic,0.0,,True
1592,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,serbia_tst_bmo_2025_1,serbia,incorrect,generic,0.0,,True
1593,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,spain_2025_4,spain,incorrect,generic,0.0,,True
1594,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,thai_2025_4,thai,detected,generic,0.0,,False
1595,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,thai_2025_3,thai,detected,generic,0.0,,False
1596,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,thai_2025_1,thai,correct,generic,0.0,,False
1597,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,spain_2025_2,spain,detected,generic,0.0,,False
1598,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,romania_11_2025_1,romania,correct,generic,0.0,,False
1599,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,turkey_tst_2025_1,turkey,correct,generic,0.0,,False
1600,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,spain_2025_3,spain,detected,generic,0.0,,False
1601,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,thai_2025_10,thai,detected,generic,0.0,,False
1602,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,turkey_tst_2025_2,turkey,incorrect,generic,0.0,,True
1603,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,thai_2025_5,thai,detected,generic,0.0,,False
1604,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,spain_2025_5,spain,incorrect,generic,0.0,,True
1605,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,thai_2025_8,thai,incorrect,generic,0.0,,True
1606,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,thai_2025_6,thai,incorrect,generic,0.0,,True
1607,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,thai_2025_7,thai,correct,generic,0.0,,False
1608,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,thai_2025_2,thai,detected,generic,0.0,,False
1609,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,turkey_tst_2025_3,turkey,correct,generic,0.0,,False
1610,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,turkey_tst_2025_4,turkey,incorrect,generic,0.0,,True
1611,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,turkey_tst_2025_7,turkey,incorrect,generic,0.0,,True
1612,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,usamo_2025_1,usamo,detected,generic,0.0,,False
1613,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,turkey_tst_2025_6,turkey,incorrect,generic,0.0,,True
1614,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,usatst_2025_1,usatst,detected,generic,0.0,,False
1615,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,turkey_tst_2025_5,turkey,incorrect,generic,0.0,,True
1616,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,thai_2025_9,thai,detected,generic,0.0,,False
1617,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,usatst_2025_4,usatst,correct,generic,0.0,,False
1618,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,turkey_tst_2025_9,turkey,detected,generic,0.0,,False
1619,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,usamo_2025_4,usamo,incorrect,generic,0.0,,True
1620,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,turkey_tst_2025_8,turkey,incorrect,generic,0.0,,True
1621,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,usamo_2025_6,usamo,detected,generic,0.0,,False
1622,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,usamo_2025_3,usamo,incorrect,generic,0.0,,True
1623,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,usatst_2025_6,usatst,detected,generic,0.0,,False
1624,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,usatst_2025_5,usatst,incorrect,generic,0.0,,True
1625,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,usatst_2025_2,usatst,incorrect,generic,0.0,,True
1626,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,usatst_2025_7,usatst,detected,generic,0.0,,False
1627,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,usatst_2025_9,usatst,incorrect,generic,0.0,,True
1628,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,vietnam_2025_2,vietnam,correct,generic,0.0,,False
1629,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,vietnam_2025_4,vietnam,incorrect,generic,0.0,,True
1630,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,vietnam_2025_3,vietnam,detected,generic,0.0,,False
1631,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,usatst_2025_3,usatst,detected,generic,0.0,,False
1632,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,usatst_2025_8,usatst,incorrect,generic,0.0,,True
1633,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,vietnam_2025_6,vietnam,detected,generic,0.0,,False
1634,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,usamo_2025_2,usamo,detected,generic,0.0,,False
1635,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,vietnam_2025_1,vietnam,correct,generic,0.0,,False
1636,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,usamo_2025_5,usamo,incorrect,generic,0.0,,True
1637,GPT-5-mini (medium),Qwen3-4B (25/07),qwen/qwen3_4b,vietnam_2025_5,vietnam,incorrect,generic,0.0,,True
1638,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,allrussian_2025_15,allrussian,incorrect,generic,0.0,,True
1639,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,allrussian_2025_8,allrussian,incorrect,generic,0.0,,True
1640,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,allrussian_2025_12,allrussian,incorrect,generic,0.0,,True
1641,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,allrussian_2025_13,allrussian,incorrect,generic,0.0,,True
1642,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,allrussian_2025_3,allrussian,detected,generic,0.0,,False
1643,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,allrussian_2025_7,allrussian,incorrect,generic,0.0,,True
1644,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,allrussian_2025_4,allrussian,incorrect,generic,0.0,,True
1645,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,allrussian_2025_9,allrussian,correct,generic,0.0,,False
1646,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,allrussian_2025_1,allrussian,incorrect,generic,0.0,,True
1647,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,allrussian_2025_2,allrussian,detected,generic,0.0,,False
1648,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,allrussian_2025_16,allrussian,incorrect,generic,0.0,,True
1649,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,allrussian_2025_6,allrussian,incorrect,generic,0.0,,True
1650,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,allrussian_2025_14,allrussian,incorrect,generic,0.0,,True
1651,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmo_2025_2,bmo,incorrect,generic,0.0,,True
1652,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,allrussian_2025_10,allrussian,incorrect,generic,0.0,,True
1653,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmo_2025_1,bmo,incorrect,generic,0.0,,True
1654,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,allrussian_2025_5,allrussian,correct,generic,0.0,,False
1655,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_A_2025_5,bmosl,incorrect,generic,0.0,,True
1656,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_A_2025_4,bmosl,correct,generic,0.0,,False
1657,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_A_2025_6,bmosl,incorrect,generic,0.0,,True
1658,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,allrussian_2025_11,allrussian,incorrect,generic,0.0,,True
1659,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_C_2025_6,bmosl,incorrect,generic,0.0,,True
1660,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmo_2025_3,bmo,corrected,generic,0.0,,False
1661,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_A_2025_3,bmosl,correct,generic,0.0,,False
1662,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_A_2025_1,bmosl,correct,generic,0.0,,False
1663,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_G_2025_1,bmosl,incorrect,generic,0.0,,True
1664,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_G_2025_4,bmosl,incorrect,generic,0.0,,True
1665,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_C_2025_4,bmosl,incorrect,generic,0.0,,True
1666,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_C_2025_5,bmosl,incorrect,generic,0.0,,True
1667,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmo_2025_4,bmo,incorrect,generic,0.0,,True
1668,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_C_2025_1,bmosl,incorrect,generic,0.0,,True
1669,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_G_2025_7,bmosl,incorrect,generic,0.0,,True
1670,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_G_2025_6,bmosl,incorrect,generic,0.0,,True
1671,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_G_2025_3,bmosl,detected,generic,0.0,,False
1672,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_C_2025_3,bmosl,incorrect,generic,0.0,,True
1673,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_NT_2025_1,bmosl,detected,generic,0.0,,False
1674,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_NT_2025_2,bmosl,incorrect,generic,0.0,,True
1675,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_G_2025_2,bmosl,incorrect,generic,0.0,,True
1676,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bulgaria_2025_2,bulgaria,correct,generic,0.0,,False
1677,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_NT_2025_5,bmosl,detected,generic,0.0,,False
1678,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_C_2025_2,bmosl,incorrect,generic,0.0,,True
1679,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_NT_2025_7,bmosl,incorrect,generic,0.0,,True
1680,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_NT_2025_3,bmosl,incorrect,generic,0.0,,True
1681,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_NT_2025_4,bmosl,incorrect,generic,0.0,,True
1682,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_NT_2025_6,bmosl,incorrect,generic,0.0,,True
1683,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bulgaria_2025_3,bulgaria,incorrect,generic,0.0,,True
1684,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bulgaria_2025_4,bulgaria,detected,generic,0.0,,False
1685,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_A_2025_2,bmosl,incorrect,generic,0.0,,True
1686,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bmosl_G_2025_5,bmosl,corrected,generic,0.0,,False
1687,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bulgaria_2025_5,bulgaria,correct,generic,0.0,,False
1688,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,canada_2025_2,canada,correct,generic,0.0,,False
1689,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,canada_2025_1,canada,incorrect,generic,0.0,,True
1690,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bulgaria_2025_1,bulgaria,corrected,generic,0.0,,False
1691,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,china_2025_3,china,incorrect,generic,0.0,,True
1692,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,canada_2025_3,canada,incorrect,generic,0.0,,True
1693,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,china_2025_2,china,incorrect,generic,0.0,,True
1694,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,canada_2025_5,canada,incorrect,generic,0.0,,True
1695,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,china_2025_6,china,incorrect,generic,0.0,,True
1696,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,china_2025_1,china,incorrect,generic,0.0,,True
1697,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_1,chinatst,incorrect,generic,0.0,,True
1698,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,china_2025_5,china,incorrect,generic,0.0,,True
1699,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_15,chinatst,detected,generic,0.0,,False
1700,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_11,chinatst,incorrect,generic,0.0,,True
1701,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,canada_2025_4,canada,incorrect,generic,0.0,,True
1702,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_13,chinatst,incorrect,generic,0.0,,True
1703,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_18,chinatst,incorrect,generic,0.0,,True
1704,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_2,chinatst,correct,generic,0.0,,False
1705,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_12,chinatst,incorrect,generic,0.0,,True
1706,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_14,chinatst,incorrect,generic,0.0,,True
1707,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_10,chinatst,incorrect,generic,0.0,,True
1708,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_16,chinatst,incorrect,generic,0.0,,True
1709,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_19,chinatst,incorrect,generic,0.0,,True
1710,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,bulgaria_2025_6,bulgaria,incorrect,generic,0.0,,True
1711,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_17,chinatst,incorrect,generic,0.0,,True
1712,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_8,chinatst,incorrect,generic,0.0,,True
1713,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_21,chinatst,incorrect,generic,0.0,,True
1714,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_7,chinatst,incorrect,generic,0.0,,True
1715,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_6,chinatst,incorrect,generic,0.0,,True
1716,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_3,chinatst,incorrect,generic,0.0,,True
1717,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_4,chinatst,incorrect,generic,0.0,,True
1718,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,egmo_2025_3,egmo,correct,generic,0.0,,False
1719,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,egmo_2025_4,egmo,incorrect,generic,0.0,,True
1720,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_22,chinatst,corrected,generic,0.0,,False
1721,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_20,chinatst,incorrect,generic,0.0,,True
1722,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_5,chinatst,incorrect,generic,0.0,,True
1723,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_A_2025_3,elmosl,correct,generic,0.0,,False
1724,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,egmo_2025_1,egmo,corrected,generic,0.0,,False
1725,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_A_2025_2,elmosl,correct,generic,0.0,,False
1726,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_A_2025_1,elmosl,detected,generic,0.0,,False
1727,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_A_2025_8,elmosl,incorrect,generic,0.0,,True
1728,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_A_2025_6,elmosl,incorrect,generic,0.0,,True
1729,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,egmo_2025_5,egmo,incorrect,generic,0.0,,True
1730,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_C_2025_3,elmosl,correct,generic,0.0,,False
1731,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,egmo_2025_2,egmo,incorrect,generic,0.0,,True
1732,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,chinatst_2025_9,chinatst,detected,generic,0.0,,False
1733,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_C_2025_2,elmosl,incorrect,generic,0.0,,True
1734,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,egmo_2025_6,egmo,incorrect,generic,0.0,,True
1735,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_C_2025_4,elmosl,incorrect,generic,0.0,,True
1736,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_A_2025_7,elmosl,incorrect,generic,0.0,,True
1737,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_C_2025_5,elmosl,incorrect,generic,0.0,,True
1738,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_C_2025_8,elmosl,incorrect,generic,0.0,,True
1739,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_G_2025_4,elmosl,incorrect,generic,0.0,,True
1740,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_C_2025_9,elmosl,incorrect,generic,0.0,,True
1741,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_G_2025_3,elmosl,detected,generic,0.0,,False
1742,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_G_2025_7,elmosl,incorrect,generic,0.0,,True
1743,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_G_2025_6,elmosl,incorrect,generic,0.0,,True
1744,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_C_2025_7,elmosl,incorrect,generic,0.0,,True
1745,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_G_2025_8,elmosl,incorrect,generic,0.0,,True
1746,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_C_2025_1,elmosl,incorrect,generic,0.0,,True
1747,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_G_2025_2,elmosl,incorrect,generic,0.0,,True
1748,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_G_2025_1,elmosl,incorrect,generic,0.0,,True
1749,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_NT_2025_1,elmosl,incorrect,generic,0.0,,True
1750,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_A_2025_5,elmosl,incorrect,generic,0.0,,True
1751,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_G_2025_5,elmosl,detected,generic,0.0,,False
1752,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_NT_2025_6,elmosl,incorrect,generic,0.0,,True
1753,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,german_2025_2,german,incorrect,generic,0.0,,True
1754,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,greece_2025_2,greece,incorrect,generic,0.0,,True
1755,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_NT_2025_5,elmosl,incorrect,generic,0.0,,True
1756,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_NT_2025_7,elmosl,incorrect,generic,0.0,,True
1757,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_G_2025_9,elmosl,incorrect,generic,0.0,,True
1758,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_NT_2025_2,elmosl,incorrect,generic,0.0,,True
1759,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_C_2025_6,elmosl,incorrect,generic,0.0,,True
1760,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,german_2025_3,german,detected,generic,0.0,,False
1761,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,greece_2025_4,greece,detected,generic,0.0,,False
1762,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,german_2025_1,german,correct,generic,0.0,,False
1763,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_NT_2025_3,elmosl,incorrect,generic,0.0,,True
1764,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,elmosl_NT_2025_4,elmosl,incorrect,generic,0.0,,True
1765,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,german_2025_4,german,correct,generic,0.0,,False
1766,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_11,imosl,incorrect,generic,0.0,,True
1767,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_16,imosl,incorrect,generic,0.0,,True
1768,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_10,imosl,incorrect,generic,0.0,,True
1769,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_15,imosl,incorrect,generic,0.0,,True
1770,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_17,imosl,incorrect,generic,0.0,,True
1771,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_1,imosl,correct,generic,0.0,,False
1772,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_20,imosl,incorrect,generic,0.0,,True
1773,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,greece_2025_3,greece,detected,generic,0.0,,False
1774,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_19,imosl,incorrect,generic,0.0,,True
1775,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_14,imosl,incorrect,generic,0.0,,True
1776,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_24,imosl,incorrect,generic,0.0,,True
1777,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_2,imosl,corrected,generic,0.0,,False
1778,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_23,imosl,incorrect,generic,0.0,,True
1779,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_25,imosl,incorrect,generic,0.0,,True
1780,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_18,imosl,detected,generic,0.0,,False
1781,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_21,imosl,incorrect,generic,0.0,,True
1782,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_27,imosl,correct,generic,0.0,,False
1783,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_28,imosl,correct,generic,0.0,,False
1784,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_22,imosl,incorrect,generic,0.0,,True
1785,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_3,imosl,detected,generic,0.0,,False
1786,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_13,imosl,incorrect,generic,0.0,,True
1787,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_34,imosl,incorrect,generic,0.0,,True
1788,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_12,imosl,incorrect,generic,0.0,,True
1789,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_35,imosl,incorrect,generic,0.0,,True
1790,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_26,imosl,corrected,generic,0.0,,False
1791,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_29,imosl,incorrect,generic,0.0,,True
1792,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_8,imosl,incorrect,generic,0.0,,True
1793,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_30,imosl,incorrect,generic,0.0,,True
1794,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_32,imosl,incorrect,generic,0.0,,True
1795,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_5,imosl,correct,generic,0.0,,False
1796,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_36,imosl,detected,generic,0.0,,False
1797,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,greece_2025_1,greece,detected,generic,0.0,,False
1798,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_4,imosl,incorrect,generic,0.0,,True
1799,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_2025_1,india,detected,generic,0.0,,False
1800,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_37,imosl,incorrect,generic,0.0,,True
1801,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_6,imosl,incorrect,generic,0.0,,True
1802,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_31,imosl,incorrect,generic,0.0,,True
1803,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_2025_4,india,correct,generic,0.0,,False
1804,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_7,imosl,incorrect,generic,0.0,,True
1805,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_2025_3,india,incorrect,generic,0.0,,True
1806,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_33,imosl,incorrect,generic,0.0,,True
1807,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_2025_2,india,incorrect,generic,0.0,,True
1808,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_2025_5,india,detected,generic,0.0,,False
1809,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_2025_6,india,incorrect,generic,0.0,,True
1810,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_prep_2025_15,india,incorrect,generic,0.0,,True
1811,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_prep_2025_1,india,detected,generic,0.0,,False
1812,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_prep_2025_11,india,incorrect,generic,0.0,,True
1813,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,imosl_2025_9,imosl,incorrect,generic,0.0,,True
1814,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_prep_2025_17,india,incorrect,generic,0.0,,True
1815,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_prep_2025_18,india,incorrect,generic,0.0,,True
1816,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_prep_2025_16,india,correct,generic,0.0,,False
1817,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_prep_2025_13,india,incorrect,generic,0.0,,True
1818,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_prep_2025_2,india,incorrect,generic,0.0,,True
1819,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_prep_2025_7,india,incorrect,generic,0.0,,True
1820,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_prep_2025_12,india,incorrect,generic,0.0,,True
1821,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_prep_2025_10,india,incorrect,generic,0.0,,True
1822,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_prep_2025_3,india,incorrect,generic,0.0,,True
1823,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,iran_tst_2025_1,iran,incorrect,generic,0.0,,True
1824,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_prep_2025_20,india,incorrect,generic,0.0,,True
1825,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_prep_2025_6,india,incorrect,generic,0.0,,True
1826,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_prep_2025_5,india,incorrect,generic,0.0,,True
1827,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_prep_2025_9,india,correct,generic,0.0,,False
1828,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_prep_2025_4,india,detected,generic,0.0,,False
1829,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_prep_2025_19,india,incorrect,generic,0.0,,True
1830,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,iran_tst_2025_5,iran,incorrect,generic,0.0,,True
1831,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,iran_tst_2025_7,iran,incorrect,generic,0.0,,True
1832,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,iran_tst_2025_2,iran,incorrect,generic,0.0,,True
1833,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_prep_2025_21,india,corrected,generic,0.0,,False
1834,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_prep_2025_14,india,incorrect,generic,0.0,,True
1835,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,india_prep_2025_8,india,incorrect,generic,0.0,,True
1836,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,israel_tst_2025_1,israel,detected,generic,0.0,,False
1837,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,iran_tst_2025_3,iran,incorrect,generic,0.0,,True
1838,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,iran_tst_2025_6,iran,incorrect,generic,0.0,,True
1839,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,israel_tst_2025_5,israel,incorrect,generic,0.0,,True
1840,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,israel_tst_2025_6,israel,detected,generic,0.0,,False
1841,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,israel_tst_2025_8,israel,detected,generic,0.0,,False
1842,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,iran_tst_2025_9,iran,incorrect,generic,0.0,,True
1843,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,iran_tst_2025_4,iran,incorrect,generic,0.0,,True
1844,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,israel_tst_2025_4,israel,incorrect,generic,0.0,,True
1845,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,iran_tst_2025_8,iran,incorrect,generic,0.0,,True
1846,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,jbmo_2025_1,jbmo,detected,generic,0.0,,False
1847,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,izho_2025_3,izho,incorrect,generic,0.0,,True
1848,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,israel_tst_2025_7,israel,incorrect,generic,0.0,,True
1849,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,izho_2025_2,izho,incorrect,generic,0.0,,True
1850,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,israel_tst_2025_10,israel,incorrect,generic,0.0,,True
1851,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,izho_2025_5,izho,incorrect,generic,0.0,,True
1852,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,korea_2025_1,korea,incorrect,generic,0.0,,True
1853,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,izho_2025_1,izho,incorrect,generic,0.0,,True
1854,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,jbmo_2025_4,jbmo,correct,generic,0.0,,False
1855,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,korea_2025_4,korea,incorrect,generic,0.0,,True
1856,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,izho_2025_4,izho,incorrect,generic,0.0,,True
1857,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,korea_2025_2,korea,detected,generic,0.0,,False
1858,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,jbmo_2025_2,jbmo,correct,generic,0.0,,False
1859,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,israel_tst_2025_3,israel,detected,generic,0.0,,False
1860,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,israel_tst_2025_2,israel,detected,generic,0.0,,False
1861,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,jbmo_2025_3,jbmo,incorrect,generic,0.0,,True
1862,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,izho_2025_6,izho,detected,generic,0.0,,False
1863,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_10,matharena,incorrect,matharena,0.0,,True
1864,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,korea_2025_5,korea,incorrect,generic,0.0,,True
1865,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,israel_tst_2025_9,israel,incorrect,generic,0.0,,True
1866,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_17,matharena,correct,matharena,0.0,,False
1867,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_16,matharena,correct,matharena,0.0,,False
1868,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_1,matharena,detected,matharena,0.0,,False
1869,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_15,matharena,incorrect,matharena,0.0,,True
1870,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_20,matharena,detected,matharena,0.0,,False
1871,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,korea_2025_6,korea,incorrect,generic,0.0,,True
1872,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_14,matharena,incorrect,matharena,0.0,,True
1873,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_11,matharena,incorrect,matharena,0.0,,True
1874,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_21,matharena,detected,matharena,0.0,,False
1875,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_18,matharena,correct,matharena,0.0,,False
1876,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_13,matharena,incorrect,matharena,0.0,,True
1877,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_24,matharena,correct,matharena,0.0,,False
1878,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_25,matharena,detected,matharena,0.0,,False
1879,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_22,matharena,correct,matharena,0.0,,False
1880,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_12,matharena,detected,matharena,0.0,,False
1881,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_19,matharena,detected,matharena,0.0,,False
1882,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_2,matharena,detected,matharena,0.0,,False
1883,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,korea_2025_3,korea,incorrect,generic,0.0,,True
1884,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_1,matharena,correct,matharena,0.0,,False
1885,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_26,matharena,correct,matharena,0.0,,False
1886,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_4,matharena,detected,matharena,0.0,,False
1887,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_30,matharena,detected,matharena,0.0,,False
1888,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_6,matharena,correct,matharena,0.0,,False
1889,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_5,matharena,correct,matharena,0.0,,False
1890,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_7,matharena,corrected,matharena,0.0,,False
1891,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_27,matharena,incorrect,matharena,0.0,,True
1892,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_29,matharena,correct,matharena,0.0,,False
1893,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_11,matharena,correct,matharena,0.0,,False
1894,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_9,matharena,incorrect,matharena,0.0,,True
1895,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_8,matharena,corrected,matharena,0.0,,False
1896,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_10,matharena,detected,matharena,0.0,,False
1897,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_3,matharena,correct,matharena,0.0,,False
1898,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_28,matharena,corrected,matharena,0.0,,False
1899,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_18,matharena,correct,matharena,0.0,,False
1900,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_16,matharena,correct,matharena,0.0,,False
1901,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_12,matharena,incorrect,matharena,0.0,,True
1902,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_2,matharena,correct,matharena,0.0,,False
1903,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_aime_aime_2025_23,matharena,incorrect,matharena,0.0,,True
1904,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_19,matharena,correct,matharena,0.0,,False
1905,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_24,matharena,correct,matharena,0.0,,False
1906,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_14,matharena,correct,matharena,0.0,,False
1907,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_17,matharena,incorrect,matharena,0.0,,True
1908,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_20,matharena,incorrect,matharena,0.0,,True
1909,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_26,matharena,correct,matharena,0.0,,False
1910,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_21,matharena,corrected,matharena,0.0,,False
1911,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_15,matharena,incorrect,matharena,0.0,,True
1912,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_22,matharena,correct,matharena,0.0,,False
1913,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_5,matharena,correct,matharena,0.0,,False
1914,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_29,matharena,correct,matharena,0.0,,False
1915,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_4,matharena,correct,matharena,0.0,,False
1916,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_25,matharena,correct,matharena,0.0,,False
1917,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_9,matharena,detected,matharena,0.0,,False
1918,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_7,matharena,correct,matharena,0.0,,False
1919,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_23,matharena,detected,matharena,0.0,,False
1920,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_3,matharena,correct,matharena,0.0,,False
1921,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_30,matharena,incorrect,matharena,0.0,,True
1922,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_13,matharena,corrected,matharena,0.0,,False
1923,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_27,matharena,correct,matharena,0.0,,False
1924,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_1,matharena,detected,matharena,0.0,,False
1925,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_11,matharena,correct,matharena,0.0,,False
1926,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_6,matharena,correct,matharena,0.0,,False
1927,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_28,matharena,incorrect,matharena,0.0,,True
1928,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_12,matharena,correct,matharena,0.0,,False
1929,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_17,matharena,correct,matharena,0.0,,False
1930,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_8,matharena,correct,matharena,0.0,,False
1931,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_brumo_brumo_2025_13,matharena,incorrect,matharena,0.0,,True
1932,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_15,matharena,incorrect,matharena,0.0,,True
1933,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_19,matharena,incorrect,matharena,0.0,,True
1934,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_2,matharena,correct,matharena,0.0,,False
1935,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_14,matharena,corrected,matharena,0.0,,False
1936,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_22,matharena,correct,matharena,0.0,,False
1937,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_16,matharena,incorrect,matharena,0.0,,True
1938,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_10,matharena,correct,matharena,0.0,,False
1939,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_28,matharena,incorrect,matharena,0.0,,True
1940,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_20,matharena,incorrect,matharena,0.0,,True
1941,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_26,matharena,correct,matharena,0.0,,False
1942,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_3,matharena,correct,matharena,0.0,,False
1943,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_27,matharena,incorrect,matharena,0.0,,True
1944,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_18,matharena,incorrect,matharena,0.0,,True
1945,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_25,matharena,incorrect,matharena,0.0,,True
1946,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_29,matharena,incorrect,matharena,0.0,,True
1947,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_21,matharena,incorrect,matharena,0.0,,True
1948,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_36,matharena,detected,matharena,0.0,,False
1949,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_33,matharena,correct,matharena,0.0,,False
1950,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_37,matharena,correct,matharena,0.0,,False
1951,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_23,matharena,detected,matharena,0.0,,False
1952,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_31,matharena,corrected,matharena,0.0,,False
1953,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_32,matharena,incorrect,matharena,0.0,,True
1954,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_35,matharena,incorrect,matharena,0.0,,True
1955,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_5,matharena,incorrect,matharena,0.0,,True
1956,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_24,matharena,correct,matharena,0.0,,False
1957,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_7,matharena,incorrect,matharena,0.0,,True
1958,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_8,matharena,incorrect,matharena,0.0,,True
1959,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_34,matharena,incorrect,matharena,0.0,,True
1960,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_40,matharena,incorrect,matharena,0.0,,True
1961,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_39,matharena,detected,matharena,0.0,,False
1962,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_30,matharena,corrected,matharena,0.0,,False
1963,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_38,matharena,incorrect,matharena,0.0,,True
1964,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_4,matharena,correct,matharena,0.0,,False
1965,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_6,matharena,incorrect,matharena,0.0,,True
1966,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_14,matharena,incorrect,matharena,0.0,,True
1967,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_12,matharena,incorrect,matharena,0.0,,True
1968,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_10,matharena,detected,matharena,0.0,,False
1969,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_17,matharena,incorrect,matharena,0.0,,True
1970,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_1,matharena,detected,matharena,0.0,,False
1971,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_cmimc_cmimc_2025_9,matharena,incorrect,matharena,0.0,,True
1972,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_2,matharena,correct,matharena,0.0,,False
1973,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_11,matharena,incorrect,matharena,0.0,,True
1974,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_20,matharena,incorrect,matharena,0.0,,True
1975,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_21,matharena,correct,matharena,0.0,,False
1976,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_19,matharena,incorrect,matharena,0.0,,True
1977,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_18,matharena,incorrect,matharena,0.0,,True
1978,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_15,matharena,incorrect,matharena,0.0,,True
1979,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_13,matharena,incorrect,matharena,0.0,,True
1980,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_23,matharena,correct,matharena,0.0,,False
1981,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_24,matharena,correct,matharena,0.0,,False
1982,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_28,matharena,detected,matharena,0.0,,False
1983,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_26,matharena,correct,matharena,0.0,,False
1984,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_30,matharena,incorrect,matharena,0.0,,True
1985,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_22,matharena,incorrect,matharena,0.0,,True
1986,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_27,matharena,correct,matharena,0.0,,False
1987,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_8,matharena,correct,matharena,0.0,,False
1988,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_3,matharena,correct,matharena,0.0,,False
1989,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_29,matharena,incorrect,matharena,0.0,,True
1990,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_25,matharena,incorrect,matharena,0.0,,True
1991,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_6,matharena,correct,matharena,0.0,,False
1992,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_7,matharena,detected,matharena,0.0,,False
1993,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_1,matharena,correct,matharena,0.0,,False
1994,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_4,matharena,detected,matharena,0.0,,False
1995,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_16,matharena,incorrect,matharena,0.0,,True
1996,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_15,matharena,correct,matharena,0.0,,False
1997,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_11,matharena,incorrect,matharena,0.0,,True
1998,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_12,matharena,correct,matharena,0.0,,False
1999,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_14,matharena,correct,matharena,0.0,,False
2000,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_5,matharena,detected,matharena,0.0,,False
2001,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_16,matharena,correct,matharena,0.0,,False
2002,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_hmmt_hmmt_feb_2025_9,matharena,incorrect,matharena,0.0,,True
2003,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_17,matharena,corrected,matharena,0.0,,False
2004,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_2,matharena,correct,matharena,0.0,,False
2005,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_10,matharena,correct,matharena,0.0,,False
2006,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_22,matharena,incorrect,matharena,0.0,,True
2007,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_18,matharena,detected,matharena,0.0,,False
2008,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_21,matharena,correct,matharena,0.0,,False
2009,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_25,matharena,correct,matharena,0.0,,False
2010,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_27,matharena,incorrect,matharena,0.0,,True
2011,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_20,matharena,incorrect,matharena,0.0,,True
2012,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_19,matharena,incorrect,matharena,0.0,,True
2013,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_29,matharena,correct,matharena,0.0,,False
2014,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_3,matharena,correct,matharena,0.0,,False
2015,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_32,matharena,correct,matharena,0.0,,False
2016,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_24,matharena,detected,matharena,0.0,,False
2017,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_26,matharena,detected,matharena,0.0,,False
2018,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_30,matharena,incorrect,matharena,0.0,,True
2019,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_23,matharena,detected,matharena,0.0,,False
2020,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_35,matharena,corrected,matharena,0.0,,False
2021,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_31,matharena,correct,matharena,0.0,,False
2022,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_33,matharena,incorrect,matharena,0.0,,True
2023,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_4,matharena,correct,matharena,0.0,,False
2024,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_40,matharena,incorrect,matharena,0.0,,True
2025,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_37,matharena,correct,matharena,0.0,,False
2026,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_38,matharena,incorrect,matharena,0.0,,True
2027,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_44,matharena,correct,matharena,0.0,,False
2028,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_36,matharena,corrected,matharena,0.0,,False
2029,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_13,matharena,incorrect,matharena,0.0,,True
2030,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_48,matharena,correct,matharena,0.0,,False
2031,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_43,matharena,incorrect,matharena,0.0,,True
2032,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_46,matharena,correct,matharena,0.0,,False
2033,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_52,matharena,correct,matharena,0.0,,False
2034,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_49,matharena,correct,matharena,0.0,,False
2035,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_39,matharena,correct,matharena,0.0,,False
2036,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_42,matharena,incorrect,matharena,0.0,,True
2037,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_34,matharena,incorrect,matharena,0.0,,True
2038,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_28,matharena,correct,matharena,0.0,,False
2039,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_7,matharena,correct,matharena,0.0,,False
2040,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_45,matharena,detected,matharena,0.0,,False
2041,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_5,matharena,correct,matharena,0.0,,False
2042,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_53,matharena,incorrect,matharena,0.0,,True
2043,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_41,matharena,incorrect,matharena,0.0,,True
2044,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_47,matharena,correct,matharena,0.0,,False
2045,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_51,matharena,correct,matharena,0.0,,False
2046,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,nordic_2025_1,nordic,detected,generic,0.0,,False
2047,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,nordic_2025_2,nordic,incorrect,generic,0.0,,True
2048,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_9,matharena,correct,matharena,0.0,,False
2049,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,pan_african_2025_4,pan,detected,generic,0.0,,False
2050,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_6,matharena,incorrect,matharena,0.0,,True
2051,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,pan_african_2025_5,pan,incorrect,generic,0.0,,True
2052,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,pan_african_2025_2,pan,detected,generic,0.0,,False
2053,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,nordic_2025_3,nordic,detected,generic,0.0,,False
2054,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,philippines_2025_1,philippines,corrected,generic,0.0,,False
2055,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,philippines_2025_7,philippines,incorrect,generic,0.0,,True
2056,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_8,matharena,detected,matharena,0.0,,False
2057,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,philippines_2025_3,philippines,correct,generic,0.0,,False
2058,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,pan_african_2025_6,pan,incorrect,generic,0.0,,True
2059,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,philippines_2025_4,philippines,detected,generic,0.0,,False
2060,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,philippines_2025_5,philippines,correct,generic,0.0,,False
2061,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,philippines_2025_8,philippines,correct,generic,0.0,,False
2062,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,matharena_smt_smt_2025_50,matharena,correct,matharena,0.0,,False
2063,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,polish_2025_4,polish,detected,generic,0.0,,False
2064,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,pan_african_2025_3,pan,corrected,generic,0.0,,False
2065,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,pan_african_2025_1,pan,correct,generic,0.0,,False
2066,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,philippines_2025_6,philippines,incorrect,generic,0.0,,True
2067,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,polish_2025_6,polish,detected,generic,0.0,,False
2068,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,rmm_2025_2,rmm,incorrect,generic,0.0,,True
2069,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,romania_10_2025_1,romania,incorrect,generic,0.0,,True
2070,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,rmm_2025_4,rmm,incorrect,generic,0.0,,True
2071,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,philippines_2025_2,philippines,incorrect,generic,0.0,,True
2072,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,polish_2025_5,polish,incorrect,generic,0.0,,True
2073,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,polish_2025_3,polish,incorrect,generic,0.0,,True
2074,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,rmm_2025_3,rmm,incorrect,generic,0.0,,True
2075,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,rmm_2025_6,rmm,incorrect,generic,0.0,,True
2076,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,polish_2025_2,polish,detected,generic,0.0,,False
2077,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,romania_10_2025_2,romania,incorrect,generic,0.0,,True
2078,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,polish_2025_1,polish,detected,generic,0.0,,False
2079,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,romania_11_2025_2,romania,correct,generic,0.0,,False
2080,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,rmm_2025_1,rmm,incorrect,generic,0.0,,True
2081,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,romania_12_2025_2,romania,correct,generic,0.0,,False
2082,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,romania_11_2025_3,romania,detected,generic,0.0,,False
2083,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,romania_9_2025_2,romania,incorrect,generic,0.0,,True
2084,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,romania_11_2025_1,romania,detected,generic,0.0,,False
2085,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,romania_12_2025_1,romania,incorrect,generic,0.0,,True
2086,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,rmm_2025_5,rmm,incorrect,generic,0.0,,True
2087,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,romania_10_2025_3,romania,incorrect,generic,0.0,,True
2088,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,romania_12_2025_3,romania,incorrect,generic,0.0,,True
2089,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,romania_tst_2025_3,romania,incorrect,generic,0.0,,True
2090,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,serbia_tst_bmo_2025_3,serbia,correct,generic,0.0,,False
2091,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,spain_2025_1,spain,correct,generic,0.0,,False
2092,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,romania_tst_2025_2,romania,incorrect,generic,0.0,,True
2093,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,romania_9_2025_1,romania,incorrect,generic,0.0,,True
2094,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,serbia_tst_bmo_2025_2,serbia,detected,generic,0.0,,False
2095,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,spain_2025_4,spain,incorrect,generic,0.0,,True
2096,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,spain_2025_3,spain,incorrect,generic,0.0,,True
2097,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,thai_2025_10,thai,incorrect,generic,0.0,,True
2098,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,spain_2025_2,spain,detected,generic,0.0,,False
2099,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,romania_tst_2025_1,romania,incorrect,generic,0.0,,True
2100,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,thai_2025_2,thai,incorrect,generic,0.0,,True
2101,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,thai_2025_3,thai,detected,generic,0.0,,False
2102,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,thai_2025_4,thai,detected,generic,0.0,,False
2103,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,serbia_tst_bmo_2025_1,serbia,incorrect,generic,0.0,,True
2104,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,thai_2025_1,thai,correct,generic,0.0,,False
2105,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,serbia_tst_bmo_2025_4,serbia,incorrect,generic,0.0,,True
2106,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,thai_2025_7,thai,correct,generic,0.0,,False
2107,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,spain_2025_5,spain,detected,generic,0.0,,False
2108,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,thai_2025_8,thai,incorrect,generic,0.0,,True
2109,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,turkey_tst_2025_2,turkey,incorrect,generic,0.0,,True
2110,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,turkey_tst_2025_1,turkey,detected,generic,0.0,,False
2111,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,turkey_tst_2025_5,turkey,incorrect,generic,0.0,,True
2112,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,thai_2025_5,thai,incorrect,generic,0.0,,True
2113,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,thai_2025_6,thai,correct,generic,0.0,,False
2114,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,turkey_tst_2025_3,turkey,correct,generic,0.0,,False
2115,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,turkey_tst_2025_9,turkey,detected,generic,0.0,,False
2116,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,thai_2025_9,thai,incorrect,generic,0.0,,True
2117,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,usamo_2025_1,usamo,incorrect,generic,0.0,,True
2118,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,turkey_tst_2025_6,turkey,incorrect,generic,0.0,,True
2119,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,turkey_tst_2025_8,turkey,incorrect,generic,0.0,,True
2120,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,usamo_2025_2,usamo,incorrect,generic,0.0,,True
2121,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,usamo_2025_3,usamo,incorrect,generic,0.0,,True
2122,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,usatst_2025_3,usatst,incorrect,generic,0.0,,True
2123,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,turkey_tst_2025_7,turkey,incorrect,generic,0.0,,True
2124,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,usamo_2025_4,usamo,incorrect,generic,0.0,,True
2125,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,usatst_2025_2,usatst,incorrect,generic,0.0,,True
2126,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,usatst_2025_4,usatst,incorrect,generic,0.0,,True
2127,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,usatst_2025_1,usatst,incorrect,generic,0.0,,True
2128,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,usatst_2025_7,usatst,detected,generic,0.0,,False
2129,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,usatst_2025_9,usatst,incorrect,generic,0.0,,True
2130,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,usatst_2025_5,usatst,incorrect,generic,0.0,,True
2131,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,usatst_2025_8,usatst,incorrect,generic,0.0,,True
2132,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,usatst_2025_6,usatst,incorrect,generic,0.0,,True
2133,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,vietnam_2025_6,vietnam,detected,generic,0.0,,False
2134,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,vietnam_2025_2,vietnam,detected,generic,0.0,,False
2135,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,vietnam_2025_1,vietnam,incorrect,generic,0.0,,True
2136,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,vietnam_2025_5,vietnam,incorrect,generic,0.0,,True
2137,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,vietnam_2025_3,vietnam,detected,generic,0.0,,False
2138,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,usamo_2025_5,usamo,incorrect,generic,0.0,,True
2139,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,vietnam_2025_4,vietnam,incorrect,generic,0.0,,True
2140,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,turkey_tst_2025_4,turkey,incorrect,generic,0.0,,True
2141,GPT-5-mini (medium),Qwen3-235B-A22B,qwen/qwen3_235b_a22b,usamo_2025_6,usamo,incorrect,generic,0.0,,True
2142,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,allrussian_2025_10,allrussian,correct,generic,0.09950845999999999,,False
2143,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,allrussian_2025_15,allrussian,incorrect,generic,0.058601190000000004,,True
2144,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,allrussian_2025_11,allrussian,correct,generic,0.11453691,,False
2145,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,allrussian_2025_12,allrussian,detected,generic,0.06285517,,False
2146,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,allrussian_2025_14,allrussian,correct,generic,0.04877558,,False
2147,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,allrussian_2025_13,allrussian,detected,generic,0.030760529999999998,,False
2148,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,allrussian_2025_1,allrussian,correct,generic,0.0875489,,False
2149,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,allrussian_2025_4,allrussian,incorrect,generic,0.09791036,,True
2150,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,allrussian_2025_8,allrussian,incorrect,generic,0.10453965,,True
2151,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,allrussian_2025_5,allrussian,corrected,generic,0.06237288,,False
2152,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmo_2025_2,bmo,correct,generic,0.07845508999999999,,False
2153,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,allrussian_2025_6,allrussian,incorrect,generic,0.09645288,,True
2154,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,allrussian_2025_9,allrussian,corrected,generic,0.04976811,,False
2155,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,allrussian_2025_3,allrussian,detected,generic,0.07691202,,False
2156,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,allrussian_2025_16,allrussian,incorrect,generic,0.0531785,,True
2157,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmo_2025_1,bmo,incorrect,generic,0.07502334,,True
2158,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,allrussian_2025_2,allrussian,detected,generic,0.054044659999999994,,False
2159,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,allrussian_2025_7,allrussian,incorrect,generic,0.08298769,,True
2160,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_A_2025_6,bmosl,correct,generic,0.09499934,,False
2161,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_A_2025_5,bmosl,incorrect,generic,0.08724839999999999,,True
2162,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmo_2025_3,bmo,corrected,generic,0.10157415,,False
2163,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_C_2025_2,bmosl,corrected,generic,0.07798696,,False
2164,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_A_2025_1,bmosl,correct,generic,0.03662817,,False
2165,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_C_2025_6,bmosl,incorrect,generic,0.07797917,,True
2166,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_C_2025_4,bmosl,incorrect,generic,0.01905843,,True
2167,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_G_2025_1,bmosl,detected,generic,0.07021356999999999,,False
2168,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_C_2025_1,bmosl,incorrect,generic,0.09619833,,True
2169,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_A_2025_4,bmosl,corrected,generic,0.07178253,,False
2170,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_G_2025_3,bmosl,correct,generic,0.0562719,,False
2171,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_C_2025_5,bmosl,incorrect,generic,0.10826491,,True
2172,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_G_2025_6,bmosl,correct,generic,0.0825504,,False
2173,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_G_2025_4,bmosl,incorrect,generic,0.05613947,,True
2174,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_NT_2025_5,bmosl,detected,generic,0.053890129999999994,,False
2175,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_NT_2025_1,bmosl,detected,generic,0.054699529999999996,,False
2176,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_G_2025_2,bmosl,incorrect,generic,0.07498221000000001,,True
2177,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_NT_2025_4,bmosl,incorrect,generic,0.11325568999999999,,True
2178,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_A_2025_2,bmosl,correct,generic,0.06743914999999999,,False
2179,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_NT_2025_3,bmosl,incorrect,generic,0.044582119999999996,,True
2180,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bulgaria_2025_3,bulgaria,detected,generic,0.03189931,,False
2181,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bulgaria_2025_2,bulgaria,incorrect,generic,0.08445143999999999,,True
2182,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_NT_2025_6,bmosl,correct,generic,0.10728900999999999,,False
2183,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_NT_2025_2,bmosl,detected,generic,0.10235754,,False
2184,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bulgaria_2025_4,bulgaria,corrected,generic,0.08774826,,False
2185,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_NT_2025_7,bmosl,correct,generic,0.04514711,,False
2186,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bulgaria_2025_6,bulgaria,detected,generic,0.05137789,,False
2187,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,canada_2025_4,canada,incorrect,generic,0.08732833999999999,,True
2188,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_G_2025_7,bmosl,incorrect,generic,0.07117044,,True
2189,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmosl_G_2025_5,bmosl,corrected,generic,0.035839270000000006,,False
2190,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bulgaria_2025_5,bulgaria,incorrect,generic,0.03593053,,True
2191,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,canada_2025_2,canada,correct,generic,0.0398304,,False
2192,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bulgaria_2025_1,bulgaria,corrected,generic,0.11050168999999999,,False
2193,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,canada_2025_3,canada,detected,generic,0.04349285,,False
2194,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,bmo_2025_4,bmo,incorrect,generic,0.06223963,,True
2195,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,china_2025_2,china,correct,generic,0.07762891000000001,,False
2196,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,canada_2025_1,canada,incorrect,generic,0.038862190000000005,,True
2197,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,canada_2025_5,canada,detected,generic,0.07042875,,False
2198,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,china_2025_3,china,detected,generic,0.058436949999999994,,False
2199,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,china_2025_1,china,incorrect,generic,0.077324,,True
2200,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,china_2025_5,china,incorrect,generic,0.10934819,,True
2201,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_10,chinatst,incorrect,generic,0.06241515,,True
2202,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_11,chinatst,detected,generic,0.05063305,,False
2203,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_16,chinatst,detected,generic,0.09440203,,False
2204,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,china_2025_6,china,corrected,generic,0.07922615,,False
2205,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_17,chinatst,incorrect,generic,0.10244491,,True
2206,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_19,chinatst,incorrect,generic,0.06168573,,True
2207,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_22,chinatst,corrected,generic,0.09902345,,False
2208,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_21,chinatst,incorrect,generic,0.04691576,,True
2209,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_14,chinatst,incorrect,generic,0.06883313,,True
2210,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_3,chinatst,detected,generic,0.06722637999999999,,False
2211,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_13,chinatst,incorrect,generic,0.10667473999999999,,True
2212,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_1,chinatst,incorrect,generic,0.07492623000000001,,True
2213,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_2,chinatst,correct,generic,0.05748132,,False
2214,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_12,chinatst,incorrect,generic,0.09499440999999999,,True
2215,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_18,chinatst,detected,generic,0.07819786,,False
2216,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_15,chinatst,detected,generic,0.06326558,,False
2217,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_6,chinatst,correct,generic,0.06682334,,False
2218,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_20,chinatst,incorrect,generic,0.08998281,,True
2219,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_8,chinatst,correct,generic,0.05252386999999999,,False
2220,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,egmo_2025_1,egmo,incorrect,generic,0.07481639,,True
2221,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,egmo_2025_4,egmo,incorrect,generic,0.08280987000000001,,True
2222,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_7,chinatst,incorrect,generic,0.07720322,,True
2223,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,egmo_2025_2,egmo,incorrect,generic,0.10526937,,True
2224,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,egmo_2025_3,egmo,corrected,generic,0.06173389,,False
2225,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,egmo_2025_5,egmo,incorrect,generic,0.09274270999999999,,True
2226,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_A_2025_2,elmosl,correct,generic,0.042021989999999995,,False
2227,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_C_2025_2,elmosl,incorrect,generic,0.1054823,,True
2228,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_9,chinatst,detected,generic,0.10681345999999999,,False
2229,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,egmo_2025_6,egmo,incorrect,generic,0.13935074,,True
2230,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_5,chinatst,incorrect,generic,0.10242994,,True
2231,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_C_2025_1,elmosl,incorrect,generic,0.021480859999999997,,True
2232,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,chinatst_2025_4,chinatst,incorrect,generic,0.10846779999999999,,True
2233,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_C_2025_3,elmosl,correct,generic,0.021709370000000002,,False
2234,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_A_2025_1,elmosl,incorrect,generic,0.04587593,,True
2235,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_G_2025_1,elmosl,incorrect,generic,0.06326562,,True
2236,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_A_2025_7,elmosl,detected,generic,0.05745246,,False
2237,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_C_2025_7,elmosl,incorrect,generic,0.04036375,,True
2238,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_A_2025_5,elmosl,corrected,generic,0.04738264,,False
2239,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_A_2025_8,elmosl,incorrect,generic,0.06940906,,True
2240,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_G_2025_6,elmosl,correct,generic,0.11048312,,False
2241,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_G_2025_5,elmosl,correct,generic,0.08684647,,False
2242,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_C_2025_5,elmosl,incorrect,generic,0.04900317,,True
2243,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_G_2025_3,elmosl,incorrect,generic,0.05400634000000001,,True
2244,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_C_2025_9,elmosl,incorrect,generic,0.06707350999999999,,True
2245,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_A_2025_3,elmosl,detected,generic,0.049514079999999995,,False
2246,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_G_2025_4,elmosl,incorrect,generic,0.08611996999999999,,True
2247,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_C_2025_6,elmosl,incorrect,generic,0.1338631,,True
2248,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_NT_2025_6,elmosl,incorrect,generic,0.05755855,,True
2249,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_NT_2025_7,elmosl,incorrect,generic,0.04048289,,True
2250,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_NT_2025_5,elmosl,incorrect,generic,0.08374216000000001,,True
2251,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_G_2025_9,elmosl,incorrect,generic,0.08085802,,True
2252,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_NT_2025_1,elmosl,detected,generic,0.08151423,,False
2253,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_C_2025_8,elmosl,incorrect,generic,0.06634484000000002,,True
2254,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_G_2025_2,elmosl,detected,generic,0.06545894,,False
2255,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,german_2025_3,german,detected,generic,0.03068271,,False
2256,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_G_2025_8,elmosl,detected,generic,0.061147379999999994,,False
2257,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,german_2025_1,german,incorrect,generic,0.051598939999999996,,True
2258,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,greece_2025_2,greece,correct,generic,0.08621863,,False
2259,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,german_2025_4,german,detected,generic,0.07832996,,False
2260,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,greece_2025_4,greece,detected,generic,0.05246111,,False
2261,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_NT_2025_3,elmosl,incorrect,generic,0.057741299999999995,,True
2262,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_NT_2025_2,elmosl,incorrect,generic,0.11247755000000001,,True
2263,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,elmosl_C_2025_4,elmosl,incorrect,generic,0.07893286000000001,,True
2264,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,greece_2025_3,greece,detected,generic,0.05035057,,False
2265,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_1,imosl,correct,generic,0.05063476,,False
2266,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_16,imosl,incorrect,generic,0.08333761,,True
2267,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,german_2025_2,german,detected,generic,0.06896953,,False
2268,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_11,imosl,incorrect,generic,0.11029754,,True
2269,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_18,imosl,correct,generic,0.08499825999999999,,False
2270,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_17,imosl,incorrect,generic,0.04545228,,True
2271,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_13,imosl,detected,generic,0.07788732000000001,,False
2272,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_15,imosl,detected,generic,0.02510498,,False
2273,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_19,imosl,detected,generic,0.08564044,,False
2274,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_20,imosl,corrected,generic,0.09857834,,False
2275,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_14,imosl,correct,generic,0.07232634000000002,,False
2276,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_10,imosl,incorrect,generic,0.0767126,,True
2277,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_2,imosl,corrected,generic,0.0718768,,False
2278,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_22,imosl,correct,generic,0.055781489999999996,,False
2279,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_12,imosl,incorrect,generic,0.06698997999999999,,True
2280,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_23,imosl,detected,generic,0.07506338,,False
2281,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,greece_2025_1,greece,incorrect,generic,0.01137793,,True
2282,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_21,imosl,detected,generic,0.06769673000000001,,False
2283,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_3,imosl,detected,generic,0.02649448,,False
2284,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_29,imosl,incorrect,generic,0.09356062,,True
2285,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_26,imosl,corrected,generic,0.10972095,,False
2286,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_34,imosl,incorrect,generic,0.02855152,,True
2287,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_27,imosl,corrected,generic,0.06867281,,False
2288,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_28,imosl,incorrect,generic,0.13052758,,True
2289,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_33,imosl,correct,generic,0.06843855,,False
2290,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_24,imosl,incorrect,generic,0.05363095,,True
2291,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_31,imosl,detected,generic,0.11863876999999999,,False
2292,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_4,imosl,incorrect,generic,0.05903629,,True
2293,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_36,imosl,incorrect,generic,0.08304357,,True
2294,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_5,imosl,corrected,generic,0.08138856,,False
2295,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_35,imosl,detected,generic,0.08164581,,False
2296,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_37,imosl,incorrect,generic,0.12041039,,True
2297,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_9,imosl,corrected,generic,0.0441295,,False
2298,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_7,imosl,detected,generic,0.10215020999999999,,False
2299,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_8,imosl,detected,generic,0.0771362,,False
2300,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_32,imosl,incorrect,generic,0.12090498999999999,,True
2301,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,imosl_2025_6,imosl,detected,generic,0.04644892,,False
2302,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_2025_1,india,corrected,generic,0.03034064,,False
2303,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_2025_2,india,incorrect,generic,0.02595467,,True
2304,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_prep_2025_12,india,correct,generic,0.047202520000000005,,False
2305,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_2025_3,india,incorrect,generic,0.047465940000000005,,True
2306,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_prep_2025_11,india,incorrect,generic,0.09923214999999999,,True
2307,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_prep_2025_10,india,incorrect,generic,0.11979726999999998,,True
2308,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_2025_4,india,correct,generic,0.06603007,,False
2309,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_prep_2025_15,india,incorrect,generic,0.06826227,,True
2310,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_2025_5,india,detected,generic,0.07876542,,False
2311,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_prep_2025_18,india,detected,generic,0.09620498000000001,,False
2312,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_prep_2025_17,india,corrected,generic,0.07944942999999999,,False
2313,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_2025_6,india,incorrect,generic,0.07865698,,True
2314,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_prep_2025_1,india,correct,generic,0.0498831,,False
2315,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_prep_2025_14,india,correct,generic,0.0628256,,False
2316,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_prep_2025_20,india,incorrect,generic,0.07107028,,True
2317,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_prep_2025_3,india,incorrect,generic,0.09105268000000001,,True
2318,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_prep_2025_21,india,incorrect,generic,0.09385083999999999,,True
2319,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_prep_2025_5,india,incorrect,generic,0.04276734,,True
2320,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_prep_2025_19,india,incorrect,generic,0.05705411,,True
2321,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_prep_2025_6,india,incorrect,generic,0.08354167,,True
2322,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_prep_2025_7,india,correct,generic,0.09903512,,False
2323,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_prep_2025_4,india,incorrect,generic,0.11740268,,True
2324,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,iran_tst_2025_1,iran,incorrect,generic,0.05798011,,True
2325,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_prep_2025_8,india,detected,generic,0.03157586,,False
2326,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_prep_2025_2,india,detected,generic,0.07650375,,False
2327,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,iran_tst_2025_5,iran,correct,generic,0.08893965999999999,,False
2328,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_prep_2025_13,india,incorrect,generic,0.06530290999999999,,True
2329,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,iran_tst_2025_7,iran,correct,generic,0.05002676,,False
2330,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,iran_tst_2025_6,iran,detected,generic,0.06515074,,False
2331,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,iran_tst_2025_3,iran,incorrect,generic,0.0625364,,True
2332,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,israel_tst_2025_5,israel,correct,generic,0.07432364,,False
2333,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,israel_tst_2025_10,israel,corrected,generic,0.12815537,,False
2334,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,israel_tst_2025_1,israel,detected,generic,0.02678457,,False
2335,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,iran_tst_2025_4,iran,detected,generic,0.1152421,,False
2336,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,iran_tst_2025_9,iran,correct,generic,0.08955337999999999,,False
2337,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,india_prep_2025_9,india,detected,generic,0.041392469999999994,,False
2338,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,israel_tst_2025_3,israel,correct,generic,0.0385087,,False
2339,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,israel_tst_2025_2,israel,detected,generic,0.06820147,,False
2340,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,israel_tst_2025_8,israel,correct,generic,0.05967184,,False
2341,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,izho_2025_4,izho,correct,generic,0.06036012,,False
2342,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,israel_tst_2025_7,israel,detected,generic,0.03170875,,False
2343,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,israel_tst_2025_6,israel,incorrect,generic,0.08469006,,True
2344,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,israel_tst_2025_9,israel,incorrect,generic,0.08638991,,True
2345,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,iran_tst_2025_8,iran,incorrect,generic,0.051494340000000006,,True
2346,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,jbmo_2025_4,jbmo,correct,generic,0.08990407999999998,,False
2347,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,israel_tst_2025_4,israel,incorrect,generic,0.10086302999999999,,True
2348,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,izho_2025_5,izho,incorrect,generic,0.08618912999999999,,True
2349,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,izho_2025_2,izho,incorrect,generic,0.06760906,,True
2350,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,korea_2025_3,korea,incorrect,generic,0.08838779,,True
2351,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,jbmo_2025_1,jbmo,correct,generic,0.07549294000000001,,False
2352,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,izho_2025_3,izho,incorrect,generic,0.06986957,,True
2353,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,korea_2025_1,korea,correct,generic,0.05458396,,False
2354,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,iran_tst_2025_2,iran,detected,generic,0.09040933,,False
2355,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,izho_2025_6,izho,corrected,generic,0.09887253,,False
2356,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,korea_2025_4,korea,correct,generic,0.06918806,,False
2357,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_11,matharena,detected,matharena,0.08597681,,False
2358,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,izho_2025_1,izho,incorrect,generic,0.02175243,,True
2359,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,jbmo_2025_3,jbmo,correct,generic,0.05530225,,False
2360,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_14,matharena,incorrect,matharena,0.1071445,,True
2361,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,korea_2025_6,korea,incorrect,generic,0.07534252,,True
2362,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_13,matharena,corrected,matharena,0.13630703,,False
2363,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_10,matharena,correct,matharena,0.06724042,,False
2364,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_17,matharena,incorrect,matharena,0.03138458,,True
2365,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_16,matharena,correct,matharena,0.01921106,,False
2366,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_19,matharena,corrected,matharena,0.056601130000000006,,False
2367,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_20,matharena,correct,matharena,0.08580646,,False
2368,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_12,matharena,correct,matharena,0.05818105,,False
2369,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_15,matharena,incorrect,matharena,0.09193489,,True
2370,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_1,matharena,detected,matharena,0.04650651,,False
2371,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_21,matharena,detected,matharena,0.11307576,,False
2372,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_25,matharena,incorrect,matharena,0.0723267,,True
2373,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_26,matharena,incorrect,matharena,0.05695725,,True
2374,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_23,matharena,incorrect,matharena,0.13800148,,True
2375,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,jbmo_2025_2,jbmo,detected,generic,0.0664372,,False
2376,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_29,matharena,correct,matharena,0.05904166,,False
2377,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_22,matharena,corrected,matharena,0.11443985000000001,,False
2378,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,korea_2025_5,korea,corrected,generic,0.09356825,,False
2379,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_4,matharena,correct,matharena,0.02981659,,False
2380,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_2,matharena,incorrect,matharena,0.1010723,,True
2381,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_24,matharena,incorrect,matharena,0.08033229999999998,,True
2382,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_27,matharena,detected,matharena,0.08461794,,False
2383,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_28,matharena,corrected,matharena,0.10206761,,False
2384,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_6,matharena,correct,matharena,0.02718917,,False
2385,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_30,matharena,incorrect,matharena,0.09377942,,True
2386,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_3,matharena,corrected,matharena,0.06550223,,False
2387,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_5,matharena,correct,matharena,0.03247694,,False
2388,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_9,matharena,correct,matharena,0.08638156,,False
2389,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_8,matharena,correct,matharena,0.05143636,,False
2390,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_1,matharena,correct,matharena,0.01450909,,False
2391,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_18,matharena,corrected,matharena,0.09507481,,False
2392,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_aime_aime_2025_7,matharena,detected,matharena,0.11322621,,False
2393,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_13,matharena,correct,matharena,0.12830182,,False
2394,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_17,matharena,corrected,matharena,0.0786098,,False
2395,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_11,matharena,corrected,matharena,0.0548177,,False
2396,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_12,matharena,corrected,matharena,0.10406047,,False
2397,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_18,matharena,correct,matharena,0.034204319999999996,,False
2398,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_14,matharena,correct,matharena,0.028109629999999997,,False
2399,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_20,matharena,correct,matharena,0.058567,,False
2400,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_16,matharena,correct,matharena,0.02642641,,False
2401,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_10,matharena,incorrect,matharena,0.04343953,,True
2402,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_2,matharena,incorrect,matharena,0.01565768,,True
2403,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_26,matharena,correct,matharena,0.034200440000000006,,False
2404,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_15,matharena,incorrect,matharena,0.09277973999999999,,True
2405,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_28,matharena,detected,matharena,0.12637725,,False
2406,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_21,matharena,corrected,matharena,0.07813427,,False
2407,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_22,matharena,incorrect,matharena,0.05688060999999999,,True
2408,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_24,matharena,corrected,matharena,0.09077533,,False
2409,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_5,matharena,corrected,matharena,0.05082085,,False
2410,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_19,matharena,correct,matharena,0.049392969999999994,,False
2411,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_23,matharena,incorrect,matharena,0.02684093,,True
2412,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_29,matharena,correct,matharena,0.04053888,,False
2413,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_7,matharena,correct,matharena,0.02104131,,False
2414,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_4,matharena,correct,matharena,0.03479605,,False
2415,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_3,matharena,incorrect,matharena,0.1276736,,True
2416,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_1,matharena,correct,matharena,0.05350814,,False
2417,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_30,matharena,correct,matharena,0.11015098,,False
2418,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_9,matharena,correct,matharena,0.04361022999999999,,False
2419,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_6,matharena,incorrect,matharena,0.05369597,,True
2420,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_15,matharena,correct,matharena,0.11568495,,False
2421,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_10,matharena,correct,matharena,0.054322260000000004,,False
2422,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_25,matharena,incorrect,matharena,0.02822338,,True
2423,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_12,matharena,incorrect,matharena,0.060766299999999995,,True
2424,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_18,matharena,incorrect,matharena,0.09577059,,True
2425,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_13,matharena,incorrect,matharena,0.09067022,,True
2426,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_27,matharena,correct,matharena,0.06913420999999999,,False
2427,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_brumo_brumo_2025_8,matharena,incorrect,matharena,0.04068783,,True
2428,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_17,matharena,correct,matharena,0.06946172,,False
2429,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_14,matharena,corrected,matharena,0.07152038000000001,,False
2430,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_20,matharena,incorrect,matharena,0.06367958,,True
2431,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_22,matharena,corrected,matharena,0.0838822,,False
2432,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_11,matharena,incorrect,matharena,0.061707519999999995,,True
2433,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_19,matharena,correct,matharena,0.055868339999999996,,False
2434,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_24,matharena,correct,matharena,0.05612623,,False
2435,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_2,matharena,correct,matharena,0.041067179999999995,,False
2436,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_27,matharena,correct,matharena,0.08181579,,False
2437,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_30,matharena,incorrect,matharena,0.08710114,,True
2438,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_32,matharena,incorrect,matharena,0.06636373000000001,,True
2439,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_3,matharena,corrected,matharena,0.05089803,,False
2440,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_35,matharena,corrected,matharena,0.05798491,,False
2441,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_25,matharena,detected,matharena,0.12602564,,False
2442,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_34,matharena,incorrect,matharena,0.12747544,,True
2443,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_31,matharena,detected,matharena,0.04068071,,False
2444,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_21,matharena,incorrect,matharena,0.09371205,,True
2445,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_29,matharena,correct,matharena,0.07618391000000001,,False
2446,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_23,matharena,incorrect,matharena,0.09136236,,True
2447,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_5,matharena,detected,matharena,0.10918651,,False
2448,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_26,matharena,corrected,matharena,0.04657242999999999,,False
2449,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_33,matharena,corrected,matharena,0.07911520999999999,,False
2450,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_38,matharena,incorrect,matharena,0.12812472,,True
2451,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_39,matharena,correct,matharena,0.06436778,,False
2452,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_36,matharena,correct,matharena,0.07897204,,False
2453,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_8,matharena,correct,matharena,0.09658707000000001,,False
2454,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_4,matharena,correct,matharena,0.03477369,,False
2455,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_6,matharena,incorrect,matharena,0.13840394,,True
2456,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_1,matharena,correct,matharena,0.055759260000000005,,False
2457,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_14,matharena,correct,matharena,0.1048615,,False
2458,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_37,matharena,incorrect,matharena,0.07563045,,True
2459,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_9,matharena,correct,matharena,0.10993380999999999,,False
2460,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_13,matharena,corrected,matharena,0.08098715,,False
2461,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_16,matharena,corrected,matharena,0.09788564999999999,,False
2462,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_19,matharena,incorrect,matharena,0.07895795,,True
2463,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_22,matharena,corrected,matharena,0.044203849999999996,,False
2464,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_17,matharena,corrected,matharena,0.13205083,,False
2465,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_12,matharena,corrected,matharena,0.058323889999999996,,False
2466,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_23,matharena,correct,matharena,0.0419279,,False
2467,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_40,matharena,incorrect,matharena,0.08628268999999998,,True
2468,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_20,matharena,incorrect,matharena,0.09219176,,True
2469,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_cmimc_cmimc_2025_7,matharena,detected,matharena,0.12179564,,False
2470,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_21,matharena,correct,matharena,0.02125703,,False
2471,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_15,matharena,correct,matharena,0.05797788,,False
2472,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_28,matharena,corrected,matharena,0.03345429,,False
2473,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_18,matharena,incorrect,matharena,0.10101428,,True
2474,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_11,matharena,correct,matharena,0.11251374999999998,,False
2475,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_6,matharena,correct,matharena,0.08063717,,False
2476,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_25,matharena,incorrect,matharena,0.10886362,,True
2477,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_26,matharena,corrected,matharena,0.06678059,,False
2478,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_4,matharena,correct,matharena,0.030753899999999997,,False
2479,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_30,matharena,incorrect,matharena,0.12793078,,True
2480,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_5,matharena,corrected,matharena,0.06638416999999999,,False
2481,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_8,matharena,correct,matharena,0.028370199999999998,,False
2482,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_12,matharena,correct,matharena,0.040147949999999995,,False
2483,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_2,matharena,correct,matharena,0.12585967,,False
2484,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_1,matharena,corrected,matharena,0.04033320000000001,,False
2485,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_3,matharena,correct,matharena,0.03466921,,False
2486,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_7,matharena,detected,matharena,0.06138657,,False
2487,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_27,matharena,corrected,matharena,0.06904502,,False
2488,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_24,matharena,incorrect,matharena,0.07617697,,True
2489,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_13,matharena,corrected,matharena,0.07295193000000001,,False
2490,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_15,matharena,correct,matharena,0.02234284,,False
2491,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_14,matharena,corrected,matharena,0.046915519999999995,,False
2492,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_29,matharena,corrected,matharena,0.06996754999999999,,False
2493,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_19,matharena,incorrect,matharena,0.11605631,,True
2494,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_22,matharena,incorrect,matharena,0.08614796,,True
2495,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_11,matharena,incorrect,matharena,0.11693752,,True
2496,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_21,matharena,correct,matharena,0.07988114,,False
2497,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_hmmt_hmmt_feb_2025_9,matharena,incorrect,matharena,0.09779776,,True
2498,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_18,matharena,incorrect,matharena,0.1048601,,True
2499,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_2,matharena,correct,matharena,0.012192030000000001,,False
2500,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_17,matharena,corrected,matharena,0.09116782,,False
2501,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_16,matharena,detected,matharena,0.02986911,,False
2502,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_29,matharena,correct,matharena,0.06357990999999999,,False
2503,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_26,matharena,correct,matharena,0.0532343,,False
2504,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_10,matharena,incorrect,matharena,0.04459518,,True
2505,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_23,matharena,correct,matharena,0.04539036,,False
2506,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_32,matharena,corrected,matharena,0.06140896,,False
2507,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_27,matharena,incorrect,matharena,0.0648019,,True
2508,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_3,matharena,correct,matharena,0.02615291,,False
2509,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_25,matharena,correct,matharena,0.04670272,,False
2510,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_31,matharena,correct,matharena,0.04439668,,False
2511,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_28,matharena,corrected,matharena,0.037247270000000006,,False
2512,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_34,matharena,detected,matharena,0.09704875,,False
2513,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_37,matharena,corrected,matharena,0.08846939999999999,,False
2514,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_33,matharena,correct,matharena,0.08123068000000001,,False
2515,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_40,matharena,corrected,matharena,0.12618834,,False
2516,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_42,matharena,incorrect,matharena,0.06860273000000001,,True
2517,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_41,matharena,corrected,matharena,0.10282823999999999,,False
2518,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_35,matharena,correct,matharena,0.049013240000000007,,False
2519,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_44,matharena,correct,matharena,0.02395402,,False
2520,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_47,matharena,correct,matharena,0.04054704,,False
2521,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_39,matharena,detected,matharena,0.05162361,,False
2522,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_36,matharena,corrected,matharena,0.05356496,,False
2523,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_48,matharena,correct,matharena,0.02698716,,False
2524,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_5,matharena,incorrect,matharena,0.0544903,,True
2525,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_51,matharena,correct,matharena,0.07104134999999999,,False
2526,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_43,matharena,incorrect,matharena,0.06967155,,True
2527,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_46,matharena,correct,matharena,0.02080582,,False
2528,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_45,matharena,correct,matharena,0.014751700000000001,,False
2529,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_49,matharena,incorrect,matharena,0.06195646,,True
2530,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_52,matharena,detected,matharena,0.07261968999999999,,False
2531,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_7,matharena,correct,matharena,0.049820590000000005,,False
2532,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_50,matharena,correct,matharena,0.04494272999999999,,False
2533,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_30,matharena,corrected,matharena,0.08969321,,False
2534,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_4,matharena,correct,matharena,0.018853460000000002,,False
2535,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_9,matharena,correct,matharena,0.035635589999999995,,False
2536,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,nordic_2025_3,nordic,corrected,generic,0.10339242,,False
2537,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_53,matharena,incorrect,matharena,0.08488814,,True
2538,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,philippines_2025_1,philippines,correct,generic,0.07440445,,False
2539,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,pan_african_2025_5,pan,detected,generic,0.10782171,,False
2540,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_6,matharena,detected,matharena,0.08843319000000001,,False
2541,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,nordic_2025_1,nordic,correct,generic,0.07970648,,False
2542,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,pan_african_2025_6,pan,correct,generic,0.09279409999999999,,False
2543,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,pan_african_2025_2,pan,corrected,generic,0.04278211999999999,,False
2544,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,pan_african_2025_4,pan,corrected,generic,0.035196870000000005,,False
2545,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,pan_african_2025_1,pan,incorrect,generic,0.038074689999999994,,True
2546,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,philippines_2025_5,philippines,corrected,generic,0.06368506,,False
2547,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,philippines_2025_4,philippines,incorrect,generic,0.10083251,,True
2548,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,nordic_2025_2,nordic,detected,generic,0.055905489999999995,,False
2549,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,pan_african_2025_3,pan,corrected,generic,0.037786049999999995,,False
2550,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,polish_2025_1,polish,corrected,generic,0.03691277,,False
2551,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_8,matharena,detected,matharena,0.0800224,,False
2552,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,philippines_2025_7,philippines,detected,generic,0.04314525,,False
2553,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,philippines_2025_3,philippines,correct,generic,0.04429044,,False
2554,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,matharena_smt_smt_2025_20,matharena,incorrect,matharena,0.08536403,,True
2555,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,polish_2025_3,polish,incorrect,generic,0.05479044,,True
2556,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,philippines_2025_2,philippines,incorrect,generic,0.11020133,,True
2557,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,rmm_2025_5,rmm,correct,generic,0.07469966,,False
2558,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,polish_2025_2,polish,correct,generic,0.04121611,,False
2559,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,polish_2025_4,polish,detected,generic,0.02656014,,False
2560,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,rmm_2025_1,rmm,incorrect,generic,0.06508965,,True
2561,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,polish_2025_5,polish,incorrect,generic,0.057604230000000006,,True
2562,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,romania_10_2025_1,romania,incorrect,generic,0.07232411999999999,,True
2563,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,rmm_2025_4,rmm,detected,generic,0.06873354,,False
2564,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,rmm_2025_3,rmm,detected,generic,0.07232742,,False
2565,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,polish_2025_6,polish,detected,generic,0.06184552,,False
2566,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,romania_11_2025_2,romania,correct,generic,0.031818439999999996,,False
2567,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,rmm_2025_6,rmm,incorrect,generic,0.09054613,,True
2568,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,romania_11_2025_1,romania,correct,generic,0.07243848,,False
2569,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,romania_10_2025_3,romania,correct,generic,0.06700503999999999,,False
2570,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,romania_10_2025_2,romania,incorrect,generic,0.08743077,,True
2571,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,romania_12_2025_2,romania,correct,generic,0.021399400000000002,,False
2572,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,romania_tst_2025_1,romania,correct,generic,0.07070129,,False
2573,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,serbia_tst_bmo_2025_2,serbia,corrected,generic,0.06843724999999999,,False
2574,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,romania_11_2025_3,romania,detected,generic,0.09159657,,False
2575,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,romania_tst_2025_3,romania,incorrect,generic,0.06616359999999999,,True
2576,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,romania_9_2025_2,romania,incorrect,generic,0.13323893000000003,,True
2577,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,serbia_tst_bmo_2025_4,serbia,corrected,generic,0.05685995,,False
2578,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,serbia_tst_bmo_2025_1,serbia,detected,generic,0.08883604,,False
2579,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,thai_2025_1,thai,correct,generic,0.05252153999999999,,False
2580,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,spain_2025_1,spain,correct,generic,0.06668261,,False
2581,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,romania_12_2025_3,romania,detected,generic,0.055247069999999995,,False
2582,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,romania_9_2025_1,romania,corrected,generic,0.08601267,,False
2583,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,spain_2025_4,spain,detected,generic,0.07268861,,False
2584,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,romania_12_2025_1,romania,corrected,generic,0.06713279000000001,,False
2585,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,thai_2025_10,thai,detected,generic,0.04579107,,False
2586,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,spain_2025_5,spain,correct,generic,0.08473154,,False
2587,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,spain_2025_2,spain,detected,generic,0.04847107,,False
2588,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,thai_2025_3,thai,detected,generic,0.0358857,,False
2589,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,serbia_tst_bmo_2025_3,serbia,corrected,generic,0.06789706,,False
2590,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,thai_2025_8,thai,incorrect,generic,0.11036772,,True
2591,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,spain_2025_3,spain,incorrect,generic,0.07948045,,True
2592,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,turkey_tst_2025_1,turkey,corrected,generic,0.04827293,,False
2593,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,turkey_tst_2025_2,turkey,incorrect,generic,0.07512252999999999,,True
2594,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,thai_2025_6,thai,incorrect,generic,0.11041801,,True
2595,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,thai_2025_5,thai,incorrect,generic,0.10064643999999999,,True
2596,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,thai_2025_9,thai,correct,generic,0.03409377,,False
2597,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,thai_2025_2,thai,corrected,generic,0.0530295,,False
2598,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,romania_tst_2025_2,romania,detected,generic,0.09374323,,False
2599,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,turkey_tst_2025_6,turkey,incorrect,generic,0.06492981,,True
2600,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,thai_2025_7,thai,corrected,generic,0.0466069,,False
2601,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,turkey_tst_2025_7,turkey,incorrect,generic,0.059547449999999995,,True
2602,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,thai_2025_4,thai,correct,generic,0.07971581,,False
2603,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,turkey_tst_2025_4,turkey,correct,generic,0.09041622,,False
2604,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,turkey_tst_2025_3,turkey,incorrect,generic,0.07455401999999998,,True
2605,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,turkey_tst_2025_5,turkey,incorrect,generic,0.09096312999999999,,True
2606,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,turkey_tst_2025_9,turkey,detected,generic,0.04375822,,False
2607,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,turkey_tst_2025_8,turkey,detected,generic,0.03923877,,False
2608,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,usamo_2025_3,usamo,incorrect,generic,0.06494733999999999,,True
2609,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,usamo_2025_2,usamo,detected,generic,0.07061756,,False
2610,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,usatst_2025_6,usatst,detected,generic,0.0916635,,False
2611,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,usamo_2025_5,usamo,incorrect,generic,0.058096629999999996,,True
2612,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,usatst_2025_2,usatst,detected,generic,0.08901300999999999,,False
2613,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,usatst_2025_1,usatst,detected,generic,0.03454779,,False
2614,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,usamo_2025_4,usamo,correct,generic,0.044805029999999996,,False
2615,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,usatst_2025_3,usatst,correct,generic,0.11690873,,False
2616,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,usamo_2025_6,usamo,detected,generic,0.06320284,,False
2617,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,usatst_2025_8,usatst,incorrect,generic,0.06602225,,True
2618,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,usatst_2025_4,usatst,corrected,generic,0.11850631,,False
2619,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,usatst_2025_7,usatst,detected,generic,0.04971178,,False
2620,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,vietnam_2025_1,vietnam,correct,generic,0.08032584,,False
2621,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,vietnam_2025_6,vietnam,detected,generic,0.02395731,,False
2622,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,usatst_2025_5,usatst,incorrect,generic,0.0562632,,True
2623,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,vietnam_2025_4,vietnam,detected,generic,0.06784045,,False
2624,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,vietnam_2025_3,vietnam,correct,generic,0.05555345,,False
2625,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,usamo_2025_1,usamo,detected,generic,0.08486882,,False
2626,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,usatst_2025_9,usatst,detected,generic,0.08391907,,False
2627,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,vietnam_2025_5,vietnam,corrected,generic,0.12645465,,False
2628,GPT-5-mini (medium),DeepSeek-v3.1 (Think),deepseek/deepseek_v31,vietnam_2025_2,vietnam,correct,generic,0.10618101999999999,,False
