total_samples,cot_correct_answers,es_correct_answers,cot_average_tokens,es_average_tokens,es_step_average_tokens,model,dataset,min_slope,threshold
30,12,9,14729.333333333334,6163.233333333334,4282.2,DeepSeek-R1-Distill-Llama-8B,aime,3,0.01
30,12,9,14729.333333333334,6163.233333333334,4282.2,DeepSeek-R1-Distill-Llama-8B,aime,3,0.05
30,12,9,14729.333333333334,6163.233333333334,4282.2,DeepSeek-R1-Distill-Llama-8B,aime,3,0.1
30,12,9,14729.333333333334,6163.233333333334,4282.2,DeepSeek-R1-Distill-Llama-8B,aime,3,0.15
30,12,9,14729.333333333334,6163.233333333334,4282.2,DeepSeek-R1-Distill-Llama-8B,aime,3,0.2
30,12,9,14729.333333333334,8723.5,6627.533333333334,DeepSeek-R1-Distill-Llama-8B,aime,5,0.01
30,12,9,14729.333333333334,8723.5,6627.533333333334,DeepSeek-R1-Distill-Llama-8B,aime,5,0.05
30,12,9,14729.333333333334,8723.5,6627.533333333334,DeepSeek-R1-Distill-Llama-8B,aime,5,0.1
30,12,9,14729.333333333334,8723.5,6627.533333333334,DeepSeek-R1-Distill-Llama-8B,aime,5,0.15
30,12,9,14729.333333333334,8723.5,6627.533333333334,DeepSeek-R1-Distill-Llama-8B,aime,5,0.2
30,12,10,14729.333333333334,9331.766666666666,7079.633333333333,DeepSeek-R1-Distill-Llama-8B,aime,7,0.01
30,12,10,14729.333333333334,9331.766666666666,7079.633333333333,DeepSeek-R1-Distill-Llama-8B,aime,7,0.05
30,12,10,14729.333333333334,9331.766666666666,7079.633333333333,DeepSeek-R1-Distill-Llama-8B,aime,7,0.1
30,12,10,14729.333333333334,9331.766666666666,7079.633333333333,DeepSeek-R1-Distill-Llama-8B,aime,7,0.15
30,12,10,14729.333333333334,9331.766666666666,7079.633333333333,DeepSeek-R1-Distill-Llama-8B,aime,7,0.2
30,12,11,14729.333333333334,9811.8,7456.933333333333,DeepSeek-R1-Distill-Llama-8B,aime,10,0.01
30,12,11,14729.333333333334,9811.8,7456.933333333333,DeepSeek-R1-Distill-Llama-8B,aime,10,0.05
30,12,11,14729.333333333334,9811.8,7456.933333333333,DeepSeek-R1-Distill-Llama-8B,aime,10,0.1
30,12,11,14729.333333333334,9811.8,7456.933333333333,DeepSeek-R1-Distill-Llama-8B,aime,10,0.15
30,12,11,14729.333333333334,9811.8,7456.933333333333,DeepSeek-R1-Distill-Llama-8B,aime,10,0.2
30,12,12,14729.333333333334,11065.533333333333,8426.3,DeepSeek-R1-Distill-Llama-8B,aime,15,0.01
30,12,12,14729.333333333334,11065.533333333333,8426.3,DeepSeek-R1-Distill-Llama-8B,aime,15,0.05
30,12,12,14729.333333333334,11065.533333333333,8426.3,DeepSeek-R1-Distill-Llama-8B,aime,15,0.1
30,12,12,14729.333333333334,11065.533333333333,8426.3,DeepSeek-R1-Distill-Llama-8B,aime,15,0.15
30,12,12,14729.333333333334,11065.533333333333,8426.3,DeepSeek-R1-Distill-Llama-8B,aime,15,0.2
30,12,12,14729.333333333334,11406.666666666666,8680.866666666667,DeepSeek-R1-Distill-Llama-8B,aime,20,0.01
30,12,12,14729.333333333334,11406.666666666666,8680.866666666667,DeepSeek-R1-Distill-Llama-8B,aime,20,0.05
30,12,12,14729.333333333334,11406.666666666666,8680.866666666667,DeepSeek-R1-Distill-Llama-8B,aime,20,0.1
30,12,12,14729.333333333334,11406.666666666666,8680.866666666667,DeepSeek-R1-Distill-Llama-8B,aime,20,0.15
30,12,12,14729.333333333334,11406.666666666666,8680.866666666667,DeepSeek-R1-Distill-Llama-8B,aime,20,0.2
40,0,0,7601.25,2714.325,1616.1,DeepSeek-R1-Distill-Llama-8B,amc,3,0.01
40,0,0,7601.25,2696.975,1607.6,DeepSeek-R1-Distill-Llama-8B,amc,3,0.05
40,0,0,7601.25,2690.875,1606.1,DeepSeek-R1-Distill-Llama-8B,amc,3,0.1
40,0,0,7601.25,2689.575,1605.6,DeepSeek-R1-Distill-Llama-8B,amc,3,0.15
40,0,0,7601.25,2689.575,1605.6,DeepSeek-R1-Distill-Llama-8B,amc,3,0.2
40,0,0,7601.25,3093.9,1883.25,DeepSeek-R1-Distill-Llama-8B,amc,5,0.01
40,0,0,7601.25,3077.95,1875.25,DeepSeek-R1-Distill-Llama-8B,amc,5,0.05
40,0,0,7601.25,3076.825,1874.75,DeepSeek-R1-Distill-Llama-8B,amc,5,0.1
40,0,0,7601.25,3076.825,1874.75,DeepSeek-R1-Distill-Llama-8B,amc,5,0.15
40,0,0,7601.25,3076.825,1874.75,DeepSeek-R1-Distill-Llama-8B,amc,5,0.2
40,0,0,7601.25,3327.0,2092.55,DeepSeek-R1-Distill-Llama-8B,amc,7,0.01
40,0,0,7601.25,3319.25,2089.05,DeepSeek-R1-Distill-Llama-8B,amc,7,0.05
40,0,0,7601.25,3319.25,2089.05,DeepSeek-R1-Distill-Llama-8B,amc,7,0.1
40,0,0,7601.25,3319.25,2089.05,DeepSeek-R1-Distill-Llama-8B,amc,7,0.15
40,0,0,7601.25,3319.25,2089.05,DeepSeek-R1-Distill-Llama-8B,amc,7,0.2
40,0,0,7601.25,4000.4,2681.75,DeepSeek-R1-Distill-Llama-8B,amc,10,0.01
40,0,0,7601.25,3998.25,2679.75,DeepSeek-R1-Distill-Llama-8B,amc,10,0.05
40,0,0,7601.25,3998.25,2679.75,DeepSeek-R1-Distill-Llama-8B,amc,10,0.1
40,0,0,7601.25,3998.25,2679.75,DeepSeek-R1-Distill-Llama-8B,amc,10,0.15
40,0,0,7601.25,3998.25,2679.75,DeepSeek-R1-Distill-Llama-8B,amc,10,0.2
40,0,0,7601.25,4186.75,2796.975,DeepSeek-R1-Distill-Llama-8B,amc,15,0.01
40,0,0,7601.25,4186.75,2796.975,DeepSeek-R1-Distill-Llama-8B,amc,15,0.05
40,0,0,7601.25,4186.75,2796.975,DeepSeek-R1-Distill-Llama-8B,amc,15,0.1
40,0,0,7601.25,4186.75,2796.975,DeepSeek-R1-Distill-Llama-8B,amc,15,0.15
40,0,0,7601.25,4186.75,2796.975,DeepSeek-R1-Distill-Llama-8B,amc,15,0.2
40,0,0,7601.25,5291.775,3749.1,DeepSeek-R1-Distill-Llama-8B,amc,20,0.01
40,0,0,7601.25,5291.775,3749.1,DeepSeek-R1-Distill-Llama-8B,amc,20,0.05
40,0,0,7601.25,5291.775,3749.1,DeepSeek-R1-Distill-Llama-8B,amc,20,0.1
40,0,0,7601.25,5291.775,3749.1,DeepSeek-R1-Distill-Llama-8B,amc,20,0.15
40,0,0,7601.25,5291.775,3749.1,DeepSeek-R1-Distill-Llama-8B,amc,20,0.2
198,86,66,6949.404040404041,1655.9242424242425,531.5858585858585,DeepSeek-R1-Distill-Llama-8B,gpqa,3,0.01
198,86,66,6949.404040404041,1526.6010101010102,489.09090909090907,DeepSeek-R1-Distill-Llama-8B,gpqa,3,0.05
198,86,66,6949.404040404041,1495.9292929292928,478.5353535353535,DeepSeek-R1-Distill-Llama-8B,gpqa,3,0.1
198,86,66,6949.404040404041,1478.3080808080808,471.1161616161616,DeepSeek-R1-Distill-Llama-8B,gpqa,3,0.15
198,86,66,6949.404040404041,1474.8636363636363,469.47474747474746,DeepSeek-R1-Distill-Llama-8B,gpqa,3,0.2
198,86,71,6949.404040404041,2824.2929292929293,1058.7777777777778,DeepSeek-R1-Distill-Llama-8B,gpqa,5,0.01
198,86,71,6949.404040404041,2768.3535353535353,1035.4444444444443,DeepSeek-R1-Distill-Llama-8B,gpqa,5,0.05
198,86,71,6949.404040404041,2722.489898989899,1018.3030303030303,DeepSeek-R1-Distill-Llama-8B,gpqa,5,0.1
198,86,71,6949.404040404041,2713.439393939394,1013.540404040404,DeepSeek-R1-Distill-Llama-8B,gpqa,5,0.15
198,86,71,6949.404040404041,2710.282828282828,1012.0505050505051,DeepSeek-R1-Distill-Llama-8B,gpqa,5,0.2
198,86,81,6949.404040404041,3813.308080808081,1559.328282828283,DeepSeek-R1-Distill-Llama-8B,gpqa,7,0.01
198,86,81,6949.404040404041,3777.1363636363635,1543.7525252525252,DeepSeek-R1-Distill-Llama-8B,gpqa,7,0.05
198,86,81,6949.404040404041,3776.287878787879,1543.540404040404,DeepSeek-R1-Distill-Llama-8B,gpqa,7,0.1
198,86,82,6949.404040404041,3768.626262626263,1539.7676767676767,DeepSeek-R1-Distill-Llama-8B,gpqa,7,0.15
198,86,82,6949.404040404041,3765.489898989899,1538.378787878788,DeepSeek-R1-Distill-Llama-8B,gpqa,7,0.2
198,86,84,6949.404040404041,4866.626262626262,2153.6363636363635,DeepSeek-R1-Distill-Llama-8B,gpqa,10,0.01
198,86,85,6949.404040404041,4847.464646464647,2146.570707070707,DeepSeek-R1-Distill-Llama-8B,gpqa,10,0.05
198,86,85,6949.404040404041,4846.439393939394,2146.1666666666665,DeepSeek-R1-Distill-Llama-8B,gpqa,10,0.1
198,86,86,6949.404040404041,4838.777777777777,2142.3939393939395,DeepSeek-R1-Distill-Llama-8B,gpqa,10,0.15
198,86,86,6949.404040404041,4835.641414141414,2141.0050505050503,DeepSeek-R1-Distill-Llama-8B,gpqa,10,0.2
198,86,85,6949.404040404041,5551.939393939394,2539.873737373737,DeepSeek-R1-Distill-Llama-8B,gpqa,15,0.01
198,86,85,6949.404040404041,5545.434343434344,2537.550505050505,DeepSeek-R1-Distill-Llama-8B,gpqa,15,0.05
198,86,85,6949.404040404041,5545.434343434344,2537.550505050505,DeepSeek-R1-Distill-Llama-8B,gpqa,15,0.1
198,86,85,6949.404040404041,5545.434343434344,2537.550505050505,DeepSeek-R1-Distill-Llama-8B,gpqa,15,0.15
198,86,86,6949.404040404041,5535.267676767677,2532.6363636363635,DeepSeek-R1-Distill-Llama-8B,gpqa,15,0.2
198,86,85,6949.404040404041,6037.570707070707,2814.181818181818,DeepSeek-R1-Distill-Llama-8B,gpqa,20,0.01
198,86,85,6949.404040404041,6032.626262626262,2812.3636363636365,DeepSeek-R1-Distill-Llama-8B,gpqa,20,0.05
198,86,85,6949.404040404041,6032.626262626262,2812.3636363636365,DeepSeek-R1-Distill-Llama-8B,gpqa,20,0.1
198,86,85,6949.404040404041,6032.626262626262,2812.3636363636365,DeepSeek-R1-Distill-Llama-8B,gpqa,20,0.15
198,86,85,6949.404040404041,6032.626262626262,2812.3636363636365,DeepSeek-R1-Distill-Llama-8B,gpqa,20,0.2
500,295,234,3224.782,1361.53,684.848,DeepSeek-R1-Distill-Llama-8B,math,3,0.01
500,295,229,3224.782,1309.312,658.814,DeepSeek-R1-Distill-Llama-8B,math,3,0.05
500,295,228,3224.782,1294.616,653.312,DeepSeek-R1-Distill-Llama-8B,math,3,0.1
500,295,227,3224.782,1288.53,650.75,DeepSeek-R1-Distill-Llama-8B,math,3,0.15
500,295,227,3224.782,1287.024,649.624,DeepSeek-R1-Distill-Llama-8B,math,3,0.2
500,295,261,3224.782,1738.22,914.56,DeepSeek-R1-Distill-Llama-8B,math,5,0.01
500,295,259,3224.782,1710.352,900.034,DeepSeek-R1-Distill-Llama-8B,math,5,0.05
500,295,259,3224.782,1705.938,897.676,DeepSeek-R1-Distill-Llama-8B,math,5,0.1
500,295,259,3224.782,1702.748,896.812,DeepSeek-R1-Distill-Llama-8B,math,5,0.15
500,295,259,3224.782,1701.242,895.686,DeepSeek-R1-Distill-Llama-8B,math,5,0.2
500,295,274,3224.782,1981.864,1063.946,DeepSeek-R1-Distill-Llama-8B,math,7,0.01
500,295,273,3224.782,1963.634,1052.504,DeepSeek-R1-Distill-Llama-8B,math,7,0.05
500,295,273,3224.782,1959.164,1049.994,DeepSeek-R1-Distill-Llama-8B,math,7,0.1
500,295,273,3224.782,1956.158,1049.05,DeepSeek-R1-Distill-Llama-8B,math,7,0.15
500,295,273,3224.782,1954.78,1047.964,DeepSeek-R1-Distill-Llama-8B,math,7,0.2
500,295,287,3224.782,2206.15,1206.656,DeepSeek-R1-Distill-Llama-8B,math,10,0.01
500,295,285,3224.782,2191.1,1196.476,DeepSeek-R1-Distill-Llama-8B,math,10,0.05
500,295,285,3224.782,2188.314,1194.406,DeepSeek-R1-Distill-Llama-8B,math,10,0.1
500,295,285,3224.782,2187.112,1194.016,DeepSeek-R1-Distill-Llama-8B,math,10,0.15
500,295,285,3224.782,2185.91,1193.016,DeepSeek-R1-Distill-Llama-8B,math,10,0.2
500,295,293,3224.782,2429.748,1353.464,DeepSeek-R1-Distill-Llama-8B,math,15,0.01
500,295,292,3224.782,2416.102,1342.564,DeepSeek-R1-Distill-Llama-8B,math,15,0.05
500,295,291,3224.782,2409.562,1339.294,DeepSeek-R1-Distill-Llama-8B,math,15,0.1
500,295,291,3224.782,2408.548,1338.99,DeepSeek-R1-Distill-Llama-8B,math,15,0.15
500,295,291,3224.782,2407.604,1338.11,DeepSeek-R1-Distill-Llama-8B,math,15,0.2
500,295,294,3224.782,2595.642,1454.286,DeepSeek-R1-Distill-Llama-8B,math,20,0.01
500,295,293,3224.782,2592.772,1452.046,DeepSeek-R1-Distill-Llama-8B,math,20,0.05
500,295,293,3224.782,2585.812,1444.422,DeepSeek-R1-Distill-Llama-8B,math,20,0.1
500,295,293,3224.782,2585.614,1444.256,DeepSeek-R1-Distill-Llama-8B,math,20,0.15
500,295,293,3224.782,2584.606,1443.336,DeepSeek-R1-Distill-Llama-8B,math,20,0.2
272,44,36,4136.930147058823,2554.5404411764707,1311.6433823529412,DeepSeek-R1-Distill-Llama-8B,minerva,3,0.01
272,44,35,4136.930147058823,2542.0220588235293,1307.6764705882354,DeepSeek-R1-Distill-Llama-8B,minerva,3,0.05
272,44,35,4136.930147058823,2528.360294117647,1303.6654411764705,DeepSeek-R1-Distill-Llama-8B,minerva,3,0.1
272,44,35,4136.930147058823,2527.3051470588234,1303.4742647058824,DeepSeek-R1-Distill-Llama-8B,minerva,3,0.15
272,44,35,4136.930147058823,2526.735294117647,1303.3161764705883,DeepSeek-R1-Distill-Llama-8B,minerva,3,0.2
272,44,42,4136.930147058823,3089.3970588235293,1633.4117647058824,DeepSeek-R1-Distill-Llama-8B,minerva,5,0.01
272,44,41,4136.930147058823,3081.966911764706,1630.264705882353,DeepSeek-R1-Distill-Llama-8B,minerva,5,0.05
272,44,41,4136.930147058823,3080.731617647059,1630.2426470588234,DeepSeek-R1-Distill-Llama-8B,minerva,5,0.1
272,44,41,4136.930147058823,3079.8161764705883,1630.0625,DeepSeek-R1-Distill-Llama-8B,minerva,5,0.15
272,44,41,4136.930147058823,3079.8161764705883,1630.0625,DeepSeek-R1-Distill-Llama-8B,minerva,5,0.2
272,44,42,4136.930147058823,3447.2904411764707,1851.172794117647,DeepSeek-R1-Distill-Llama-8B,minerva,7,0.01
272,44,41,4136.930147058823,3441.544117647059,1848.375,DeepSeek-R1-Distill-Llama-8B,minerva,7,0.05
272,44,41,4136.930147058823,3441.544117647059,1848.375,DeepSeek-R1-Distill-Llama-8B,minerva,7,0.1
272,44,41,4136.930147058823,3441.4595588235293,1848.327205882353,DeepSeek-R1-Distill-Llama-8B,minerva,7,0.15
272,44,41,4136.930147058823,3441.4595588235293,1848.327205882353,DeepSeek-R1-Distill-Llama-8B,minerva,7,0.2
272,44,46,4136.930147058823,3766.0882352941176,2048.952205882353,DeepSeek-R1-Distill-Llama-8B,minerva,10,0.01
272,44,45,4136.930147058823,3761.0625,2046.5955882352941,DeepSeek-R1-Distill-Llama-8B,minerva,10,0.05
272,44,45,4136.930147058823,3761.0625,2046.5955882352941,DeepSeek-R1-Distill-Llama-8B,minerva,10,0.1
272,44,45,4136.930147058823,3761.0625,2046.5955882352941,DeepSeek-R1-Distill-Llama-8B,minerva,10,0.15
272,44,45,4136.930147058823,3761.0625,2046.5955882352941,DeepSeek-R1-Distill-Llama-8B,minerva,10,0.2
272,44,45,4136.930147058823,3894.948529411765,2140.514705882353,DeepSeek-R1-Distill-Llama-8B,minerva,15,0.01
272,44,45,4136.930147058823,3892.643382352941,2139.106617647059,DeepSeek-R1-Distill-Llama-8B,minerva,15,0.05
272,44,45,4136.930147058823,3892.643382352941,2139.106617647059,DeepSeek-R1-Distill-Llama-8B,minerva,15,0.1
272,44,45,4136.930147058823,3892.643382352941,2139.106617647059,DeepSeek-R1-Distill-Llama-8B,minerva,15,0.15
272,44,45,4136.930147058823,3892.643382352941,2139.106617647059,DeepSeek-R1-Distill-Llama-8B,minerva,15,0.2
272,44,45,4136.930147058823,3939.8529411764707,2171.448529411765,DeepSeek-R1-Distill-Llama-8B,minerva,20,0.01
272,44,45,4136.930147058823,3938.948529411765,2170.7132352941176,DeepSeek-R1-Distill-Llama-8B,minerva,20,0.05
272,44,45,4136.930147058823,3938.948529411765,2170.7132352941176,DeepSeek-R1-Distill-Llama-8B,minerva,20,0.1
272,44,45,4136.930147058823,3938.948529411765,2170.7132352941176,DeepSeek-R1-Distill-Llama-8B,minerva,20,0.15
272,44,45,4136.930147058823,3938.948529411765,2170.7132352941176,DeepSeek-R1-Distill-Llama-8B,minerva,20,0.2
675,214,156,7717.914074074074,2385.6296296296296,1278.7318518518518,DeepSeek-R1-Distill-Llama-8B,olympiadbench,3,0.01
675,214,153,7717.914074074074,2348.133333333333,1265.2785185185185,DeepSeek-R1-Distill-Llama-8B,olympiadbench,3,0.05
675,214,152,7717.914074074074,2333.251851851852,1259.6162962962962,DeepSeek-R1-Distill-Llama-8B,olympiadbench,3,0.1
675,214,152,7717.914074074074,2328.0414814814817,1256.831111111111,DeepSeek-R1-Distill-Llama-8B,olympiadbench,3,0.15
675,214,152,7717.914074074074,2325.6681481481482,1255.0533333333333,DeepSeek-R1-Distill-Llama-8B,olympiadbench,3,0.2
675,214,184,7717.914074074074,3495.268148148148,2004.0222222222221,DeepSeek-R1-Distill-Llama-8B,olympiadbench,5,0.01
675,214,181,7717.914074074074,3451.72,1987.0177777777778,DeepSeek-R1-Distill-Llama-8B,olympiadbench,5,0.05
675,214,180,7717.914074074074,3441.474074074074,1981.3585185185186,DeepSeek-R1-Distill-Llama-8B,olympiadbench,5,0.1
675,214,180,7717.914074074074,3426.3674074074074,1969.7096296296297,DeepSeek-R1-Distill-Llama-8B,olympiadbench,5,0.15
675,214,180,7717.914074074074,3424.091851851852,1967.9911111111112,DeepSeek-R1-Distill-Llama-8B,olympiadbench,5,0.2
675,214,197,7717.914074074074,4058.9155555555553,2400.4414814814813,DeepSeek-R1-Distill-Llama-8B,olympiadbench,7,0.01
675,214,195,7717.914074074074,4024.1674074074076,2387.72,DeepSeek-R1-Distill-Llama-8B,olympiadbench,7,0.05
675,214,194,7717.914074074074,4020.094814814815,2385.645925925926,DeepSeek-R1-Distill-Llama-8B,olympiadbench,7,0.1
675,214,194,7717.914074074074,4007.9377777777777,2376.397037037037,DeepSeek-R1-Distill-Llama-8B,olympiadbench,7,0.15
675,214,194,7717.914074074074,4005.8459259259257,2374.7674074074075,DeepSeek-R1-Distill-Llama-8B,olympiadbench,7,0.2
675,214,206,7717.914074074074,4801.6177777777775,2925.211851851852,DeepSeek-R1-Distill-Llama-8B,olympiadbench,10,0.01
675,214,205,7717.914074074074,4790.817777777777,2919.86962962963,DeepSeek-R1-Distill-Llama-8B,olympiadbench,10,0.05
675,214,204,7717.914074074074,4786.635555555556,2917.5881481481483,DeepSeek-R1-Distill-Llama-8B,olympiadbench,10,0.1
675,214,204,7717.914074074074,4785.6577777777775,2917.0844444444447,DeepSeek-R1-Distill-Llama-8B,olympiadbench,10,0.15
675,214,204,7717.914074074074,4783.622222222222,2915.3955555555553,DeepSeek-R1-Distill-Llama-8B,olympiadbench,10,0.2
675,214,212,7717.914074074074,5449.881481481481,3379.3837037037038,DeepSeek-R1-Distill-Llama-8B,olympiadbench,15,0.01
675,214,212,7717.914074074074,5436.591111111111,3370.5333333333333,DeepSeek-R1-Distill-Llama-8B,olympiadbench,15,0.05
675,214,212,7717.914074074074,5432.973333333333,3368.222222222222,DeepSeek-R1-Distill-Llama-8B,olympiadbench,15,0.1
675,214,212,7717.914074074074,5432.373333333333,3367.866666666667,DeepSeek-R1-Distill-Llama-8B,olympiadbench,15,0.15
675,214,212,7717.914074074074,5429.982222222222,3366.0592592592593,DeepSeek-R1-Distill-Llama-8B,olympiadbench,15,0.2
675,214,215,7717.914074074074,5841.765925925926,3644.4637037037037,DeepSeek-R1-Distill-Llama-8B,olympiadbench,20,0.01
675,214,215,7717.914074074074,5827.1407407407405,3634.2725925925924,DeepSeek-R1-Distill-Llama-8B,olympiadbench,20,0.05
675,214,215,7717.914074074074,5825.047407407407,3632.6725925925925,DeepSeek-R1-Distill-Llama-8B,olympiadbench,20,0.1
675,214,215,7717.914074074074,5824.9051851851855,3632.5837037037036,DeepSeek-R1-Distill-Llama-8B,olympiadbench,20,0.15
675,214,215,7717.914074074074,5822.986666666667,3631.2503703703705,DeepSeek-R1-Distill-Llama-8B,olympiadbench,20,0.2
30,19,9,12510.666666666666,4245.633333333333,2289.733333333333,QwQ-32B,aime,3,0.01
30,19,9,12510.666666666666,4239.9,2286.4,QwQ-32B,aime,3,0.05
30,19,9,12510.666666666666,4215.633333333333,2280.4,QwQ-32B,aime,3,0.1
30,19,9,12510.666666666666,4154.766666666666,2253.733333333333,QwQ-32B,aime,3,0.15
30,19,9,12510.666666666666,4154.766666666666,2253.733333333333,QwQ-32B,aime,3,0.2
30,19,15,12510.666666666666,6649.7,3688.3,QwQ-32B,aime,5,0.01
30,19,15,12510.666666666666,6649.7,3688.3,QwQ-32B,aime,5,0.05
30,19,15,12510.666666666666,6649.7,3688.3,QwQ-32B,aime,5,0.1
30,19,15,12510.666666666666,6649.7,3688.3,QwQ-32B,aime,5,0.15
30,19,15,12510.666666666666,6649.7,3688.3,QwQ-32B,aime,5,0.2
30,19,18,12510.666666666666,7782.2,4441.566666666667,QwQ-32B,aime,7,0.01
30,19,18,12510.666666666666,7782.2,4441.566666666667,QwQ-32B,aime,7,0.05
30,19,18,12510.666666666666,7782.2,4441.566666666667,QwQ-32B,aime,7,0.1
30,19,18,12510.666666666666,7782.2,4441.566666666667,QwQ-32B,aime,7,0.15
30,19,18,12510.666666666666,7782.2,4441.566666666667,QwQ-32B,aime,7,0.2
30,19,18,12510.666666666666,8129.833333333333,4664.933333333333,QwQ-32B,aime,10,0.01
30,19,18,12510.666666666666,8129.833333333333,4664.933333333333,QwQ-32B,aime,10,0.05
30,19,18,12510.666666666666,8129.833333333333,4664.933333333333,QwQ-32B,aime,10,0.1
30,19,18,12510.666666666666,8129.833333333333,4664.933333333333,QwQ-32B,aime,10,0.15
30,19,18,12510.666666666666,8129.833333333333,4664.933333333333,QwQ-32B,aime,10,0.2
30,19,21,12510.666666666666,8611.2,4912.766666666666,QwQ-32B,aime,15,0.01
30,19,21,12510.666666666666,8611.2,4912.766666666666,QwQ-32B,aime,15,0.05
30,19,21,12510.666666666666,8611.2,4912.766666666666,QwQ-32B,aime,15,0.1
30,19,21,12510.666666666666,8611.2,4912.766666666666,QwQ-32B,aime,15,0.15
30,19,21,12510.666666666666,8611.2,4912.766666666666,QwQ-32B,aime,15,0.2
30,19,21,12510.666666666666,8935.733333333334,5096.866666666667,QwQ-32B,aime,20,0.01
30,19,21,12510.666666666666,8935.733333333334,5096.866666666667,QwQ-32B,aime,20,0.05
30,19,21,12510.666666666666,8935.733333333334,5096.866666666667,QwQ-32B,aime,20,0.1
30,19,21,12510.666666666666,8935.733333333334,5096.866666666667,QwQ-32B,aime,20,0.15
30,19,21,12510.666666666666,8935.733333333334,5096.866666666667,QwQ-32B,aime,20,0.2
40,0,0,7338.275,1768.975,1019.825,QwQ-32B,amc,3,0.01
40,0,0,7338.275,1703.825,994.45,QwQ-32B,amc,3,0.05
40,0,0,7338.275,1661.275,985.375,QwQ-32B,amc,3,0.1
40,0,0,7338.275,1661.275,985.375,QwQ-32B,amc,3,0.15
40,0,0,7338.275,1661.275,985.375,QwQ-32B,amc,3,0.2
40,0,0,7338.275,2621.575,1978.825,QwQ-32B,amc,5,0.01
40,0,0,7338.275,2555.225,1950.575,QwQ-32B,amc,5,0.05
40,0,0,7338.275,2537.225,1940.075,QwQ-32B,amc,5,0.1
40,0,0,7338.275,2536.975,1939.575,QwQ-32B,amc,5,0.15
40,0,0,7338.275,2536.975,1939.575,QwQ-32B,amc,5,0.2
40,0,0,7338.275,3551.125,2715.525,QwQ-32B,amc,7,0.01
40,0,0,7338.275,3504.325,2694.85,QwQ-32B,amc,7,0.05
40,0,0,7338.275,3496.55,2688.85,QwQ-32B,amc,7,0.1
40,0,0,7338.275,3496.55,2688.85,QwQ-32B,amc,7,0.15
40,0,0,7338.275,3496.55,2688.85,QwQ-32B,amc,7,0.2
40,0,0,7338.275,3762.975,2902.05,QwQ-32B,amc,10,0.01
40,0,0,7338.275,3746.725,2892.55,QwQ-32B,amc,10,0.05
40,0,0,7338.275,3737.95,2886.55,QwQ-32B,amc,10,0.1
40,0,0,7338.275,3737.95,2886.55,QwQ-32B,amc,10,0.15
40,0,0,7338.275,3737.95,2886.55,QwQ-32B,amc,10,0.2
40,0,0,7338.275,4371.725,3360.225,QwQ-32B,amc,15,0.01
40,0,0,7338.275,4365.25,3356.225,QwQ-32B,amc,15,0.05
40,0,0,7338.275,4365.25,3356.225,QwQ-32B,amc,15,0.1
40,0,0,7338.275,4365.25,3356.225,QwQ-32B,amc,15,0.15
40,0,0,7338.275,4365.25,3356.225,QwQ-32B,amc,15,0.2
40,0,0,7338.275,4832.875,3702.1,QwQ-32B,amc,20,0.01
40,0,0,7338.275,4829.725,3700.6,QwQ-32B,amc,20,0.05
40,0,0,7338.275,4829.725,3700.6,QwQ-32B,amc,20,0.1
40,0,0,7338.275,4829.725,3700.6,QwQ-32B,amc,20,0.15
40,0,0,7338.275,4829.725,3700.6,QwQ-32B,amc,20,0.2
198,126,88,6946.676767676768,1537.7171717171718,362.09090909090907,QwQ-32B,gpqa,3,0.01
198,126,87,6946.676767676768,1441.141414141414,331.30808080808083,QwQ-32B,gpqa,3,0.05
198,126,86,6946.676767676768,1408.8181818181818,324.2828282828283,QwQ-32B,gpqa,3,0.1
198,126,85,6946.676767676768,1404.111111111111,322.2323232323232,QwQ-32B,gpqa,3,0.15
198,126,85,6946.676767676768,1404.0151515151515,322.1313131313131,QwQ-32B,gpqa,3,0.2
198,126,101,6946.676767676768,2300.1414141414143,613.5454545454545,QwQ-32B,gpqa,5,0.01
198,126,99,6946.676767676768,2154.2626262626263,560.0656565656566,QwQ-32B,gpqa,5,0.05
198,126,99,6946.676767676768,2149.843434343434,558.070707070707,QwQ-32B,gpqa,5,0.1
198,126,99,6946.676767676768,2146.949494949495,556.4040404040404,QwQ-32B,gpqa,5,0.15
198,126,99,6946.676767676768,2146.489898989899,556.10101010101,QwQ-32B,gpqa,5,0.2
198,126,110,6946.676767676768,2939.8939393939395,876.7020202020202,QwQ-32B,gpqa,7,0.01
198,126,109,6946.676767676768,2857.7323232323233,847.1363636363636,QwQ-32B,gpqa,7,0.05
198,126,109,6946.676767676768,2852.308080808081,845.0555555555555,QwQ-32B,gpqa,7,0.1
198,126,109,6946.676767676768,2850.510101010101,844.3484848484849,QwQ-32B,gpqa,7,0.15
198,126,109,6946.676767676768,2850.1666666666665,844.1464646464647,QwQ-32B,gpqa,7,0.2
198,126,116,6946.676767676768,3661.6414141414143,1171.3686868686868,QwQ-32B,gpqa,10,0.01
198,126,116,6946.676767676768,3623.939393939394,1157.4141414141413,QwQ-32B,gpqa,10,0.05
198,126,116,6946.676767676768,3619.4848484848485,1154.8939393939395,QwQ-32B,gpqa,10,0.1
198,126,116,6946.676767676768,3617.979797979798,1154.2878787878788,QwQ-32B,gpqa,10,0.15
198,126,116,6946.676767676768,3617.979797979798,1154.2878787878788,QwQ-32B,gpqa,10,0.2
198,126,123,6946.676767676768,4516.909090909091,1607.090909090909,QwQ-32B,gpqa,15,0.01
198,126,123,6946.676767676768,4481.631313131313,1595.121212121212,QwQ-32B,gpqa,15,0.05
198,126,123,6946.676767676768,4475.262626262626,1592.449494949495,QwQ-32B,gpqa,15,0.1
198,126,123,6946.676767676768,4475.222222222223,1592.3484848484848,QwQ-32B,gpqa,15,0.15
198,126,123,6946.676767676768,4475.222222222223,1592.3484848484848,QwQ-32B,gpqa,15,0.2
198,126,128,6946.676767676768,5041.535353535353,1838.0,QwQ-32B,gpqa,20,0.01
198,126,128,6946.676767676768,5006.70202020202,1824.1868686868686,QwQ-32B,gpqa,20,0.05
198,126,127,6946.676767676768,4993.535353535353,1818.9292929292928,QwQ-32B,gpqa,20,0.1
198,126,127,6946.676767676768,4993.535353535353,1818.9292929292928,QwQ-32B,gpqa,20,0.15
198,126,127,6946.676767676768,4993.535353535353,1818.9292929292928,QwQ-32B,gpqa,20,0.2
500,300,268,4075.306,1416.568,595.594,QwQ-32B,math,3,0.01
500,300,260,4075.306,1343.494,564.58,QwQ-32B,math,3,0.05
500,300,257,4075.306,1319.874,552.432,QwQ-32B,math,3,0.1
500,300,257,4075.306,1311.564,548.154,QwQ-32B,math,3,0.15
500,300,257,4075.306,1308.514,546.054,QwQ-32B,math,3,0.2
500,300,293,4075.306,1885.388,882.864,QwQ-32B,math,5,0.01
500,300,289,4075.306,1829.884,858.154,QwQ-32B,math,5,0.05
500,300,288,4075.306,1818.422,852.118,QwQ-32B,math,5,0.1
500,300,288,4075.306,1814.242,849.7,QwQ-32B,math,5,0.15
500,300,288,4075.306,1808.292,845.872,QwQ-32B,math,5,0.2
500,300,309,4075.306,2147.746,1045.632,QwQ-32B,math,7,0.01
500,300,306,4075.306,2098.93,1023.01,QwQ-32B,math,7,0.05
500,300,306,4075.306,2092.712,1019.56,QwQ-32B,math,7,0.1
500,300,306,4075.306,2090.03,1017.532,QwQ-32B,math,7,0.15
500,300,306,4075.306,2085.316,1015.224,QwQ-32B,math,7,0.2
500,300,313,4075.306,2373.55,1161.18,QwQ-32B,math,10,0.01
500,300,311,4075.306,2343.504,1146.914,QwQ-32B,math,10,0.05
500,300,311,4075.306,2338.458,1144.352,QwQ-32B,math,10,0.1
500,300,311,4075.306,2335.528,1141.984,QwQ-32B,math,10,0.15
500,300,311,4075.306,2334.79,1141.506,QwQ-32B,math,10,0.2
500,300,323,4075.306,2721.012,1350.48,QwQ-32B,math,15,0.01
500,300,322,4075.306,2684.742,1332.132,QwQ-32B,math,15,0.05
500,300,322,4075.306,2679.622,1329.59,QwQ-32B,math,15,0.1
500,300,322,4075.306,2676.644,1328.22,QwQ-32B,math,15,0.15
500,300,322,4075.306,2674.758,1326.694,QwQ-32B,math,15,0.2
500,300,321,4075.306,2891.46,1442.4,QwQ-32B,math,20,0.01
500,300,321,4075.306,2866.59,1429.104,QwQ-32B,math,20,0.05
500,300,321,4075.306,2862.768,1427.322,QwQ-32B,math,20,0.1
500,300,321,4075.306,2860.172,1426.228,QwQ-32B,math,20,0.15
500,300,321,4075.306,2860.132,1426.188,QwQ-32B,math,20,0.2
272,51,65,5154.738970588235,2349.580882352941,1058.625,QwQ-32B,minerva,3,0.01
272,51,66,5154.738970588235,2314.393382352941,1042.735294117647,QwQ-32B,minerva,3,0.05
272,51,66,5154.738970588235,2303.768382352941,1039.5808823529412,QwQ-32B,minerva,3,0.1
272,51,66,5154.738970588235,2297.731617647059,1038.5514705882354,QwQ-32B,minerva,3,0.15
272,51,66,5154.738970588235,2295.908088235294,1037.7904411764705,QwQ-32B,minerva,3,0.2
272,51,65,5154.738970588235,3072.4375,1466.1838235294117,QwQ-32B,minerva,5,0.01
272,51,68,5154.738970588235,3048.0036764705883,1458.7941176470588,QwQ-32B,minerva,5,0.05
272,51,68,5154.738970588235,3043.529411764706,1457.2058823529412,QwQ-32B,minerva,5,0.1
272,51,68,5154.738970588235,3040.0845588235293,1456.764705882353,QwQ-32B,minerva,5,0.15
272,51,68,5154.738970588235,3039.8198529411766,1456.6911764705883,QwQ-32B,minerva,5,0.2
272,51,66,5154.738970588235,3427.514705882353,1669.2610294117646,QwQ-32B,minerva,7,0.01
272,51,69,5154.738970588235,3409.1911764705883,1662.1948529411766,QwQ-32B,minerva,7,0.05
272,51,69,5154.738970588235,3405.0551470588234,1660.6360294117646,QwQ-32B,minerva,7,0.1
272,51,69,5154.738970588235,3402.985294117647,1660.4154411764705,QwQ-32B,minerva,7,0.15
272,51,69,5154.738970588235,3402.985294117647,1660.4154411764705,QwQ-32B,minerva,7,0.2
272,51,68,5154.738970588235,3817.113970588235,1869.9632352941176,QwQ-32B,minerva,10,0.01
272,51,70,5154.738970588235,3801.8088235294117,1864.0183823529412,QwQ-32B,minerva,10,0.05
272,51,70,5154.738970588235,3798.8492647058824,1863.0625,QwQ-32B,minerva,10,0.1
272,51,70,5154.738970588235,3798.8492647058824,1863.0625,QwQ-32B,minerva,10,0.15
272,51,70,5154.738970588235,3798.8492647058824,1863.0625,QwQ-32B,minerva,10,0.2
272,51,62,5154.738970588235,4292.308823529412,2144.8676470588234,QwQ-32B,minerva,15,0.01
272,51,64,5154.738970588235,4283.261029411765,2141.610294117647,QwQ-32B,minerva,15,0.05
272,51,64,5154.738970588235,4282.044117647059,2141.198529411765,QwQ-32B,minerva,15,0.1
272,51,64,5154.738970588235,4282.044117647059,2141.198529411765,QwQ-32B,minerva,15,0.15
272,51,64,5154.738970588235,4282.044117647059,2141.198529411765,QwQ-32B,minerva,15,0.2
272,51,62,5154.738970588235,4581.474264705882,2302.7757352941176,QwQ-32B,minerva,20,0.01
272,51,64,5154.738970588235,4576.882352941177,2301.488970588235,QwQ-32B,minerva,20,0.05
272,51,64,5154.738970588235,4576.882352941177,2301.488970588235,QwQ-32B,minerva,20,0.1
272,51,64,5154.738970588235,4576.882352941177,2301.488970588235,QwQ-32B,minerva,20,0.15
272,51,64,5154.738970588235,4576.882352941177,2301.488970588235,QwQ-32B,minerva,20,0.2
675,238,185,9204.543703703705,2493.8133333333335,1137.1051851851853,QwQ-32B,olympiadbench,3,0.01
675,238,184,9204.543703703705,2417.5807407407406,1106.5377777777778,QwQ-32B,olympiadbench,3,0.05
675,238,180,9204.543703703705,2393.6266666666666,1096.1244444444444,QwQ-32B,olympiadbench,3,0.1
675,238,179,9204.543703703705,2383.5807407407406,1092.4903703703703,QwQ-32B,olympiadbench,3,0.15
675,238,179,9204.543703703705,2376.614814814815,1089.5274074074075,QwQ-32B,olympiadbench,3,0.2
675,238,219,9204.543703703705,3442.4266666666667,1717.4074074074074,QwQ-32B,olympiadbench,5,0.01
675,238,219,9204.543703703705,3373.657777777778,1689.111111111111,QwQ-32B,olympiadbench,5,0.05
675,238,219,9204.543703703705,3366.72,1686.3274074074075,QwQ-32B,olympiadbench,5,0.1
675,238,218,9204.543703703705,3357.6237037037035,1682.8103703703703,QwQ-32B,olympiadbench,5,0.15
675,238,218,9204.543703703705,3355.345185185185,1681.8874074074074,QwQ-32B,olympiadbench,5,0.2
675,238,234,9204.543703703705,4187.072592592593,2180.6133333333332,QwQ-32B,olympiadbench,7,0.01
675,238,235,9204.543703703705,4114.881481481481,2147.988148148148,QwQ-32B,olympiadbench,7,0.05
675,238,235,9204.543703703705,4109.980740740741,2145.3288888888887,QwQ-32B,olympiadbench,7,0.1
675,238,235,9204.543703703705,4101.965925925926,2142.0725925925926,QwQ-32B,olympiadbench,7,0.15
675,238,235,9204.543703703705,4100.1407407407405,2141.3555555555554,QwQ-32B,olympiadbench,7,0.2
675,238,261,9204.543703703705,5061.917037037037,2773.68,QwQ-32B,olympiadbench,10,0.01
675,238,261,9204.543703703705,4998.08,2743.9333333333334,QwQ-32B,olympiadbench,10,0.05
675,238,261,9204.543703703705,4994.567407407408,2741.0,QwQ-32B,olympiadbench,10,0.1
675,238,261,9204.543703703705,4987.725925925926,2738.511111111111,QwQ-32B,olympiadbench,10,0.15
675,238,261,9204.543703703705,4986.888888888889,2737.9777777777776,QwQ-32B,olympiadbench,10,0.2
675,238,275,9204.543703703705,5841.838518518519,3258.611851851852,QwQ-32B,olympiadbench,15,0.01
675,238,275,9204.543703703705,5794.602962962963,3233.137777777778,QwQ-32B,olympiadbench,15,0.05
675,238,275,9204.543703703705,5791.463703703704,3230.5333333333333,QwQ-32B,olympiadbench,15,0.1
675,238,275,9204.543703703705,5788.989629629629,3229.822222222222,QwQ-32B,olympiadbench,15,0.15
675,238,275,9204.543703703705,5788.208888888889,3229.348148148148,QwQ-32B,olympiadbench,15,0.2
675,238,282,9204.543703703705,6380.955555555555,3614.4903703703703,QwQ-32B,olympiadbench,20,0.01
675,238,282,9204.543703703705,6368.7674074074075,3607.657777777778,QwQ-32B,olympiadbench,20,0.05
675,238,282,9204.543703703705,6367.008888888889,3606.742222222222,QwQ-32B,olympiadbench,20,0.1
675,238,282,9204.543703703705,6364.722962962963,3606.179259259259,QwQ-32B,olympiadbench,20,0.15
675,238,282,9204.543703703705,6363.982222222222,3605.734814814815,QwQ-32B,olympiadbench,20,0.2
30,22,10,15066.566666666668,5574.766666666666,1508.5666666666666,Qwen3-8B,aime,3,0.01
30,22,10,15066.566666666668,5521.866666666667,1499.9,Qwen3-8B,aime,3,0.05
30,22,10,15066.566666666668,5445.6,1492.0666666666666,Qwen3-8B,aime,3,0.1
30,22,10,15066.566666666668,5438.633333333333,1491.7666666666667,Qwen3-8B,aime,3,0.15
30,22,10,15066.566666666668,5438.633333333333,1491.7666666666667,Qwen3-8B,aime,3,0.2
30,22,13,15066.566666666668,5921.566666666667,1604.5,Qwen3-8B,aime,5,0.01
30,22,13,15066.566666666668,5910.166666666667,1602.7333333333333,Qwen3-8B,aime,5,0.05
30,22,13,15066.566666666668,5836.433333333333,1594.3666666666666,Qwen3-8B,aime,5,0.1
30,22,13,15066.566666666668,5836.433333333333,1594.3666666666666,Qwen3-8B,aime,5,0.15
30,22,13,15066.566666666668,5836.433333333333,1594.3666666666666,Qwen3-8B,aime,5,0.2
30,22,13,15066.566666666668,6819.333333333333,1830.8,Qwen3-8B,aime,7,0.01
30,22,13,15066.566666666668,6811.766666666666,1829.3333333333333,Qwen3-8B,aime,7,0.05
30,22,13,15066.566666666668,6811.766666666666,1829.3333333333333,Qwen3-8B,aime,7,0.1
30,22,13,15066.566666666668,6811.766666666666,1829.3333333333333,Qwen3-8B,aime,7,0.15
30,22,13,15066.566666666668,6811.766666666666,1829.3333333333333,Qwen3-8B,aime,7,0.2
30,22,15,15066.566666666668,7327.566666666667,1944.8666666666666,Qwen3-8B,aime,10,0.01
30,22,15,15066.566666666668,7323.4,1943.8,Qwen3-8B,aime,10,0.05
30,22,15,15066.566666666668,7323.4,1943.8,Qwen3-8B,aime,10,0.1
30,22,15,15066.566666666668,7323.4,1943.8,Qwen3-8B,aime,10,0.15
30,22,15,15066.566666666668,7323.4,1943.8,Qwen3-8B,aime,10,0.2
30,22,21,15066.566666666668,8089.266666666666,2128.6,Qwen3-8B,aime,15,0.01
30,22,21,15066.566666666668,8089.266666666666,2128.6,Qwen3-8B,aime,15,0.05
30,22,21,15066.566666666668,8089.266666666666,2128.6,Qwen3-8B,aime,15,0.1
30,22,21,15066.566666666668,8089.266666666666,2128.6,Qwen3-8B,aime,15,0.15
30,22,21,15066.566666666668,8089.266666666666,2128.6,Qwen3-8B,aime,15,0.2
30,22,21,15066.566666666668,9339.933333333332,2570.7,Qwen3-8B,aime,20,0.01
30,22,21,15066.566666666668,9339.933333333332,2570.7,Qwen3-8B,aime,20,0.05
30,22,21,15066.566666666668,9339.933333333332,2570.7,Qwen3-8B,aime,20,0.1
30,22,21,15066.566666666668,9339.933333333332,2570.7,Qwen3-8B,aime,20,0.15
30,22,21,15066.566666666668,9339.933333333332,2570.7,Qwen3-8B,aime,20,0.2
40,0,0,8506.9,2585.175,596.25,Qwen3-8B,amc,3,0.01
40,0,0,8506.9,2551.675,584.675,Qwen3-8B,amc,3,0.05
40,0,0,8506.9,2528.95,574.375,Qwen3-8B,amc,3,0.1
40,0,0,8506.9,2522.575,573.175,Qwen3-8B,amc,3,0.15
40,0,0,8506.9,2477.175,557.675,Qwen3-8B,amc,3,0.2
40,0,0,8506.9,2974.125,712.15,Qwen3-8B,amc,5,0.01
40,0,0,8506.9,2883.625,685.425,Qwen3-8B,amc,5,0.05
40,0,0,8506.9,2860.2,680.325,Qwen3-8B,amc,5,0.1
40,0,0,8506.9,2856.75,679.75,Qwen3-8B,amc,5,0.15
40,0,0,8506.9,2806.05,660.25,Qwen3-8B,amc,5,0.2
40,0,0,8506.9,3105.575,742.825,Qwen3-8B,amc,7,0.01
40,0,0,8506.9,3015.55,716.175,Qwen3-8B,amc,7,0.05
40,0,0,8506.9,2997.55,712.3,Qwen3-8B,amc,7,0.1
40,0,0,8506.9,2997.55,712.3,Qwen3-8B,amc,7,0.15
40,0,0,8506.9,2944.525,691.8,Qwen3-8B,amc,7,0.2
40,0,0,8506.9,3742.3,1040.2,Qwen3-8B,amc,10,0.01
40,0,0,8506.9,3652.275,1013.55,Qwen3-8B,amc,10,0.05
40,0,0,8506.9,3647.9,1012.425,Qwen3-8B,amc,10,0.1
40,0,0,8506.9,3647.9,1012.425,Qwen3-8B,amc,10,0.15
40,0,0,8506.9,3601.9,990.925,Qwen3-8B,amc,10,0.2
40,0,0,8506.9,4315.65,1228.4,Qwen3-8B,amc,15,0.01
40,0,0,8506.9,4251.025,1211.125,Qwen3-8B,amc,15,0.05
40,0,0,8506.9,4251.025,1211.125,Qwen3-8B,amc,15,0.1
40,0,0,8506.9,4251.025,1211.125,Qwen3-8B,amc,15,0.15
40,0,0,8506.9,4251.025,1211.125,Qwen3-8B,amc,15,0.2
40,0,0,8506.9,4519.45,1269.55,Qwen3-8B,amc,20,0.01
40,0,0,8506.9,4511.175,1267.925,Qwen3-8B,amc,20,0.05
40,0,0,8506.9,4511.175,1267.925,Qwen3-8B,amc,20,0.1
40,0,0,8506.9,4511.175,1267.925,Qwen3-8B,amc,20,0.15
40,0,0,8506.9,4511.175,1267.925,Qwen3-8B,amc,20,0.2
198,103,88,7733.666666666667,3197.1464646464647,167.27272727272728,Qwen3-8B,gpqa,3,0.01
198,103,86,7733.666666666667,3056.2020202020203,155.67676767676767,Qwen3-8B,gpqa,3,0.05
198,103,85,7733.666666666667,2988.570707070707,149.85353535353536,Qwen3-8B,gpqa,3,0.1
198,103,85,7733.666666666667,2954.2070707070707,147.37878787878788,Qwen3-8B,gpqa,3,0.15
198,103,85,7733.666666666667,2941.7020202020203,146.5909090909091,Qwen3-8B,gpqa,3,0.2
198,103,94,7733.666666666667,3643.757575757576,191.77272727272728,Qwen3-8B,gpqa,5,0.01
198,103,93,7733.666666666667,3529.373737373737,180.9747474747475,Qwen3-8B,gpqa,5,0.05
198,103,92,7733.666666666667,3405.848484848485,174.58585858585857,Qwen3-8B,gpqa,5,0.1
198,103,92,7733.666666666667,3374.6060606060605,172.2929292929293,Qwen3-8B,gpqa,5,0.15
198,103,92,7733.666666666667,3362.8030303030305,171.56565656565655,Qwen3-8B,gpqa,5,0.2
198,103,100,7733.666666666667,4066.6363636363635,220.62121212121212,Qwen3-8B,gpqa,7,0.01
198,103,97,7733.666666666667,3943.4545454545455,209.63131313131314,Qwen3-8B,gpqa,7,0.05
198,103,96,7733.666666666667,3854.010101010101,199.88888888888889,Qwen3-8B,gpqa,7,0.1
198,103,96,7733.666666666667,3829.843434343434,198.27777777777777,Qwen3-8B,gpqa,7,0.15
198,103,96,7733.666666666667,3817.752525252525,197.55050505050505,Qwen3-8B,gpqa,7,0.2
198,103,100,7733.666666666667,4668.454545454545,259.3686868686869,Qwen3-8B,gpqa,10,0.01
198,103,99,7733.666666666667,4568.661616161616,249.27777777777777,Qwen3-8B,gpqa,10,0.05
198,103,99,7733.666666666667,4484.242424242424,239.17676767676767,Qwen3-8B,gpqa,10,0.1
198,103,99,7733.666666666667,4461.787878787879,237.67171717171718,Qwen3-8B,gpqa,10,0.15
198,103,99,7733.666666666667,4451.368686868687,237.0050505050505,Qwen3-8B,gpqa,10,0.2
198,103,106,7733.666666666667,5195.030303030303,299.8181818181818,Qwen3-8B,gpqa,15,0.01
198,103,105,7733.666666666667,5114.843434343435,290.7676767676768,Qwen3-8B,gpqa,15,0.05
198,103,105,7733.666666666667,5036.469696969697,281.1111111111111,Qwen3-8B,gpqa,15,0.1
198,103,104,7733.666666666667,5012.277777777777,278.9040404040404,Qwen3-8B,gpqa,15,0.15
198,103,104,7733.666666666667,5010.146464646465,278.6818181818182,Qwen3-8B,gpqa,15,0.2
198,103,105,7733.666666666667,5710.757575757576,347.0959595959596,Qwen3-8B,gpqa,20,0.01
198,103,103,7733.666666666667,5605.489898989899,336.45959595959596,Qwen3-8B,gpqa,20,0.05
198,103,103,7733.666666666667,5524.292929292929,326.47474747474746,Qwen3-8B,gpqa,20,0.1
198,103,102,7733.666666666667,5515.757575757576,325.6212121212121,Qwen3-8B,gpqa,20,0.15
198,103,102,7733.666666666667,5512.459595959596,325.25757575757575,Qwen3-8B,gpqa,20,0.2
500,342,260,4750.032,1786.524,417.022,Qwen3-8B,math,3,0.01
500,342,252,4750.032,1656.264,385.94,Qwen3-8B,math,3,0.05
500,342,246,4750.032,1589.124,370.304,Qwen3-8B,math,3,0.1
500,342,243,4750.032,1575.576,365.918,Qwen3-8B,math,3,0.15
500,342,240,4750.032,1562.246,362.206,Qwen3-8B,math,3,0.2
500,342,287,4750.032,2063.032,506.054,Qwen3-8B,math,5,0.01
500,342,282,4750.032,1939.9,475.992,Qwen3-8B,math,5,0.05
500,342,280,4750.032,1881.906,462.696,Qwen3-8B,math,5,0.1
500,342,280,4750.032,1870.906,458.902,Qwen3-8B,math,5,0.15
500,342,279,4750.032,1863.106,456.29,Qwen3-8B,math,5,0.2
500,342,307,4750.032,2248.988,568.758,Qwen3-8B,math,7,0.01
500,342,303,4750.032,2134.252,541.306,Qwen3-8B,math,7,0.05
500,342,300,4750.032,2090.32,530.946,Qwen3-8B,math,7,0.1
500,342,300,4750.032,2079.734,526.974,Qwen3-8B,math,7,0.15
500,342,300,4750.032,2063.816,523.408,Qwen3-8B,math,7,0.2
500,342,314,4750.032,2450.952,625.672,Qwen3-8B,math,10,0.01
500,342,312,4750.032,2332.474,599.548,Qwen3-8B,math,10,0.05
500,342,309,4750.032,2291.518,587.478,Qwen3-8B,math,10,0.1
500,342,309,4750.032,2278.188,582.496,Qwen3-8B,math,10,0.15
500,342,309,4750.032,2262.088,578.638,Qwen3-8B,math,10,0.2
500,342,321,4750.032,2805.306,727.318,Qwen3-8B,math,15,0.01
500,342,322,4750.032,2708.89,706.25,Qwen3-8B,math,15,0.05
500,342,322,4750.032,2682.032,698.524,Qwen3-8B,math,15,0.1
500,342,322,4750.032,2675.064,695.754,Qwen3-8B,math,15,0.15
500,342,322,4750.032,2673.19,694.962,Qwen3-8B,math,15,0.2
500,342,327,4750.032,3069.856,802.77,Qwen3-8B,math,20,0.01
500,342,326,4750.032,3006.506,789.448,Qwen3-8B,math,20,0.05
500,342,326,4750.032,2981.898,781.34,Qwen3-8B,math,20,0.1
500,342,326,4750.032,2976.476,778.994,Qwen3-8B,math,20,0.15
500,342,326,4750.032,2974.762,778.232,Qwen3-8B,math,20,0.2
272,69,57,5920.952205882353,1704.7757352941176,448.1875,Qwen3-8B,minerva,3,0.01
272,69,55,5920.952205882353,1623.1433823529412,424.69485294117646,Qwen3-8B,minerva,3,0.05
272,69,55,5920.952205882353,1596.889705882353,412.93014705882354,Qwen3-8B,minerva,3,0.1
272,69,55,5920.952205882353,1588.4595588235295,410.91911764705884,Qwen3-8B,minerva,3,0.15
272,69,55,5920.952205882353,1581.6801470588234,407.9007352941176,Qwen3-8B,minerva,3,0.2
272,69,60,5920.952205882353,2267.9301470588234,661.2536764705883,Qwen3-8B,minerva,5,0.01
272,69,60,5920.952205882353,2189.360294117647,638.8125,Qwen3-8B,minerva,5,0.05
272,69,60,5920.952205882353,2169.2904411764707,631.75,Qwen3-8B,minerva,5,0.1
272,69,60,5920.952205882353,2161.823529411765,630.1323529411765,Qwen3-8B,minerva,5,0.15
272,69,60,5920.952205882353,2160.143382352941,629.5,Qwen3-8B,minerva,5,0.2
272,69,63,5920.952205882353,2635.393382352941,797.2205882352941,Qwen3-8B,minerva,7,0.01
272,69,63,5920.952205882353,2567.8786764705883,775.1911764705883,Qwen3-8B,minerva,7,0.05
272,69,63,5920.952205882353,2550.4779411764707,769.5220588235294,Qwen3-8B,minerva,7,0.1
272,69,63,5920.952205882353,2536.466911764706,765.2058823529412,Qwen3-8B,minerva,7,0.15
272,69,63,5920.952205882353,2535.2426470588234,764.6286764705883,Qwen3-8B,minerva,7,0.2
272,69,68,5920.952205882353,3046.393382352941,956.9963235294117,Qwen3-8B,minerva,10,0.01
272,69,68,5920.952205882353,2981.8345588235293,934.3014705882352,Qwen3-8B,minerva,10,0.05
272,69,68,5920.952205882353,2972.9926470588234,930.1948529411765,Qwen3-8B,minerva,10,0.1
272,69,68,5920.952205882353,2959.8676470588234,925.9301470588235,Qwen3-8B,minerva,10,0.15
272,69,68,5920.952205882353,2958.7536764705883,925.4889705882352,Qwen3-8B,minerva,10,0.2
272,69,68,5920.952205882353,3470.875,1115.8933823529412,Qwen3-8B,minerva,15,0.01
272,69,68,5920.952205882353,3440.8088235294117,1107.0845588235295,Qwen3-8B,minerva,15,0.05
272,69,68,5920.952205882353,3433.533088235294,1103.1360294117646,Qwen3-8B,minerva,15,0.1
272,69,68,5920.952205882353,3432.823529411765,1102.6948529411766,Qwen3-8B,minerva,15,0.15
272,69,68,5920.952205882353,3432.143382352941,1102.5625,Qwen3-8B,minerva,15,0.2
272,69,70,5920.952205882353,3911.047794117647,1281.4889705882354,Qwen3-8B,minerva,20,0.01
272,69,70,5920.952205882353,3899.2573529411766,1277.8198529411766,Qwen3-8B,minerva,20,0.05
272,69,70,5920.952205882353,3896.294117647059,1276.0404411764705,Qwen3-8B,minerva,20,0.1
272,69,70,5920.952205882353,3896.0698529411766,1275.9669117647059,Qwen3-8B,minerva,20,0.15
272,69,70,5920.952205882353,3895.389705882353,1275.8345588235295,Qwen3-8B,minerva,20,0.2
675,281,156,10652.425185185186,2670.3866666666668,674.0903703703704,Qwen3-8B,olympiadbench,3,0.01
675,281,150,10652.425185185186,2497.271111111111,630.2074074074075,Qwen3-8B,olympiadbench,3,0.05
675,281,146,10652.425185185186,2419.1007407407405,609.6874074074074,Qwen3-8B,olympiadbench,3,0.1
675,281,145,10652.425185185186,2382.4666666666667,599.1185185185185,Qwen3-8B,olympiadbench,3,0.15
675,281,144,10652.425185185186,2346.5437037037036,591.8266666666667,Qwen3-8B,olympiadbench,3,0.2
675,281,196,10652.425185185186,3353.2533333333336,915.9244444444445,Qwen3-8B,olympiadbench,5,0.01
675,281,191,10652.425185185186,3179.9496296296297,869.7066666666667,Qwen3-8B,olympiadbench,5,0.05
675,281,190,10652.425185185186,3102.2385185185185,853.3244444444445,Qwen3-8B,olympiadbench,5,0.1
675,281,188,10652.425185185186,3067.1896296296295,844.0637037037037,Qwen3-8B,olympiadbench,5,0.15
675,281,187,10652.425185185186,3048.7614814814815,838.9170370370371,Qwen3-8B,olympiadbench,5,0.2
675,281,215,10652.425185185186,3890.776296296296,1097.3407407407408,Qwen3-8B,olympiadbench,7,0.01
675,281,212,10652.425185185186,3721.2266666666665,1050.1392592592592,Qwen3-8B,olympiadbench,7,0.05
675,281,211,10652.425185185186,3652.4933333333333,1037.0518518518518,Qwen3-8B,olympiadbench,7,0.1
675,281,209,10652.425185185186,3615.562962962963,1026.1674074074074,Qwen3-8B,olympiadbench,7,0.15
675,281,208,10652.425185185186,3601.9851851851854,1021.5392592592592,Qwen3-8B,olympiadbench,7,0.2
675,281,241,10652.425185185186,4793.902222222222,1407.7777777777778,Qwen3-8B,olympiadbench,10,0.01
675,281,238,10652.425185185186,4623.991111111111,1361.4385185185185,Qwen3-8B,olympiadbench,10,0.05
675,281,237,10652.425185185186,4522.93037037037,1326.3111111111111,Qwen3-8B,olympiadbench,10,0.1
675,281,236,10652.425185185186,4487.44,1314.1244444444444,Qwen3-8B,olympiadbench,10,0.15
675,281,236,10652.425185185186,4477.358518518518,1310.9496296296297,Qwen3-8B,olympiadbench,10,0.2
675,281,250,10652.425185185186,5541.14962962963,1652.645925925926,Qwen3-8B,olympiadbench,15,0.01
675,281,251,10652.425185185186,5410.16,1615.0607407407408,Qwen3-8B,olympiadbench,15,0.05
675,281,250,10652.425185185186,5352.838518518519,1603.3703703703704,Qwen3-8B,olympiadbench,15,0.1
675,281,250,10652.425185185186,5326.616296296296,1595.2562962962963,Qwen3-8B,olympiadbench,15,0.15
675,281,250,10652.425185185186,5319.011851851852,1593.0207407407408,Qwen3-8B,olympiadbench,15,0.2
675,281,259,10652.425185185186,6395.592592592592,1940.9762962962964,Qwen3-8B,olympiadbench,20,0.01
675,281,260,10652.425185185186,6284.902222222222,1904.0844444444444,Qwen3-8B,olympiadbench,20,0.05
675,281,260,10652.425185185186,6229.688888888889,1892.1214814814814,Qwen3-8B,olympiadbench,20,0.1
675,281,260,10652.425185185186,6205.768888888889,1884.6577777777777,Qwen3-8B,olympiadbench,20,0.15
675,281,260,10652.425185185186,6199.202962962963,1882.8444444444444,Qwen3-8B,olympiadbench,20,0.2
